-
Notifications
You must be signed in to change notification settings - Fork 1
/
fetch_MumbaiWeather.R
135 lines (118 loc) · 5.64 KB
/
fetch_MumbaiWeather.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
#Steps:
#1. Fetch data month wise or day wise
#2. See if major chunk of data is missing. If yes, try often to download it once more and then combine or rbind with major chunk of data
#3 check for duplicates and remove them
#4 fill missing data by interpolation
# Mumbai stations VABB - SATAACRUZ[most entries found here] , VAJJ - JUHU
# These functions are taken from weatherundrground_fetch.R
library(weatherData)
getdetailedData <- function(){
# this function is used to fetch data from servers
path <-"/Volumes/MacintoshHD2/Users/haroonr/Detailed_datasets/kresit_iitb/weather/"
start = "2016-10-01"
end = "2016-11-24"
data = getWeatherForDate("VABB", start, end,opt_detailed = T, opt_custom_columns = T, custom_columns = c(2,4))
# write.csv(data, file = paste0(path,"10_Mumbai2016.csv"),row.names = FALSE,quote = FALSE)
}
remove_duplicates <- function(){
# assume xts_df is original xts_object
row_no <- which(duplicated(index(xts_df)))# extract duplicate row no.s
clear_ob <- xts_df[-row_no,] # remove duplicates
xts_final <- rbind(clear_ob,xts_d1)
mean_ob <- apply.daily(clear_ob,mean)
NROW(mean_ob)
}
remove_duplicates <- function(){
# removes duplicates in the downloaded csv file
path <- "/Volumes/MacintoshHD2/Users/haroonr/Detailed_datasets/kresit_iitb/weather/"
file <- list.files(path,pattern = "*_Mumbai2016.csv")
#file2 <- file[20:21]
lapply(file, function(x) {
df <- fread(paste0(path,x),header = TRUE)
xts_df <- xts(data.frame(TemperatureC=as.numeric(df$TemperatureC),Humidity=as.numeric(df$Humidity)),as.POSIXct(strptime(df$Time,format= "%Y-%m-%d %H:%M:%S")))
row_no <- which(duplicated(index(xts_df)))# extract duplicate row no.s
if(length(row_no) > 0) {
clear_ob <- xts_df[-row_no,] # remove duplicates
} else{
clear_ob <- xts_df
}
write.csv(data.frame(Time=index(clear_ob),coredata(clear_ob)),file=paste0(path,"temp/",x),row.names = FALSE)
cat(x)
return(0)
})
}
combine_files <- function(){
path <- "/Volumes/MacintoshHD2/Users/haroonr/Detailed_datasets/kresit_iitb/weather/"
file <- list.files(path,pattern = "*_Mumbai2016.csv")
dat <- lapply(file, function(x) {
df <- fread(paste0(path,x),header = TRUE)
xts_df <- xts(data.frame(TemperatureC=as.numeric(df$TemperatureC),Humidity=as.numeric(df$Humidity)),as.POSIXct(strptime(df$Time,format= "%Y-%m-%d %H:%M:%S")))
return(xts_df)
})
temp <- do.call(rbind,dat)
write.csv(data.frame(Time=index(temp),coredata(temp)),file=paste0(path,"weather_complete2016.csv"),row.names = FALSE)
}
create_hourly <- function() {
# function used to create hourly samples from half
file <- "/Volumes/MacintoshHD2/Users/haroonr/Detailed_datasets/kresit_iitb/weather/weather_complete2016.csv"
df <- fread(file,header=TRUE)
df_xts <- xts(data.frame(df$TemperatureC,df$Humidity),fastPOSIXct(df$Time) - 19800)
sd <- resample_data(df_xts,60)
newtimestamp <- vector()
for (i in c(1:dim(sd)[1])) {
if(.indexmin(sd[i,]) %in% c(30)) {
newtimestamp[i] <- index(sd[i,]) + 60*60*0.5
} else{
newtimestamp[i] <- index(sd[i,]) + 0
}
}
sd2 <- xts(coredata(sd),as.POSIXct(newtimestamp,origin = "1970-01-01"))
sd2 <- data.frame( index(sd2), coredata(sd2) )
colnames(sd2) <- c("timestamp","TemperatureC","Humidity")
# write.csv(sd2,file="/Volumes/MacintoshHD2/Users/haroonr/Detailed_datasets/kresit_iitb/weather/hourlyweather_complete2016.csv",row.names = FALSE)
resample_data <- function(df_xts, xminutes){
ds_data <- period.apply(df_xts,INDEX = endpoints(index(df_xts)-3600*0.5, on = "minutes", k = xminutes ), FUN= mean)
# align data to nearest time boundary
align_data <- align.time(ds_data, xminutes*60 - 3600*0.5) # aligning to x minutes
return(align_data)
}
resample_data_daywise <- function(xts_datap,xdays) {
datas <- split.xts(xts_datap,"days",k=xdays)
daydata <- lapply(datas,function(x){
xts(mean(x),lubridate::date(x[1])) #
})
ds_data <- do.call(rbind,daydata)
return(ds_data)
}
}
create_daily <- function() {
# function used to create daily samples from half
file <- "/Volumes/MacintoshHD2/Users/haroonr/Detailed_datasets/kresit_iitb/weather/weather_complete2016.csv"
df <- fread(file,header=TRUE)
df_xts <- xts(data.frame(df$TemperatureC,df$Humidity),fastPOSIXct(df$Time) - 19800)
sd <- resample_weather_data_daywise(df_xts,1)
sd <- data.frame( index(sd), coredata(sd) )
colnames(sd) <- c("timestamp","TemperatureC","Humidity")
# write.csv(sd,file="/Volumes/MacintoshHD2/Users/haroonr/Detailed_datasets/kresit_iitb/weather/dailyweather_complete2016.csv",row.names = FALSE)
resample_weather_data_daywise <- function(xts_datap,xdays) {
datas <- split.xts(xts_datap,"days",k=xdays)
# browser()
daydata <- lapply(datas,function(x){
xts(data.frame(round(mean(x[,1]),3),round(mean(x[,2]),3)),lubridate::date(x[1,]))
})
ds_data <- do.call(rbind,daydata)
# colnames(ds_data)
return(ds_data)
}
}
interpolate_hourly_Series <- function() {
# In this function, I create a complete series by interpolating missing values
weatherdat <- "/Volumes/MacintoshHD2/Users/haroonr/Detailed_datasets/kresit_iitb/weather/hourlyweather_complete2016.csv"
df_xts <- fread(weatherdat)
df_xts <- xts(df_xts[,2:dim(df_xts)[2]],fastPOSIXct(df_xts$timestamp)-19800)
timerange = seq(start(df_xts),end(df_xts), by = "hour")# assuming original object is hourly sampled
temp = xts(rep(NA,length(timerange)),timerange)
complete_xts = merge(df_xts,temp)[,1:2]
tt <- na.approx(complete_xts)
# write.csv(data.frame(timestamp = index(tt),coredata(tt)),file= "/Volumes/MacintoshHD2/Users/haroonr/Detailed_datasets/kresit_iitb/weather/hourlyweather_complete2016.csv",row.names = FALSE)
}