diff --git a/Campylobacter_environment_analysis_subset_variables_hum_rain.R b/Campylobacter_environment_analysis_subset_variables_hum_rain.R deleted file mode 100644 index e688b1240f4215127c14708c9a1c1dbcc4b6ea45..0000000000000000000000000000000000000000 --- a/Campylobacter_environment_analysis_subset_variables_hum_rain.R +++ /dev/null @@ -1,567 +0,0 @@ -# The code does look at how the risk of Campylobacter in humans depends on environmental variables. It consider only two variables at a time: Relative humidity and maximum air temeprature -# It uses original MEDMI data - - -rm(list=ls(all=TRUE)) -# -library(ISOweek) -library(lubridate) -library(ggplot2) -require(MASS) -library(scales) -require(pheno) -library(timeDate) -library(pastecs) -library(stringi) -library(timeSeries) - - -list.of.packages <- c("xts") -new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] -if(length(new.packages)) install.packages(new.packages) -library(xts) - - - -## Variable file - -variable<-"humidity" -variable_df_1<-read.csv(paste("../../Data_Base/OPIE_data_base/",variable,".csv",sep="")) -humidity<-variable_df_1[,-c(1,2)] -#dates<-as.Date(as.character(variable_df_1[-c(1,2),2]),format="%d/%m/%Y") - -dates_s<-as.Date(as.character(variable_df_1[-c(1,2),2]),format="%Y-%m-%d") -dates<-rep(dates_s,times=length(variable_df_1)-2) -All_PC_s<-names(variable_df_1[1,]) -All_PC_s<-All_PC_s[-c(1,2)] -All_PC<-rep(All_PC_s,each=length(dates_s)) - -humidity<-humidity[-c(1,2),] -names(humidity) <- NULL -humidity<-unlist(c(humidity)) -#wt<-which(humidity<=0) -#humidity[wt]<-NA - - -variable<-"max_air_temp" -variable_df_2<-read.csv(paste("../../Data_Base/OPIE_data_base/",variable,".csv",sep="")) -max_temp<-variable_df_2[,-c(1,2)] -max_temp<-max_temp[-c(1,2),] -names(max_temp) <- NULL -max_temp<-unlist(c(max_temp)) -#wt<-which(max_temp>=39) -#max_temp[wt]<-NA - -variable<-"min_air_temp" -variable_df_3<-read.csv(paste("../../Data_Base/OPIE_data_base/",variable,".csv",sep="")) -min_temp<-variable_df_3[,-c(1,2)] -min_temp<-min_temp[-c(1,2),] -names(min_temp) <- NULL -min_temp<-unlist(c(min_temp)) - - -variable<-"rain" -variable_df_4<-read.csv(paste("../../Data_Base/OPIE_data_base/",variable,".csv",sep="")) -rain<-variable_df_4[,-c(1,2)] -rain<-rain[-c(1,2),] -names(rain) <- NULL -rain<-unlist(c(rain)) - -variable<-"wind_speed" -variable_df_5<-read.csv(paste("../../Data_Base/OPIE_data_base/",variable,".csv",sep="")) -wind<-variable_df_5[,-c(1,2)] -wind<-wind[-c(1,2),] -names(wind) <- NULL -wind<-unlist(c(wind)) - - -width<-14 - width_char<-paste(width) - -######################## Catchment areas ################# - -population_df<-read.csv(paste("../../Data_Base/Catchment_areas/LSOA2011Population_SumByLabs.csv",sep="")) -colnames(population_df)<-c("PostCode","col2","col3","residents") - - - -merged_lab_all2<-data.frame(All_PC,dates,humidity,max_temp,min_temp,rain,wind) -colnames(merged_lab_all2)<-c("PostCode","dates","humidity", "max_temp", "min_temp","rain","wind") -merged_lab_all<-merge(merged_lab_all2, population_df,by="PostCode") -# -#merged_lab<-data.frame(dates,humidity,max_temp,min_temp) -merged.data_all<-read.csv("../../Data_Base/OPIE_data_base/OPIE_environment_Campylobacter.csv") - - -merged.data_all[,2]<-as.Date((as.character(merged.data_all[,2]))) -merged.data_all2<-merged.data_all -merged.data_all<-merge(merged.data_all2, population_df,by="PostCode") - - - -########################### Weekly average ###################### - -week_cases<-function(lab_fac){ - lab<-as.character(lab_fac) - merged_sub<-subset(merged.data_all,merged.data_all$PostCode==lab) - - if (length(merged_sub[,1])!=0){ - - merged_sub2<-merged_sub[order(as.Date(merged_sub$Date, format="%Y-%m-%d")),] - - ep <- endpoints(as.xts(merged_sub2$Date),'weeks') - cum_Cases<-period.apply(merged_sub2$Cases, INDEX=ep, FUN=function(x) sum(na.omit(x,na.rm=TRUE))) - - - - PC<-rep(as.character(unique(merged_sub$PostCode)),times=length(ep)-1) - merged_weekly<-data.frame(PC,merged_sub$Date[ep],cum_Cases) - return(merged_weekly)} -} - - -week_PC<-function(lab_fac){ - lab<-as.character(lab_fac) - merged_lab_sub<-subset(merged_lab_all,merged_lab_all$PostCode==lab) - merged_lab_sub2<-merged_lab_sub[order(as.Date(merged_lab_sub$dates, format="%Y-%m-%d")),] - - - - mean_humidity<-rollmean(merged_lab_sub2$humidity,width) - mean_max_temp<-rollmean(merged_lab_sub2$max_temp, width) - mean_min_temp<-rollmean(merged_lab_sub2$min_temp, width) - mean_rain<-rollmean(merged_lab_sub2$rain, width) - cum_rain<-rollsum(merged_lab_sub2$rain, width) - mean_wind<-rollmean(merged_lab_sub2$wind,width) - mean_residents<-rollmean(merged_lab_sub2$residents, width) - - PC<-rep(as.character(unique(merged_lab_sub$PostCode)),times=length(mean_residents)) - ep<-seq(width,length(mean_residents)+width-1) - - merged_lab_weekly<-data.frame(PC,dates[ep],mean_humidity,mean_max_temp,mean_min_temp,mean_rain,cum_rain,mean_wind,mean_residents) - return(merged_lab_weekly) -} - -merged_lab_weekly<-c() -#merged_cases_weekly<-c() - -index_PC<-unique(merged_lab_all$PostCode) -for (i in c(1:length(index_PC))){ - merged_lab_weekly<-rbind(merged_lab_weekly,lapply(index_PC[i], week_PC)[[1]]) - - #merged_cases_weekly<-rbind(merged_cases_weekly,lapply(index_PC[i], week_cases)[[1]]) - print(100*i/length(index_PC)) -} - - - - - - -colnames(merged_lab_weekly)<-c("PostCode","Date","mean_humidity","mean_max_temp","mean_min_temp","mean_rain","cum_rain","mean_wind","mean_residents") -#write.table(merged_lab_weekly,paste("../../Data_Base/OPIE_data_base/Laboratory_",width_char,"_test.csv",sep=""), col.names = FALSE, sep = ",",eol = "\n") - - -merged.data_all$Cases<-1 -merged.data_all_weekly<-merge(merged.data_all, merged_lab_weekly,by=c("PostCode","Date")) -#merged.data_all_weekly<-merge(merged.data_all_weekly2, merged_cases_weekly,by=c("PostCode","Date")) - - -merged_lab_all<-merged_lab_weekly -#sort(merged_lab_all) -merged.data_all<-data.frame(merged.data_all_weekly$PostCode, - merged.data_all_weekly$Date, - merged.data_all_weekly$Cases, - merged.data_all_weekly$mean_humidity, - merged.data_all_weekly$mean_max_temp, - merged.data_all_weekly$mean_min_temp, - merged.data_all_weekly$mean_rain, - merged.data_all_weekly$cum_rain, - merged.data_all_weekly$mean_wind, - merged.data_all_weekly$mean_residents) - -colnames(merged.data_all)<-c("PostCode","Date","Cases","humidity","max_temp","min_temp","rain","cum_rain","wind_speed","residents") -colnames(merged_lab_all)<-c("PostCode","Date","humidity","max_temp","min_temp","rain","cum_rain","wind_speed","residents") - -merged.data_all<-merged.data_all[order(as.Date(merged.data_all$Date)),] - -########################### END Weekly average ###################### - - -#PHE_Centre<-merged.data_all$PHE_Centre_Name -#n_Centre<-length(levels(PHE_Centre)) -#i_centre<-6 -#For Dorset only -#merged.data_PHE<-subset(merged.data_all,merged.data_all$PHE_Centre_Name==levels(PHE_Centre)[i_centre]) -#merged.data<-subset(merged.data_PHE,year(merged.data_PHE$Date)>=2000 & year(merged.data_PHE$Date)<2016) -merged.data<-subset(merged.data_all,year(merged.data_all$Date)>=1990 & year(merged.data_all$Date)<=2015) -merged_lab<-subset(merged_lab_all,year(merged_lab_all$Date)>=1990 & year(merged_lab_all$Date)<=2015) -################### weekly summary - - -################### - -delta_hum<-5 -delta_temp<-1 -delta_rain<-1 -delta_wind<-0.5 -breaks_hum<-seq(max(min(na.omit(merged_lab_weekly$mean_humidity))-10,0),max(na.omit(merged_lab_weekly$mean_humidity))+10,by=delta_hum) #i -breaks_min_temp<-seq(min(na.omit(merged_lab_weekly$mean_min_temp))-2, max(na.omit(merged_lab_weekly$mean_min_temp))+2,by=delta_temp) -breaks_max_temp<-seq(min(na.omit(merged_lab_weekly$mean_max_temp))-2, max(na.omit(merged_lab_weekly$mean_max_temp))+2,by=delta_temp) -breaks_rain<-seq(min(na.omit(merged_lab_weekly$mean_rain))-1, max(na.omit(merged_lab_weekly$mean_rain))+1,by=delta_rain) -breaks_wind<-seq(max(min(na.omit(merged_lab_weekly$mean_wind))-0.5,0),max(na.omit(merged_lab_weekly$mean_wind))+0.5,by=delta_wind) -#breaks_wind<-seq(max(min(na.omit(wind))-2,0),max(na.omit(wind))+2,by=delta_wind) #WARNING -breaks_cum_rain<-seq(min(na.omit(merged_lab_weekly$cum_rain))-1, max(na.omit(merged_lab_weekly$cum_rain))+1,by=delta_rain) -breaks_mean_temp<-seq(min(na.omit(min_temp))-2,max(na.omit(max_temp))+2,by=delta_temp) - - - -# First find right domain where the values have no NA -i_hum_min<-min(which(breaks_hum>=min(na.omit(merged.data$humidity)))) -i_hum_max<-max(which(breaks_hum<=max(na.omit(merged.data$humidity)))) - -i_min_temp_min<-max(min(which(breaks_min_temp>=min(na.omit(merged.data$min_temp))))-1,1) -i_min_temp_max<-max(max(which(breaks_min_temp<=max(na.omit(merged.data$min_temp))))+1,1) - -i_max_temp_min<-max(min(which(breaks_max_temp>=min(na.omit(merged.data$max_temp))))-1,1) -i_max_temp_max<-max(max(which(breaks_max_temp<=max(na.omit(merged.data$max_temp))))+1,1) - -i_rain_min<-max(min(which(breaks_rain>=min(na.omit(merged.data$rain))))-1,1) -i_rain_max<-max(which(breaks_rain<=max(na.omit(merged.data$rain))))+1 - -i_wind_min<-max(min(which(breaks_wind>=min(na.omit(merged.data$wind))))-1,1) -i_wind_max<-max(which(breaks_wind<=max(na.omit(merged.data$wind))))+1 - -print(c(i_hum_min,i_min_temp_min ,i_max_temp_min, i_rain_min,i_wind_min)) -#print(c(i_hum_min,i_hum_max,i_min_temp_min ,i_min_temp_max,i_max_temp_min,i_max_temp_max, i_rain_min,i_rain_max,i_wind_min,i_wind_max)) - - - - - - -############################# General variables ########################### - - - - - - -variable_x<-"max_air_temp" -# -variable<-"rain" - - -var_x_loc_df<-c() - - - -if (variable=="max_air_temp"){ - - breaks_var<-breaks_max_temp - i_var_min<-i_max_temp_min - i_var_max<-i_max_temp_max - merged.data_var<-merged.data$max_temp - merged_lab_var<-merged_lab$max_temp -} -if (variable_x=="max_air_temp"){ - - i_var_x_min<-i_max_temp_min - i_var_x_max<-i_max_temp_max - breaks_var_x<-breaks_max_temp -} - -if (variable=="min_air_temp"){ - - breaks_var<-breaks_min_temp - i_var_min<-i_min_temp_min - i_var_max<-i_min_temp_max - merged.data_var<-merged.data$min_temp - merged_lab_var<-merged_lab$min_temp -} -if (variable_x=="min_air_temp"){ - - i_var_x_min<-i_min_temp_min - i_var_x_max<-i_min_temp_max - breaks_var_x<-breaks_min_temp -} - - -if (variable=="humidity"){ - - breaks_var<-breaks_hum - i_var_min<-i_hum_min - i_var_max<-i_hum_max - merged.data_var<-merged.data$humidity - merged_lab_var<-merged_lab$humidity -} -if (variable_x=="humidity"){ - - i_var_x_min<-i_hum_min - i_var_x_max<-i_hum_max - breaks_var_x<-breaks_hum -} - - - -if (variable=="mean_temp"){ - - breaks_var<-breaks_mean_temp - i_var_min<-i_mean_temp_min - i_var_max<-i_mean_temp_max - merged.data_var<-merged.data$mean_temp - merged_lab_var<-merged_lab$mean_temp -} -if (variable_x=="mean_temp"){ - - i_var_x_min<-i_mean_temp_min - i_var_x_max<-i_mean_temp_max - breaks_var_x<-breaks_mean_temp -} - -if (variable=="rain"){ - - breaks_var<-breaks_rain - i_var_min<-i_rain_min - i_var_max<-i_rain_max - merged.data_var<-merged.data$rain - merged_lab_var<-merged_lab$rain -} -if (variable_x=="rain"){ - - i_var_x_min<-i_rain_min - i_var_x_max<-i_rain_max - breaks_var_x<-breaks_rain -} - - - -if (variable=="wind"){ - - breaks_var<-breaks_wind - i_var_min<-i_wind_min - i_var_max<-i_wind_max - merged.data_var<-merged.data$wind - merged_lab_var<-merged_lab$wind -} -if (variable_x=="wind"){ - - i_var_x_min<-i_wind_min - i_var_x_max<-i_wind_max - breaks_var_x<-breaks_wind -} - -Yt_var_x<-function(i_var) -{ - - Yt1<-subset(merged.data,merged.data_var>=breaks_var[i_var] & merged.data_var<breaks_var[i_var+1]) - Yt2<-Yt1 - - return(as.list(Yt2)) - -} - - -Tot_var_x<-function(i_var) -{ - - Yt1<-subset(merged_lab,merged_lab_var>=breaks_var[i_var] & merged_lab_var<breaks_var[i_var+1]) - Yt2<-Yt1 - - return(as.list(Yt2)) - -} -var_x_loc_df<-c(0) -n_seas<-1 - - -for (i_var in c(i_var_min:i_var_max)) -{ - for (i in c(1:n_seas)) - { - - n_months<-12/n_seas - #if (is.na(Yt_min_temp(i_hum)$min_temp)==FALSE){ - - wt<-which(month(Yt_var_x(i_var)$Date)>(i-1)*n_months & month(Yt_var_x(i_var)$Date)<=i*n_months) - wt_tot<-which(month(Tot_var_x(i_var)$Date)>(i-1)*n_months & month(Tot_var_x(i_var)$Date)<=i*n_months) - - if (variable_x=="min_air_temp"){ - Campylobacter_var_x<-Yt_var_x(i_var)$min_temp[wt] - var_x_tot<-Tot_var_x(i_var)$min_temp[wt_tot] - - } - if (variable_x=="max_air_temp"){ - Campylobacter_var_x<-Yt_var_x(i_var)$max_temp[wt] - var_x_tot<-Tot_var_x(i_var)$max_temp[wt_tot] - - } - - if (variable_x=="mean_temp"){ - Campylobacter_var_x<-Yt_var_x(i_var)$mean_temp[wt] - var_x_tot<-Tot_var_x(i_var)$mean_temp[wt_tot] - - } - - - - if (variable_x=="humidity"){ - Campylobacter_var_x<-Yt_var_x(i_var)$humidity[wt] - var_x_tot<-Tot_var_x(i_var)$humidity[wt_tot] - - } - if (variable_x=="wind"){ - Campylobacter_var_x<-Yt_var_x(i_var)$wind[wt] - var_x_tot<-Tot_var_x(i_var)$wind[wt_tot] - - } - if (variable_x=="rain"){ - Campylobacter_var_x<-Yt_var_x(i_var)$rain[wt] - var_x_tot<-Tot_var_x(i_var)$rain[wt_tot] - - } - - n_var_x_tot<-hist(var_x_tot,breaks=breaks_var_x) - n_var_x<-hist((as.numeric(as.character(Campylobacter_var_x))),breaks=breaks_var_x) - - residents<-rep(0,times=length(n_var_x$counts)) - residents_tot<-rep(0,times=length(n_var_x$counts)) - - wt<-which(n_var_x_tot$counts!=0) - wt2<-which(n_var_x$counts!=0) - - - if (variable_x=="min_air_temp"){ - if(length(wt)>0){ - for (j in c(1:(length(wt)))){ - - ww<-which(Tot_var_x(i_var)$min_temp>=n_var_x_tot$breaks[wt[j]] & - Tot_var_x(i_var)$min_temp<=n_var_x_tot$breaks[wt[j]]+delta_temp ) - residents_tot[wt[j]]<-sum(as.numeric(Tot_var_x(i_var)$residents[ww])) - } - if(length(wt2)>0){ - for (j in c(1:(length(wt2)))){ - - ww<-which(Yt_var_x(i_var)$min_temp>=n_var_x$breaks[wt2[j]] & Yt_var_x(i_var)$min_temp<=n_var_x$breaks[wt2[j]]+delta_temp) - residents[wt2[j]]<-sum(Yt_var_x(i_var)$residents[ww]) - } - } - - } - - - } - if (variable_x=="max_air_temp"){ - - if(length(wt)>0){ - for (j in c(1:(length(wt)))){ - - ww<-which(Tot_var_x(i_var)$max_temp>=n_var_x_tot$breaks[wt[j]] & - Tot_var_x(i_var)$max_temp<=n_var_x_tot$breaks[wt[j]]+delta_temp ) - residents_tot[wt[j]]<-sum(as.numeric(Tot_var_x(i_var)$residents[ww])) - } - if(length(wt2)>0){ - for (j in c(1:(length(wt2)))){ - - ww<-which(Yt_var_x(i_var)$max_temp>=n_var_x$breaks[wt2[j]] & Yt_var_x(i_var)$max_temp<=n_var_x$breaks[wt2[j]]+delta_temp) - residents[wt2[j]]<-sum(Yt_var_x(i_var)$residents[ww]) - } - } - - } - - } - - if (variable_x=="mean_temp"){ - if(length(wt)>0){ - for (j in c(1:(length(wt)))){ - - ww<-which(Tot_var_x(i_var)$mean_temp>=n_var_x_tot$breaks[wt[j]] & - Tot_var_x(i_var)$mean_temp<=n_var_x_tot$breaks[wt[j]]+delta_temp ) - residents_tot[wt[j]]<-sum(as.numeric(Tot_var_x(i_var)$residents[ww])) - } - if(length(wt2)>0){ - for (j in c(1:(length(wt2)))){ - - ww<-which(Yt_var_x(i_var)$mean_temp>=n_var_x$breaks[wt2[j]] & Yt_var_x(i_var)$mean_temp<=n_var_x$breaks[wt2[j]]+delta_temp) - residents[wt2[j]]<-sum(Yt_var_x(i_var)$residents[ww]) - } - } - - } - } - - - if (variable_x=="humidity"){ - if(length(wt)>0){ - for (j in c(1:(length(wt)))){ - - ww<-which(Tot_var_x(i_var)$hum>=n_var_x_tot$breaks[wt[j]] & - Tot_var_x(i_var)$hum<=n_var_x_tot$breaks[wt[j]]+delta_hum ) - residents_tot[wt[j]]<-sum(as.numeric(Tot_var_x(i_var)$residents[ww])) - } - if(length(wt2)>0){ - for (j in c(1:(length(wt2)))){ - - ww<-which(Yt_var_x(i_var)$hum>=n_var_x$breaks[wt2[j]] & Yt_var_x(i_var)$hum<=n_var_x$breaks[wt2[j]]+delta_hum) - residents[wt2[j]]<-sum(Yt_var_x(i_var)$residents[ww]) - } - } - - } - } - if (variable_x=="wind"){ - if(length(wt)>0){ - for (j in c(1:(length(wt)))){ - - ww<-which(Tot_var_x(i_var)$wind>=n_var_x_tot$breaks[wt[j]] & - Tot_var_x(i_var)$wind<=n_var_x_tot$breaks[wt[j]]+delta_wind) - residents_tot[wt[j]]<-sum(as.numeric(Tot_var_x(i_var)$residents[ww])) - } - if(length(wt2)>0){ - for (j in c(1:(length(wt2)))){ - - ww<-which(Yt_var_x(i_var)$wind>=n_var_x$breaks[wt2[j]] & Yt_var_x(i_var)$wind<=n_var_x$breaks[wt2[j]]+delta_wind) - residents[wt2[j]]<-sum(Yt_var_x(i_var)$residents[ww]) - } - } - - } - - } - if (variable_x=="rain"){ - - if(length(wt)>0){ - for (j in c(1:(length(wt)))){ - - ww<-which(Tot_var_x(i_var)$rain>=n_var_x_tot$breaks[wt[j]] & - Tot_var_x(i_var)$rain<=n_var_x_tot$breaks[wt[j]]+delta_rain) - residents_tot[wt[j]]<-sum(as.numeric(Tot_var_x(i_var)$residents[ww])) - } - if(length(wt2)>0){ - for (j in c(1:(length(wt2)))){ - - ww<-which(Yt_var_x(i_var)$rain>=n_var_x$breaks[wt2[j]] & Yt_var_x(i_var)$rain<=n_var_x$breaks[wt2[j]]+delta_rain) - residents[wt2[j]]<-sum(Yt_var_x(i_var)$residents[ww]) - } - } - - } - } - - - # if(length(residents)>0){ - data_df<-data.frame(n_var_x$mids,n_var_x$counts/n_var_x_tot$counts,(n_var_x$counts)/(residents_tot),i,breaks_var[i_var],n_var_x$counts,n_var_x_tot$counts,residents,residents_tot) - - colnames(data_df)<-c(variable_x,"prop","incidence","month",variable,"counts","counts_tot","residents","residents_tot") - var_x_loc_df<-rbind(var_x_loc_df,data_df) - colnames(var_x_loc_df)<-c(variable_x,"prop","incidence","month",variable,"counts","counts_tot","residents","residents_tot") -#} - #} - - - } -} - - -write.table(var_x_loc_df,paste("../../Data_Base/OPIE_data_base/",variable,"_",variable_x,"_",width_char,"_original_MEDMI.csv",sep=""), col.names = FALSE, sep = ",",eol = "\n")