diff --git a/PAPER_Conditional_probability_quantile_original_MEDMI_4_variables_quarter.R b/PAPER_Conditional_probability_quantile_original_MEDMI_4_variables_quarter.R deleted file mode 100644 index 77493adab997755e6ce694acdef58e24d579c52d..0000000000000000000000000000000000000000 --- a/PAPER_Conditional_probability_quantile_original_MEDMI_4_variables_quarter.R +++ /dev/null @@ -1,524 +0,0 @@ -# The code does look at how the risk of Campylobacter in humans depends on environmental variables -#The code uses old MEDMI data (not corrected for altitude) and analysis done on regular division of the range of the environemtal varaibles rather than quantile. - - -rm(list=ls(all=TRUE)) -# -library(ISOweek) -library(lubridate) -library(ggplot2) -require(MASS) -library(scales) -require(pheno) -library(timeDate) -library(pastecs) -library(stringi) -library(timeSeries) -#library(Hmisc) - -#list.of.packages <- c("xts") -#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] -#if(length(new.packages)) install.packages(new.packages) -library(xts) - - - - -## Varaible file - -variable_int<-"humidity" -variable_df_1<-read.csv(paste("../../Data_Base/OPIE_data_base/",variable_int,".csv",sep="")) -humidity<-variable_df_1[,-c(1,2)] -#dates<-as.Date(as.character(variable_df_1[-c(1,2),2]),format="%d/%m/%Y") - -dates_s<-as.Date(as.character(variable_df_1[-c(1,2),2]),format="%Y-%m-%d") -dates<-rep(dates_s,times=length(variable_df_1)-2) -All_PC_s<-names(variable_df_1[1,]) -All_PC_s<-All_PC_s[-c(1,2)] -All_PC<-rep(All_PC_s,each=length(dates_s)) - - -width<-30 -width_char<-paste(width) - - - -variable<-"Maximum_air_temperature" -#variable<-"daylength" -variable_y<-"Mean_Precipitation" -variable_x<-"Relative_humidity" - -#variable_y<-"Mean_Precipitation" -#variable<-"daylength" -#variable<-"Mean_Precipitation" -#"Maximum_air_temperature", -#"Minimum_air_temperature", -#"Mean_wind_speed", -#"Mean_Precipitation", -#"Relative_humidity", -#"daylength" - - -Env_Campylobacter_data_all2<-read.csv(paste("../../Data_Base/Cases_Environment/Simulated_Campylobacter_environment_",width_char,"_original_MEDMI.csv",sep="")) - -Env_Campylobacter_data_all2<-Env_Campylobacter_data_all2[,-1] -colnames(Env_Campylobacter_data_all2)<-c("PostCode","Date","Cases", - "Maximum_air_temperature", - "Minimum_air_temperature", - "Mean_wind_speed", - "Cumul_Precipitation", - "Mean_Precipitation", - "Relative_humidity", - "daylength", - "residents") - -Env_laboratory_weekly<-read.csv(paste("../../Data_Base/Cases_Environment/Simulated_Laboratory_",width_char,"_original_MEDMI.csv",sep="")) -Env_laboratory_weekly<-Env_laboratory_weekly[,-1] -colnames(Env_laboratory_weekly)<-c("PostCode","Date", - "Maximum_air_temperature", - "Minimum_air_temperature", - "Mean_wind_speed", - "Cumul_Precipitation", - "Mean_Precipitation", - "Relative_humidity", - "daylength", - "residents") - - - - -Env_Campylobacter_data_int1<-subset(Env_Campylobacter_data_all2,year(as.Date(Env_Campylobacter_data_all2$Date))>=1990 & year(as.Date(Env_Campylobacter_data_all2$Date))<=2015) -Env_laboratory_int1<-subset(Env_laboratory_weekly,year(as.Date(Env_laboratory_weekly$Date))>=1990 & year(as.Date(Env_laboratory_weekly$Date))<=2015) - - -quarter_year<-4 -quarter_year_char<-paste("_",as.character(quarter_year),"-quarter_year",sep="") - -if (quarter_year==1){ -Env_Campylobacter_data_int1<-subset(Env_Campylobacter_data_all2,month(as.Date(Env_Campylobacter_data_all2$Date))>=1 & month(as.Date(Env_Campylobacter_data_all2$Date))<=3) -Env_laboratory_int1<-subset(Env_laboratory_weekly,month(as.Date(Env_laboratory_weekly$Date))>=1 & month(as.Date(Env_laboratory_weekly$Date))<=3) -} - -if (quarter_year==2){ -Env_Campylobacter_data_int1<-subset(Env_Campylobacter_data_all2,month(as.Date(Env_Campylobacter_data_all2$Date))>=4 & month(as.Date(Env_Campylobacter_data_all2$Date))<=6) -Env_laboratory_int1<-subset(Env_laboratory_weekly,month(as.Date(Env_laboratory_weekly$Date))>=4 & month(as.Date(Env_laboratory_weekly$Date))<=6) -} - -if (quarter_year==3){ -Env_Campylobacter_data_int1<-subset(Env_Campylobacter_data_all2,month(as.Date(Env_Campylobacter_data_all2$Date))>=7 & month(as.Date(Env_Campylobacter_data_all2$Date))<=9) -Env_laboratory_int1<-subset(Env_laboratory_weekly,month(as.Date(Env_laboratory_weekly$Date))>=7 & month(as.Date(Env_laboratory_weekly$Date))<=9) -} - -if (quarter_year==4){ -Env_Campylobacter_data_int1<-subset(Env_Campylobacter_data_all2,month(as.Date(Env_Campylobacter_data_all2$Date))>=10 & month(as.Date(Env_Campylobacter_data_all2$Date))<=12) -Env_laboratory_int1<-subset(Env_laboratory_weekly,month(as.Date(Env_laboratory_weekly$Date))>=10 & month(as.Date(Env_laboratory_weekly$Date))<=12) -} - - - -############# Not elegant but it works for the rest - -Env_laboratory_weekly_sub<-Env_laboratory_int1 -Env_Campylobacter_data_all2_sub<-Env_Campylobacter_data_int1 - - - - -for (quarter in c(1:4)){ - -quarter_char<-paste("_",as.character(quarter),"-quarter",sep="") - - breaks_daylength<-(as.numeric(quantile(na.omit(Env_Campylobacter_data_all2_sub$daylength), probs=seq(0,1, by=0.25), na.rm=TRUE))) - breaks_daylength[length(breaks_daylength)]<-ceiling((as.numeric(quantile(na.omit(Env_Campylobacter_data_all2_sub$daylength), probs=seq(0,1, by=0.25), na.rm=TRUE))))[length(breaks_daylength)] - - breaks_daylength[1]<-floor((as.numeric(quantile(na.omit(Env_Campylobacter_data_all2_sub$daylength), probs=seq(0,1, by=0.25), na.rm=TRUE))))[1] - - - -if (quarter==1){ -Env_Campylobacter_data_int1<-subset(Env_Campylobacter_data_all2_sub,Env_Campylobacter_data_all2_sub$daylength<=breaks_daylength[2]) -Env_laboratory_int1<-subset(Env_laboratory_weekly_sub,Env_laboratory_weekly_sub$daylength<=breaks_daylength[2]) -hours_char<-paste("_",as.character(round(breaks_daylength[2])),"-quarter",sep="") -} - -if (quarter==2){ -Env_Campylobacter_data_int1<-subset(Env_Campylobacter_data_all2_sub,Env_Campylobacter_data_all2_sub$daylength>breaks_daylength[2] & Env_Campylobacter_data_all2_sub$daylength<=breaks_daylength[3]) -Env_laboratory_int1<-subset(Env_laboratory_weekly_sub,Env_laboratory_weekly_sub$daylength>breaks_daylength[2] & Env_laboratory_weekly$daylength<=breaks_daylength[3]) -hours_char<-paste("_",as.character(round(breaks_daylength[3])),"-quarter",sep="") -} - -if (quarter==3){ -Env_Campylobacter_data_int1<-subset(Env_Campylobacter_data_all2_sub,Env_Campylobacter_data_all2_sub$daylength>breaks_daylength[3] & Env_Campylobacter_data_all2_sub$daylength<=breaks_daylength[4]) -Env_laboratory_int1<-subset(Env_laboratory_weekly_sub,Env_laboratory_weekly_sub$daylength>breaks_daylength[3] & Env_laboratory_weekly$daylength<=breaks_daylength[4]) -hours_char<-paste("_",as.character(round(breaks_daylength[4])),"-quarter",sep="") -} - -if (quarter==4){ -Env_Campylobacter_data_int1<-subset(Env_Campylobacter_data_all2_sub,Env_Campylobacter_data_all2_sub$daylength>breaks_daylength[4]) -Env_laboratory_int1<-subset(Env_laboratory_weekly_sub,Env_laboratory_weekly_sub$daylength>breaks_daylength[4]) -hours_char<-paste("_",as.character(round(breaks_daylength[5])),"-quarter",sep="") -} - - - - -################### include latitude and longitude -Coord_laboratory<-read.csv(paste("../../Data_Base/Cases/Lab_PostCodes.csv",sep="")) - - -lat_long_lab<-data.frame(names(Coord_laboratory),as.numeric(Coord_laboratory[1,]),as.numeric(Coord_laboratory[2,]))# -colnames(lat_long_lab)<-c("PostCode","lat","long") - -Env_laboratory_int2<-merge(Env_laboratory_int1,lat_long_lab,by="PostCode") -Env_laboratory_int3<-data.frame(Env_laboratory_int2) - -Env_Campylobacter_data_int2<-merge(Env_Campylobacter_data_int1,lat_long_lab,by="PostCode") -Env_Campylobacter_data_int3<-data.frame(Env_Campylobacter_data_int2) - - - -######################## include daylength ################## - -PC_df<-data.frame(All_PC,as.Date(dates)) -colnames(PC_df)<-c("PostCode","Date") - - -Post_Codes_df<-merge(PC_df,lat_long_lab,by="PostCode") - - -daylength<-function(lat,day_year) -{ - #Latitude measure in degrees - P <- asin(.39795*cos(.2163108 + 2*atan(.9671396*tan(.00860*(day_year-186))))) - Denom<-cos(lat*pi/180)*cos(P) - Numer<-sin(0.8333*pi/180) + sin(lat*pi/180)*sin(P) - D<-24-(24/pi)*acos(Numer/Denom) - return(D) -} - -latitude<-Post_Codes_df$lat -day_of_the_year<-yday(as.Date(Post_Codes_df$Date)) - -daylength_int1<-mapply(daylength, latitude, day_of_the_year) -daylength_df<-data.frame(latitude, day_of_the_year,as.Date(Post_Codes_df$Date),daylength_int1) -colnames(daylength_df)<-c("lat","day_year","Date","daylength") -daylength_df$Date<-as.factor(daylength_df$Date) -daylength_df$lat<-as.factor(daylength_df$lat) -Env_laboratory_int3$Date<-as.factor(Env_laboratory_int3$Date) -Env_laboratory_int3$lat<-as.factor(Env_laboratory_int3$lat) - -#Env_laboratory_int4<-merge(Env_laboratory_int3,daylength_df,by=c("lat","Date")) -#Env_laboratory<-data.frame(Env_laboratory_int4) -Env_laboratory<-data.frame(Env_laboratory_int3) -Env_Campylobacter_data_int3$Date<-as.factor(Env_Campylobacter_data_int3$Date) -Env_Campylobacter_data_int3$lat <-as.factor(Env_Campylobacter_data_int3$lat) - - -#Env_Campylobacter_data_int4<-merge(Env_Campylobacter_data_int3,daylength_df,by=c("lat","Date")) -#Env_Campylobacter_data<-data.frame(Env_Campylobacter_data_int4) -Env_Campylobacter_data<-data.frame(Env_Campylobacter_data_int3) - - - - - -################### Divide the domains of the variables in bins according to quantiles - - -index_C<-which (names(Env_Campylobacter_data)==variable) -index_y_C<-which (names(Env_Campylobacter_data)==variable_y) -index_x_C<-which (names(Env_Campylobacter_data)==variable_x) - -index_res_C<-which (names(Env_Campylobacter_data)=="residents") - - -index<-which (names(Env_laboratory)==variable) -index_y<-which (names(Env_laboratory)==variable_y) -index_x<-which (names(Env_laboratory)==variable_x) -index_res<-which (names(Env_laboratory)=="residents") - - -######################### - - -breaks_z_lab<-function(variable,by_z) -{ - - index<-which (names(Env_laboratory)==variable) - - - breaks_z<-as.numeric(quantile(na.omit(Env_laboratory[,index]), probs=seq(0,1, by=by_z), na.rm=TRUE)) - breaks_z[length(breaks_z)]<-ceiling(as.numeric(quantile(na.omit(Env_laboratory[,index]), probs=seq(0,1, by=by_z), na.rm=TRUE)))[length(breaks_z)] - breaks_z[1]<-floor(as.numeric(quantile(na.omit(Env_laboratory[,index]), probs=seq(0,1, by=by_z), na.rm=TRUE)))[1] - - - return(breaks_z) - -} - - - -breaks_z<-function(variable,by_z) -{ - - index_C<-which (names(Env_Campylobacter_data)==variable) - - breaks_z<-as.numeric(quantile(na.omit(Env_Campylobacter_data[,index_C]), probs=seq(0,1, by=by_z), na.rm=TRUE)) - - breaks_z[length(breaks_z)]<-ceiling(as.numeric(quantile(na.omit(Env_Campylobacter_data[,index_C]), probs=seq(0,1, by=by_z), na.rm=TRUE)))[length(breaks_z)] - breaks_z[1]<-floor(as.numeric(quantile(na.omit(Env_Campylobacter_data[,index_C]), probs=seq(0,1, by=by_z), na.rm=TRUE)))[1] - - - return(breaks_z) - -} - - - - -breaks_y_lab<-function(variable,variable_y,by_z,by_y,j_z) -{ - - index_C<-which (names(Env_Campylobacter_data)==variable) - - index<-which (names(Env_laboratory)==variable) - index_y<-which (names(Env_laboratory)==variable_y) - - - - wt<-(findInterval(Env_Campylobacter_data[,index_C],breaks_z(variable,by_z))) - ww<-which(wt==j_z) - Env_Campylobacter_data_some<-Env_Campylobacter_data[ww,] - - wt<-(findInterval(Env_laboratory[,index],breaks_z(variable,by_z))) - ww<-which(wt==j_z) - Env_laboratory_some<-Env_laboratory[ww,] - - if (length(Env_Campylobacter_data_some[,1])!=0) { - - breaks_y<-as.numeric(quantile(na.omit(Env_laboratory_some[,index_y]), probs=seq(0,1, by=by_y), na.rm=TRUE)) - breaks_y[length(breaks_y)]<-ceiling(as.numeric(quantile(na.omit(Env_laboratory_some[,index_y]), probs=seq(0,1, by=by_y), na.rm=TRUE)))[length(breaks_y)] - breaks_y[1]<-floor(as.numeric(quantile(na.omit(Env_laboratory_some[,index_y]), probs=seq(0,1, by=by_y), na.rm=TRUE)))[1] - - }else{ - - breaks_y<-c() - } - - return(breaks_y) - } - - - -breaks_y<-function(variable,variable_y,by_z,by_y,j_z) -{ - - index_C<-which (names(Env_Campylobacter_data)==variable) - index_y_C<-which (names(Env_Campylobacter_data)==variable_y) - - - - wt<-(findInterval(Env_Campylobacter_data[,index_C],breaks_z(variable,by_z))) - ww<-which(wt==j_z) - Env_Campylobacter_data_some<-Env_Campylobacter_data[ww,] - - - if (length(Env_Campylobacter_data_some[,1])!=0) { - - breaks_y<-as.numeric(quantile(na.omit(Env_Campylobacter_data_some[,index_y_C]), probs=seq(0,1, by=by_y), na.rm=TRUE)) - breaks_y[length(breaks_y)]<-ceiling(as.numeric(quantile(na.omit(Env_Campylobacter_data_some[,index_y_C]), probs=seq(0,1, by=by_y), na.rm=TRUE)))[length(breaks_y)] - breaks_y[1]<-floor(as.numeric(quantile(na.omit(Env_Campylobacter_data_some[,index_y_C]), probs=seq(0,1, by=by_y), na.rm=TRUE)))[1] - - }else{ - - breaks_y<-c() - } - - return(breaks_y) - } - - - -breaks_x_lab<-function(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y) -{ - index_C<-which (names(Env_Campylobacter_data)==variable) - index_y_C<-which (names(Env_Campylobacter_data)==variable_y) - - - index<-which (names(Env_laboratory)==variable) - index_y<-which (names(Env_laboratory)==variable_y) - index_x<-which (names(Env_laboratory)==variable_x) - - if(is.na(breaks_y(variable,variable_y,by_z,by_y,j_z)[j_y])=='FALSE'){ - - wt<-(findInterval(Env_Campylobacter_data[,index_y_C],breaks_y(variable,variable_y,by_z,by_y,j_z))) - ww<-which(wt==j_y) - Env_Campylobacter_data_some<-Env_Campylobacter_data[ww,] - - if (length(Env_Campylobacter_data_some[,1])!=0) { - - wt<-(findInterval(Env_laboratory[,index_y],breaks_y(variable,variable_y,by_z,by_y,j_z))) - ww<-which(wt==j_y) - Env_laboratory_some<-Env_laboratory[ww,] - - - breaks_x<-as.numeric(quantile(na.omit(Env_laboratory_some[,index_x]), probs=seq(0,1, by=by_x), na.rm=TRUE)) - breaks_x[length(breaks_x)]<-ceiling(as.numeric(quantile(na.omit(Env_laboratory_some[,index_x]), probs=seq(0,1, by=by_x), na.rm=TRUE)))[length(breaks_x)] - breaks_x[1]<-floor(as.numeric(quantile(na.omit(Env_laboratory_some[,index_x]), probs=seq(0,1, by=by_x), na.rm=TRUE)))[1] - - } else { - - breaks_x<-c() - } } else { - - breaks_x<-c() - - } - - return(breaks_x) - } - - -breaks_x<-function(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y) -{ - index_C<-which (names(Env_Campylobacter_data)==variable) - index_y_C<-which (names(Env_Campylobacter_data)==variable_y) - index_x_C<-which (names(Env_Campylobacter_data)==variable_x) - - if(is.na(breaks_y(variable,variable_y,by_z,by_y,j_z)[j_y])=='FALSE'){ - - wt<-(findInterval(Env_Campylobacter_data[,index_y_C],breaks_y(variable,variable_y,by_z,by_y,j_z))) - ww<-which(wt==j_y) - Env_Campylobacter_data_some<-Env_Campylobacter_data[ww,] - - if (length(Env_Campylobacter_data_some[,1])!=0) { - - wt<-(findInterval(Env_Campylobacter_data_some[,index_y_C],breaks_y(variable,variable_y,by_z,by_y,j_z))) - ww<-which(wt==j_y) - Env_Campylobacter_data_some2<-Env_Campylobacter_data_some[ww,] - - - breaks_x<-as.numeric(quantile(na.omit(Env_Campylobacter_data_some2[,index_x_C]), probs=seq(0,1, by=by_x), na.rm=TRUE)) - breaks_x[length(breaks_x)]<-ceiling(as.numeric(quantile(na.omit(Env_Campylobacter_data_some2[,index_x_C]), probs=seq(0,1, by=by_x), na.rm=TRUE)))[length(breaks_x)] - breaks_x[1]<-floor(as.numeric(quantile(na.omit(Env_Campylobacter_data_some2[,index_x_C]), probs=seq(0,1, by=by_x), na.rm=TRUE)))[1] - - } else - { breaks_x<-c() - } - } - else { - - breaks_x<-c() - - } - - return(breaks_x) - } - - -################# - - - - -var_x_loc_df<-data.frame(character(), character(),character(),numeric(),numeric(),numeric()) -colnames(var_x_loc_df)<-c(variable,variable_y,variable_x,"counts","residents","residents_tot") - -residents_i_var<-0 -residents_universal<-0 -#i_var_max<-length(breaks_var) -#i_var_min<-1 -#i_var_max_x<-length(breaks_var_x) -#i_var_min_x<-1 - - -##################### -by_z<-0.25 -by_y<-0.25 -by_x<-0.1 - -#i_var_min<-breaks_z(variable,by_z)[1] -#i_var_max<-breaks_z(variable,by_z)[length(breaks_z(variable,by_z))] -j_z_min<-1 -j_z_max<-length(breaks_z(variable,by_z))-1 - - - -for (j_z in c(j_z_min:j_z_max)) -{ - - if (length(Env_Campylobacter_data[,index_C])!=0){ - - wt<-(findInterval((Env_Campylobacter_data[,index_C]),breaks_z(variable,by_z))) - ww<-which(wt==j_z) - Env_Campylobacter_data_z<-Env_Campylobacter_data[ww,] - - wt<-(findInterval((Env_laboratory[,index]),breaks_z(variable,by_z))) - ww<-which(wt==j_z) - Env_laboratory_z<-Env_laboratory[ww,] - - if (length(Env_Campylobacter_data_z[,1])!=0){ - if (length(breaks_y(variable,variable_y,by_z,by_y,j_z))!=0){ - - j_y_min<-1 - j_y_max<-length(breaks_y(variable,variable_y,by_z,by_y,j_z))-1 - - - - for (j_y in c(j_y_min:j_y_max)) - { - - wt<-(findInterval((Env_Campylobacter_data_z[,index_y_C]),breaks_y(variable,variable_y,by_z,by_y,j_z))) - ww<-which(wt==j_y) - Env_Campylobacter_data_y<-Env_Campylobacter_data_z[ww,] - - wt<-(findInterval((Env_laboratory_z[,index_y]),breaks_y(variable,variable_y,by_z,by_y,j_z))) - ww<-which(wt==j_y) - Env_laboratory_y<-Env_laboratory_z[ww,] - - - if (length(breaks_x(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y))!=0){ - - j_x_min<-1 - j_x_max<- length(breaks_x(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y))-1 - for (j_x in c(j_x_min:j_x_max)) - { - - - wt<-(findInterval((Env_Campylobacter_data_y[,index_x_C]),breaks_x(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y))) - ww<-which(wt==j_x) - Yt1<-Env_Campylobacter_data_y[ww,c(1:3,index_C,index_y_C,index_x_C,index_res_C)] - - wt<-(findInterval((Env_laboratory[,index_x]),breaks_x(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y))) - ww<-which(wt==j_x) - Y_tot<-Env_laboratory_y[ww,c(1:2,index,index_y,index_x,index_res)] - - Total_cases<-sum((as.numeric(na.omit(Yt1$Cases)))) - residents<-sum((as.numeric(na.omit(Yt1$residents)))) - residents_tot<-sum((as.numeric(na.omit(Y_tot$residents)))) - - data_df<-data.frame( - breaks_z(variable,by_z)[j_z], - breaks_y(variable,variable_y,by_z,by_y,j_z)[j_y], - breaks_x(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y)[j_x], - Total_cases, - residents, - residents_tot) - - - - - - colnames(data_df)<-c(variable,variable_y,variable_x,"counts","residents","residents_tot") - var_x_loc_df<-rbind(var_x_loc_df,data_df) - print(c(j_x,j_y,j_z, Total_cases)) - - }}} - } - - - }} -} - -write.csv(var_x_loc_df,paste("../../Data_Base/Cases_Environment/Conditional_probability_",variable,"_",variable_y,"_",variable_x,"_",width_char,hours_char,quarter_year_char,"_Simulated_for_rec_original_MEDMI_quantile.csv",sep="")) -} \ No newline at end of file