diff --git a/Codes/Function_Input_files.R b/Codes/Function_Input_files.R new file mode 100644 index 0000000000000000000000000000000000000000..abb55b26236580e86149d9fb58789f1cb36097bb --- /dev/null +++ b/Codes/Function_Input_files.R @@ -0,0 +1,45 @@ + +##### Read Simulation of Salmonella cases from the England and Wales. #### +# Reminder that these cases are only for the PC with residents information provided by PHE (missing 117 PC, and about 1/3 of the salmonella cases) +Env_Pathogen_data_all <-read_csv(paste("../Data_Base/Campylobacter_environment_",time_lag_char,".csv",sep="")) +Env_Pathogen_data_all <- as.data.frame(Env_Pathogen_data_all) + +#Env_Pathogen_data_all <-Env_Pathogen_data_all[,-1] +colnames(Env_Pathogen_data_all) <-c("PostCode","Date", + "Maximum_air_temperature", + "Minimum_air_temperature", + "Mean_wind_speed", + "Cumul_Precipitation", + "Mean_Precipitation", + "Relative_humidity", + "daylength", + "residents","Cases") + +Env_Pathogen_data_all$Temperature_amplitude <- Env_Pathogen_data_all$Maximum_air_temperature - Env_Pathogen_data_all$Minimum_air_temperature + +# Select the variables of interest: +Env_Pathogen_data_all$Date<-as.Date(Env_Pathogen_data_all$Date,format = "%d/%m/%Y") +Env_Pathogen_data_all <- distinct(Env_Pathogen_data_all) # Remove possible duplicates Gianni, correct but better to do this when we create the file +Env_Pathogen_data <-(subset(Env_Pathogen_data_all, year(Env_Pathogen_data_all$Date)>=year_first & year(Env_Pathogen_data_all$Date)<=year_last)) + +# Read the weather variables for every PC, averaged for the estimated delay in the past + +Env_laboratory_data_all <-read_csv(paste("../Data_Base/Laboratory_environment_",time_lag_char,".csv",sep="")) +Env_laboratory_data_all <- as.data.frame (Env_laboratory_data_all) +#Env_laboratory_data_all <-Env_laboratory_data_all[,-1] +colnames(Env_laboratory_data_all) <-c("PostCode","Date", + "Maximum_air_temperature", + "Minimum_air_temperature", + "Mean_wind_speed", + "Cumul_Precipitation", + "Mean_Precipitation", + "Relative_humidity", + "daylength", + "residents") + +Env_laboratory_data_all$Temperature_amplitude <- Env_laboratory_data_all$Maximum_air_temperature - Env_laboratory_data_all$Minimum_air_temperature + + +# Select the subset the year of interest + +Env_laboratory_data<-subset(Env_laboratory_data_all, year(as.Date(Env_laboratory_data_all$Date))>=year_first & year(as.Date(Env_laboratory_data_all$Date))<=year_last) diff --git a/Codes/Function_Plot_Reconstruction.R b/Codes/Function_Plot_Reconstruction.R new file mode 100644 index 0000000000000000000000000000000000000000..a8d8a7226e1bb2de4e6efa780aedbb2033b52720 --- /dev/null +++ b/Codes/Function_Plot_Reconstruction.R @@ -0,0 +1,129 @@ +# Plot Reconstruction of Time series and compare with empirical data + +Plot_Reconstruction_time_series<-function(Data_frame_1, Data_frame_2,Pars) +{ + + with(as.list(c(Data_frame_1, Data_frame_2,Pars)), { + + time_series_Post_Codes<-Data_frame_1 + empirical_time_series<-Data_frame_2 + file_name_ts<-Pars[1] + file_name_yearly<-Pars[2] + y_lab_text<-Pars[3] + +colnames(time_series_Post_Codes) <-c("Date","Cases","Lambda","PostCode") +time_series<-aggregate (Cases ~ Date, time_series_Post_Codes, sum) # get the mean of the cases per day for all the PC + +time_series_quantile <-ddply(time_series,~Date, function (x) quantile(x$Cases, c(.25,.5,.75))) +time_series_summary <-data.frame(time_series_quantile, rep("Model",times=length(time_series[,1]))) +colnames(time_series_summary)<-c("Date","f_quant","Cases","s_quant","source") + +#real_cases_mean <-empirical_time_series %>% mutate (rolling_mean= slider::slide_mean(Cases, before=3 , after=3 )) # %>% ungroup() #Gianni Check this, I changed as not sure it worked +#empirical_time_series$rolling_mean <-real_cases_mean$rolling_mean + +real_cases_quantile<-ddply(empirical_time_series,~Date, function (x) quantile(x$Cases, c(.25,.5,.75))) +real_cases_summary<-cbind(real_cases_quantile,rep("Cases",times=length(real_cases_quantile[,1]))) +colnames(real_cases_summary)<-c("Date","f_quant","Cases","s_quant","source") + +#time_series_all <-rbind(time_series_summary[,c(1,2,6)], real_cases_summary[,c(1,2,6)]) + +time_series_all <-rbind(time_series_summary, real_cases_summary) + + +############## Plot Average per day of the year ################# + +pal<-wes_palette("Cavalcanti1") +time_series_plot<-ggplot(time_series_all,aes(x=Date,y=Cases,colour=source)) +#time_series_plot<-time_series_plot+scale_color_brewer(palette=pal) +#time_series_plot<-time_series_plot+geom_line(data=real_cases_summary) +time_series_plot<-time_series_plot+geom_line(size=0.75) +#time_series_plot<-time_series_plot+scale_color_manual(values=c( "#E69F00", "#56B4E9")) +time_series_plot<-time_series_plot+scale_color_manual(values=wes_palette(n=2, name="Cavalcanti1")) + +time_series_plot<-time_series_plot+ xlab("Date")+ ylab("Campylobacter Cases") + +time_series_plot<-time_series_plot+ + theme(legend.position= c(0.125,0.85),legend.title = element_blank(), + legend.text = element_text( size = 10),legend.background = element_blank(), + legend.key=element_rect(colour=NA,fill=NA)) + + +time_series_plot<-time_series_plot+ theme(axis.title.x =element_text( colour="#990000", size=13)) +time_series_plot<-time_series_plot+ theme(axis.title.y =element_text( colour="#990000", size=13)) + + + +time_series_plot<-time_series_plot+ theme(axis.title.x =element_text(size=13)) +time_series_plot<-time_series_plot+ theme(axis.title.y =element_text(size=13)) +print(time_series_plot) + + +tiff(filename = file_name_ts,width = 17.35, height = 17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw",antialias="default") +print(time_series_plot) +dev.off() + + +time_series$yday <-as.factor(yday(time_series$Date)) + +time_series_average1 <-ddply(time_series,~yday, summarise, mean=mean(Cases)) # mean of all cases for the same day of the year +time_series_quantile1 <-ddply(time_series,~yday, function (x) quantile(x$Cases, c(.25,.5,.75))) # quantiles of cases for all the years for the same day of the year +time_series_average <-cbind(time_series_average1,time_series_quantile1[,-1]) + +#real_cases$norm<-(real_cases$Cases/sum(real_cases$Cases)) +empirical_time_series$yday<-as.factor(yday(as.Date(as.character(empirical_time_series$Date)))) +real_cases_average1<-ddply(empirical_time_series,~yday,summarise,mean=mean(Cases)) +real_cases_quantile1<-ddply(empirical_time_series,~yday,function (x) quantile(x$Cases, c(.25,.5,.75))) +real_cases_average2<-cbind(real_cases_average1,real_cases_quantile1[,-1]) +real_cases_average<- + data.frame(real_cases_average2[,1],real_cases_average2[,c(2:5)]) + + +df1 <-data.frame(real_cases_average,rep("Cases",times=length(real_cases_average[,1]))) +colnames(df1) <-c("Day","Mean","f_quant","median","s_quant","source") +df2 <-data.frame(time_series_average,rep("Model",times=length(time_series_average[,1]))) +colnames(df2) <-c("Day","Mean","f_quant","median","s_quant","source") + +average_data <-rbind(df1,df2) +average_data$Day <-as.numeric(average_data$Day) + +## We get one of the years as example to plot the date, in this case 2014, but could be any year within our data set. +month_2014 <-month.abb[unique(month(as.Date(average_data$Day, origin = "2014-01-01")))] + +tick_pos<-yday(as.Date(as.character( + c("2014-01-01","2014-02-01","2014-03-01","2014-04-01","2014-05-01","2014-06-01", + "2014-07-01","2014-08-01","2014-09-01","2014-10-01","2014-11-01","2014-12-01") +))) + + + +yearly_average <-ggplot(average_data, aes(x=Day, y=Mean, colour=source))+ + geom_line(size=2)+ + geom_ribbon(aes(ymin=f_quant, ymax=s_quant, fill=source), alpha=0.15) + + xlab("Day of the Year") + ylab(y_lab_text) + + + scale_x_continuous(breaks=tick_pos, labels=month_2014) + + + theme(legend.position= c(0.125,0.75),legend.title = element_text( size = 10), + legend.text = element_text( size = 10),legend.background = element_blank(), + legend.key=element_rect(colour=NA,fill=NA)) + + + theme(axis.title.x =element_text( colour="#990000", size=13)) + + theme(axis.title.y =element_text( colour="#990000", size=13)) +#+ ggtitle(paste(i,",", h,",", k)) + + +print(yearly_average) + + +# Save the plots: +# Yearly average + +tiff(filename = file_name_yearly, width = 17.35, height = 17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw", antialias="default") +print(yearly_average) +dev.off() + +return() + +}) +} + diff --git a/Codes/Function_Reconstruction.R b/Codes/Function_Reconstruction.R new file mode 100644 index 0000000000000000000000000000000000000000..cc7aab272c8ab810aa29e82df8399692e3111a57 --- /dev/null +++ b/Codes/Function_Reconstruction.R @@ -0,0 +1,79 @@ +Reconstruction_time_series<-function(Conditional_prevalence_unif_info, Data_frame_2,Pars) +{ + + with(as.list(c(Conditional_prevalence_unif_info, Data_frame_2,Pars)), { + + + Conditional_prevalence_unif<-Conditional_prevalence_unif_info[[1]] + breaks_x<-Conditional_prevalence_unif_info[[2]] + breaks_y<-Conditional_prevalence_unif_info[[3]] + breaks_z<-Conditional_prevalence_unif_info[[4]] + Env_laboratory_data<-Data_frame_2 + variable_x<-Pars[1] + variable_y<-Pars[2] + variable_z<-Pars[3] + info<-Pars[4] + + # z variable + dt_z = data.table(breaks_z, val = breaks_z) + dt_z <- na.omit (dt_z) + setattr(dt_z, "sorted", "breaks_z") + dt_z2 <- dt_z[J(Env_laboratory_data[[variable_z]]), roll = "nearest"] + variable_df <- as.data.frame(Env_laboratory_data[,c("PostCode","Date", "residents")]) + variable_df$variable_z_break<- dt_z2$val + + + # y variable + dt_y = data.table(breaks_y, val = breaks_y) + dt_y <- na.omit (dt_y) + setattr(dt_y, "sorted", "breaks_y") + dt_y2 <- dt_y[J(Env_laboratory_data[[variable_y]]), roll = "nearest"] + + + variable_df$variable_y_break<- dt_y2$val + + + + # x variable + dt_temp = data.table(breaks_x, val = breaks_x) + dt_temp <- na.omit (dt_temp) + setattr(dt_temp, "sorted", "breaks_x") + dt_temp2 <- dt_temp[J(Env_laboratory_data[[variable_x]]), roll = "nearest"] + + variable_df$variable_x_break<- dt_temp2$val + + variable_df2<-data.frame(variable_df$PostCode,variable_df$Date,variable_df$residents,variable_df$variable_z_break, variable_df$variable_y_break, variable_df$variable_x_break) + colnames(variable_df2) <- c("PostCode","Date", "residents", variable_z, variable_y, variable_x) + + dt_df <- data.table(variable_df2) + dt_var <- data.table(Conditional_prevalence_unif) + variable_df_dis3 <- dt_df[dt_var, on= c(variable_z,variable_y,variable_x), allow.cartesian=TRUE] # repeated rows since same conditions apply to more than one PC. Gianni this shoul be a merge + + variable_df_dis3 <- na.omit(variable_df_dis3) %>% distinct() + + + + variable_df_dis3 <-variable_df_dis3[order(as.Date(variable_df_dis3$Date)),] + + + # calculation of all the potential incidence based on the weather conditions of the area. The counts used as numerator correspond to the simulation of cases per weather condition. + variable_df_dis3$prevalence <-variable_df_dis3$counts/variable_df_dis3$residents_tot + #variable_df_dis_excluded<-subset(variable_df_dis3,variable_df_dis3$residents_tot<=cutoff) + #print(variable_df_dis_excluded) + #write.csv(na.omit(variable_df_dis_excluded),"variable_df_dis_excluded.csv") + #variable_df_dis3<- subset(variable_df_dis3,variable_df_dis3$residents_tot>=cutoff) #Gianni discuss this + + # if (length(na.omit(variable_df_dis3[,1])!=0)){ + + comp_cases <-variable_df_dis3$prevalence*variable_df_dis3$residents # estimation of the "real" incidence based on the possible cases due to weather and the residents number in the catchment area + time_series <-data.frame(variable_df_dis3$Date, comp_cases, variable_df_dis3$prevalence, variable_df_dis3$PostCode) + colnames(time_series) <-c("Date","Cases","Lambda","Lab") + + #} + + write.table(time_series, paste("../Data_Base/",variable_x,"_",variable_y,"_",variable_z,"_",time_lag_char,info,sep=""), col.names= NA, row.names= TRUE, sep=",",eol= "\n") # Saved on 17/11 after switching residents_total and residents from the denominator respecting the above attempt. + return(time_series) + + +}) +} diff --git a/Codes/Function_Wavelet_analysis.R b/Codes/Function_Wavelet_analysis.R new file mode 100644 index 0000000000000000000000000000000000000000..df24444eca9a42882d518a36b95cc717d4e46095 --- /dev/null +++ b/Codes/Function_Wavelet_analysis.R @@ -0,0 +1,59 @@ +# This script performs Wavelet analysis for to check for periodicity in the data # +wavelet_analysis<-function(State, Pars) +{ +# Cleaning of cases: +with(as.list(c(State, Pars)), { + + + filename_power<-Pars[1] + filename_power_average<-Pars[2] + + +Env_Pathogen_data0 <- ddply (State, ~Date, summarise, Cases=sum(Cases_computed)) # subset for all postcodes combined per day +# Wavelet spectrum plot +Env_Pathogen_data0$date<-Env_Pathogen_data0$Date +salmonella_data_national <-Env_Pathogen_data0[order(as.Date(Env_Pathogen_data0$Date)),] # Sort from least recent to most recent +dt0<-1 + +my.wt = analyze.wavelet(salmonella_data_national, "Cases", + loess.span=0.75, + dt=dt0, dj=1/20, + lowerPeriod=2.*dt0, + upperPeriod=floor(nrow(salmonella_data_national)/3)*dt0, + make.pval=T, n.sim=100) + + +wt.image(my.wt, color.key = "quantile", n.levels = 250,legend.params = list(lab = "wavelet power levels", mar = 4.7),show.date = T, + color.palette = "rainbow(n.levels, start=0, end=.6)",plot.contour = F, siglvl = 0.05, col.contour = "black", plot.ridge = T, lvl = 0, col.ridge = "black",date.format ="%Y-%m-%d",periodlab = "periods (days)",useRaster=T) + +# Save plot +tiff(filename = filename_power,width = 17.35, height = 17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw",antialias="default") + +wt.image(my.wt, color.key = "quantile", n.levels = 250,legend.params = list(lab = "wavelet power levels", mar = 4.7),show.date = T,color.palette = "rainbow(n.levels, start=0, end=.6)",plot.contour = F, siglvl = 0.05, col.contour = "black", plot.ridge = T, lvl = 0, col.ridge = "black",periodlab = "periods (minutes)",useRaster=F) + +dev.off() + + +# Power curve for ease of interpretation of Wavelet Gianni changed name as said detrend + +reconstruct(my.wt, plot.waves = F, lwd = c(1,2), legend.coords = "bottomleft") + +wt.avg(my.wt, siglvl=0.05, sigcol="red") +#wt.phase.image(my.wt, "Cases") + +# Save plot +tiff(filename = filename_power_average,width = 17.35, height = 17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw",antialias="default") + +wt.avg(my.wt, + show.siglvl = T, sigpch = 20, + label.avg.axis = T, averagelab = NULL, + label.period.axis = T, periodlab = NULL, + show.legend = T, legend.coords = "topright", + main = NULL, lwd = 0.5, + verbose = F) + +dev.off() + +return(salmonella_data_national[,c(1,2)]) +}) +}