update

2076280c · Lo Iacono, Giovanni Dr (School of Vet Med.) · 02e8d51c · 2076280c · 2076280c · 2076280c
Commit 2076280c authored 2 years ago by Lo Iacono, Giovanni Dr (School of Vet Med.)
--- a/Codes/Function_Input_files.R
+++ b/Codes/Function_Input_files.R
+
+##### Read Simulation of Salmonella cases from the England and Wales. ####
+#  Reminder that these cases are only for the PC with residents information provided by PHE (missing 117 PC, and about 1/3 of the salmonella cases)
+Env_Pathogen_data_all <-read_csv(paste("../Data_Base/Campylobacter_environment_",time_lag_char,".csv",sep=""))
+Env_Pathogen_data_all <- as.data.frame(Env_Pathogen_data_all)
+
+#Env_Pathogen_data_all <-Env_Pathogen_data_all[,-1]
+colnames(Env_Pathogen_data_all) <-c("PostCode","Date",
+                                     "Maximum_air_temperature",
+                                     "Minimum_air_temperature",
+                                     "Mean_wind_speed",
+                                     "Cumul_Precipitation",
+                                     "Mean_Precipitation",
+                                     "Relative_humidity",
+                                     "daylength",
+                                     "residents","Cases")
+
+Env_Pathogen_data_all$Temperature_amplitude <- Env_Pathogen_data_all$Maximum_air_temperature - Env_Pathogen_data_all$Minimum_air_temperature
+
+# Select the variables of interest:
+Env_Pathogen_data_all$Date<-as.Date(Env_Pathogen_data_all$Date,format = "%d/%m/%Y")
+Env_Pathogen_data_all <- distinct(Env_Pathogen_data_all)                         # Remove possible duplicates Gianni, correct but better to do this when we create the file
+Env_Pathogen_data <-(subset(Env_Pathogen_data_all, year(Env_Pathogen_data_all$Date)>=year_first & year(Env_Pathogen_data_all$Date)<=year_last))
+
+# Read the weather variables for every PC, averaged for the estimated delay in the past
+
+Env_laboratory_data_all <-read_csv(paste("../Data_Base/Laboratory_environment_",time_lag_char,".csv",sep=""))
+Env_laboratory_data_all <- as.data.frame (Env_laboratory_data_all)
+#Env_laboratory_data_all <-Env_laboratory_data_all[,-1]
+colnames(Env_laboratory_data_all) <-c("PostCode","Date",
+                                      "Maximum_air_temperature",
+                                      "Minimum_air_temperature",
+                                      "Mean_wind_speed",
+                                      "Cumul_Precipitation",
+                                      "Mean_Precipitation",
+                                      "Relative_humidity",
+                                      "daylength",
+                                      "residents")
+
+Env_laboratory_data_all$Temperature_amplitude <- Env_laboratory_data_all$Maximum_air_temperature - Env_laboratory_data_all$Minimum_air_temperature
+
+
+# Select the subset the year of interest
+
+Env_laboratory_data<-subset(Env_laboratory_data_all, year(as.Date(Env_laboratory_data_all$Date))>=year_first & year(as.Date(Env_laboratory_data_all$Date))<=year_last)
--- a/Codes/Function_Plot_Reconstruction.R
+++ b/Codes/Function_Plot_Reconstruction.R
+# Plot Reconstruction of Time series and compare with empirical data
+
+Plot_Reconstruction_time_series<-function(Data_frame_1, Data_frame_2,Pars)
+{
+  
+  with(as.list(c(Data_frame_1, Data_frame_2,Pars)), {
+    
+    time_series_Post_Codes<-Data_frame_1
+    empirical_time_series<-Data_frame_2
+    file_name_ts<-Pars[1]
+    file_name_yearly<-Pars[2]
+    y_lab_text<-Pars[3]
+    
+colnames(time_series_Post_Codes) <-c("Date","Cases","Lambda","PostCode")
+time_series<-aggregate (Cases ~ Date, time_series_Post_Codes, sum)  # get the mean of the cases per day for all the PC
+
+time_series_quantile <-ddply(time_series,~Date, function (x) quantile(x$Cases, c(.25,.5,.75)))
+time_series_summary <-data.frame(time_series_quantile, rep("Model",times=length(time_series[,1]))) 
+colnames(time_series_summary)<-c("Date","f_quant","Cases","s_quant","source")
+
+#real_cases_mean <-empirical_time_series %>% mutate (rolling_mean= slider::slide_mean(Cases, before=3 , after=3 )) # %>% ungroup() #Gianni Check this, I changed as not sure it worked
+#empirical_time_series$rolling_mean <-real_cases_mean$rolling_mean  
+
+real_cases_quantile<-ddply(empirical_time_series,~Date, function (x) quantile(x$Cases, c(.25,.5,.75)))
+real_cases_summary<-cbind(real_cases_quantile,rep("Cases",times=length(real_cases_quantile[,1])))
+colnames(real_cases_summary)<-c("Date","f_quant","Cases","s_quant","source")              
+
+#time_series_all <-rbind(time_series_summary[,c(1,2,6)], real_cases_summary[,c(1,2,6)])
+
+time_series_all <-rbind(time_series_summary, real_cases_summary)
+
+
+############## Plot Average per day of the year #################
+
+pal<-wes_palette("Cavalcanti1")
+time_series_plot<-ggplot(time_series_all,aes(x=Date,y=Cases,colour=source))
+#time_series_plot<-time_series_plot+scale_color_brewer(palette=pal)
+#time_series_plot<-time_series_plot+geom_line(data=real_cases_summary)
+time_series_plot<-time_series_plot+geom_line(size=0.75)
+#time_series_plot<-time_series_plot+scale_color_manual(values=c( "#E69F00", "#56B4E9"))
+time_series_plot<-time_series_plot+scale_color_manual(values=wes_palette(n=2, name="Cavalcanti1"))
+
+time_series_plot<-time_series_plot+   xlab("Date")+   ylab("Campylobacter Cases")
+
+time_series_plot<-time_series_plot+
+  theme(legend.position= c(0.125,0.85),legend.title =  element_blank(),
+        legend.text = element_text( size = 10),legend.background = element_blank(),
+        legend.key=element_rect(colour=NA,fill=NA))
+
+
+time_series_plot<-time_series_plot+ theme(axis.title.x =element_text( colour="#990000", size=13))
+time_series_plot<-time_series_plot+ theme(axis.title.y =element_text( colour="#990000", size=13))
+
+
+
+time_series_plot<-time_series_plot+ theme(axis.title.x =element_text(size=13))
+time_series_plot<-time_series_plot+ theme(axis.title.y =element_text(size=13))
+print(time_series_plot)
+
+
+tiff(filename = file_name_ts,width = 17.35, height =  17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw",antialias="default")
+print(time_series_plot)
+dev.off()
+
+
+time_series$yday <-as.factor(yday(time_series$Date))
+
+time_series_average1 <-ddply(time_series,~yday, summarise, mean=mean(Cases)) # mean of all cases for the same day of the year
+time_series_quantile1 <-ddply(time_series,~yday, function (x) quantile(x$Cases, c(.25,.5,.75))) # quantiles of cases for all the years for the same day of the year
+time_series_average <-cbind(time_series_average1,time_series_quantile1[,-1])
+
+#real_cases$norm<-(real_cases$Cases/sum(real_cases$Cases))
+empirical_time_series$yday<-as.factor(yday(as.Date(as.character(empirical_time_series$Date))))
+real_cases_average1<-ddply(empirical_time_series,~yday,summarise,mean=mean(Cases))
+real_cases_quantile1<-ddply(empirical_time_series,~yday,function (x) quantile(x$Cases, c(.25,.5,.75)))
+real_cases_average2<-cbind(real_cases_average1,real_cases_quantile1[,-1])
+real_cases_average<-
+  data.frame(real_cases_average2[,1],real_cases_average2[,c(2:5)])
+
+
+df1 <-data.frame(real_cases_average,rep("Cases",times=length(real_cases_average[,1])))
+colnames(df1) <-c("Day","Mean","f_quant","median","s_quant","source")
+df2 <-data.frame(time_series_average,rep("Model",times=length(time_series_average[,1])))
+colnames(df2) <-c("Day","Mean","f_quant","median","s_quant","source")
+
+average_data <-rbind(df1,df2)
+average_data$Day <-as.numeric(average_data$Day)
+
+## We get one of the years as example to plot the date, in this case 2014, but could be any year within our data set.
+month_2014 <-month.abb[unique(month(as.Date(average_data$Day, origin = "2014-01-01")))]
+
+tick_pos<-yday(as.Date(as.character(
+  c("2014-01-01","2014-02-01","2014-03-01","2014-04-01","2014-05-01","2014-06-01",
+    "2014-07-01","2014-08-01","2014-09-01","2014-10-01","2014-11-01","2014-12-01")
+)))
+
+
+
+yearly_average <-ggplot(average_data, aes(x=Day, y=Mean, colour=source))+
+  geom_line(size=2)+
+  geom_ribbon(aes(ymin=f_quant, ymax=s_quant, fill=source), alpha=0.15) +
+  xlab("Day of the Year") + ylab(y_lab_text) +
+  
+  scale_x_continuous(breaks=tick_pos, labels=month_2014) +
+  
+  theme(legend.position= c(0.125,0.75),legend.title =  element_text( size = 10),
+        legend.text = element_text( size = 10),legend.background = element_blank(),
+        legend.key=element_rect(colour=NA,fill=NA)) +
+  
+  theme(axis.title.x =element_text( colour="#990000", size=13)) +
+  theme(axis.title.y =element_text( colour="#990000", size=13)) 
+#+  ggtitle(paste(i,",", h,",", k))
+
+
+print(yearly_average)
+
+
+# Save the plots:
+# Yearly average
+
+tiff(filename = file_name_yearly, width = 17.35, height =  17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw", antialias="default")
+print(yearly_average)
+dev.off()
+
+return()
+
+})
+}
+
--- a/Codes/Function_Reconstruction.R
+++ b/Codes/Function_Reconstruction.R
+Reconstruction_time_series<-function(Conditional_prevalence_unif_info, Data_frame_2,Pars)
+{
+  
+  with(as.list(c(Conditional_prevalence_unif_info, Data_frame_2,Pars)), {
+    
+   
+    Conditional_prevalence_unif<-Conditional_prevalence_unif_info[[1]]
+    breaks_x<-Conditional_prevalence_unif_info[[2]]
+    breaks_y<-Conditional_prevalence_unif_info[[3]]
+    breaks_z<-Conditional_prevalence_unif_info[[4]]
+    Env_laboratory_data<-Data_frame_2
+    variable_x<-Pars[1]
+    variable_y<-Pars[2]
+    variable_z<-Pars[3]
+    info<-Pars[4]  
+    
+    # z variable    
+    dt_z = data.table(breaks_z, val = breaks_z)
+    dt_z <- na.omit (dt_z)
+    setattr(dt_z, "sorted", "breaks_z")
+    dt_z2 <- dt_z[J(Env_laboratory_data[[variable_z]]), roll = "nearest"] 
+    variable_df <- as.data.frame(Env_laboratory_data[,c("PostCode","Date", "residents")])
+    variable_df$variable_z_break<- dt_z2$val
+    
+    
+    # y variable
+    dt_y = data.table(breaks_y, val = breaks_y)
+    dt_y <- na.omit (dt_y)
+    setattr(dt_y, "sorted", "breaks_y")
+    dt_y2 <- dt_y[J(Env_laboratory_data[[variable_y]]), roll = "nearest"]
+    
+    
+    variable_df$variable_y_break<- dt_y2$val
+    
+    
+    
+    # x variable
+    dt_temp = data.table(breaks_x, val = breaks_x)
+    dt_temp <- na.omit (dt_temp)
+    setattr(dt_temp, "sorted", "breaks_x")
+    dt_temp2  <- dt_temp[J(Env_laboratory_data[[variable_x]]), roll = "nearest"]
+    
+    variable_df$variable_x_break<- dt_temp2$val
+    
+    variable_df2<-data.frame(variable_df$PostCode,variable_df$Date,variable_df$residents,variable_df$variable_z_break, variable_df$variable_y_break, variable_df$variable_x_break)
+    colnames(variable_df2) <- c("PostCode","Date", "residents", variable_z, variable_y, variable_x)
+    
+    dt_df <- data.table(variable_df2)
+    dt_var <- data.table(Conditional_prevalence_unif)
+    variable_df_dis3 <- dt_df[dt_var, on= c(variable_z,variable_y,variable_x), allow.cartesian=TRUE] # repeated rows since same conditions apply to more than one PC. Gianni this shoul be a merge
+    
+    variable_df_dis3 <- na.omit(variable_df_dis3) %>% distinct()
+    
+    
+    
+    variable_df_dis3 <-variable_df_dis3[order(as.Date(variable_df_dis3$Date)),]
+    
+    
+    # calculation of all the potential incidence based on the weather conditions of the area. The counts used as numerator correspond to the simulation of cases per weather condition.
+    variable_df_dis3$prevalence <-variable_df_dis3$counts/variable_df_dis3$residents_tot 
+    #variable_df_dis_excluded<-subset(variable_df_dis3,variable_df_dis3$residents_tot<=cutoff)
+    #print(variable_df_dis_excluded)
+    #write.csv(na.omit(variable_df_dis_excluded),"variable_df_dis_excluded.csv")
+    #variable_df_dis3<-        subset(variable_df_dis3,variable_df_dis3$residents_tot>=cutoff) #Gianni discuss this
+    
+    # if (length(na.omit(variable_df_dis3[,1])!=0)){
+    
+    comp_cases <-variable_df_dis3$prevalence*variable_df_dis3$residents # estimation of the "real" incidence based on the possible cases due to weather and the residents number in the catchment area
+    time_series <-data.frame(variable_df_dis3$Date, comp_cases, variable_df_dis3$prevalence, variable_df_dis3$PostCode)
+    colnames(time_series) <-c("Date","Cases","Lambda","Lab")
+    
+    #}
+    
+    write.table(time_series, paste("../Data_Base/",variable_x,"_",variable_y,"_",variable_z,"_",time_lag_char,info,sep=""), col.names= NA, row.names= TRUE, sep=",",eol= "\n") #  Saved on 17/11 after switching residents_total and residents from the denominator respecting the above attempt.  
+    return(time_series)
+    
+
+})
+}
--- a/Codes/Function_Wavelet_analysis.R
+++ b/Codes/Function_Wavelet_analysis.R
+# This script performs Wavelet analysis for  to check for periodicity in the data #
+wavelet_analysis<-function(State, Pars)
+{
+# Cleaning of cases:
+with(as.list(c(State, Pars)), {
+  
+   
+     filename_power<-Pars[1]
+     filename_power_average<-Pars[2]
+     
+       
+Env_Pathogen_data0 <- ddply (State, ~Date, summarise, Cases=sum(Cases_computed)) # subset for all postcodes combined per day
+# Wavelet spectrum plot
+Env_Pathogen_data0$date<-Env_Pathogen_data0$Date
+salmonella_data_national <-Env_Pathogen_data0[order(as.Date(Env_Pathogen_data0$Date)),]  # Sort from least recent to most recent
+dt0<-1
+
+my.wt = analyze.wavelet(salmonella_data_national, "Cases",
+                       loess.span=0.75,
+                       dt=dt0, dj=1/20,
+                       lowerPeriod=2.*dt0,
+                       upperPeriod=floor(nrow(salmonella_data_national)/3)*dt0,
+                       make.pval=T, n.sim=100)
+
+
+wt.image(my.wt, color.key = "quantile", n.levels = 250,legend.params = list(lab = "wavelet power levels", mar = 4.7),show.date = T,
+        color.palette = "rainbow(n.levels, start=0, end=.6)",plot.contour = F, siglvl = 0.05, col.contour = "black", plot.ridge = T, lvl = 0, col.ridge = "black",date.format ="%Y-%m-%d",periodlab = "periods (days)",useRaster=T)
+
+# Save plot
+tiff(filename =  filename_power,width = 17.35, height =  17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw",antialias="default")
+
+wt.image(my.wt, color.key = "quantile", n.levels = 250,legend.params = list(lab = "wavelet power levels", mar = 4.7),show.date = T,color.palette = "rainbow(n.levels, start=0, end=.6)",plot.contour = F, siglvl = 0.05, col.contour = "black", plot.ridge = T, lvl = 0, col.ridge = "black",periodlab = "periods (minutes)",useRaster=F)
+
+dev.off()
+
+
+# Power curve for ease of interpretation of Wavelet Gianni changed name as said detrend
+
+reconstruct(my.wt, plot.waves = F, lwd = c(1,2), legend.coords = "bottomleft")
+
+wt.avg(my.wt, siglvl=0.05, sigcol="red")
+#wt.phase.image(my.wt, "Cases")
+
+# Save plot 
+tiff(filename =  filename_power_average,width = 17.35, height =  17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw",antialias="default")
+
+wt.avg(my.wt,
+      show.siglvl = T, sigpch = 20,
+      label.avg.axis = T, averagelab = NULL,
+      label.period.axis = T, periodlab = NULL,
+      show.legend = T, legend.coords = "topright",
+      main = NULL, lwd = 0.5,
+      verbose = F)
+
+dev.off()
+
+return(salmonella_data_national[,c(1,2)])
+})
+}