From daee2a3728da85ff740a31756e3a643cd203fe9b Mon Sep 17 00:00:00 2001 From: gl0020 <g.loiacono@surrey.ac.uk> Date: Mon, 9 Oct 2023 16:26:41 +0100 Subject: [PATCH] minor correction to include 7-day rolling mean of the reported data --- ...Conditional_incidence_and_reconstruction.R | 8 +- ...nd_reconstruction_for_constant_situation.R | 4 +- ..._incidence_and_reconstruction_one_factor.R | 78 +++++++-- ...incidence_and_reconstruction_two_factors.R | 16 +- Codes/Conditional_incidence_one_factor.R | 80 +++++++++ .../Function_Conditional_Incidence_Uniform.R | 67 ++++++++ Codes/Function_Detrend_analysis.R | 51 ++++++ Codes/Function_Input_files.R | 5 + ...ion_Plot_Conditional_Incidence_Quantiles.R | 35 ++-- ...ction_Plot_Conditional_Incidence_Uniform.R | 89 ++++++++++ Codes/Function_Plot_Reconstruction.R | 14 +- Codes/Function_Reconstruction.R | 160 ++++++++++++++++-- Codes/Function_Wavelet_analysis.R | 10 +- Codes/Pathogen_Linkage_fixed_time_lag.R | 2 +- ...eported_and_adjusted_campylobacter_cases.R | 2 + 15 files changed, 558 insertions(+), 63 deletions(-) create mode 100644 Codes/Conditional_incidence_one_factor.R create mode 100644 Codes/Function_Detrend_analysis.R diff --git a/Codes/Conditional_incidence_and_reconstruction.R b/Codes/Conditional_incidence_and_reconstruction.R index d29dac7..f80944b 100644 --- a/Codes/Conditional_incidence_and_reconstruction.R +++ b/Codes/Conditional_incidence_and_reconstruction.R @@ -46,9 +46,9 @@ All_PC<<-rep(All_PC_s,each=length(dates_s)) #Change HERE depending on the variable object of study, by copy-pasting from the list below variable_x<-"Maximum_air_temperature" -#variable_x<-"Relative_humidity" -variable_y<-"Mean_Precipitation" -#variable_x<-"Mean_wind_speed" +#variable_y<-"Relative_humidity" +#variable_y<-"Mean_Precipitation" +variable_y<-"Mean_wind_speed" variable_z<-"daylength" #variable_z<-"Relative_humidity" @@ -60,7 +60,7 @@ variable_z<-"daylength" delta_variable_x<-1 round_x<-0 -delta_variable_y<-1 +delta_variable_y<-5 round_y<-0 delta_variable_z<-1 round_z<-0 diff --git a/Codes/Conditional_incidence_and_reconstruction_for_constant_situation.R b/Codes/Conditional_incidence_and_reconstruction_for_constant_situation.R index 3d78c57..7dfdcce 100644 --- a/Codes/Conditional_incidence_and_reconstruction_for_constant_situation.R +++ b/Codes/Conditional_incidence_and_reconstruction_for_constant_situation.R @@ -81,8 +81,8 @@ by_x<-0.1 #const_variable<-"const_temp_20_hum_76" -#const_variable<-"const_hum_76_daylength_15" -const_variable<-"const_temp_20_daylength_15" +const_variable<-"const_hum_76_daylength_15" +#const_variable<-"const_temp_20_daylength_15" if (const_variable=="const_hum_76_daylength_15"){ diff --git a/Codes/Conditional_incidence_and_reconstruction_one_factor.R b/Codes/Conditional_incidence_and_reconstruction_one_factor.R index 5a08a50..c87a77a 100644 --- a/Codes/Conditional_incidence_and_reconstruction_one_factor.R +++ b/Codes/Conditional_incidence_and_reconstruction_one_factor.R @@ -41,16 +41,16 @@ All_PC<<-rep(All_PC_s,each=length(dates_s)) # Selection of the variables of interest. -#Change HERE depending on the variable object of study, by copy-pasting from the list below -variables<-c("Maximum_air_temperature","Minimum_air_temperature","Mean_wind_speed","Mean_Precipitation","Relative_humidity","daylength") -time_lags <- c(7,14,30,60,90) -for (i in seq(1:5)) { - time_lag <<-time_lags[i] +time_lag <- 14 + time_lag_char <<-paste(time_lag) -for (j in seq(1:6)) { -variable<-variables[j] +#variable<-"daylength" +variable<-"Mean_wind_speed" +#variable<-"Relative_humidity" +#variable<-"Maximum_air_temperature" +#variable<-"Mean_Precipitation" ### Set the size of the bins for Uniform conditional incidence and resolution in rounding the values of the bins. ### For example: @@ -58,12 +58,28 @@ variable<-variables[j] ## if the size of the bin for precipitation is 0.1 and we want the interval of he bins like ...0,0.1.0.2 ..., we choose delta_variable<-0.1, round_x<-1 ## to distinguishes one decimal point -by_z<-0.05 +by_x<-0.05 +delta_variable_x<-1 +round_x<-0 + +######## Input of data #### ######## Input of data #### source("Function_Input_files.R", chdir=TRUE) +Env_Pathogen_data$Difference_temperature<-Env_Pathogen_data$Maximum_air_temperature-Env_Pathogen_data$Minimum_air_temperature +Env_laboratory_data$Difference_temperature<-Env_laboratory_data$Maximum_air_temperature-Env_laboratory_data$Minimum_air_temperature + +######## Perform Wavelet analysis for the selected 17 years to check for periodicity in the data #### +###### helpful but not necessary. So this is commented +source("Function_Wavelet_analysis.R", chdir=TRUE) + +Pars_wavelet_analysis<-c("../Graphs/Power_Spectrum.tiff","../Graphs/Power_Average.tiff","No") +Env_Pathogen_data$Cases_computed<-Env_Pathogen_data$Cases +campylobacter_data_national<-wavelet_analysis(Env_Pathogen_data, Pars_wavelet_analysis) + + ###### Calculate Conditional incidence Uniform ###### @@ -71,9 +87,45 @@ source("Function_Input_files.R", chdir=TRUE) # Analysis was done following an uniform division of the range of the environmental variables, independently of the number of observations # This is used for the reconstruction -source("Function_Conditional_Incidence_Quantiles.R", chdir=TRUE) -Pars_cond_incidence_quantiles<- c(variable,"_Quantiles",by_z) -Conditional_incidence_quantiles_info<-Conditional_incidence_Quantiles_one_factor(Env_laboratory_data,Env_Pathogen_data,Pars_cond_incidence_quantiles) -}} -# use Plots_Campylobacter_environment_one_variables_quantile.R to plot +# +source("Function_Conditional_Incidence_Uniform.R", chdir=TRUE) +#Conditional incidence, Uniform bins +Pars_cond_incidence <- c(variable,"_Uniform.csv",delta_variable_x,round_x) +Conditional_incidence_unif_info<-Conditional_incidence_Uniform_one_factor(Env_laboratory_data,Env_Pathogen_data,Pars_cond_incidence) + +## plot Conditional incidence for uniform, for diagnostic only +Pars_plot_cond_prev_uniform <- c(variable,time_lag_char) +source("Function_Plot_Conditional_incidence_Uniform.R", chdir=TRUE) +Plot_Conditional_incidence_Uniform_one_factor(Conditional_incidence_unif_info,Pars_plot_cond_prev_uniform) + + + + +#source("Function_Conditional_Incidence_Quantiles.R", chdir=TRUE) +##Conditional incidence, bins divided in quantiles. As by_z=1, there is only one bin for the third variable, so practically we consider two variables +#Pars_cond_prev_quantiles<- c(variable,variable_y,variable_z,"_Quantiles.csv",by_x,by_y,by_z) +#Conditional_incidence_quantiles_info<-Conditional_incidence_Quantiles(Env_laboratory_data,Env_Pathogen_data,Pars_cond_prev_quantiles) + +## plot Conditional incidence for bins divided in quantiles +#Pars_plot_cond_prev_quantiles <- c(variable_x,variable_y) +#source("Function_Plot_Conditional_incidence_Quantiles.R", chdir=TRUE) +#Plot_Conditional_incidence_quantiles_two_factors(Conditional_incidence_quantiles_info,Pars_plot_cond_prev_quantiles) + + +###### Reconstruction ###### + +source("Function_Reconstruction.R") +Pars_reconstruction <- c(variable,"_reconstructed.csv") +Reconstruction_time_series<-Reconstruction_time_series_one_factor(Conditional_incidence_unif_info,Env_laboratory_data,Pars_reconstruction) + + + +############## End reconstructions #################### + +###### Plot Reconstruction ###### +source("Function_Plot_Reconstruction.R") +Pars_Plot_Reconstruction <- c(paste("../Graphs/",variable,time_lag_char,"_time_series.tiff",sep=""),paste("../Graphs/",variable,"_",time_lag_char,"_yearly_average.tiff",sep=""),"Campylobacteriosis") +Plot_Reconstruction_time_series(Reconstruction_time_series,campylobacter_data_national,Pars_Plot_Reconstruction) +# +############## End Plot Reconstruction #################### diff --git a/Codes/Conditional_incidence_and_reconstruction_two_factors.R b/Codes/Conditional_incidence_and_reconstruction_two_factors.R index a49c5ed..6682d3f 100644 --- a/Codes/Conditional_incidence_and_reconstruction_two_factors.R +++ b/Codes/Conditional_incidence_and_reconstruction_two_factors.R @@ -22,7 +22,7 @@ library(zoo) # Set length of the desired time-lag -time_lag <<-14 +time_lag <<-30 time_lag_char <<-paste(time_lag) # Set the years you are focusing on @@ -47,12 +47,14 @@ All_PC<<-rep(All_PC_s,each=length(dates_s)) #variable_x<-"Difference_temperature" #variable_x<-"Minimum_air_temperature" -#variable_y<-"Mean_Precipitation" -variable_y<-"Relative_humidity" +#variable_x<-"Mean_Precipitation" +#variable_y<-"Relative_humidity" #variable_y<-"Mean_wind_speed" -#variable_y<-"daylength" - variable_z<-"daylength" + +#variable_z<-"daylength" +#variable_y<-"Mean_Precipitation" +variable_y<-"Relative_humidity" variable_x<-"Maximum_air_temperature" ### Set the size of the bins for Uniform conditional incidence and resolution in rounding the values of the bins. ### For example: @@ -62,8 +64,8 @@ variable_x<-"Maximum_air_temperature" delta_variable_x<-1 round_x<-0 -delta_variable_y<-0.2 -round_y<-1 +delta_variable_y<-1 +round_y<-0 ## partitions of the quantiles by_z<-1 by_y<-0.25 diff --git a/Codes/Conditional_incidence_one_factor.R b/Codes/Conditional_incidence_one_factor.R new file mode 100644 index 0000000..c2fbc92 --- /dev/null +++ b/Codes/Conditional_incidence_one_factor.R @@ -0,0 +1,80 @@ +# The code estimate the incidence of the diseases conditional to local environmental variables. It perform a series of diagnostic and +# then reconstruct the time series of cases given the local environmental factors in England and Wales + +### Require Relevant R Packages ########### +rm(list=ls(all=TRUE)) + +library(data.table) +library(readr) +library(dplyr) +library(lubridate) +library(plyr) +library(dplR) +library(MALDIquant) +library(ggplot2) +library(slider) +library(WaveletComp) +library(gdata) +library(wesanderson) +library(zoo) + +# Global Parameters + +# Set length of the desired time-lag + + +# Set the years you are focusing on + +year_first<<-1990 +year_last<<-2009 +first_day<<-as.Date(paste(as.character(year_first),"-01-01",sep="")) +last_day<<-as.Date(paste(as.character(year_last),"-12-31",sep="")) +dates_s<<-seq(first_day,last_day,by="1 day") + +## Gather Information on laboratory Postcodes + +variable_df_1<-read.csv("../Data_Base/Laboratories_Post_Codes.csv") + +All_PC_s<-variable_df_1[,1] +dates<-rep(dates_s,times=length(All_PC_s)) +All_PC<<-rep(All_PC_s,each=length(dates_s)) + + +# Selection of the variables of interest. +#Change HERE depending on the variable object of study, by copy-pasting from the list below +variables<-c("Maximum_air_temperature","Minimum_air_temperature","Mean_wind_speed","Mean_Precipitation","Relative_humidity","daylength") +time_lags <- c(7,14,30,60,90) + +for (i in seq(1:5)) { + time_lag <<-time_lags[i] + time_lag_char <<-paste(time_lag) + +for (j in seq(1:6)) { +variable<-variables[j] + +### Set the size of the bins for Uniform conditional incidence and resolution in rounding the values of the bins. +### For example: +## if the size of the bin for humidity is 5 and we want the interval of he bins like ...70,75,80... we choose delta_variable<-5, round<-0 +## if the size of the bin for precipitation is 0.1 and we want the interval of he bins like ...0,0.1.0.2 ..., we choose delta_variable<-0.1, round_x<-1 +## to distinguishes one decimal point + +by_x<-0.05 +delta_variable_x<-0.2 +round_x<-1 + + +######## Input of data #### +source("Function_Input_files.R", chdir=TRUE) + + +###### Calculate Conditional incidence Uniform ###### +# The code does look at how the risk of the disease in humans depends on environmental variables +# Analysis was done following an uniform division of the range of the environmental variables, independently of the number of observations +# This is used for the reconstruction + +source("Function_Conditional_Incidence_Quantiles.R", chdir=TRUE) +Pars_cond_incidence_quantiles<- c(variable,"_Quantiles",by_x) +Conditional_incidence_quantiles_info<-Conditional_incidence_Quantiles_one_factor(Env_laboratory_data,Env_Pathogen_data,Pars_cond_incidence_quantiles) +}} +# use Plots_Campylobacter_environment_one_variables_quantile.R to plot + diff --git a/Codes/Function_Conditional_Incidence_Uniform.R b/Codes/Function_Conditional_Incidence_Uniform.R index f645ee2..a7eeefb 100644 --- a/Codes/Function_Conditional_Incidence_Uniform.R +++ b/Codes/Function_Conditional_Incidence_Uniform.R @@ -238,3 +238,70 @@ Conditional_incidence_Uniform_two_factors<-function(Data_frame_1, Data_frame_2,P }) } + + + + +Conditional_incidence_Uniform_one_factor<-function(Data_frame_1, Data_frame_2,Pars) +{ + + with(as.list(c(Data_frame_1, Data_frame_2,Pars)), { + + variable_x<-Pars[1] + info<-Pars[2] + delta_x<-as.numeric(Pars[3]) + round_digit_x<-as.numeric(Pars[4]) + + # Select the variables of study: + Env_Laboratory_data_df <- Data_frame_1[,c("PostCode","Date", variable_x, + "residents")] + Env_Pathogen_data_df <- Data_frame_2[,c("PostCode","Date", variable_x, + "residents","Cases")] + + + + ################### Divide the domain of weather variable in bins of delta size for the uniform distribution. + + breaks_x <-round(seq(min(na.omit(Env_Laboratory_data_df[[variable_x]])-delta_x), + max(na.omit(Env_Laboratory_data_df[[variable_x]])+delta_x) , by=delta_x), round_digit_x) + + + + #################### Create a data frame to fill with + Conditional_incidence_unif <-data.frame(character(), numeric(), numeric()) + colnames(Conditional_incidence_unif) <-c(variable_x, "counts","residents_tot") + + + j_x_min <-1 + j_x_max <- length( breaks_x) + + for (j_x in c(j_x_min:j_x_max)){ + + wt_x <- match.closest (Env_Pathogen_data_df[[variable_x]], breaks_x) + ww_x <-which(wt_x==j_x) + Pathogen_stratified <-Env_Pathogen_data_df[ww_x,] #where there are cases for specific values of the 3 weather variables. residents is the number of people exposed to these situations during the entire duration of the dataset. It is the number of times the people is exposed to this caracteristics, only when a case is found. It can count a same catchment area twice. Residents total is the same irrespectively of the presence of a case + + + wt_x_t <- match.closest (Env_Laboratory_data_df[[variable_x]], breaks_x) + ww_x_t <-which(wt_x_t==j_x) + Laboratory_stratified <-Env_Laboratory_data_df[ww_x_t, ] #NA in residents at Env_Laboratory_data. we do not care if there is a case reported + + Total_cases <-sum((as.numeric(na.omit(Pathogen_stratified$Cases)))) + residents_tot <-sum((as.numeric(na.omit(Laboratory_stratified$residents)))) #number of people exposed to these weather conditions. Will be used to calculate the incidence + + data_df <-data.frame ( + breaks_x[j_x], + Total_cases, + residents_tot + ) + colnames(data_df) <-c(variable_x, "counts","residents_tot") # + Conditional_incidence_unif <-rbind(Conditional_incidence_unif, data_df) + + print(c(j_x, variable_x)) # check in the output log file if x y and z cover all the conditions established in breaks() + } + write.table(Conditional_incidence_unif, paste("../Data_Base/Conditional_incidence_",variable_x,"_",time_lag_char,info,sep=""), row.names = FALSE , sep = ",",eol = "\n") + + return(list(Conditional_incidence_unif,breaks_x)) + + }) +} diff --git a/Codes/Function_Detrend_analysis.R b/Codes/Function_Detrend_analysis.R new file mode 100644 index 0000000..f7eb8b2 --- /dev/null +++ b/Codes/Function_Detrend_analysis.R @@ -0,0 +1,51 @@ +# Remove temporal trend from the time-series of salmonellosis + +Detrend_analysis<-function(Data_frame_1, Data_frame_2,Pars) +{ + + with(as.list(c(Data_frame_1, Data_frame_2,Pars)), { + + + pathogen_data_national<-Data_frame_1 + pathogen_enviroment_data<-Data_frame_2 + file_name<-Pars + +mean_cases <- mean(pathogen_data_national$Cases) +national_detrended <-detrend.series(y=pathogen_data_national$Cases, y.name = "", make.plot = TRUE, + method = c("Spline"),return.info=TRUE) + +pathogen_df <-data.frame(pathogen_data_national$Date, pathogen_data_national$Cases, national_detrended$series, national_detrended$curves) +colnames(pathogen_df)<-c("Date","cases","detrend","curve") + + +pathogen_cases_detrend <-merge(pathogen_enviroment_data , pathogen_df, by="Date") +# After discussing with GLI, we concluded that applying the national curve data to a smaller PC area is similar to calculating the curve of the specific PC. +pathogen_cases_detrend$Cases_detrend<-pathogen_cases_detrend$Cases/pathogen_cases_detrend$curve # correct to use Cases here +pathogen_cases_detrend$Cases_detrend_adjusted <- pathogen_cases_detrend$Cases_detrend* mean_cases # mean daily cases + +# Plot detrend: +pl3<-ggplot(pathogen_cases_detrend, aes(x=Date,y=Cases_detrend_adjusted))+geom_line(color="darkred",stat = "identity") + +pl3<- pl3 +xlab("Date")+ylab("Number of Occurences")#+scale_x_date(labels = date_format("%d/%m/%y"),breaks = pretty_breaks()) + +pl3<- pl3+ theme(axis.title.y =element_text(face="bold", colour="#990000", size=13)) + +pl3<- pl3+theme(axis.title.x =element_text(face="bold", colour="#990000", size=13)) + ggtitle ("Detrended cases or individual postocodes") + +pl3 + + +tiff(filename = file_name ,width = 17.35, height = 17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw",antialias="default") + +pl3 + +dev.off() + +####### Diagnostic plot ########## +time_series <-ddply(pathogen_cases_detrend,~Date, cases=sum(Cases_detrend_adjusted)) +plot(time_series$Date,time_series$cases,type="l",main="AFTER DETREND",xlab="Date",ylab="Cases at National Level") + +return(pathogen_cases_detrend) + +}) +} diff --git a/Codes/Function_Input_files.R b/Codes/Function_Input_files.R index abb55b2..e27c3e3 100644 --- a/Codes/Function_Input_files.R +++ b/Codes/Function_Input_files.R @@ -20,8 +20,13 @@ Env_Pathogen_data_all$Temperature_amplitude <- Env_Pathogen_data_all$Maximum_air # Select the variables of interest: Env_Pathogen_data_all$Date<-as.Date(Env_Pathogen_data_all$Date,format = "%d/%m/%Y") Env_Pathogen_data_all <- distinct(Env_Pathogen_data_all) # Remove possible duplicates Gianni, correct but better to do this when we create the file +Env_Pathogen_data_rolling_mean <-Env_Pathogen_data_all %>% mutate (rolling_mean= slider::slide_mean(Cases, before=7 , after=0 )) # %>% ungroup() +Env_Pathogen_data_all$Cases <-Env_Pathogen_data_rolling_mean$rolling_mean Env_Pathogen_data <-(subset(Env_Pathogen_data_all, year(Env_Pathogen_data_all$Date)>=year_first & year(Env_Pathogen_data_all$Date)<=year_last)) + + + # Read the weather variables for every PC, averaged for the estimated delay in the past Env_laboratory_data_all <-read_csv(paste("../Data_Base/Laboratory_environment_",time_lag_char,".csv",sep="")) diff --git a/Codes/Function_Plot_Conditional_Incidence_Quantiles.R b/Codes/Function_Plot_Conditional_Incidence_Quantiles.R index d94b97c..8133f4e 100644 --- a/Codes/Function_Plot_Conditional_Incidence_Quantiles.R +++ b/Codes/Function_Plot_Conditional_Incidence_Quantiles.R @@ -94,7 +94,7 @@ Plot_Conditional_incidence_quantiles<-function(Data_frame_1, Pars) if (variable_x=="Maximum_air_temperature" & variable_y=="Relative_humidity"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(Relative_humidity),fill=factor(Relative_humidity))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(0,25)) } if (variable_x=="Relative_humidity" & variable_y=="Maximum_air_temperature"){ Conditional_incidence_plot <- @@ -104,7 +104,7 @@ Plot_Conditional_incidence_quantiles<-function(Data_frame_1, Pars) if (variable_x=="Maximum_air_temperature" & variable_y=="Mean_wind_speed"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(Mean_wind_speed),fill=factor(Mean_wind_speed))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(0,25)) } if (variable_x=="Mean_wind_speed" & variable_y=="Maximum_air_temperature"){ Conditional_incidence_plot <- @@ -116,7 +116,7 @@ Plot_Conditional_incidence_quantiles<-function(Data_frame_1, Pars) if (variable_x=="Maximum_air_temperature" & variable_y=="Mean_Precipitation"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(Mean_Precipitation),fill=factor(Mean_Precipitation))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(0,25)) } if (variable_x=="Mean_Precipitation" & variable_y=="Maximum_air_temperature"){ Conditional_incidence_plot <- @@ -127,7 +127,7 @@ Plot_Conditional_incidence_quantiles<-function(Data_frame_1, Pars) if (variable_x=="Maximum_air_temperature" & variable_y=="daylength"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(daylength),fill=factor(daylength))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(0,25)) } @@ -218,7 +218,7 @@ Plot_Conditional_incidence_quantiles<-function(Data_frame_1, Pars) if (variable_x=="Maximum_air_temperature" & variable_y=="Relative_humidity"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(Relative_humidity),fill=factor(Relative_humidity))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(0,25)) } if (variable_x=="Relative_humidity" & variable_y=="Maximum_air_temperature"){ Conditional_incidence_plot <- @@ -228,7 +228,7 @@ Plot_Conditional_incidence_quantiles<-function(Data_frame_1, Pars) if (variable_x=="Maximum_air_temperature" & variable_y=="Mean_wind_speed"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(Mean_wind_speed),fill=factor(Mean_wind_speed))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(0,25)) } if (variable_x=="Mean_wind_speed" & variable_y=="Maximum_air_temperature"){ Conditional_incidence_plot <- @@ -240,7 +240,7 @@ Plot_Conditional_incidence_quantiles<-function(Data_frame_1, Pars) if (variable_x=="Maximum_air_temperature" & variable_y=="Mean_Precipitation"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(Mean_Precipitation),fill=factor(Mean_Precipitation))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(0,25)) } if (variable_x=="Mean_Precipitation" & variable_y=="Maximum_air_temperature"){ Conditional_incidence_plot <- @@ -251,7 +251,7 @@ Plot_Conditional_incidence_quantiles<-function(Data_frame_1, Pars) if (variable_x=="Maximum_air_temperature" & variable_y=="daylength"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(daylength),fill=factor(daylength))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(0,5))+scale_x_continuous(limits=c(0,25)) } if (variable_x=="daylength" & variable_y=="Maximum_air_temperature"){ Conditional_incidence_plot <- @@ -396,25 +396,34 @@ Plot_Conditional_incidence_quantiles_two_factors<-function(Data_frame_1, Pars) if (variable_x=="Maximum_air_temperature" & variable_y=="Relative_humidity"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(Relative_humidity),fill=factor(Relative_humidity))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(1,4))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(1,4))+scale_x_continuous(limits=c(5,25)) } if (variable_x=="Maximum_air_temperature" & variable_y=="Mean_wind_speed"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(Mean_wind_speed),fill=factor(Mean_wind_speed))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(1,4))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(1,4))+scale_x_continuous(limits=c(0,25)) } if (variable_x=="Maximum_air_temperature" & variable_y=="Mean_Precipitation"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(Mean_Precipitation),fill=factor(Mean_Precipitation))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(1,4))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(1,4))+scale_x_continuous(limits=c(0,25)) } if (variable_x=="Maximum_air_temperature" & variable_y=="daylength"){ Conditional_incidence_plot <- ggplot(Conditional_incidence_df,aes(x=Maximum_air_temperature,y=incidence,color=factor(daylength),fill=factor(daylength))) - Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(1,4))+scale_x_continuous(limits=c(-5,30)) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(1,4))+scale_x_continuous(limits=c(0,25)) } - + if (variable_x=="Relative_humidity" & variable_y=="daylength"){ + Conditional_incidence_plot <- + ggplot(Conditional_incidence_df,aes(x=Relative_humidity,y=incidence,color=factor(daylength),fill=factor(daylength))) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(1,4))+scale_x_continuous(limits=c(60,100)) + } + if (variable_x=="Mean_Precipitation" & variable_y=="daylength"){ + Conditional_incidence_plot <- + ggplot(Conditional_incidence_df,aes(x=Mean_Precipitation,y=incidence,color=factor(daylength),fill=factor(daylength))) + Conditional_incidence_plot <- Conditional_incidence_plot+ scale_y_continuous(limits=c(1,4))+scale_x_continuous(limits=c(0,10)) + } if (variable_x=="Minimum_air_temperature" & variable_y=="Relative_humidity"){ Conditional_incidence_plot <- diff --git a/Codes/Function_Plot_Conditional_Incidence_Uniform.R b/Codes/Function_Plot_Conditional_Incidence_Uniform.R index 47bfcd8..51cc713 100644 --- a/Codes/Function_Plot_Conditional_Incidence_Uniform.R +++ b/Codes/Function_Plot_Conditional_Incidence_Uniform.R @@ -204,6 +204,95 @@ Plot_Conditional_incidence_Uniform_two_factors<-function(Data_frame_1, Pars) + +Plot_Conditional_incidence_Uniform_one_factor<-function(Data_frame_1, Pars) +{ + + with(as.list(c(Data_frame_1, Pars)), { + + # Pars is the list of 3 variables of interest + # Select the variables of study: + + variable_x<-Pars[1] + time_lag_char<-Pars[2] + + # Select the file with relevant information: + Conditional_incidence_unif <- Data_frame_1[[1]] + + + #Conditional_incidence_unif<-Conditional_incidence_unif[,-1] + Conditional_incidence_unif$incidence<-Conditional_incidence_unif$counts/Conditional_incidence_unif$residents_tot + Conditional_incidence_unif<-na.omit(Conditional_incidence_unif) + #For visual purposes, we are removing cases when the counts were too low resulting in exceptional large incidence + Conditional_incidence_unif<-subset(Conditional_incidence_unif, Conditional_incidence_unif$counts>100) + + + norm<-1/(1e6) + + + + Conditional_incidence_unif$preval<-Conditional_incidence_unif$incidence/norm + + + + ################### Choose a specific day-length ################### + + + #Conditional_incidence_unif<-na.omit(Conditional_incidence_unif) + ############################# General variables ########################### + if (variable_x=="Relative_humidity"){variable_x_char<-"Relative humidity (%)"} + if (variable_x=="Mean_wind_speed"){variable_x_char<-"Mean Wind Speed (knots)"} + if (variable_x=="Mean_wind_speed"){variable_x_char<-"Mean Wind Speed (knots)"} + if (variable_x=="Mean_Precipitation"){variable_x_char<-"Mean Precipitation (mm)"} + if (variable_x=="daylength"){variable_x_char<-"Day-Length (hours)"} + if (variable_x=="Minimum_air_temperature"){variable_x_char<-expression("Minimum Air Temperature"*~degree*C)} + if (variable_x=="Maximum_air_temperature"){variable_x_char<-expression("Maximum Air Temperature"*~degree*C)} + if (variable_x=="Difference_temperature"){variable_x_char<-expression("Difference Maximum-Minimum Air Temperature"*~degree*C)} + + + + var_x_loc_plot <- ggplot(Conditional_incidence_unif,aes(x=.data[[variable_x]],y=preval)) + + var_x_loc_plot <- var_x_loc_plot+ scale_fill_continuous (type = "viridis") + var_x_loc_plot <- var_x_loc_plot+ scale_colour_continuous(type = "viridis") + + + var_x_loc_plot <- var_x_loc_plot+ylab("Conditional incidence of Campylobacter Cases (per year, per million)") + var_x_loc_plot <- var_x_loc_plot+xlab(variable_x_char) + + var_x_loc_plot <- var_x_loc_plot+ geom_point(size=3.5) + var_x_loc_plot <- var_x_loc_plot+ geom_line(size=1) + + var_x_loc_plot <- var_x_loc_plot+ scale_x_continuous(limits=c(min(Conditional_incidence_unif[,c(variable_x)]),max(Conditional_incidence_unif[,c(variable_x)]))) + + var_x_loc_plot <- var_x_loc_plot+ + theme(legend.position= c(0.125,0.75),legend.title = element_text( size = 10), + legend.text = element_text( size = 10),legend.background = element_blank(), + legend.key=element_rect(colour=NA)) + + var_x_loc_plot <- var_x_loc_plot+ theme(axis.title.x =element_text( colour="#990000", size=13)) + var_x_loc_plot <- var_x_loc_plot+ theme(axis.title.y =element_text( colour="#990000", size=13)) + + var_x_loc_plot <- var_x_loc_plot+ theme(axis.title.x =element_text(size=13)) + var_x_loc_plot <- var_x_loc_plot+ theme(axis.title.y =element_text(size=13)) + + print(var_x_loc_plot) + + + + file_name<-paste("../Graphs/Conditional_probability_",variable_x,"_",time_lag_char,"_uniform.tiff", sep = "") + tiff(filename = file_name,width = 17.35, height = 17.35, units = "cm", pointsize = 9, res = 1200,compression = "lzw",antialias="default") + + print(var_x_loc_plot) + + dev.off() + + + }) +} + + + Plot_Conditional_incidence_unif_constant<-function(Data_frame_1, Pars) { diff --git a/Codes/Function_Plot_Reconstruction.R b/Codes/Function_Plot_Reconstruction.R index a8d8a72..38aefd6 100644 --- a/Codes/Function_Plot_Reconstruction.R +++ b/Codes/Function_Plot_Reconstruction.R @@ -14,14 +14,16 @@ Plot_Reconstruction_time_series<-function(Data_frame_1, Data_frame_2,Pars) colnames(time_series_Post_Codes) <-c("Date","Cases","Lambda","PostCode") time_series<-aggregate (Cases ~ Date, time_series_Post_Codes, sum) # get the mean of the cases per day for all the PC + time_series_quantile <-ddply(time_series,~Date, function (x) quantile(x$Cases, c(.25,.5,.75))) time_series_summary <-data.frame(time_series_quantile, rep("Model",times=length(time_series[,1]))) colnames(time_series_summary)<-c("Date","f_quant","Cases","s_quant","source") -#real_cases_mean <-empirical_time_series %>% mutate (rolling_mean= slider::slide_mean(Cases, before=3 , after=3 )) # %>% ungroup() #Gianni Check this, I changed as not sure it worked -#empirical_time_series$rolling_mean <-real_cases_mean$rolling_mean +real_cases_mean <-empirical_time_series %>% mutate (rolling_mean= slider::slide_mean(Cases, before=7 , after=0 )) # %>% ungroup() ##rolling mean to aremove the weekend effects +empirical_time_series$rolling_mean <-real_cases_mean$rolling_mean +real_cases_quantile<-ddply(empirical_time_series,~Date, function (x) quantile(x$rolling_mean, c(.25,.5,.75))) -real_cases_quantile<-ddply(empirical_time_series,~Date, function (x) quantile(x$Cases, c(.25,.5,.75))) +#real_cases_quantile<-ddply(empirical_time_series,~Date, function (x) quantile(x$Cases, c(.25,.5,.75))) real_cases_summary<-cbind(real_cases_quantile,rep("Cases",times=length(real_cases_quantile[,1]))) colnames(real_cases_summary)<-c("Date","f_quant","Cases","s_quant","source") @@ -34,13 +36,14 @@ time_series_all <-rbind(time_series_summary, real_cases_summary) pal<-wes_palette("Cavalcanti1") time_series_plot<-ggplot(time_series_all,aes(x=Date,y=Cases,colour=source)) +time_series_plot<-time_series_plot+ theme_bw(20) #time_series_plot<-time_series_plot+scale_color_brewer(palette=pal) #time_series_plot<-time_series_plot+geom_line(data=real_cases_summary) time_series_plot<-time_series_plot+geom_line(size=0.75) #time_series_plot<-time_series_plot+scale_color_manual(values=c( "#E69F00", "#56B4E9")) time_series_plot<-time_series_plot+scale_color_manual(values=wes_palette(n=2, name="Cavalcanti1")) -time_series_plot<-time_series_plot+ xlab("Date")+ ylab("Campylobacter Cases") +time_series_plot<-time_series_plot+ xlab("Date")+ ylab("Campylobacteriosis") time_series_plot<-time_series_plot+ theme(legend.position= c(0.125,0.85),legend.title = element_blank(), @@ -96,7 +99,7 @@ tick_pos<-yday(as.Date(as.character( -yearly_average <-ggplot(average_data, aes(x=Day, y=Mean, colour=source))+ +yearly_average <-ggplot(average_data, aes(x=Day, y=Mean, colour=source))+ theme_bw(20)+ geom_line(size=2)+ geom_ribbon(aes(ymin=f_quant, ymax=s_quant, fill=source), alpha=0.15) + xlab("Day of the Year") + ylab(y_lab_text) + @@ -126,4 +129,3 @@ return() }) } - diff --git a/Codes/Function_Reconstruction.R b/Codes/Function_Reconstruction.R index cc7aab2..cd78ec1 100644 --- a/Codes/Function_Reconstruction.R +++ b/Codes/Function_Reconstruction.R @@ -1,13 +1,13 @@ -Reconstruction_time_series<-function(Conditional_prevalence_unif_info, Data_frame_2,Pars) +Reconstruction_time_series<-function(Conditional_incidence_unif_info, Data_frame_2,Pars) { - with(as.list(c(Conditional_prevalence_unif_info, Data_frame_2,Pars)), { + with(as.list(c(Conditional_incidence_unif_info, Data_frame_2,Pars)), { - - Conditional_prevalence_unif<-Conditional_prevalence_unif_info[[1]] - breaks_x<-Conditional_prevalence_unif_info[[2]] - breaks_y<-Conditional_prevalence_unif_info[[3]] - breaks_z<-Conditional_prevalence_unif_info[[4]] + + Conditional_incidence_unif<-Conditional_incidence_unif_info[[1]] + breaks_x<-Conditional_incidence_unif_info[[2]] + breaks_y<-Conditional_incidence_unif_info[[3]] + breaks_z<-Conditional_incidence_unif_info[[4]] Env_laboratory_data<-Data_frame_2 variable_x<-Pars[1] variable_y<-Pars[2] @@ -46,7 +46,7 @@ Reconstruction_time_series<-function(Conditional_prevalence_unif_info, Data_fram colnames(variable_df2) <- c("PostCode","Date", "residents", variable_z, variable_y, variable_x) dt_df <- data.table(variable_df2) - dt_var <- data.table(Conditional_prevalence_unif) + dt_var <- data.table(Conditional_incidence_unif) variable_df_dis3 <- dt_df[dt_var, on= c(variable_z,variable_y,variable_x), allow.cartesian=TRUE] # repeated rows since same conditions apply to more than one PC. Gianni this shoul be a merge variable_df_dis3 <- na.omit(variable_df_dis3) %>% distinct() @@ -57,7 +57,7 @@ Reconstruction_time_series<-function(Conditional_prevalence_unif_info, Data_fram # calculation of all the potential incidence based on the weather conditions of the area. The counts used as numerator correspond to the simulation of cases per weather condition. - variable_df_dis3$prevalence <-variable_df_dis3$counts/variable_df_dis3$residents_tot + variable_df_dis3$incidence <-variable_df_dis3$counts/variable_df_dis3$residents_tot #variable_df_dis_excluded<-subset(variable_df_dis3,variable_df_dis3$residents_tot<=cutoff) #print(variable_df_dis_excluded) #write.csv(na.omit(variable_df_dis_excluded),"variable_df_dis_excluded.csv") @@ -65,8 +65,8 @@ Reconstruction_time_series<-function(Conditional_prevalence_unif_info, Data_fram # if (length(na.omit(variable_df_dis3[,1])!=0)){ - comp_cases <-variable_df_dis3$prevalence*variable_df_dis3$residents # estimation of the "real" incidence based on the possible cases due to weather and the residents number in the catchment area - time_series <-data.frame(variable_df_dis3$Date, comp_cases, variable_df_dis3$prevalence, variable_df_dis3$PostCode) + comp_cases <-variable_df_dis3$incidence*variable_df_dis3$residents # estimation of the "real" incidence based on the possible cases due to weather and the residents number in the catchment area + time_series <-data.frame(variable_df_dis3$Date, comp_cases, variable_df_dis3$incidence, variable_df_dis3$PostCode) colnames(time_series) <-c("Date","Cases","Lambda","Lab") #} @@ -74,6 +74,142 @@ Reconstruction_time_series<-function(Conditional_prevalence_unif_info, Data_fram write.table(time_series, paste("../Data_Base/",variable_x,"_",variable_y,"_",variable_z,"_",time_lag_char,info,sep=""), col.names= NA, row.names= TRUE, sep=",",eol= "\n") # Saved on 17/11 after switching residents_total and residents from the denominator respecting the above attempt. return(time_series) + + }) +} + + +Reconstruction_time_series_two_factors<-function(Conditional_incidence_unif_info, Data_frame_2,Pars) +{ + + with(as.list(c(Conditional_incidence_unif_info, Data_frame_2,Pars)), { + + + Conditional_incidence_unif<-Conditional_incidence_unif_info[[1]] + breaks_x<-Conditional_incidence_unif_info[[2]] + breaks_y<-Conditional_incidence_unif_info[[3]] + Env_laboratory_data<-Data_frame_2 + variable_x<-Pars[1] + variable_y<-Pars[2] + info<-Pars[3] + + + variable_df <- as.data.frame(Env_laboratory_data[,c("PostCode","Date", "residents")]) + + # y variable + dt_y = data.table(breaks_y, val = breaks_y) + dt_y <- na.omit (dt_y) + setattr(dt_y, "sorted", "breaks_y") + dt_y2 <- dt_y[J(Env_laboratory_data[[variable_y]]), roll = "nearest"] + + + variable_df$variable_y_break<- dt_y2$val + + + + # x variable + dt_temp = data.table(breaks_x, val = breaks_x) + dt_temp <- na.omit (dt_temp) + setattr(dt_temp, "sorted", "breaks_x") + dt_temp2 <- dt_temp[J(Env_laboratory_data[[variable_x]]), roll = "nearest"] + + variable_df$variable_x_break<- dt_temp2$val + + variable_df2<-data.frame(variable_df$PostCode,variable_df$Date,variable_df$residents, variable_df$variable_y_break, variable_df$variable_x_break) + colnames(variable_df2) <- c("PostCode","Date", "residents", variable_y, variable_x) + + dt_df <- data.table(variable_df2) + dt_var <- data.table(Conditional_incidence_unif) + variable_df_dis3 <- dt_df[dt_var, on= c(variable_y,variable_x), allow.cartesian=TRUE] # repeated rows since same conditions apply to more than one PC. Gianni this shoul be a merge + + variable_df_dis3 <- na.omit(variable_df_dis3) %>% distinct() + + + + variable_df_dis3 <-variable_df_dis3[order(as.Date(variable_df_dis3$Date)),] + + + # calculation of all the potential incidence based on the weather conditions of the area. The counts used as numerator correspond to the simulation of cases per weather condition. + variable_df_dis3$incidence <-variable_df_dis3$counts/variable_df_dis3$residents_tot + #variable_df_dis_excluded<-subset(variable_df_dis3,variable_df_dis3$residents_tot<=cutoff) + #print(variable_df_dis_excluded) + #write.csv(na.omit(variable_df_dis_excluded),"variable_df_dis_excluded.csv") + #variable_df_dis3<- subset(variable_df_dis3,variable_df_dis3$residents_tot>=cutoff) #Gianni discuss this + + # if (length(na.omit(variable_df_dis3[,1])!=0)){ + + comp_cases <-variable_df_dis3$incidence*variable_df_dis3$residents # estimation of the "real" incidence based on the possible cases due to weather and the residents number in the catchment area + time_series <-data.frame(variable_df_dis3$Date, comp_cases, variable_df_dis3$incidence, variable_df_dis3$PostCode) + colnames(time_series) <-c("Date","Cases","Lambda","Lab") + + #} + + write.table(time_series, paste("../Data_Base/",variable_x,"_",variable_y,"_",time_lag_char,info,sep=""), col.names= NA, row.names= TRUE, sep=",",eol= "\n") # Saved on 17/11 after switching residents_total and residents from the denominator respecting the above attempt. + return(time_series) + + + }) +} -}) + + + +Reconstruction_time_series_one_factor<-function(Conditional_incidence_unif_info, Data_frame_2,Pars) +{ + + with(as.list(c(Conditional_incidence_unif_info, Data_frame_2,Pars)), { + + + Conditional_incidence_unif<-Conditional_incidence_unif_info[[1]] + breaks_x<-Conditional_incidence_unif_info[[2]] + Env_laboratory_data<-Data_frame_2 + variable_x<-Pars[1] + info<-Pars[2] + + + variable_df <- as.data.frame(Env_laboratory_data[,c("PostCode","Date", "residents")]) + + + # x variable + dt_temp = data.table(breaks_x, val = breaks_x) + dt_temp <- na.omit (dt_temp) + setattr(dt_temp, "sorted", "breaks_x") + dt_temp2 <- dt_temp[J(Env_laboratory_data[[variable_x]]), roll = "nearest"] + + variable_df$variable_x_break<- dt_temp2$val + + variable_df2<-data.frame(variable_df$PostCode,variable_df$Date,variable_df$residents, variable_df$variable_x_break) + colnames(variable_df2) <- c("PostCode","Date", "residents", variable_x) + + dt_df <- data.table(variable_df2) + dt_var <- data.table(Conditional_incidence_unif) + variable_df_dis3 <- dt_df[dt_var, on= c(variable_x), allow.cartesian=TRUE] # repeated rows since same conditions apply to more than one PC. Gianni this shoul be a merge + + variable_df_dis3 <- na.omit(variable_df_dis3) %>% distinct() + + + + variable_df_dis3 <-variable_df_dis3[order(as.Date(variable_df_dis3$Date)),] + + + # calculation of all the potential incidence based on the weather conditions of the area. The counts used as numerator correspond to the simulation of cases per weather condition. + variable_df_dis3$incidence <-variable_df_dis3$counts/variable_df_dis3$residents_tot + #variable_df_dis_excluded<-subset(variable_df_dis3,variable_df_dis3$residents_tot<=cutoff) + #print(variable_df_dis_excluded) + #write.csv(na.omit(variable_df_dis_excluded),"variable_df_dis_excluded.csv") + #variable_df_dis3<- subset(variable_df_dis3,variable_df_dis3$residents_tot>=cutoff) #Gianni discuss this + + # if (length(na.omit(variable_df_dis3[,1])!=0)){ + + comp_cases <-variable_df_dis3$incidence*variable_df_dis3$residents # estimation of the "real" incidence based on the possible cases due to weather and the residents number in the catchment area + time_series <-data.frame(variable_df_dis3$Date, comp_cases, variable_df_dis3$incidence, variable_df_dis3$PostCode) + colnames(time_series) <-c("Date","Cases","Lambda","Lab") + + #} + + write.table(time_series, paste("../Data_Base/",variable_x,"_",time_lag_char,info,sep=""), col.names= NA, row.names= TRUE, sep=",",eol= "\n") # Saved on 17/11 after switching residents_total and residents from the denominator respecting the above attempt. + return(time_series) + + + }) } diff --git a/Codes/Function_Wavelet_analysis.R b/Codes/Function_Wavelet_analysis.R index df24444..d1d1b06 100644 --- a/Codes/Function_Wavelet_analysis.R +++ b/Codes/Function_Wavelet_analysis.R @@ -9,17 +9,17 @@ with(as.list(c(State, Pars)), { filename_power_average<-Pars[2] -Env_Pathogen_data0 <- ddply (State, ~Date, summarise, Cases=sum(Cases_computed)) # subset for all postcodes combined per day +Env_Pathogen_data0 <- ddply (State, ~Date, summarise, Cases=sum(Cases)) # subset for all postcodes combined per day # Wavelet spectrum plot Env_Pathogen_data0$date<-Env_Pathogen_data0$Date -salmonella_data_national <-Env_Pathogen_data0[order(as.Date(Env_Pathogen_data0$Date)),] # Sort from least recent to most recent +pathogen_data_national <-Env_Pathogen_data0[order(as.Date(Env_Pathogen_data0$Date)),] # Sort from least recent to most recent dt0<-1 -my.wt = analyze.wavelet(salmonella_data_national, "Cases", +my.wt = analyze.wavelet(pathogen_data_national, "Cases", loess.span=0.75, dt=dt0, dj=1/20, lowerPeriod=2.*dt0, - upperPeriod=floor(nrow(salmonella_data_national)/3)*dt0, + upperPeriod=floor(nrow(pathogen_data_national)/3)*dt0, make.pval=T, n.sim=100) @@ -54,6 +54,6 @@ wt.avg(my.wt, dev.off() -return(salmonella_data_national[,c(1,2)]) +return(pathogen_data_national[,c(1,2)]) }) } diff --git a/Codes/Pathogen_Linkage_fixed_time_lag.R b/Codes/Pathogen_Linkage_fixed_time_lag.R index 53938b2..52ab00c 100644 --- a/Codes/Pathogen_Linkage_fixed_time_lag.R +++ b/Codes/Pathogen_Linkage_fixed_time_lag.R @@ -174,7 +174,7 @@ write.table(diagnostic_laboratory_df[-c(length(diagnostic_laboratory_df))],paste ################ Define the time lag here ############################### -time_lag<-90 +time_lag<-14 time_lag_char<-paste(time_lag) ## This create a database for all postocode where there was at least one case diff --git a/Codes/Plot_reported_and_adjusted_campylobacter_cases.R b/Codes/Plot_reported_and_adjusted_campylobacter_cases.R index 2203f6a..1ee4ed3 100644 --- a/Codes/Plot_reported_and_adjusted_campylobacter_cases.R +++ b/Codes/Plot_reported_and_adjusted_campylobacter_cases.R @@ -19,6 +19,8 @@ Campylobacter_adjusted_date_df<-read.csv("../Data_Base/Campylobacter_adjusted_da Campylobacter_adjusted_date_df$Cases<-1 Campylobacter_adjusted_date_df$Reported_Cases<-1 + +Campylobacter_adjusted_date_df$Date<-as.Date(Campylobacter_adjusted_date_df$Date,format="%d/%m/%Y") #This line associates one case for each day and PostCode Campylobacter_adjusted_date_df<-subset(Campylobacter_adjusted_date_df,as.Date(as.character(Campylobacter_adjusted_date_df$Date))>="1989-01-01") -- GitLab