Skip to content
Snippets Groups Projects
Commit 2076280c authored by Lo Iacono, Giovanni Dr (School of Vet Med.)'s avatar Lo Iacono, Giovanni Dr (School of Vet Med.)
Browse files

update

parent 02e8d51c
No related branches found
No related tags found
No related merge requests found
##### Read Simulation of Salmonella cases from the England and Wales. ####
# Reminder that these cases are only for the PC with residents information provided by PHE (missing 117 PC, and about 1/3 of the salmonella cases)
Env_Pathogen_data_all <-read_csv(paste("../Data_Base/Campylobacter_environment_",time_lag_char,".csv",sep=""))
Env_Pathogen_data_all <- as.data.frame(Env_Pathogen_data_all)
#Env_Pathogen_data_all <-Env_Pathogen_data_all[,-1]
colnames(Env_Pathogen_data_all) <-c("PostCode","Date",
"Maximum_air_temperature",
"Minimum_air_temperature",
"Mean_wind_speed",
"Cumul_Precipitation",
"Mean_Precipitation",
"Relative_humidity",
"daylength",
"residents","Cases")
Env_Pathogen_data_all$Temperature_amplitude <- Env_Pathogen_data_all$Maximum_air_temperature - Env_Pathogen_data_all$Minimum_air_temperature
# Select the variables of interest:
Env_Pathogen_data_all$Date<-as.Date(Env_Pathogen_data_all$Date,format = "%d/%m/%Y")
Env_Pathogen_data_all <- distinct(Env_Pathogen_data_all) # Remove possible duplicates Gianni, correct but better to do this when we create the file
Env_Pathogen_data <-(subset(Env_Pathogen_data_all, year(Env_Pathogen_data_all$Date)>=year_first & year(Env_Pathogen_data_all$Date)<=year_last))
# Read the weather variables for every PC, averaged for the estimated delay in the past
Env_laboratory_data_all <-read_csv(paste("../Data_Base/Laboratory_environment_",time_lag_char,".csv",sep=""))
Env_laboratory_data_all <- as.data.frame (Env_laboratory_data_all)
#Env_laboratory_data_all <-Env_laboratory_data_all[,-1]
colnames(Env_laboratory_data_all) <-c("PostCode","Date",
"Maximum_air_temperature",
"Minimum_air_temperature",
"Mean_wind_speed",
"Cumul_Precipitation",
"Mean_Precipitation",
"Relative_humidity",
"daylength",
"residents")
Env_laboratory_data_all$Temperature_amplitude <- Env_laboratory_data_all$Maximum_air_temperature - Env_laboratory_data_all$Minimum_air_temperature
# Select the subset the year of interest
Env_laboratory_data<-subset(Env_laboratory_data_all, year(as.Date(Env_laboratory_data_all$Date))>=year_first & year(as.Date(Env_laboratory_data_all$Date))<=year_last)
# Plot Reconstruction of Time series and compare with empirical data
Plot_Reconstruction_time_series<-function(Data_frame_1, Data_frame_2,Pars)
{
with(as.list(c(Data_frame_1, Data_frame_2,Pars)), {
time_series_Post_Codes<-Data_frame_1
empirical_time_series<-Data_frame_2
file_name_ts<-Pars[1]
file_name_yearly<-Pars[2]
y_lab_text<-Pars[3]
colnames(time_series_Post_Codes) <-c("Date","Cases","Lambda","PostCode")
time_series<-aggregate (Cases ~ Date, time_series_Post_Codes, sum) # get the mean of the cases per day for all the PC
time_series_quantile <-ddply(time_series,~Date, function (x) quantile(x$Cases, c(.25,.5,.75)))
time_series_summary <-data.frame(time_series_quantile, rep("Model",times=length(time_series[,1])))
colnames(time_series_summary)<-c("Date","f_quant","Cases","s_quant","source")
#real_cases_mean <-empirical_time_series %>% mutate (rolling_mean= slider::slide_mean(Cases, before=3 , after=3 )) # %>% ungroup() #Gianni Check this, I changed as not sure it worked
#empirical_time_series$rolling_mean <-real_cases_mean$rolling_mean
real_cases_quantile<-ddply(empirical_time_series,~Date, function (x) quantile(x$Cases, c(.25,.5,.75)))
real_cases_summary<-cbind(real_cases_quantile,rep("Cases",times=length(real_cases_quantile[,1])))
colnames(real_cases_summary)<-c("Date","f_quant","Cases","s_quant","source")
#time_series_all <-rbind(time_series_summary[,c(1,2,6)], real_cases_summary[,c(1,2,6)])
time_series_all <-rbind(time_series_summary, real_cases_summary)
############## Plot Average per day of the year #################
pal<-wes_palette("Cavalcanti1")
time_series_plot<-ggplot(time_series_all,aes(x=Date,y=Cases,colour=source))
#time_series_plot<-time_series_plot+scale_color_brewer(palette=pal)
#time_series_plot<-time_series_plot+geom_line(data=real_cases_summary)
time_series_plot<-time_series_plot+geom_line(size=0.75)
#time_series_plot<-time_series_plot+scale_color_manual(values=c( "#E69F00", "#56B4E9"))
time_series_plot<-time_series_plot+scale_color_manual(values=wes_palette(n=2, name="Cavalcanti1"))
time_series_plot<-time_series_plot+ xlab("Date")+ ylab("Campylobacter Cases")
time_series_plot<-time_series_plot+
theme(legend.position= c(0.125,0.85),legend.title = element_blank(),
legend.text = element_text( size = 10),legend.background = element_blank(),
legend.key=element_rect(colour=NA,fill=NA))
time_series_plot<-time_series_plot+ theme(axis.title.x =element_text( colour="#990000", size=13))
time_series_plot<-time_series_plot+ theme(axis.title.y =element_text( colour="#990000", size=13))
time_series_plot<-time_series_plot+ theme(axis.title.x =element_text(size=13))
time_series_plot<-time_series_plot+ theme(axis.title.y =element_text(size=13))
print(time_series_plot)
tiff(filename = file_name_ts,width = 17.35, height = 17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw",antialias="default")
print(time_series_plot)
dev.off()
time_series$yday <-as.factor(yday(time_series$Date))
time_series_average1 <-ddply(time_series,~yday, summarise, mean=mean(Cases)) # mean of all cases for the same day of the year
time_series_quantile1 <-ddply(time_series,~yday, function (x) quantile(x$Cases, c(.25,.5,.75))) # quantiles of cases for all the years for the same day of the year
time_series_average <-cbind(time_series_average1,time_series_quantile1[,-1])
#real_cases$norm<-(real_cases$Cases/sum(real_cases$Cases))
empirical_time_series$yday<-as.factor(yday(as.Date(as.character(empirical_time_series$Date))))
real_cases_average1<-ddply(empirical_time_series,~yday,summarise,mean=mean(Cases))
real_cases_quantile1<-ddply(empirical_time_series,~yday,function (x) quantile(x$Cases, c(.25,.5,.75)))
real_cases_average2<-cbind(real_cases_average1,real_cases_quantile1[,-1])
real_cases_average<-
data.frame(real_cases_average2[,1],real_cases_average2[,c(2:5)])
df1 <-data.frame(real_cases_average,rep("Cases",times=length(real_cases_average[,1])))
colnames(df1) <-c("Day","Mean","f_quant","median","s_quant","source")
df2 <-data.frame(time_series_average,rep("Model",times=length(time_series_average[,1])))
colnames(df2) <-c("Day","Mean","f_quant","median","s_quant","source")
average_data <-rbind(df1,df2)
average_data$Day <-as.numeric(average_data$Day)
## We get one of the years as example to plot the date, in this case 2014, but could be any year within our data set.
month_2014 <-month.abb[unique(month(as.Date(average_data$Day, origin = "2014-01-01")))]
tick_pos<-yday(as.Date(as.character(
c("2014-01-01","2014-02-01","2014-03-01","2014-04-01","2014-05-01","2014-06-01",
"2014-07-01","2014-08-01","2014-09-01","2014-10-01","2014-11-01","2014-12-01")
)))
yearly_average <-ggplot(average_data, aes(x=Day, y=Mean, colour=source))+
geom_line(size=2)+
geom_ribbon(aes(ymin=f_quant, ymax=s_quant, fill=source), alpha=0.15) +
xlab("Day of the Year") + ylab(y_lab_text) +
scale_x_continuous(breaks=tick_pos, labels=month_2014) +
theme(legend.position= c(0.125,0.75),legend.title = element_text( size = 10),
legend.text = element_text( size = 10),legend.background = element_blank(),
legend.key=element_rect(colour=NA,fill=NA)) +
theme(axis.title.x =element_text( colour="#990000", size=13)) +
theme(axis.title.y =element_text( colour="#990000", size=13))
#+ ggtitle(paste(i,",", h,",", k))
print(yearly_average)
# Save the plots:
# Yearly average
tiff(filename = file_name_yearly, width = 17.35, height = 17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw", antialias="default")
print(yearly_average)
dev.off()
return()
})
}
Reconstruction_time_series<-function(Conditional_prevalence_unif_info, Data_frame_2,Pars)
{
with(as.list(c(Conditional_prevalence_unif_info, Data_frame_2,Pars)), {
Conditional_prevalence_unif<-Conditional_prevalence_unif_info[[1]]
breaks_x<-Conditional_prevalence_unif_info[[2]]
breaks_y<-Conditional_prevalence_unif_info[[3]]
breaks_z<-Conditional_prevalence_unif_info[[4]]
Env_laboratory_data<-Data_frame_2
variable_x<-Pars[1]
variable_y<-Pars[2]
variable_z<-Pars[3]
info<-Pars[4]
# z variable
dt_z = data.table(breaks_z, val = breaks_z)
dt_z <- na.omit (dt_z)
setattr(dt_z, "sorted", "breaks_z")
dt_z2 <- dt_z[J(Env_laboratory_data[[variable_z]]), roll = "nearest"]
variable_df <- as.data.frame(Env_laboratory_data[,c("PostCode","Date", "residents")])
variable_df$variable_z_break<- dt_z2$val
# y variable
dt_y = data.table(breaks_y, val = breaks_y)
dt_y <- na.omit (dt_y)
setattr(dt_y, "sorted", "breaks_y")
dt_y2 <- dt_y[J(Env_laboratory_data[[variable_y]]), roll = "nearest"]
variable_df$variable_y_break<- dt_y2$val
# x variable
dt_temp = data.table(breaks_x, val = breaks_x)
dt_temp <- na.omit (dt_temp)
setattr(dt_temp, "sorted", "breaks_x")
dt_temp2 <- dt_temp[J(Env_laboratory_data[[variable_x]]), roll = "nearest"]
variable_df$variable_x_break<- dt_temp2$val
variable_df2<-data.frame(variable_df$PostCode,variable_df$Date,variable_df$residents,variable_df$variable_z_break, variable_df$variable_y_break, variable_df$variable_x_break)
colnames(variable_df2) <- c("PostCode","Date", "residents", variable_z, variable_y, variable_x)
dt_df <- data.table(variable_df2)
dt_var <- data.table(Conditional_prevalence_unif)
variable_df_dis3 <- dt_df[dt_var, on= c(variable_z,variable_y,variable_x), allow.cartesian=TRUE] # repeated rows since same conditions apply to more than one PC. Gianni this shoul be a merge
variable_df_dis3 <- na.omit(variable_df_dis3) %>% distinct()
variable_df_dis3 <-variable_df_dis3[order(as.Date(variable_df_dis3$Date)),]
# calculation of all the potential incidence based on the weather conditions of the area. The counts used as numerator correspond to the simulation of cases per weather condition.
variable_df_dis3$prevalence <-variable_df_dis3$counts/variable_df_dis3$residents_tot
#variable_df_dis_excluded<-subset(variable_df_dis3,variable_df_dis3$residents_tot<=cutoff)
#print(variable_df_dis_excluded)
#write.csv(na.omit(variable_df_dis_excluded),"variable_df_dis_excluded.csv")
#variable_df_dis3<- subset(variable_df_dis3,variable_df_dis3$residents_tot>=cutoff) #Gianni discuss this
# if (length(na.omit(variable_df_dis3[,1])!=0)){
comp_cases <-variable_df_dis3$prevalence*variable_df_dis3$residents # estimation of the "real" incidence based on the possible cases due to weather and the residents number in the catchment area
time_series <-data.frame(variable_df_dis3$Date, comp_cases, variable_df_dis3$prevalence, variable_df_dis3$PostCode)
colnames(time_series) <-c("Date","Cases","Lambda","Lab")
#}
write.table(time_series, paste("../Data_Base/",variable_x,"_",variable_y,"_",variable_z,"_",time_lag_char,info,sep=""), col.names= NA, row.names= TRUE, sep=",",eol= "\n") # Saved on 17/11 after switching residents_total and residents from the denominator respecting the above attempt.
return(time_series)
})
}
# This script performs Wavelet analysis for to check for periodicity in the data #
wavelet_analysis<-function(State, Pars)
{
# Cleaning of cases:
with(as.list(c(State, Pars)), {
filename_power<-Pars[1]
filename_power_average<-Pars[2]
Env_Pathogen_data0 <- ddply (State, ~Date, summarise, Cases=sum(Cases_computed)) # subset for all postcodes combined per day
# Wavelet spectrum plot
Env_Pathogen_data0$date<-Env_Pathogen_data0$Date
salmonella_data_national <-Env_Pathogen_data0[order(as.Date(Env_Pathogen_data0$Date)),] # Sort from least recent to most recent
dt0<-1
my.wt = analyze.wavelet(salmonella_data_national, "Cases",
loess.span=0.75,
dt=dt0, dj=1/20,
lowerPeriod=2.*dt0,
upperPeriod=floor(nrow(salmonella_data_national)/3)*dt0,
make.pval=T, n.sim=100)
wt.image(my.wt, color.key = "quantile", n.levels = 250,legend.params = list(lab = "wavelet power levels", mar = 4.7),show.date = T,
color.palette = "rainbow(n.levels, start=0, end=.6)",plot.contour = F, siglvl = 0.05, col.contour = "black", plot.ridge = T, lvl = 0, col.ridge = "black",date.format ="%Y-%m-%d",periodlab = "periods (days)",useRaster=T)
# Save plot
tiff(filename = filename_power,width = 17.35, height = 17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw",antialias="default")
wt.image(my.wt, color.key = "quantile", n.levels = 250,legend.params = list(lab = "wavelet power levels", mar = 4.7),show.date = T,color.palette = "rainbow(n.levels, start=0, end=.6)",plot.contour = F, siglvl = 0.05, col.contour = "black", plot.ridge = T, lvl = 0, col.ridge = "black",periodlab = "periods (minutes)",useRaster=F)
dev.off()
# Power curve for ease of interpretation of Wavelet Gianni changed name as said detrend
reconstruct(my.wt, plot.waves = F, lwd = c(1,2), legend.coords = "bottomleft")
wt.avg(my.wt, siglvl=0.05, sigcol="red")
#wt.phase.image(my.wt, "Cases")
# Save plot
tiff(filename = filename_power_average,width = 17.35, height = 17.35, units = "cm", pointsize = 9, res = 600,compression = "lzw",antialias="default")
wt.avg(my.wt,
show.siglvl = T, sigpch = 20,
label.avg.axis = T, averagelab = NULL,
label.period.axis = T, periodlab = NULL,
show.legend = T, legend.coords = "topright",
main = NULL, lwd = 0.5,
verbose = F)
dev.off()
return(salmonella_data_national[,c(1,2)])
})
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment