Skip to content
Snippets Groups Projects
Commit 7d4ec78e authored by Lo Iacono, Giovanni Dr (School of Vet Med.)'s avatar Lo Iacono, Giovanni Dr (School of Vet Med.)
Browse files

Delete PAPER_Conditional_probability_quantile_original_MEDMI.R

parent 5abf54ab
No related branches found
No related tags found
No related merge requests found
# The code does look at how the risk of Campylobacter in humans depends on environmental variables
#The code uses old MEDMI data (not corrected for altitude) and analysis done on regular division of the range of the environemtal varaibles rather than quantile.
rm(list=ls(all=TRUE))
#
library(ISOweek)
library(lubridate)
library(ggplot2)
require(MASS)
library(scales)
require(pheno)
library(timeDate)
library(pastecs)
library(stringi)
library(timeSeries)
#library(Hmisc)
#list.of.packages <- c("xts")
#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
#if(length(new.packages)) install.packages(new.packages)
library(xts)
## Varaible file
variable_int<-"humidity"
variable_df_1<-read.csv(paste("../../Data_Base/OPIE_data_base/",variable_int,".csv",sep=""))
humidity<-variable_df_1[,-c(1,2)]
#dates<-as.Date(as.character(variable_df_1[-c(1,2),2]),format="%d/%m/%Y")
dates_s<-as.Date(as.character(variable_df_1[-c(1,2),2]),format="%Y-%m-%d")
dates<-rep(dates_s,times=length(variable_df_1)-2)
All_PC_s<-names(variable_df_1[1,])
All_PC_s<-All_PC_s[-c(1,2)]
All_PC<-rep(All_PC_s,each=length(dates_s))
width<-30
width_char<-paste(width)
#variable<-"Maximum_air_temperature"
#variable_y<-"Mean_Precipitation"
#variable_x<-"Relative_humidity"
variable_x<-"Minimum_air_temperature"
variable<-"daylength"
variable_y<-"Relative_humidity"
#variable<-"daylength"
#variable<-"Mean_Precipitation"
#"Maximum_air_temperature",
#"Minimum_air_temperature",
#variable_y<-"Mean_wind_speed"
#"Mean_Precipitation",
#"Relative_humidity",
#"daylength"
Env_Campylobacter_data_all2<-read.csv(paste("../../Data_Base/Cases_Environment/Simulated_Campylobacter_environment_",width_char,"_original_MEDMI.csv",sep=""))
Env_Campylobacter_data_all2<-Env_Campylobacter_data_all2[,-1]
colnames(Env_Campylobacter_data_all2)<-c("PostCode","Date","Cases",
"Maximum_air_temperature",
"Minimum_air_temperature",
"Mean_wind_speed",
"Cumul_Precipitation",
"Mean_Precipitation",
"Relative_humidity",
"daylength",
"residents")
Env_laboratory_weekly<-read.csv(paste("../../Data_Base/Cases_Environment/Simulated_Laboratory_",width_char,"_original_MEDMI.csv",sep=""))
Env_laboratory_weekly<-Env_laboratory_weekly[,-1]
colnames(Env_laboratory_weekly)<-c("PostCode","Date",
"Maximum_air_temperature",
"Minimum_air_temperature",
"Mean_wind_speed",
"Cumul_Precipitation",
"Mean_Precipitation",
"Relative_humidity",
"daylength",
"residents")
Env_Campylobacter_data_int1<-subset(Env_Campylobacter_data_all2,year(as.Date(Env_Campylobacter_data_all2$Date))>=1990 & year(as.Date(Env_Campylobacter_data_all2$Date))<=2015)
Env_laboratory_int1<-subset(Env_laboratory_weekly,year(as.Date(Env_laboratory_weekly$Date))>=1990 & year(as.Date(Env_laboratory_weekly$Date))<=2015)
################### include latitude and longitude
Coord_laboratory<-read.csv(paste("../../Data_Base/Cases/Lab_PostCodes.csv",sep=""))
lat_long_lab<-data.frame(names(Coord_laboratory),as.numeric(Coord_laboratory[1,]),as.numeric(Coord_laboratory[2,]))#
colnames(lat_long_lab)<-c("PostCode","lat","long")
Env_laboratory_int2<-merge(Env_laboratory_int1,lat_long_lab,by="PostCode")
Env_laboratory_int3<-data.frame(Env_laboratory_int2)
Env_Campylobacter_data_int2<-merge(Env_Campylobacter_data_int1,lat_long_lab,by="PostCode")
Env_Campylobacter_data_int3<-data.frame(Env_Campylobacter_data_int2)
######################## include daylength ##################
PC_df<-data.frame(All_PC,as.Date(dates))
colnames(PC_df)<-c("PostCode","Date")
Post_Codes_df<-merge(PC_df,lat_long_lab,by="PostCode")
daylength<-function(lat,day_year)
{
#Latitude measure in degrees
P <- asin(.39795*cos(.2163108 + 2*atan(.9671396*tan(.00860*(day_year-186)))))
Denom<-cos(lat*pi/180)*cos(P)
Numer<-sin(0.8333*pi/180) + sin(lat*pi/180)*sin(P)
D<-24-(24/pi)*acos(Numer/Denom)
return(D)
}
latitude<-Post_Codes_df$lat
day_of_the_year<-yday(as.Date(Post_Codes_df$Date))
daylength_int1<-mapply(daylength, latitude, day_of_the_year)
daylength_df<-data.frame(latitude, day_of_the_year,as.Date(Post_Codes_df$Date),daylength_int1)
colnames(daylength_df)<-c("lat","day_year","Date","daylength")
daylength_df$Date<-as.factor(daylength_df$Date)
daylength_df$lat<-as.factor(daylength_df$lat)
Env_laboratory_int3$Date<-as.factor(Env_laboratory_int3$Date)
Env_laboratory_int3$lat<-as.factor(Env_laboratory_int3$lat)
#Env_laboratory_int4<-merge(Env_laboratory_int3,daylength_df,by=c("lat","Date"))
#Env_laboratory<-data.frame(Env_laboratory_int4)
Env_laboratory<-data.frame(Env_laboratory_int3)
Env_Campylobacter_data_int3$Date<-as.factor(Env_Campylobacter_data_int3$Date)
Env_Campylobacter_data_int3$lat <-as.factor(Env_Campylobacter_data_int3$lat)
#Env_Campylobacter_data_int4<-merge(Env_Campylobacter_data_int3,daylength_df,by=c("lat","Date"))
#Env_Campylobacter_data<-data.frame(Env_Campylobacter_data_int4)
Env_Campylobacter_data<-data.frame(Env_Campylobacter_data_int3)
################### Divide the domains of the variables in bins according to quantiles
index_C<-which (names(Env_Campylobacter_data)==variable)
index_y_C<-which (names(Env_Campylobacter_data)==variable_y)
index_x_C<-which (names(Env_Campylobacter_data)==variable_x)
index_res_C<-which (names(Env_Campylobacter_data)=="residents")
index<-which (names(Env_laboratory)==variable)
index_y<-which (names(Env_laboratory)==variable_y)
index_x<-which (names(Env_laboratory)==variable_x)
index_res<-which (names(Env_laboratory)=="residents")
#########################
breaks_z_lab<-function(variable,by_z)
{
index<-which (names(Env_laboratory)==variable)
breaks_z<-as.numeric(quantile(na.omit(Env_laboratory[,index]), probs=seq(0,1, by=by_z), na.rm=TRUE))
breaks_z[length(breaks_z)]<-ceiling(as.numeric(quantile(na.omit(Env_laboratory[,index]), probs=seq(0,1, by=by_z), na.rm=TRUE)))[length(breaks_z)]
breaks_z[1]<-floor(as.numeric(quantile(na.omit(Env_laboratory[,index]), probs=seq(0,1, by=by_z), na.rm=TRUE)))[1]
return(breaks_z)
}
breaks_z<-function(variable,by_z)
{
index_C<-which (names(Env_Campylobacter_data)==variable)
breaks_z<-as.numeric(quantile(na.omit(Env_Campylobacter_data[,index_C]), probs=seq(0,1, by=by_z), na.rm=TRUE))
breaks_z[length(breaks_z)]<-ceiling(as.numeric(quantile(na.omit(Env_Campylobacter_data[,index_C]), probs=seq(0,1, by=by_z), na.rm=TRUE)))[length(breaks_z)]
breaks_z[1]<-floor(as.numeric(quantile(na.omit(Env_Campylobacter_data[,index_C]), probs=seq(0,1, by=by_z), na.rm=TRUE)))[1]
return(breaks_z)
}
breaks_y_lab<-function(variable,variable_y,by_z,by_y,j_z)
{
index_C<-which (names(Env_Campylobacter_data)==variable)
index<-which (names(Env_laboratory)==variable)
index_y<-which (names(Env_laboratory)==variable_y)
wt<-(findInterval(Env_Campylobacter_data[,index_C],breaks_z(variable,by_z)))
ww<-which(wt==j_z)
Env_Campylobacter_data_some<-Env_Campylobacter_data[ww,]
wt<-(findInterval(Env_laboratory[,index],breaks_z(variable,by_z)))
ww<-which(wt==j_z)
Env_laboratory_some<-Env_laboratory[ww,]
if (length(Env_Campylobacter_data_some[,1])!=0) {
breaks_y<-as.numeric(quantile(na.omit(Env_laboratory_some[,index_y]), probs=seq(0,1, by=by_y), na.rm=TRUE))
breaks_y[length(breaks_y)]<-ceiling(as.numeric(quantile(na.omit(Env_laboratory_some[,index_y]), probs=seq(0,1, by=by_y), na.rm=TRUE)))[length(breaks_y)]
breaks_y[1]<-floor(as.numeric(quantile(na.omit(Env_laboratory_some[,index_y]), probs=seq(0,1, by=by_y), na.rm=TRUE)))[1]
}else{
breaks_y<-c()
}
return(breaks_y)
}
breaks_y<-function(variable,variable_y,by_z,by_y,j_z)
{
index_C<-which (names(Env_Campylobacter_data)==variable)
index_y_C<-which (names(Env_Campylobacter_data)==variable_y)
wt<-(findInterval(Env_Campylobacter_data[,index_C],breaks_z(variable,by_z)))
ww<-which(wt==j_z)
Env_Campylobacter_data_some<-Env_Campylobacter_data[ww,]
if (length(Env_Campylobacter_data_some[,1])!=0) {
breaks_y<-as.numeric(quantile(na.omit(Env_Campylobacter_data_some[,index_y_C]), probs=seq(0,1, by=by_y), na.rm=TRUE))
breaks_y[length(breaks_y)]<-ceiling(as.numeric(quantile(na.omit(Env_Campylobacter_data_some[,index_y_C]), probs=seq(0,1, by=by_y), na.rm=TRUE)))[length(breaks_y)]
breaks_y[1]<-floor(as.numeric(quantile(na.omit(Env_Campylobacter_data_some[,index_y_C]), probs=seq(0,1, by=by_y), na.rm=TRUE)))[1]
}else{
breaks_y<-c()
}
return(breaks_y)
}
breaks_x_lab<-function(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y)
{
index_C<-which (names(Env_Campylobacter_data)==variable)
index_y_C<-which (names(Env_Campylobacter_data)==variable_y)
index<-which (names(Env_laboratory)==variable)
index_y<-which (names(Env_laboratory)==variable_y)
index_x<-which (names(Env_laboratory)==variable_x)
if(is.na(breaks_y(variable,variable_y,by_z,by_y,j_z)[j_y])=='FALSE'){
wt<-(findInterval(Env_Campylobacter_data[,index_y_C],breaks_y(variable,variable_y,by_z,by_y,j_z)))
ww<-which(wt==j_y)
Env_Campylobacter_data_some<-Env_Campylobacter_data[ww,]
if (length(Env_Campylobacter_data_some[,1])!=0) {
wt<-(findInterval(Env_laboratory[,index_y],breaks_y(variable,variable_y,by_z,by_y,j_z)))
ww<-which(wt==j_y)
Env_laboratory_some<-Env_laboratory[ww,]
breaks_x<-as.numeric(quantile(na.omit(Env_laboratory_some[,index_x]), probs=seq(0,1, by=by_x), na.rm=TRUE))
breaks_x[length(breaks_x)]<-ceiling(as.numeric(quantile(na.omit(Env_laboratory_some[,index_x]), probs=seq(0,1, by=by_x), na.rm=TRUE)))[length(breaks_x)]
breaks_x[1]<-floor(as.numeric(quantile(na.omit(Env_laboratory_some[,index_x]), probs=seq(0,1, by=by_x), na.rm=TRUE)))[1]
} else {
breaks_x<-c()
} } else {
breaks_x<-c()
}
return(breaks_x)
}
breaks_x<-function(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y)
{
index_C<-which (names(Env_Campylobacter_data)==variable)
index_y_C<-which (names(Env_Campylobacter_data)==variable_y)
index_x_C<-which (names(Env_Campylobacter_data)==variable_x)
if(is.na(breaks_y(variable,variable_y,by_z,by_y,j_z)[j_y])=='FALSE'){
wt<-(findInterval(Env_Campylobacter_data[,index_y_C],breaks_y(variable,variable_y,by_z,by_y,j_z)))
ww<-which(wt==j_y)
Env_Campylobacter_data_some<-Env_Campylobacter_data[ww,]
if (length(Env_Campylobacter_data_some[,1])!=0) {
wt<-(findInterval(Env_Campylobacter_data_some[,index_y_C],breaks_y(variable,variable_y,by_z,by_y,j_z)))
ww<-which(wt==j_y)
Env_Campylobacter_data_some2<-Env_Campylobacter_data_some[ww,]
breaks_x<-as.numeric(quantile(na.omit(Env_Campylobacter_data_some2[,index_x_C]), probs=seq(0,1, by=by_x), na.rm=TRUE))
breaks_x[length(breaks_x)]<-ceiling(as.numeric(quantile(na.omit(Env_Campylobacter_data_some2[,index_x_C]), probs=seq(0,1, by=by_x), na.rm=TRUE)))[length(breaks_x)]
breaks_x[1]<-floor(as.numeric(quantile(na.omit(Env_Campylobacter_data_some2[,index_x_C]), probs=seq(0,1, by=by_x), na.rm=TRUE)))[1]
} else
{ breaks_x<-c()
}
}
else {
breaks_x<-c()
}
return(breaks_x)
}
#################
var_x_loc_df<-data.frame(character(), character(),character(),numeric(),numeric(),numeric())
colnames(var_x_loc_df)<-c(variable,variable_y,variable_x,"counts","residents","residents_tot")
residents_i_var<-0
residents_universal<-0
#i_var_max<-length(breaks_var)
#i_var_min<-1
#i_var_max_x<-length(breaks_var_x)
#i_var_min_x<-1
#####################
by_z<-0.25
by_y<-0.25
by_x<-0.1
#i_var_min<-breaks_z(variable,by_z)[1]
#i_var_max<-breaks_z(variable,by_z)[length(breaks_z(variable,by_z))]
j_z_min<-1
j_z_max<-length(breaks_z(variable,by_z))-1
for (j_z in c(j_z_min:j_z_max))
{
wt<-(findInterval((Env_Campylobacter_data[,index_C]),breaks_z(variable,by_z)))
ww<-which(wt==j_z)
Env_Campylobacter_data_z<-Env_Campylobacter_data[ww,]
wt<-(findInterval((Env_laboratory[,index]),breaks_z(variable,by_z)))
ww<-which(wt==j_z)
Env_laboratory_z<-Env_laboratory[ww,]
if (length(Env_Campylobacter_data_z[,1])!=0){
if (length(breaks_y(variable,variable_y,by_z,by_y,j_z))!=0){
j_y_min<-1
j_y_max<-length(breaks_y(variable,variable_y,by_z,by_y,j_z))-1
for (j_y in c(j_y_min:j_y_max))
{
wt<-(findInterval((Env_Campylobacter_data_z[,index_y_C]),breaks_y(variable,variable_y,by_z,by_y,j_z)))
ww<-which(wt==j_y)
Env_Campylobacter_data_y<-Env_Campylobacter_data_z[ww,]
wt<-(findInterval((Env_laboratory_z[,index_y]),breaks_y(variable,variable_y,by_z,by_y,j_z)))
ww<-which(wt==j_y)
Env_laboratory_y<-Env_laboratory_z[ww,]
if (length(breaks_x(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y))!=0){
j_x_min<-1
j_x_max<- length(breaks_x(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y))-1
for (j_x in c(j_x_min:j_x_max))
{
wt<-(findInterval((Env_Campylobacter_data_y[,index_x_C]),breaks_x(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y)))
ww<-which(wt==j_x)
Yt1<-Env_Campylobacter_data_y[ww,c(1:3,index_C,index_y_C,index_x_C,index_res_C)]
wt<-(findInterval((Env_laboratory[,index_x]),breaks_x(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y)))
ww<-which(wt==j_x)
Y_tot<-Env_laboratory_y[ww,c(1:2,index,index_y,index_x,index_res)]
Total_cases<-sum((as.numeric(na.omit(Yt1$Cases))))
residents<-sum((as.numeric(na.omit(Yt1$residents))))
residents_tot<-sum((as.numeric(na.omit(Y_tot$residents))))
data_df<-data.frame(
breaks_z(variable,by_z)[j_z],
breaks_y(variable,variable_y,by_z,by_y,j_z)[j_y],
breaks_x(variable,variable_y,variable_x,by_z,by_y,by_x,j_z,j_y)[j_x],
Total_cases,
residents,
residents_tot)
colnames(data_df)<-c(variable,variable_y,variable_x,"counts","residents","residents_tot")
var_x_loc_df<-rbind(var_x_loc_df,data_df)
print(c(j_x,j_y,j_z, Total_cases))
}}}
}
}}
write.csv(var_x_loc_df,paste("../../Data_Base/Cases_Environment/Conditional_probability_",variable,"_",variable_y,"_",variable_x,"_",width_char,"_Simulated_for_rec_original_MEDMI_quantile.csv",sep=""))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment