Skip to content
Snippets Groups Projects
Commit 0afe5601 authored by Lo Iacono, Giovanni Dr (School of Vet Med.)'s avatar Lo Iacono, Giovanni Dr (School of Vet Med.)
Browse files

Delete Campylobacter_environment_analysis_subset_two_variables_quantile.R

parent b4612ab6
No related branches found
No related tags found
No related merge requests found
# The code does look at how the risk of Campylobacter in humans depends on environmental variables
#The code uses old MEDMI data (not corrected for altitude) and analysis done on regular division of the range of the environemtal varaibles rather than quantile.
rm(list=ls(all=TRUE))
#
library(ISOweek)
library(lubridate)
library(ggplot2)
require(MASS)
library(scales)
require(pheno)
library(timeDate)
library(pastecs)
library(stringi)
library(timeSeries)
#library(Hmisc)
#list.of.packages <- c("xts")
#new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
#if(length(new.packages)) install.packages(new.packages)
library(xts)
## Variable file
variable_int<-"humidity"
variable_df_1<-read.csv(paste("../../Data_Base/OPIE_data_base/",variable_int,".csv",sep=""))
humidity<-variable_df_1[,-c(1,2)]
#dates<-as.Date(as.character(variable_df_1[-c(1,2),2]),format="%d/%m/%Y")
dates_s<-as.Date(as.character(variable_df_1[-c(1,2),2]),format="%Y-%m-%d")
dates<-rep(dates_s,times=length(variable_df_1)-2)
All_PC_s<-names(variable_df_1[1,])
All_PC_s<-All_PC_s[-c(1,2)]
All_PC<-rep(All_PC_s,each=length(dates_s))
width<-30
width_char<-paste(width)
variable<-"Maximum_air_temperature"
variable_x<-"daylength"
#variable_y<-"Mean_Precipitation"
#variable<-"daylength"
#variable<-"Mean_Precipitation"
#"Maximum_air_temperature",
#"Minimum_air_temperature",
#"Mean_wind_speed",
#"Mean_Precipitation",
#"Relative_humidity",
#"daylength"
Env_Campylobacter_data_all2<-read.csv(paste("../../Data_Base/Cases_Environment/Simulated_Campylobacter_environment_",width_char,"_original_MEDMI.csv",sep=""))
Env_Campylobacter_data_all2<-Env_Campylobacter_data_all2[,-1]
colnames(Env_Campylobacter_data_all2)<-c("PostCode","Date","Cases",
"Maximum_air_temperature",
"Minimum_air_temperature",
"Mean_wind_speed",
"Cumul_Precipitation",
"Mean_Precipitation",
"Relative_humidity",
"daylength",
"residents")
Env_laboratory_weekly<-read.csv(paste("../../Data_Base/Cases_Environment/Simulated_Laboratory_",width_char,"_original_MEDMI.csv",sep=""))
Env_laboratory_weekly<-Env_laboratory_weekly[,-1]
colnames(Env_laboratory_weekly)<-c("PostCode","Date",
"Maximum_air_temperature",
"Minimum_air_temperature",
"Mean_wind_speed",
"Cumul_Precipitation",
"Mean_Precipitation",
"Relative_humidity",
"daylength",
"residents")
Env_Campylobacter_data_int1<-subset(Env_Campylobacter_data_all2,year(as.Date(Env_Campylobacter_data_all2$Date))>=1990 & year(as.Date(Env_Campylobacter_data_all2$Date))<=2015)
Env_laboratory_int1<-subset(Env_laboratory_weekly,year(as.Date(Env_laboratory_weekly$Date))>=1990 & year(as.Date(Env_laboratory_weekly$Date))<=2015)
################### include latitude and longitude
Coord_laboratory<-read.csv(paste("../../Data_Base/Cases/Lab_PostCodes.csv",sep=""))
lat_long_lab<-data.frame(names(Coord_laboratory),as.numeric(Coord_laboratory[1,]),as.numeric(Coord_laboratory[2,]))#
colnames(lat_long_lab)<-c("PostCode","lat","long")
Env_laboratory_int2<-merge(Env_laboratory_int1,lat_long_lab,by="PostCode")
Env_laboratory_int3<-data.frame(Env_laboratory_int2)
Env_Campylobacter_data_int2<-merge(Env_Campylobacter_data_int1,lat_long_lab,by="PostCode")
Env_Campylobacter_data_int3<-data.frame(Env_Campylobacter_data_int2)
######################## include daylength ##################
PC_df<-data.frame(All_PC,as.Date(dates))
colnames(PC_df)<-c("PostCode","Date")
Post_Codes_df<-merge(PC_df,lat_long_lab,by="PostCode")
daylength<-function(lat,day_year)
{
#Latitude measure in degrees
P <- asin(.39795*cos(.2163108 + 2*atan(.9671396*tan(.00860*(day_year-186)))))
Denom<-cos(lat*pi/180)*cos(P)
Numer<-sin(0.8333*pi/180) + sin(lat*pi/180)*sin(P)
D<-24-(24/pi)*acos(Numer/Denom)
return(D)
}
latitude<-Post_Codes_df$lat
day_of_the_year<-yday(as.Date(Post_Codes_df$Date))
daylength_int1<-mapply(daylength, latitude, day_of_the_year)
daylength_df<-data.frame(latitude, day_of_the_year,as.Date(Post_Codes_df$Date),daylength_int1)
colnames(daylength_df)<-c("lat","day_year","Date","daylength")
daylength_df$Date<-as.factor(daylength_df$Date)
daylength_df$lat<-as.factor(daylength_df$lat)
Env_laboratory_int3$Date<-as.factor(Env_laboratory_int3$Date)
Env_laboratory_int3$lat<-as.factor(Env_laboratory_int3$lat)
#Env_laboratory_int4<-merge(Env_laboratory_int3,daylength_df,by=c("lat","Date"))
#Env_laboratory<-data.frame(Env_laboratory_int4)
Env_laboratory<-data.frame(Env_laboratory_int3)
Env_Campylobacter_data_int3$Date<-as.factor(Env_Campylobacter_data_int3$Date)
Env_Campylobacter_data_int3$lat <-as.factor(Env_Campylobacter_data_int3$lat)
#Env_Campylobacter_data_int4<-merge(Env_Campylobacter_data_int3,daylength_df,by=c("lat","Date"))
#Env_Campylobacter_data<-data.frame(Env_Campylobacter_data_int4)
Env_Campylobacter_data<-data.frame(Env_Campylobacter_data_int3)
################### Divide the domains of the variables in bins according to quantiles
index_C<-which (names(Env_Campylobacter_data)==variable)
index<-which (names(Env_laboratory)==variable)
#########################
breaks_z<-function(variable,by_z)
{
index<-which (names(Env_laboratory)==variable)
breaks_z<-as.numeric(quantile(na.omit(Env_laboratory[,index]), probs=seq(0,1, by=by_z), na.rm=TRUE))
breaks_z[length(breaks_z)]<-ceiling(as.numeric(quantile(na.omit(Env_laboratory[,index]), probs=seq(0,1, by=by_z), na.rm=TRUE)))[length(breaks_z)]
breaks_z[1]<-floor(as.numeric(quantile(na.omit(Env_laboratory[,index]), probs=seq(0,1, by=by_z), na.rm=TRUE)))[1]
return(breaks_z)
}
#################
var_x_loc_df<-data.frame(character(),numeric(),numeric(),numeric())
colnames(var_x_loc_df)<-c(variable,"counts","residents","residents_tot")
residents_i_var<-0
residents_universal<-0
#i_var_max<-length(breaks_var)
#i_var_min<-1
#i_var_max_x<-length(breaks_var_x)
#i_var_min_x<-1
#####################
by_z<-0.05
j_z_min<-1
j_z_max<-length(breaks_z(variable,by_z))-1
for (j_z in c(j_z_min:j_z_max))
{
wt<-(findInterval((Env_Campylobacter_data[,index_C]),breaks_z(variable,by_z)))
ww<-which(wt==j_z)
Env_Campylobacter_data_z<-Env_Campylobacter_data[ww,]
wt<-(findInterval((Env_laboratory[,index]),breaks_z(variable,by_z)))
ww<-which(wt==j_z)
Env_laboratory_z<-Env_laboratory[ww,]
Total_cases<-sum((as.numeric(na.omit(Env_Campylobacter_data_z$Cases))))
residents<-sum((as.numeric(na.omit(Env_Campylobacter_data_z$residents))))
residents_tot<-sum((as.numeric(na.omit(Env_laboratory_z$residents))))
data_df<-data.frame(
breaks_z(variable,by_z)[j_z],
Total_cases,
residents,
residents_tot)
colnames(data_df)<-c(variable,"counts","residents","residents_tot")
var_x_loc_df<-rbind(var_x_loc_df,data_df)
print(c(j_z, Total_cases))
}
write.csv(var_x_loc_df,paste("../../Data_Base/Cases_Environment/Conditional_probability_",variable,"_",width_char,"_Simulated_for_rec_original_MEDMI.csv",sep=""))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment