#################
## Estimating time dependent reproduction number for Nigeria
## Data can be downloaded from 
# https://data.mendeley.com/datasets/nz2gtpmh99/1 
#################
# Load Libraries (packages)
library(ggplot2)
library(EpiEstim)

# Nigeria
rm(list=ls())
# set to your working directory
setwd("~/COVID 19 Incidence Data Nigeria")
# read in the data
ex1<-read.csv("~/COVID_1 Nig.csv", header=T)
# check the data
head(ex1)
# define the mean and standard deviation 
## Mean serial interval for Covid19 (4.7)

mean_cov_si <- 4.8

## SD serial interval for Covid19 (2.9)

sd_cov_si <- 2.3
# run the estimates
rt2 <- estimate_R(ex1[,2:3], method = "parametric_si",
                  
                  config = make_config(list(
                    
                    mean_si = mean_cov_si, std_si = sd_cov_si)))
# get the Rt 
rt_t2<-rt2$R
# Plot the mean Rt
plot(rt_t2$Mean,type="l")

# set the date
date<-seq(from = as.Date("2020-03-23"), to = as.Date("2020-05-27"), by = 'day')
Mean<-rt_t2$Mean
Std<-rt_t2$Std

ggplot(rt_t2, aes(x=date, y=Mean)) +
  
  geom_ribbon(aes(ymin=Mean - Std, ymax=Mean + Std), fill = "grey70")+
  
  geom_line(aes(y=Mean))+
  
  theme_minimal()+
  
  theme(panel.grid.minor.x=element_blank(),
        
        panel.grid.major.x=element_blank())+
  
  labs( y = 'TD - R0', x = "Date")+
  
  ggtitle('COVID_19 weekly-varying Reproduction Number in Nigeria (23rd March - 27th May)')

# Make a summary plot
plot(rt2)

# read in data for the south west
sw1<-read.csv("~/SW1_C19.csv", header=T)

# run the estimate for SW
sw_r <- estimate_R(sw1[,2:3], method = "parametric_si",
                   
                   config = make_config(list(
                     
                     mean_si = mean_cov_si, std_si = sd_cov_si)))
plot(sw_r)

# get the Rt 
swrt_t2<-sw_r$R
sw_Mn<-swrt_t2$Mean
sw_Std<-swrt_t2$Std

ggplot(swrt_t2, aes(x=date, y=sw_Mn)) +
  
  geom_ribbon(aes(ymin=sw_Mn - sw_Std, ymax=sw_Mn + sw_Std), fill = "grey70")+
  
  geom_line(aes(y=sw_Mn))+
  
  theme_minimal()+
  
  theme(panel.grid.minor.x=element_blank(),
        
        panel.grid.major.x=element_blank())+
  
  labs( y = 'Time Dependent Reproduction Number', x = "Date")+
  
  ggtitle("")


# read in data for the North west
nw1<-read.csv("~/NW1_C19.csv", header=T)

# run the estimate for NW
nw_r <- estimate_R(nw1[44:91,2], method = "parametric_si",
                   
                   config = make_config(list(
                     
                     mean_si = mean_cov_si, std_si = sd_cov_si)))
plot(nw_r)

# get the Rt 
nwrt_t2<-nw_r$R
nw_Mn<-nwrt_t2$Mean
nw_Std<-nwrt_t2$Std
d_nw<-seq(from = as.Date("2020-04-17"), to = as.Date("2020-05-27"), by = 'day')

ggplot(nwrt_t2, aes(x=d_nw, y=nw_Mn)) +
  
  geom_ribbon(aes(ymin=nw_Mn - nw_Std, ymax=nw_Mn + nw_Std), fill = "grey70")+
  
  geom_line(aes(y=nw_Mn))+
  
  theme_minimal()+
  
  theme(panel.grid.minor.x=element_blank(),
        
        panel.grid.major.x=element_blank())+
  
  labs( y = 'Time Dependent Reproduction Number', x = "Date")+
  
  ggtitle("")


# read in data for the north central
nc1<-read.csv("~/NC1_C19.csv", header=T)

# run the estimate for NC
nc_r <- estimate_R(nc1[25:91,2], method = "parametric_si",
                   
                   config = make_config(list(
                     
                     mean_si = mean_cov_si, std_si = sd_cov_si)))
plot(nc_r)

# get the Rt 
ncrt_t2<-nc_r$R
nc_Mn<-ncrt_t2$Mean
nc_Std<-ncrt_t2$Std
d_nc<-seq(from = as.Date("2020-03-29"), to = as.Date("2020-05-27"), by = 'day')

# plot TD_R
ggplot(ncrt_t2, aes(x=d_nc, y=nc_Mn)) +
  
  geom_ribbon(aes(ymin=nc_Mn - nc_Std, ymax=nc_Mn + nc_Std), fill = "grey70")+
  
  geom_line(aes(y=nc_Mn))+
  
  theme_minimal()+
  
  theme(panel.grid.minor.x=element_blank(),
        
        panel.grid.major.x=element_blank())+
  
  labs( y = 'Time Dependent Reproduction Number', x = "Date")+
  
  ggtitle("")
#############
# read in data for the north east
ne1<-read.csv("~/NE1_C19.csv", header=T)

# run the estimate for NC
ne_r <- estimate_R(ne1[54:91,2], method = "parametric_si",
                   
                   config = make_config(list(
                     
                     mean_si = mean_cov_si, std_si = sd_cov_si)))
plot(ne_r)

# get the Rt 
nert_t2<-ne_r$R
ne_Mn<-nert_t2$Mean
ne_Std<-nert_t2$Std
d_ne<-seq(from = as.Date("2020-04-27"), to = as.Date("2020-05-27"), by = 'day')

# plot TD_R
ggplot(nert_t2, aes(x=d_ne, y=ne_Mn)) +
  
  geom_ribbon(aes(ymin=ne_Mn - ne_Std, ymax=ne_Mn + ne_Std), fill = "grey70")+
  
  geom_line(aes(y=ne_Mn))+
  
  theme_minimal()+
  
  theme(panel.grid.minor.x=element_blank(),
        
        panel.grid.major.x=element_blank())+
  
  labs( y = 'Time Dependent Reproduction Number', x = "Date")+
  
  ggtitle("")

#############
# read in data for the south south
ss1<-read.csv("~/SS1_C19.csv", header=T)

# run the estimate for SS
ss_r <- estimate_R(ss1[39:91,2], method = "parametric_si",
                   
                   config = make_config(list(
                     
                     mean_si = mean_cov_si, std_si = sd_cov_si)))
plot(ss_r)

# get the Rt 
ssrt_t2<-ss_r$R
ss_Mn<-ssrt_t2$Mean
ss_Std<-ssrt_t2$Std
d_ss<-seq(from = as.Date("2020-04-12"), to = as.Date("2020-05-27"), by = 'day')

# plot TD_R
ggplot(ssrt_t2, aes(x=d_ss, y=ss_Mn)) +
  
  geom_ribbon(aes(ymin=ss_Mn - ss_Std, ymax=ss_Mn + ss_Std), fill = "grey70")+
  
  geom_line(aes(y=ss_Mn))+
  
  theme_minimal()+
  
  theme(panel.grid.minor.x=element_blank(),
        
        panel.grid.major.x=element_blank())+
  
  labs( y = 'Time Dependent Reproduction Number', x = "Date")+
  
  ggtitle("")

#############
# read in data for the south east
se1<-read.csv("~/SE1_C19.csv", header=T)

# run the estimate for SS
se_r <- estimate_R(se1[59:91,2], method = "parametric_si",
                   
                   config = make_config(list(
                     
                     mean_si = mean_cov_si, std_si = sd_cov_si)))
plot(se_r)

# get the Rt 
sert_t2<-se_r$R
se_Mn<-sert_t2$Mean
se_Std<-sert_t2$Std
d_se<-seq(from = as.Date("2020-05-2"), to = as.Date("2020-05-27"), by = 'day')

# plot TD_R
ggplot(sert_t2, aes(x=d_se, y=se_Mn)) +
  
  geom_ribbon(aes(ymin=se_Mn - se_Std, ymax=se_Mn + se_Std), fill = "grey70")+
  
  geom_line(aes(y=se_Mn))+
  
  theme_minimal()+
  
  theme(panel.grid.minor.x=element_blank(),
        
        panel.grid.major.x=element_blank())+
  
  labs( y = 'Time Dependent Reproduction Number', x = "Date")+
  
  ggtitle("")

####################################################
################################ 
# Kruskal- Wallis, Kolmogorv-Smirnov and Student's t tests 

# NC/NE
ks.test(nc_Mn[30:60],ne_Mn)

# NC/NW
ks.test(nc_Mn[20:60],nw_Mn)

#NC/SW
ks.test(nc_Mn,sw_Mn[7:66])

# NC/SE
ks.test(nc_Mn[35:60],se_Mn)

# NC/SS
ks.test(nc_Mn[15:60],ss_Mn)

### NE 31
# Ne/nw
ks.test(ne_Mn,nw_Mn[11:41])

# ne/sw
ks.test(ne_Mn,sw_Mn[36:66])

# ne/se
ks.test(ne_Mn[6:31],se_Mn)

# ne/ss
ks.test(ne_Mn,ss_Mn[16:46])

### NW
# Nw vs SW
ks.test(nw_Mn,sw_Mn[26:66])

# Nw vs se
ks.test(nw_Mn[16:41],se_Mn)

# Nw vs ss
ks.test(nw_Mn,ss_Mn[6:46])

### SW
# sw vs se
ks.test(sw_Mn[41:66],se_Mn)

# sw vs ss
ks.test(sw_Mn[21:66],ss_Mn)

### SE 
# se vs ss
ks.test(se_Mn,ss_Mn[21:46])

## based on the p-values obtained from KS test
## adjust for the type 1 error
ad1<-c(0.41, 0.42,0.047,0.00,0.01,0.96,0.02,0.00,0.00,0.03,0.00,0.03,0.00,0.14,0.5)
p.adjust(ad1,method="bonferroni")

# try all methods
p.adjust.M <- p.adjust.methods[p.adjust.methods != "fdr"]
p.adj    <- sapply(p.adjust.M, function(meth) p.adjust(ad1, meth))

# save result as a table
write.csv(p.adj, "~/type_1 error1.csv")
# plot results of all methods
matplot(ad1, p.adj, ylab="p.adjust(ad1, meth)", type = "l", asp = 1,
        main = "P-value adjustments",lty=1:12)

# bar plots country COVID 19 and across the zones 
zo<-read.csv("~/Geo_zones.csv", header=T)
dates<-seq(from = as.Date("2020-02-27"), to = as.Date("2020-05-27"), by = 'day')
zo<-as.matrix(zo[,2:92])

cl<-c("blue","green","brown","red","yellow", "orange")
reg<-c("NC","NE","NW","SE","SS","SW")

barplot(zo,ylim=c(0,400),names.arg=dates, col=cl,xlab="Dates",ylab= "No. of Infections")
legend("topleft", reg, cex = 1.3, fill = cl)

#########