library("tidyverse") # intsalibrary("dplyr") library("plotly") library("htmlwidgets") library("lubridate") library("viridis") ## The folders htmlplots and pngplots are assumed to be in the dir ## the file assumes gisaid-india and india-states are present in the dir df <- read_tsv("gisaid-india.tsv") states <- read_tsv("states.tsv") ## this is the update file file <- list.files(pattern = "metadata.tsv") ## if the file exists, it updates gisaid-india file and deletes update file if(length(file) > 0){ if(file.exists(file)){ update <- read_tsv(file) df <- df %>% rbind(update) %>% distinct() write_tsv(df, file = "gisaid-india.tsv") file.remove(file) } } ## cleanup df <- df %>% mutate(mydate = parse_date_time(x = df$date, orders = c("%Y-%m-%d", "%Y-%m", "%Y"))) %>% mutate(Month = paste0(year(mydate), "-", month.abb[month(mydate)])) ## Change state names df["division"][df["division"] == "Andhra pradesh"] <- "Andhra Pradesh" df["division"][df["division"] == "Chhatisgarh"] <- "Chhattisgarh" df["division"][df["division"] == "Chandighar"] <- "Chandigarh" df["division"][df["division"] == "Tamilnadu"] <- "Tamil Nadu" df["division"][df["division"] == "ARUNACHAL PRADESH"] <- "Arunachal Pradesh" df["division"][df["division"] == "Dadra & Nagar Haveli"] <- "Dadra and Nagar Haveli and Daman and Diu" df["division"][df["division"] == "Dadra and Nagar Haveli"] <- "Dadra and Nagar Haveli and Daman and Diu" df["division"][df["division"] == "Dadra And Nagar Haveli"] <- "Dadra and Nagar Haveli and Daman and Diu" df["division"][df["division"] == "New Delhi"] <- "Delhi" df["division"][df["division"] == "Jammu & kashmir"] <- "Jammu and Kashmir" df["division"][df["division"] == "Jammu & Kashmir"] <- "Jammu and Kashmir" df["division"][df["division"] == "Jammu and Kashmīr"] <- "Jammu and Kashmir" df["division"][df["division"] == "Jammu"] <- "Jammu and Kashmir" ## There are around 800 unique lineages. ## Reducing by ignoring less than .25 percent in India lin <- df %>% count(pangolin_lineage, sort = TRUE) %>% mutate(percent = n/sum(n)*100) %>% filter(percent > 0.2) lin <- lin[order(lin$pangolin_lineage),] #unique_lineages <- unique(df$pangolin_lineage) #length(unique_lineages) ## getting viridis colors for plots #my_colors <- viridis_pal(option = "D")(nrow(lin)+1) #my_colors <- viridis_pal(option = "D")(length(unique_lineages) + 1) my_colors <- viridis_pal(option = "D")(100) my_labels <- c(lin$pangolin_lineage, "Others") ## function plots interactive graph and saves html file in htmlplots ## and static png file in pngplots for given Indian 'state' output <- function(state) { ## Subsetting given state's data df <- df[df$division == state,] ## new dataframes to calculate percentages newdf <- df %>% count(Month, pangolin_lineage, sort = TRUE) bymonth <- df %>% count(Month, sort = TRUE) comb <- merge(x = newdf, y = bymonth, by = "Month") comb$percent <- round(comb$n.x/comb$n.y*100, 2) my_levels <- c(paste0("2019-", month.abb), paste0("2020-", month.abb), paste0("2021-", month.abb), paste0("2022-", month.abb), paste0("2023-", month.abb), paste0("2024-", month.abb)) ## Subsetting statewise data by onepercent <- comb[comb$percent > 5,] onepercent$Month <- factor(onepercent$Month, my_levels) onepercent <- onepercent[!is.na(onepercent$Month),] others <- comb[comb$percent <= 5, ] others <- others %>% group_by(Month) %>% summarise(percent = sum(percent)) %>% mutate(pangolin_lineage = "Others") onepercent <- onepercent %>% select(Month, percent, pangolin_lineage) onepercent <- rbind(others, onepercent) p <- onepercent %>% transmute(Variant = pangolin_lineage, Month = Month, Percent = percent) %>% ggplot() + geom_col(mapping = aes(x=Month, y = Percent, fill=Variant)) + scale_fill_manual(values = my_colors, labels = my_labels) + ggtitle(label=paste("Variant Map for", state)) + theme(axis.text.x = element_text(angle = 45, hjust=1)) + xlab("Month") + ylab("Percentage") f <- paste0("htmlplots/", states[states$name == state,]$code, ".html") f <- file.path(normalizePath(dirname(f)), basename(f)) saveWidget(ggplotly(p), file=f, selfcontained = FALSE, libdir = "plotly.html") ################# p <- onepercent %>% transmute(Variant = pangolin_lineage, Month = Month, Percent = percent) %>% ggplot() + geom_col(mapping = aes(x=Month, y = Percent, fill=Variant)) + scale_fill_manual(values = my_colors, labels = my_labels) + theme(axis.text.x = element_text(angle = 45, hjust=1)) + ggtitle(label=paste("Variant Map for", state)) + guides(fill=guide_legend(ncol=2)) + xlab("Month") + ylab("Percentage")+ theme(plot.title = element_text(size=22)) f <- paste0("pngplots/", states[states$name == state,]$code, ".png") ggsave(f, height = 11.28, width = 13.54) } for (s in states$name){ output(s) }