Skip to content
Snippets Groups Projects
HBV_RNAs_count.R 11 KiB
Newer Older
aliarifki's avatar
aliarifki committed
#!/bin/Rscript
aliarifki's avatar
aliarifki committed
# Packages loading
aliarifki's avatar
aliarifki committed
library(ggplot2, quietly = TRUE)
library(tidyr, quietly = TRUE)
library(plyr, quietly = TRUE)
library(dplyr, quietly = TRUE)
library(stringr, quietly = TRUE)
library(RColorBrewer)
library(optparse)

# Load files
option_list = list(
  make_option(c("-s", "--SPvariants"), type="character", default=NULL, 
              help="input identified SP variants table (.csv)", metavar="character"),
  make_option(c("-c", "--classification"), type="character", default=NULL, 
              help="input classification of reads file (.txt)", metavar="character"),
  make_option(c("-b", "--barcode"), type="character", default=NULL, 
              help="input barcode", metavar="character"))
aliarifki's avatar
aliarifki committed
opt_parser = OptionParser(option_list=option_list)
opt = parse_args(opt_parser)

# vectors of species & Palettes Colors:
SPvariants <- c("SP01", "SP02", "SP03", "SP04", "SP05", "SP06", "SP07", "SP08",
                "SP09", "SP10", "SP11", "SP12", "SP13", "SP14", "SP15", "SP16",
                "SP17", "SP18", "SP19", "SP20", "SP21", "SP22")
SPvariants <- factor(SPvariants, levels = SPvariants)
colors_SP1_22 <- c("#FADD4E", "#FBE0C3", "#C8FF94", "#9ED7C3", "#CCEDE3", 
                   "#A9E5F0", "#A9C8E6", "#BBC7DF", "#D8C8EB", "#FFBADC", 
                   "#E2C9B5", "#E2B5AB", "#CAC4C5", "#D7D9DF", "#F9B7BA", 
                   "#99DDF9", "#ABA7D1", "#B26572", "#FB9651", "#4F7E7E", 
                   "#4FBCBA", "#608AD2")
palette_SP1_22 <- data.frame(nom = SPvariants, teinte = colors_SP1_22, 
                             stringsAsFactors = FALSE)

TSS_species <- c("preCore", "pgRNA", "preS1", "preS2/S", "HBx")
TSS_species <- factor(TSS_species, levels = TSS_species)
colors_RNAs_species <- c("#712E80", "#006695", "#3B9746", "#1F4F25", "#F5751A")
palette_TSS <- data.frame(nom = TSS_species, teinte = colors_RNAs_species, 
                          stringsAsFactors = FALSE)

new_SP_candidates <- c("new_comb_pg", "new_comb_HBs", "new_comb_HBx", 
                       "new_comb_UnC", "new_SP", "new_SP_HBs", "Undefined")
new_SP_candidates <- factor(new_SP_candidates, levels = new_SP_candidates)
colors_newSP <- c("#AC0E0D", "#CB674F", "#AD565C", "#923222", "#C78A77", 
                  "#A92322", "#964B34")
palette_new_SP <- data.frame(nom = new_SP_candidates, teinte = colors_newSP, 
                             stringsAsFactors = FALSE)

SPxx <- c("SPvariants")
SPxx <- factor(SPxx, levels = SPxx)
color_SPxx <- c("#33C5FF")
palette_SPxx <- data.frame(nom = SPxx, 
                           teinte = color_SPxx, 
                           stringsAsFactors = FALSE)

all_species_name <- c(TSS_species, SPvariants, new_SP_candidates, SPxx)

palette_complete <- rbind.data.frame(palette_TSS, 
                                     palette_SP1_22, 
                                     palette_new_SP,
                                     palette_SPxx,
                                     stringsAsFactors = FALSE)

# Load Start_positions_count files:
aliarifki's avatar
aliarifki committed
identified_SP <- read.table(file = opt$SPvariants,
aliarifki's avatar
aliarifki committed
                            header = TRUE)

clean_SP <- identified_SP[!duplicated(identified_SP$id),] %>% 
  select(id, mapQ, transcript_strand, JAQ, barcode, promoter, junction, SP_name)

countSP <- clean_SP %>% count(SP_name)
countSP <- dplyr::inner_join(palette_complete, 
                      countSP, 
                      by = c("nom" = "SP_name"))
#print(names(palette_complete))
#print(names(countSP))

countSP$nom <- factor(countSP$nom, levels = all_species_name)
aliarifki's avatar
aliarifki committed
countSP <- dplyr::mutate(countSP,
aliarifki's avatar
aliarifki committed
                  proportion = (as.numeric(n)/sum(as.numeric(n))*100))
#print(countSP)
ggplot(countSP, aes(x = "percent", 
                    y = proportion,
                    fill = nom)) +
  geom_col() +
  coord_polar("y") +
  scale_fill_manual(values = countSP$teinte) +
  labs(fill = "spliced-variants")

ggsave(file = paste0(opt$barcode, "_SP_proportion_piechart.png"),
aliarifki's avatar
aliarifki committed
       scale = 2,
       width = 1920,
       height = 1080,
       units = "px",
       dpi = 300)

ggplot(countSP, aes(x = nom, y = proportion, fill = nom)) +
  geom_col() +
  scale_fill_manual(values = countSP$teinte) + 
  theme(axis.text.x = element_text(angle = 45)) +
  labs(fill = "spliced-variants") +
  xlab(label = "spliced-variants") +
  ylab(label = "percent")

ggsave(file = paste0(opt$barcode, "_SP_proportion.png"),
aliarifki's avatar
aliarifki committed
       scale = 2,
       width = 1920,
       height = 1080,
       units = "px",
       dpi = 300)

# TSS not spliced:
aliarifki's avatar
aliarifki committed
classified_reads <- read.table(file = opt$classification, 
aliarifki's avatar
aliarifki committed
                               header = TRUE)

not_spliced <- classified_reads[!(classified_reads$read_ID %in% clean_SP$id),]
aliarifki's avatar
aliarifki committed
not_spliced <- dplyr::mutate(not_spliced,
aliarifki's avatar
aliarifki committed
                      species = not_spliced$promoter)
#print(not_spliced)
not_spliced <- not_spliced %>% select(read_ID, species)
colnames(not_spliced) <- c("id", "species")

clean_SP_type <- clean_SP %>% select(id, SP_name)
colnames(clean_SP_type) <- c("id", "species")

df_species <- rbind.data.frame(not_spliced, clean_SP_type, 
                               stringsAsFactors = FALSE)
count_species <- df_species %>% count(species)
aliarifki's avatar
aliarifki committed
count_species <- dplyr::mutate(count_species,
aliarifki's avatar
aliarifki committed
                        percent = (as.numeric(n)/sum(as.numeric(n))*100))
#print(count_species)
write.table(df_species, file = paste0(opt$barcode, "_all_reads_identified.csv"), 
aliarifki's avatar
aliarifki committed
            sep = "\t", quote = FALSE, row.names = FALSE)

# Null dataset:
species <- c(TSS_species, SPvariants, new_SP_candidates)

canonical_species <- TSS_species[c(1:3,5)]

null_count <- data.frame(species = species, 
                         n = rep(0, times = length(species)),
                         percent = rep(0, times = length(species)), 
                         stringsAsFactors = FALSE)

null_count <- transform(null_count,
                        n = as.integer(n),
                        percent = as.numeric(percent))

# Join:
count_species <- rbind.data.frame(count_species, 
                                  subset(null_count, 
                                         !(species %in% count_species$species)),
                                  stringsAsFactors = FALSE)

# Merge all SP variants:
count_species_SPxx <- data.frame(species = "SPvariants", 
                                 n = sum(count_species[count_species$species 
                                                       %in% SPvariants,]$n))
count_species_SPxx <- rbind.data.frame(count_species_SPxx,
                                       count_species[count_species$species 
                                                     %in% c(canonical_species, 
                                                            SPvariants), 1:2],
                                       stringsAsFactors = FALSE)
count_species_SPxx <- count_species_SPxx[count_species_SPxx$species %in% 
                                           all_species_name[c(1:3,5,35)],]
aliarifki's avatar
aliarifki committed
count_species_SPxx <- dplyr::mutate(count_species_SPxx,
aliarifki's avatar
aliarifki committed
                             percent=(as.numeric(n)/sum(as.numeric(n))*100))
#print(count_species_SPxx)
# save the tab:
write.csv(count_species_SPxx, file = paste0(opt$barcode, "_count_canonical_species_SPxx.csv"))
aliarifki's avatar
aliarifki committed

# prepare to plot:
count_species_SPxx <- dplyr::inner_join(palette_complete,
                                 count_species_SPxx,
                                 by = c("nom" = "species"))
#names(palette_complete)
#names(count_species_SPxx)
count_species_SPxx$nom <- factor(count_species_SPxx$nom, levels = all_species_name)

# Save:
write.csv(count_species, file = paste0(opt$barcode, "_count_species.csv"))
aliarifki's avatar
aliarifki committed

# RNA species composition all species:
count_species <- dplyr::inner_join(palette_complete, count_species,
                                  by = c("nom" = "species"),
                                  suffix = c("",""))
count_species$nom <- factor(count_species$nom, levels = all_species_name)

ggplot(count_species, 
       aes(x = nom, 
           y = percent,
           fill = nom)) +
  geom_col() +
  scale_fill_manual(values = count_species$teinte) +
  theme(axis.text.x = element_text(angle = 45)) +
  labs(fill = "RNA species & spliced-variants") +
  xlab(label = "RNA species & spliced-variants")

ggsave(file = paste0(opt$barcode, "_count_RNAs_species.png"),
aliarifki's avatar
aliarifki committed
       scale = 2,
       width = 1920,
       height = 1080,
       units = "px",
       dpi = 300)

# Filter RNA species canonical + SPvariants:
count_species_clear <- count_species[count_species$species %in% c(canonical_species, SPvariants),]

# RNA species composition all species:
count_species_clear <- dplyr::inner_join(palette_complete, count_species_clear,
                            by = c("nom" = "nom"),
                            suffix = c("",""))
count_species_clear$nom <- factor(count_species_clear$nom, levels = all_species_name)

ggplot(count_species_clear, 
       aes(x = nom, 
           y = percent,
           fill = nom)) +
  geom_col() +
  scale_fill_manual(values = count_species_clear$teinte) +
  theme(axis.text.x = element_text(angle = 45)) +
  labs(fill = "RNA species & spliced-variants") +
  xlab(label = "RNA species & spliced-variants")

ggsave(file = paste0(opt$barcode, "_count_RNAs_species_clear.png"),
aliarifki's avatar
aliarifki committed
       scale = 2,
       width = 1920,
       height = 1080,
       units = "px",
       dpi = 300)

# SP composition clear:
count_clear <- clean_SP[clean_SP$SP_name %in% SPvariants,] %>% count(SP_name)
aliarifki's avatar
aliarifki committed
count_clear <- dplyr::mutate(count_clear,
aliarifki's avatar
aliarifki committed
                      proportion=(as.numeric(n)/sum(as.numeric(n))*100))
#print(count_clear)
count_clear <- dplyr::inner_join(palette_complete,
                          count_clear, 
                          by = c("nom" = "SP_name"))
#names(count_clear)
count_clear$nom <- factor(count_clear$nom, levels = all_species_name)

if(nrow(count_clear)!=0){
  ggplot(count_clear, aes(x = "percent", 
                          y = proportion,
                          fill = nom)) +
    geom_col() +
    coord_polar("y") +
    scale_fill_manual(values = count_clear$teinte) +
    labs(fill = "spliced-variants")
  
  ggsave(file = paste0(opt$barcode, "_SP_clear_proportion_piechart.png"),
         scale = 2,
         width = 1920,
         height = 1080,
         units = "px",
         dpi = 300)}
aliarifki's avatar
aliarifki committed

ggplot(count_clear, aes(x = nom, 
                        y = proportion,
                        fill = nom)) +
  geom_col() +
  scale_fill_manual(values = count_clear$teinte) + 
  theme(axis.text.x = element_text(angle = 45)) +
  labs(fill = "spliced-variants") +
  xlab(label = "spliced-variants") +
  ylab(label = "percent")

ggsave(file = paste0(opt$barcode, "_SP_clear_proportion.png"),
aliarifki's avatar
aliarifki committed
       scale = 2,
       width = 1920,
       height = 1080,
       units = "px",
       dpi = 300)

# Camembert SPxx:
ggplot(count_species_SPxx, aes(x = "species", 
                               y = percent,
                               fill = nom)) +
  geom_col() +
  coord_polar("y") +
  scale_fill_manual(values = count_species_SPxx$teinte) +
  labs(fill = "TSS") +
  ylab(label = "TSS usage") +
  xlab(label = "percent")

ggsave(file = paste0(opt$barcode, "_count_RNAs_species_piechart.png"),
aliarifki's avatar
aliarifki committed
       scale = 2,
       width = 1920,
       height = 1080,
       units = "px",
       dpi = 300)