From ca26a64844c37d8fee1ecd9964e1b924ab8e437b Mon Sep 17 00:00:00 2001 From: aliarifki <aliarifki@outlook.fr> Date: Wed, 14 Jun 2023 10:47:10 +0200 Subject: [PATCH] Ajout des barcodes en option pour les noms de fichiers --- .../r-bolero/1.0/HBV_RNAs_count.R | 24 ++-- .../r-bolero/1.0/Junctions_NanoSplicer.R | 107 +----------------- .../r-bolero/1.0/Start_positions.R | 10 +- 3 files changed, 24 insertions(+), 117 deletions(-) diff --git a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R index 16d67f3..16d8b1d 100644 --- a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R +++ b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R @@ -13,7 +13,9 @@ option_list = list( make_option(c("-s", "--SPvariants"), type="character", default=NULL, help="input identified SP variants table (.csv)", metavar="character"), make_option(c("-c", "--classification"), type="character", default=NULL, - help="input classification of reads file (.txt)", metavar="character")) + help="input classification of reads file (.txt)", metavar="character"), + make_option(c("-b", "--barcode"), type="character", default=NULL, + help="input barcode", metavar="character")) opt_parser = OptionParser(option_list=option_list) opt = parse_args(opt_parser) @@ -85,7 +87,7 @@ ggplot(countSP, aes(x = "percent", scale_fill_manual(values = countSP$teinte) + labs(fill = "spliced-variants") -ggsave(file = "SP_proportion_camembert.png", +ggsave(file = paste0(opt$barcode, "_SP_proportion_piechart.png"), scale = 2, width = 1920, height = 1080, @@ -100,7 +102,7 @@ ggplot(countSP, aes(x = nom, y = proportion, fill = nom)) + xlab(label = "spliced-variants") + ylab(label = "percent") -ggsave(file = "SP_proportion.png", +ggsave(file = paste0(opt$barcode, "_SP_proportion.png"), scale = 2, width = 1920, height = 1080, @@ -127,7 +129,7 @@ count_species <- df_species %>% count(species) count_species <- dplyr::mutate(count_species, percent = (as.numeric(n)/sum(as.numeric(n))*100)) #print(count_species) -write.table(df_species, file = "All_reads_identified.csv", +write.table(df_species, file = paste0(opt$barcode, "_all_reads_identified.csv"), sep = "\t", quote = FALSE, row.names = FALSE) # Null dataset: @@ -165,7 +167,7 @@ count_species_SPxx <- dplyr::mutate(count_species_SPxx, percent=(as.numeric(n)/sum(as.numeric(n))*100)) #print(count_species_SPxx) # save the tab: -write.csv(count_species_SPxx, file = "Count_canonical_species_SPxx.csv") +write.csv(count_species_SPxx, file = paste0(opt$barcode, "_count_canonical_species_SPxx.csv")) # prepare to plot: count_species_SPxx <- dplyr::inner_join(palette_complete, @@ -176,7 +178,7 @@ count_species_SPxx <- dplyr::inner_join(palette_complete, count_species_SPxx$nom <- factor(count_species_SPxx$nom, levels = all_species_name) # Save: -write.csv(count_species, file = "Count_species.csv") +write.csv(count_species, file = paste0(opt$barcode, "_count_species.csv")) # RNA species composition all species: count_species <- dplyr::inner_join(palette_complete, count_species, @@ -194,7 +196,7 @@ ggplot(count_species, labs(fill = "RNA species & spliced-variants") + xlab(label = "RNA species & spliced-variants") -ggsave(file = "Count_RNAs_species.png", +ggsave(file = paste0(opt$barcode, "_count_RNAs_species.png"), scale = 2, width = 1920, height = 1080, @@ -220,7 +222,7 @@ ggplot(count_species_clear, labs(fill = "RNA species & spliced-variants") + xlab(label = "RNA species & spliced-variants") -ggsave(file = "Count_RNAs_species_clear.png", +ggsave(file = paste0(opt$barcode, "_count_RNAs_species_clear.png"), scale = 2, width = 1920, height = 1080, @@ -246,7 +248,7 @@ ggplot(count_clear, aes(x = "percent", scale_fill_manual(values = count_clear$teinte) + labs(fill = "spliced-variants") -ggsave(file = "SP_clear_proportion_camembert.png", +ggsave(file = paste0(opt$barcode, "_SP_clear_proportion_piechart.png"), scale = 2, width = 1920, height = 1080, @@ -263,7 +265,7 @@ ggplot(count_clear, aes(x = nom, xlab(label = "spliced-variants") + ylab(label = "percent") -ggsave(file = "SP_clear_proportion.png", +ggsave(file = paste0(opt$barcode, "_SP_clear_proportion.png"), scale = 2, width = 1920, height = 1080, @@ -281,7 +283,7 @@ ggplot(count_species_SPxx, aes(x = "species", ylab(label = "TSS usage") + xlab(label = "percent") -ggsave(file = "Count_RNAs_species_camembert.png", +ggsave(file = paste0(opt$barcode, "_count_RNAs_species_piechart.png"), scale = 2, width = 1920, height = 1080, diff --git a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R index 5567a37..0685a21 100644 --- a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R +++ b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R @@ -15,7 +15,9 @@ option_list = list( make_option(c("-c", "--classification"), type="character", default="./classification.txt", help="input classification or reads file (.txt)", metavar="character"), make_option(c("-j", "--jwr"), type="character", default=NULL, - help="input nanosplicer results table (.csv)", metavar="character")) + help="input nanosplicer results table (.csv)", metavar="character"), + make_option(c("-b", "--barcode"), type="character", default=NULL, + help="input barcode", metavar="character")) opt_parser = OptionParser(option_list=option_list) opt = parse_args(opt_parser) reads_pos <- read.table(opt$classification, @@ -113,7 +115,7 @@ df$acceptor_site <- sapply(df$pg_acceptor, assignation_acceptor) df <- dplyr::mutate(df, junction = paste0(donor_site, acceptor_site)) -write.table(df, file = "JWR_check_parsed.csv", row.names = FALSE, sep = "\t") +write.table(df, file = paste0(opt$barcode, "_JWR_check_parsed.csv"), row.names = FALSE, sep = "\t") duplicated2 <- function(x){ if (sum(dup <- duplicated(x))==0) @@ -293,107 +295,8 @@ SP_variant_unique <- df_SPvariants %>% select(id, SP_name) SP_variant_unique <- SP_variant_unique[!duplicated(SP_variant_unique$id),] # distinct(SP_variant_unique, id) write.table(df_SPvariants, - "identified_SPvariants.csv", + paste0(opt$barcode, "_identified_SPvariants.csv"), row.names = FALSE, sep = "\t", quote = FALSE) -ggplot(df, aes(x=pg_donor)) + - geom_histogram(aes(y=after_stat(density)),color="darkblue", fill="lightblue") + - geom_density(alpha=.2, fill="lightblue") + - geom_vline(aes(xintercept=median(pg_donor)), - color="blue", linetype="dashed", linewidth=1) + - geom_vline(aes(xintercept=quantile(pg_donor, 0.025)), - linetype="dashed", linewidth=0.25) + - geom_vline(aes(xintercept=quantile(pg_donor, 0.975)), - linetype="dashed", linewidth=0.25) + - geom_vline(aes(xintercept=quantile(pg_donor, 0.01)), - color="green", linetype="dashed", linewidth=0.25) + - geom_vline(aes(xintercept=quantile(pg_donor, 0.99)), - color="green", linetype="dashed", linewidth=0.25) + - geom_vline(aes(xintercept=(median(pg_donor)+sd(pg_donor))), - color="red", linewidth=0.5) + - geom_vline(aes(xintercept=(median(pg_donor)-sd(pg_donor))), - color="red", linewidth=0.5) + - scale_x_continuous(breaks=c(min(df$pg_donor), - quantile(df$pg_donor, 0.025), - quantile(df$pg_donor, 0.005), - median(df$pg_donor)-sd(df$pg_donor), - median(df$pg_donor), - median(df$pg_donor)+sd(df$pg_donor), - quantile(df$pg_donor, 0.975), - quantile(df$pg_donor, 0.995), - max(df$pg_donor)), - label = c(min(df$pg_donor), - floor(quantile(df$pg_donor, 0.025)), - floor(quantile(df$pg_donor, 0.005)), - round(median(df$pg_donor)-sd(df$pg_donor)), - median(df$pg_donor), - round(median(df$pg_donor)+sd(df$pg_donor)), - floor(quantile(df$pg_donor, 0.975))+1, - round(quantile(df$pg_donor, 0.995))+1, - max(df$pg_donor))) + - theme(axis.text.x = element_text(angle = 45)) - -ggsave(filename = "Donor_curve.png", - device = "png", - scale = 1, - width = 1920, - height = 1080, - units = "px", - dpi = 320) - -ggplot(df, aes(x=pg_acceptor)) + - geom_histogram(aes(y=after_stat(density)),color="red", fill="darksalmon") + - geom_density(alpha=.2, fill="darksalmon") + - geom_vline(aes(xintercept=median(pg_acceptor)), - color="red", linetype="dashed", linewidth=1) + - geom_vline(aes(xintercept=quantile(pg_acceptor, 0.025)), - linetype="dashed", linewidth=0.25) + - geom_vline(aes(xintercept=quantile(pg_acceptor, 0.975)), - linetype="dashed", linewidth=0.25) + - geom_vline(aes(xintercept=quantile(pg_acceptor, 0.005)), - color="green", linetype="dashed", linewidth=0.25) + - geom_vline(aes(xintercept=quantile(pg_acceptor, 0.995)), - color="green", linetype="dashed", linewidth=0.25) + - geom_vline(aes(xintercept=(median(pg_acceptor)+sd(pg_acceptor))), - color="blue", linewidth=0.5) + - geom_vline(aes(xintercept=(median(pg_acceptor)-sd(pg_acceptor))), - color="blue", linewidth=0.5) + - scale_x_continuous(breaks=c(min(df$pg_acceptor), - quantile(df$pg_acceptor, 0.025), - quantile(df$pg_acceptor, 0.005), - median(df$pg_acceptor)-sd(df$pg_acceptor), - median(df$pg_acceptor), - median(df$pg_acceptor)+sd(df$pg_acceptor), - quantile(df$pg_acceptor, 0.975), - quantile(df$pg_acceptor, 0.995), - max(df$pg_acceptor)), - label = c(min(df$pg_acceptor), - floor(quantile(df$pg_acceptor, 0.025)), - floor(quantile(df$pg_acceptor, 0.005)), - round(median(df$pg_acceptor)-sd(df$pg_acceptor)), - median(df$pg_acceptor), - round(median(df$pg_acceptor)+sd(df$pg_acceptor)), - floor(quantile(df$pg_acceptor, 0.975))+1, - floor(quantile(df$pg_acceptor, 0.995))+1, - max(df$pg_acceptor))) + - theme(axis.text.x = element_text(angle = 45)) - -ggsave(filename = "Acceptor_curve.png", - device = "png", - scale = 1, - width = 1920, - height = 1080, - units = "px", - dpi = 320) - -# Graphs and tests: - -# sink("test_shapiro.txt") -# print("Normality test: Shapiro-Wilk") -# print("Donor site:") -# print(shapiro.test(df$pg_donor)) -# print("Acceptor site:") -# print(shapiro.test(df$pg_acceptor)) -# sink() \ No newline at end of file diff --git a/src/.docker_modules/r-bolero/1.0/Start_positions.R b/src/.docker_modules/r-bolero/1.0/Start_positions.R index 830c52e..fcffb8e 100644 --- a/src/.docker_modules/r-bolero/1.0/Start_positions.R +++ b/src/.docker_modules/r-bolero/1.0/Start_positions.R @@ -14,7 +14,9 @@ conflict_prefer("lag", "dplyr") # Load Start_positions_count files: option_list = list( make_option(c("-i", "--input"), type="character", default=NULL, - help="input start position file (.txt)", metavar="character") + help="input start position file (.txt)", metavar="character"), + make_option(c("-b", "--barcode"), type="character", default=NULL, + help="input barcode", metavar="character") ) opt_parser = OptionParser(option_list=option_list) @@ -109,7 +111,7 @@ sam_bc01$promoter <- sapply(sam_bc01$start_position, classify_reads) write.table(sam_bc01, - file = "classification_of_reads_per_RNA.txt", + file = paste0(opt$barcode, "_classification_of_reads_per_RNA.txt"), quote = FALSE, sep = "\t", row.names = FALSE) @@ -164,7 +166,7 @@ abs_count_reads <- cbind(c(as.vector(promoters),"total"), abs_count_reads) colnames(abs_count_reads) <- c("promoter", "read_number") write.table(abs_count_reads, - file = "Count_reads_per_promoter.tsv", + file = paste0(opt$barcode, "_count_reads_per_promoter.tsv"), quote = FALSE, sep = "\t", row.names = FALSE) @@ -201,7 +203,7 @@ plot_camembert <- function(barcode, df, tot) { print(camembert) - ggsave(filename = paste0("./Reads_start_promoters_", barcode, "_camembert.png"), + ggsave(filename = paste0("./", opt$barcode, "_reads_start_promoters_piechart.png"), plot = last_plot(), scale = 1, width = 1920, -- GitLab