From 8561de6881624dc0c536c4f5896fdceb919feeff Mon Sep 17 00:00:00 2001 From: xgrand <xavier.grand@ens-lyon.fr> Date: Fri, 4 Aug 2023 14:51:13 +0200 Subject: [PATCH] Debug r-bolero scripts and Docker --- src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R | 15 ++++++++------- .../r-bolero/1.0/Install_packages.R | 2 +- .../r-bolero/1.0/Junctions_NanoSplicer.R | 14 +++++++++----- .../r-bolero/1.0/Start_positions.R | 6 ++---- 4 files changed, 20 insertions(+), 17 deletions(-) diff --git a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R index e3ef7f8..5d0d29c 100644 --- a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R +++ b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R @@ -2,7 +2,6 @@ # Packages loading library(ggplot2, quietly = TRUE) library(tidyr, quietly = TRUE) -library(plyr, quietly = TRUE) library(dplyr, quietly = TRUE) library(stringr, quietly = TRUE) library(RColorBrewer) @@ -135,7 +134,7 @@ write.table(df_species, file = paste0(opt$barcode, "_all_reads_identified.csv"), # Null dataset: species <- c(TSS_species, SPvariants, new_SP_candidates) -canonical_species <- TSS_species[c(1:3,5)] +# canonical_species <- TSS_species[c(1:3,5)] null_count <- data.frame(species = species, n = rep(0, times = length(species)), @@ -158,16 +157,17 @@ count_species_SPxx <- data.frame(species = "SPvariants", %in% SPvariants,]$n)) count_species_SPxx <- rbind.data.frame(count_species_SPxx, count_species[count_species$species - %in% c(canonical_species, + %in% c(TSS_species, SPvariants), 1:2], stringsAsFactors = FALSE) count_species_SPxx <- count_species_SPxx[count_species_SPxx$species %in% - all_species_name[c(1:3,5,35)],] + all_species_name[c(1:5,35)],] # c(1:3,5,35) count_species_SPxx <- dplyr::mutate(count_species_SPxx, percent=(as.numeric(n)/sum(as.numeric(n))*100)) #print(count_species_SPxx) # save the tab: -write.csv(count_species_SPxx, file = paste0(opt$barcode, "_count_canonical_species_SPxx.csv")) +write.csv(count_species_SPxx, + file = paste0(opt$barcode, "_count_canonical_species_SPxx.csv")) # prepare to plot: count_species_SPxx <- dplyr::inner_join(palette_complete, @@ -175,7 +175,8 @@ count_species_SPxx <- dplyr::inner_join(palette_complete, by = c("nom" = "species")) #names(palette_complete) #names(count_species_SPxx) -count_species_SPxx$nom <- factor(count_species_SPxx$nom, levels = all_species_name) +count_species_SPxx$nom <- factor(count_species_SPxx$nom, + levels = all_species_name) # Save: write.csv(count_species, file = paste0(opt$barcode, "_count_species.csv")) @@ -204,7 +205,7 @@ ggsave(file = paste0(opt$barcode, "_count_RNAs_species.png"), dpi = 300) # Filter RNA species canonical + SPvariants: -count_species_clear <- count_species[count_species$species %in% c(canonical_species, SPvariants),] +count_species_clear <- count_species[count_species$nom %in% c(TSS_species, SPvariants),] # RNA species composition all species: count_species_clear <- dplyr::inner_join(palette_complete, count_species_clear, diff --git a/src/.docker_modules/r-bolero/1.0/Install_packages.R b/src/.docker_modules/r-bolero/1.0/Install_packages.R index 384435c..01ade6f 100644 --- a/src/.docker_modules/r-bolero/1.0/Install_packages.R +++ b/src/.docker_modules/r-bolero/1.0/Install_packages.R @@ -1,3 +1,3 @@ -list.of.packages <- c("ggplot2", "tidyr", "plyr", "dplyr", "tidyverse", "stringr", "optparse", "RColorBrewer", "conflicted", "BiocManager", "resshape2", "R.utils") +list.of.packages <- c("ggplot2", "tidyr", "dplyr", "tidyverse", "stringr", "optparse", "RColorBrewer", "conflicted", "BiocManager", "resshape2", "R.utils") new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] if(length(new.packages)) install.packages(new.packages, dependencies = T) diff --git a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R index 0685a21..3b6a147 100644 --- a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R +++ b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R @@ -5,7 +5,6 @@ ################################################################################ library(ggplot2, quietly = TRUE) library(tidyr, quietly = TRUE) -library(plyr, quietly = TRUE) library(dplyr, quietly = TRUE) library(stringr, quietly = TRUE) library(optparse) @@ -165,10 +164,15 @@ SP_assignation_single <- function(site, promoter) { } } -single_junction <- ddply(single_junction, - .(id), - mutate, - SP_name = SP_assignation_single(junction, promoter)) +#dplyr version: +single_junction <- single_junction %>% dplyr::group_by(id) %>% + dplyr::mutate(SP_name = SP_assignation_single(junction, promoter)) + +#plyr version +# single_junction <- ddply(single_junction, +# .(id), +# mutate, +# SP_name = SP_assignation_single(junction, promoter)) SP_assignation_multiple <- function(read_id, combinaison, promoter) { if (promoter == "pgRNA" | promoter == "preCore") { diff --git a/src/.docker_modules/r-bolero/1.0/Start_positions.R b/src/.docker_modules/r-bolero/1.0/Start_positions.R index fcffb8e..9a4890a 100644 --- a/src/.docker_modules/r-bolero/1.0/Start_positions.R +++ b/src/.docker_modules/r-bolero/1.0/Start_positions.R @@ -187,9 +187,7 @@ formated_start_promoters <- pivot_longer(resultats_start_promoters, names_to = "Barcodes", values_to = "nb_reads") -mycolors <- colorRampPalette(brewer.pal(10, "Paired"))(10) -mycolors5 <- c("#712E80", "#006695", "#3B9746", "#1F4F25", "#F5751A") -mycolors6 <- c("#A6CEE3", "#3362ff", "#33c5ff", "#6A3D9A", "#d60000") +mycolors <- c("#712E80", "#006695", "#3B9746", "#1F4F25", "#F5751A") plot_camembert <- function(barcode, df, tot) { camembert <- ggplot(df[df$Barcodes == barcode,], aes(x = barcode, @@ -197,7 +195,7 @@ plot_camembert <- function(barcode, df, tot) { fill=promoters)) + geom_col() + coord_polar("y") + - scale_fill_manual(values = mycolors5) + + scale_fill_manual(values = mycolors) + labs(title = paste0("#reads = ", tot[1,barcode]), x=element_blank(), y=element_blank()) + theme_light() -- GitLab