From 8561de6881624dc0c536c4f5896fdceb919feeff Mon Sep 17 00:00:00 2001
From: xgrand <xavier.grand@ens-lyon.fr>
Date: Fri, 4 Aug 2023 14:51:13 +0200
Subject: [PATCH] Debug r-bolero scripts and Docker

---
 src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R | 15 ++++++++-------
 .../r-bolero/1.0/Install_packages.R               |  2 +-
 .../r-bolero/1.0/Junctions_NanoSplicer.R          | 14 +++++++++-----
 .../r-bolero/1.0/Start_positions.R                |  6 ++----
 4 files changed, 20 insertions(+), 17 deletions(-)

diff --git a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R
index e3ef7f8..5d0d29c 100644
--- a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R
+++ b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R
@@ -2,7 +2,6 @@
 # Packages loading
 library(ggplot2, quietly = TRUE)
 library(tidyr, quietly = TRUE)
-library(plyr, quietly = TRUE)
 library(dplyr, quietly = TRUE)
 library(stringr, quietly = TRUE)
 library(RColorBrewer)
@@ -135,7 +134,7 @@ write.table(df_species, file = paste0(opt$barcode, "_all_reads_identified.csv"),
 # Null dataset:
 species <- c(TSS_species, SPvariants, new_SP_candidates)
 
-canonical_species <- TSS_species[c(1:3,5)]
+# canonical_species <- TSS_species[c(1:3,5)]
 
 null_count <- data.frame(species = species, 
                          n = rep(0, times = length(species)),
@@ -158,16 +157,17 @@ count_species_SPxx <- data.frame(species = "SPvariants",
                                                        %in% SPvariants,]$n))
 count_species_SPxx <- rbind.data.frame(count_species_SPxx,
                                        count_species[count_species$species 
-                                                     %in% c(canonical_species, 
+                                                     %in% c(TSS_species, 
                                                             SPvariants), 1:2],
                                        stringsAsFactors = FALSE)
 count_species_SPxx <- count_species_SPxx[count_species_SPxx$species %in% 
-                                           all_species_name[c(1:3,5,35)],]
+                                           all_species_name[c(1:5,35)],] # c(1:3,5,35)
 count_species_SPxx <- dplyr::mutate(count_species_SPxx,
                              percent=(as.numeric(n)/sum(as.numeric(n))*100))
 #print(count_species_SPxx)
 # save the tab:
-write.csv(count_species_SPxx, file = paste0(opt$barcode, "_count_canonical_species_SPxx.csv"))
+write.csv(count_species_SPxx, 
+          file = paste0(opt$barcode, "_count_canonical_species_SPxx.csv"))
 
 # prepare to plot:
 count_species_SPxx <- dplyr::inner_join(palette_complete,
@@ -175,7 +175,8 @@ count_species_SPxx <- dplyr::inner_join(palette_complete,
                                  by = c("nom" = "species"))
 #names(palette_complete)
 #names(count_species_SPxx)
-count_species_SPxx$nom <- factor(count_species_SPxx$nom, levels = all_species_name)
+count_species_SPxx$nom <- factor(count_species_SPxx$nom, 
+                                 levels = all_species_name)
 
 # Save:
 write.csv(count_species, file = paste0(opt$barcode, "_count_species.csv"))
@@ -204,7 +205,7 @@ ggsave(file = paste0(opt$barcode, "_count_RNAs_species.png"),
        dpi = 300)
 
 # Filter RNA species canonical + SPvariants:
-count_species_clear <- count_species[count_species$species %in% c(canonical_species, SPvariants),]
+count_species_clear <- count_species[count_species$nom %in% c(TSS_species, SPvariants),]
 
 # RNA species composition all species:
 count_species_clear <- dplyr::inner_join(palette_complete, count_species_clear,
diff --git a/src/.docker_modules/r-bolero/1.0/Install_packages.R b/src/.docker_modules/r-bolero/1.0/Install_packages.R
index 384435c..01ade6f 100644
--- a/src/.docker_modules/r-bolero/1.0/Install_packages.R
+++ b/src/.docker_modules/r-bolero/1.0/Install_packages.R
@@ -1,3 +1,3 @@
-list.of.packages <- c("ggplot2", "tidyr", "plyr", "dplyr", "tidyverse", "stringr", "optparse", "RColorBrewer", "conflicted", "BiocManager", "resshape2", "R.utils")
+list.of.packages <- c("ggplot2", "tidyr", "dplyr", "tidyverse", "stringr", "optparse", "RColorBrewer", "conflicted", "BiocManager", "resshape2", "R.utils")
 new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
 if(length(new.packages)) install.packages(new.packages, dependencies = T)
diff --git a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R
index 0685a21..3b6a147 100644
--- a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R
+++ b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R
@@ -5,7 +5,6 @@
 ################################################################################
 library(ggplot2, quietly = TRUE)
 library(tidyr, quietly = TRUE)
-library(plyr, quietly = TRUE)
 library(dplyr, quietly = TRUE)
 library(stringr, quietly = TRUE)
 library(optparse)
@@ -165,10 +164,15 @@ SP_assignation_single <- function(site, promoter) {
   }
 }
 
-single_junction <- ddply(single_junction,
-                         .(id), 
-                         mutate,
-                         SP_name = SP_assignation_single(junction, promoter))
+#dplyr version:
+single_junction <- single_junction %>% dplyr::group_by(id) %>%
+  dplyr::mutate(SP_name = SP_assignation_single(junction, promoter))
+
+#plyr version
+# single_junction <- ddply(single_junction,
+#                          .(id), 
+#                          mutate,
+#                          SP_name = SP_assignation_single(junction, promoter))
 
 SP_assignation_multiple <- function(read_id, combinaison, promoter) {
   if (promoter == "pgRNA" | promoter == "preCore") {
diff --git a/src/.docker_modules/r-bolero/1.0/Start_positions.R b/src/.docker_modules/r-bolero/1.0/Start_positions.R
index fcffb8e..9a4890a 100644
--- a/src/.docker_modules/r-bolero/1.0/Start_positions.R
+++ b/src/.docker_modules/r-bolero/1.0/Start_positions.R
@@ -187,9 +187,7 @@ formated_start_promoters <- pivot_longer(resultats_start_promoters,
                                          names_to = "Barcodes", 
                                          values_to = "nb_reads")
 
-mycolors <- colorRampPalette(brewer.pal(10, "Paired"))(10)
-mycolors5 <- c("#712E80", "#006695", "#3B9746", "#1F4F25", "#F5751A")
-mycolors6 <- c("#A6CEE3", "#3362ff", "#33c5ff", "#6A3D9A", "#d60000")
+mycolors <- c("#712E80", "#006695", "#3B9746", "#1F4F25", "#F5751A")
 
 plot_camembert <- function(barcode, df, tot) {
   camembert <- ggplot(df[df$Barcodes == barcode,], aes(x = barcode, 
@@ -197,7 +195,7 @@ plot_camembert <- function(barcode, df, tot) {
                                                        fill=promoters)) +
     geom_col() +
     coord_polar("y") +
-    scale_fill_manual(values = mycolors5) +
+    scale_fill_manual(values = mycolors) +
     labs(title = paste0("#reads = ", tot[1,barcode]), x=element_blank(), y=element_blank()) +
     theme_light()
   
-- 
GitLab