diff --git a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R index af077b27a950e5f7af809c14690819db6c5ea880..9f86d2f5aac28f20f962586804286221857953aa 100644 --- a/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R +++ b/src/.docker_modules/r-bolero/1.0/HBV_RNAs_count.R @@ -4,8 +4,13 @@ library(ggplot2, quietly = TRUE) library(tidyr, quietly = TRUE) library(dplyr, quietly = TRUE) library(stringr, quietly = TRUE) -library(RColorBrewer) -library(optparse) +library(RColorBrewer, quietly = TRUE) +library(optparse, quietly = TRUE) +library(conflicted, quietly = TRUE) + +# Résolution de conflits entre les bibliothèques dplyr et stats +conflict_prefer("filter", "dplyr") +conflict_prefer("lag", "dplyr") # Load files option_list = list( diff --git a/src/.docker_modules/r-bolero/1.0/Install_packages.R b/src/.docker_modules/r-bolero/1.0/Install_packages.R index 01ade6ff3cada50c7dbbfc1db835287d2397103e..1716eee62b8459a845bc574f181c2f026e36cadd 100644 --- a/src/.docker_modules/r-bolero/1.0/Install_packages.R +++ b/src/.docker_modules/r-bolero/1.0/Install_packages.R @@ -1,3 +1,3 @@ -list.of.packages <- c("ggplot2", "tidyr", "dplyr", "tidyverse", "stringr", "optparse", "RColorBrewer", "conflicted", "BiocManager", "resshape2", "R.utils") +list.of.packages <- c("ggplot2", "tidyr", "dplyr", "tidyverse", "stringr", "optparse", "RColorBrewer", "conflicted", "BiocManager", "resshape2", "R.utils", "conflicted", "future", "future.apply") new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] if(length(new.packages)) install.packages(new.packages, dependencies = T) diff --git a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R index 0efefc1e77e8d72d214d28d18d6a4ff9c7f05bc6..78824c5fea433a1e555c2ebde6c9664f5c2ab52b 100644 --- a/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R +++ b/src/.docker_modules/r-bolero/1.0/Junctions_NanoSplicer.R @@ -1,14 +1,19 @@ #!/bin/Rscript - -################################################################################ -### NEED TO ADD A CASE OF NO SPLICED-VARIANTS ARE IDENTIFIED !!!!!!!!!!!!!!! ### -### PROBLEM IF THERE IS ONLY MULTIPLE JUNCTIONS CONTAINING READS ############### -################################################################################ library(ggplot2, quietly = TRUE) library(tidyr, quietly = TRUE) library(dplyr, quietly = TRUE) library(stringr, quietly = TRUE) -library(optparse) +library(optparse, quietly = TRUE) +library(future, quietly = TRUE) +library(future.apply) +library(conflicted, quietly = TRUE) + +# Set up parallel processing plan +plan("multisession", workers = 18) + +# Résolution de conflits entre les bibliothèques dplyr et stats +conflict_prefer("filter", "dplyr") +conflict_prefer("lag", "dplyr") # Load classification per promoter: option_list = list( @@ -118,8 +123,8 @@ assignation_acceptor <- function(pg_acceptor) { return(acceptor_site) } -df$donor_site <- sapply(df$pg_donor, assignation_donor) -df$acceptor_site <- sapply(df$pg_acceptor, assignation_acceptor) +df$donor_site <- future_sapply(df$pg_donor, assignation_donor) +df$acceptor_site <- future_sapply(df$pg_acceptor, assignation_acceptor) df <- dplyr::mutate(df, junction = paste0(donor_site, acceptor_site)) @@ -286,19 +291,48 @@ SP_assignation_multiple <- function(read_id, combinaison, promoter) { return(SP_name) } -tmp <- multiple_junction %>% select(id, junction, promoter) -df_combinaison <- data.frame(matrix(nrow = 0, ncol = 2)) -colnames(df_combinaison) <- c("id", "SP_name") +tmp <- multiple_junction %>% select(all_of(c("id", "junction", "promoter"))) -for (read_id in list_read_multiple) { +# Create an empty data frame with appropriate column names +df_combinaison <- data.frame(id = character(0), SP_name = character(0)) + +# Function to process each read_id +process_read_id <- function(read_id) { SP_name_computed <- SP_assignation_multiple(read_id, tmp[tmp$id == read_id,]$junction, tmp[tmp$id == read_id,]$promoter[1]) - res_vector <- data.frame(t(c(read_id, SP_name_computed))) - colnames(res_vector) <- colnames(df_combinaison) - df_combinaison <- rbind(df_combinaison, res_vector) + data.frame(id = read_id, SP_name = SP_name_computed) +} + +if (length(multiple_junction$promoter) != 0) { + for (read_id in list_read_multiple) { + SP_name_computed <- SP_assignation_multiple(read_id, + tmp[tmp$id == read_id,]$junction, + tmp[tmp$id == read_id,]$promoter[1]) + res_vector <- data.frame(t(c(read_id, SP_name_computed))) + colnames(res_vector) <- colnames(df_combinaison) + df_combinaison <- rbind(df_combinaison, res_vector) + } + + df_combinaison <- df_combinaison[2:length(df_combinaison$id),] } +# Parallel processing using future_apply: +# results <- future_apply(multiple_junction$id, 2, FUN = process_read_id, +# future.seed = FALSE, future.progress = FALSE) +# Combine the results into a single data frame +# df_combinaison <- do.call(rbind, results) + +# Old loop: +# for (read_id in list_read_multiple) { +# SP_name_computed <- SP_assignation_multiple(read_id, +# tmp[tmp$id == read_id,]$junction, +# tmp[tmp$id == read_id,]$promoter[1]) +# res_vector <- data.frame(t(c(read_id, SP_name_computed))) +# colnames(res_vector) <- colnames(df_combinaison) +# df_combinaison <- rbind(df_combinaison, res_vector) +# } + # df_combinaison <- df_combinaison[2:length(df_combinaison$id),] multiple_junction <- merge(multiple_junction, df_combinaison, by="id") diff --git a/src/.docker_modules/r-bolero/1.0/Start_positions.R b/src/.docker_modules/r-bolero/1.0/Start_positions.R index 9a4890a9c672256820a60a027108adfa423d2ddc..79fe46fad26ba8521d1a0746fd008c5e2c42e103 100644 --- a/src/.docker_modules/r-bolero/1.0/Start_positions.R +++ b/src/.docker_modules/r-bolero/1.0/Start_positions.R @@ -1,11 +1,16 @@ #!/bin/Rscript # Packages loading -library(dplyr) -library(ggplot2) -library(RColorBrewer) -library(conflicted) -library(optparse) -library(tidyverse) +library(dplyr, quietly = TRUE) +library(ggplot2, quietly = TRUE) +library(RColorBrewer, quietly = TRUE) +library(conflicted, quietly = TRUE) +library(optparse, quietly = TRUE) +library(tidyverse, quietly = TRUE) +library(future, quietly = TRUE) +library(future.apply) + +# Set up parallel processing plan +plan("multisession", workers = 18) # Résolution de conflits entre les bibliothèques dplyr et stats conflict_prefer("filter", "dplyr") @@ -16,17 +21,14 @@ option_list = list( make_option(c("-i", "--input"), type="character", default=NULL, help="input start position file (.txt)", metavar="character"), make_option(c("-b", "--barcode"), type="character", default=NULL, - help="input barcode", metavar="character") + help="input barcode", metavar="character"), + make_option(c("-s", "--start-positions"), type = "character", + default = "103,117,276,1106,1221,1455,1632,2550,2968") ) opt_parser = OptionParser(option_list=option_list) opt = parse_args(opt_parser) -#opt = ("/home/alia/scripts/Start_position/Start_positions_counts.txt") -#list_file <- list.files(path=".", -# pattern="*.txt", -# all.files=FALSE, -# full.names=FALSE) file_to_load <- opt$input splitted <- strsplit(opt$input, split = "[/]")[[1]] filename <- strsplit(splitted[length(splitted)], split = "[.]")[[1]][1] @@ -107,7 +109,7 @@ classify_reads <- function(read_info) { } colnames(sam_bc01) <- c("read_ID", "start_position", "barcode") -sam_bc01$promoter <- sapply(sam_bc01$start_position, +sam_bc01$promoter <- future_sapply(sam_bc01$start_position, classify_reads) write.table(sam_bc01, @@ -159,7 +161,7 @@ promoters <- factor(c("preCore", "pgRNA", "preS1", "preS2/S", "HBx"), levels = c("preCore", "pgRNA", "preS1", "preS2/S", "HBx")) abs_count_reads <- data.frame() -abs_count_reads <- sapply(list_name_samples, +abs_count_reads <- future_sapply(list_name_samples, abscount_promoter_reads, df_parsed) abs_count_reads <- cbind(c(as.vector(promoters),"total"), abs_count_reads) @@ -171,7 +173,7 @@ write.table(abs_count_reads, sep = "\t", row.names = FALSE) -resultats_start_promoters <- lapply(list_name_samples, +resultats_start_promoters <- future_lapply(list_name_samples, count_promoter_reads, df_parsed) @@ -210,4 +212,4 @@ plot_camembert <- function(barcode, df, tot) { dpi = 300) } -lapply(list_name_samples, plot_camembert, formated_start_promoters, totalCountSample) +future_lapply(list_name_samples, plot_camembert, formated_start_promoters, totalCountSample)