diff --git a/src/.docker_modules/r-docher-test/1.0/Dockerfile b/src/.docker_modules/r-docher-test/1.0/Dockerfile deleted file mode 100644 index 9c3fa5cf773c6b44ed8f61a56acc5924df3edfcd..0000000000000000000000000000000000000000 --- a/src/.docker_modules/r-docher-test/1.0/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -FROM rocker/r-base:4.2.3 - -## copy Rscript files -COPY ./*.R . - -RUN Rscript install_pkgs.R - -# command to run on container start -CMD [ "bash" ] \ No newline at end of file diff --git a/src/.docker_modules/r-docher-test/1.0/install_pkgs.R b/src/.docker_modules/r-docher-test/1.0/install_pkgs.R deleted file mode 100644 index 20def570d4eea82c9488e92f888efacef4a94f6b..0000000000000000000000000000000000000000 --- a/src/.docker_modules/r-docher-test/1.0/install_pkgs.R +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/Rscript -# Packages installation: -list.of.packages <- c("BiocManager", "ggplot2", "dplyr", "reshape2", - "RColorBrewer", "R.utils") -new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])] -if(length(new.packages)) install.packages(new.packages, dependencies = T) \ No newline at end of file diff --git a/src/.docker_modules/r-docher-test/1.0/start_positions_individuals_2.R b/src/.docker_modules/r-docher-test/1.0/start_positions_individuals_2.R deleted file mode 100755 index 0ba0f18208c4c186d152aad9bb2add79a41ace12..0000000000000000000000000000000000000000 --- a/src/.docker_modules/r-docher-test/1.0/start_positions_individuals_2.R +++ /dev/null @@ -1,202 +0,0 @@ -#!/bin/Rscript -library(dplyr) -library(ggplot2) -library(tidyverse) -library(RColorBrewer) -library(conflicted) -#résolution de conflits entre les bibliothèques dplyr et stats -conflict_prefer("filter", "dplyr") -conflict_prefer("lag", "dplyr") - -# Load Start_positions_count files: - -list_file <- list.files(path=".", - pattern="*.txt", - all.files=FALSE, - full.names=FALSE) -file_to_load <- paste0("./", list_file[1]) -filename <- strsplit(list_file[1], split = "[.]")[[1]][1] - -sam_bc01 <- read.table(file_to_load, header = F) -sam_bc01[3] <- rep(filename, length(sam_bc01[,1])) - -# Function to parse and arrange data: - -parsingData <- function(df) { - binsize <- 10 - pos <- as.data.frame(table(df[,2])) - colnames(pos)[1] <- "Start" - - Start <- as.data.frame(as.factor(seq(0, 3300))) - colnames(Start)[1] = "Start" - - tmp <- dplyr::left_join(Start, pos) - tmp[is.na(tmp)] <- 0 - - tmp$Start <- as.numeric(tmp$Start) - - df2 <- as_tibble(tmp) %>% - mutate(bin = round(Start/binsize)*binsize) %>% - group_by(bin) %>% - summarize(nb_reads = sum(Freq, na.rm = T)) - df2[is.na(df2)] <- 0 - df2[3] <- rep(df[1,3], length(df2$bin)) - colnames(df2) <- c("Start_position", "nb_reads", "Barcode") - df2 -} - -df_parsed <- parsingData(sam_bc01) - -ggplot(df_parsed, aes(Start_position, nb_reads)) + - geom_area(alpha = 0.5, fill = "blue") + - scale_y_sqrt() + - facet_wrap(facets = vars(df_parsed$Barcode)) + - theme_light()+ - scale_x_continuous(breaks = c(0, 127, 1114, 1490, 2554, 2732, 2907, 3421), - label = c("1692", "1819", "2806", "EcoRI", "1065", - "1243", "1418", "1932")) + - theme(axis.text.x = element_text(angle = 45) - ) - -ggsave(paste0(filename,".jpg"), - plot = last_plot(), - scale = 2, - width = 1920, - height = 1080, - units = "px", - dpi = 300, -) - -# Classify reads based on start-position: - -# Separate preCore & pg: -classify_reads <- function(read_info) { - if (read_info <= 103) { - promoter <- "preCore" - } - else if (read_info >= 117 & - read_info <= 276) { - promoter <- "pgRNA" - } - else if (read_info >= 1106 & - read_info <= 1221 ) { - promoter <- "preS1" - } - else if (read_info >= 1455 & - read_info <= 1632 ) { - promoter <- "preS2/S" - } - else if (read_info >= 2550 & - read_info <= 2968 ) { - promoter <- "HBx" - } - else promoter <- "Undefined" -} - -colnames(sam_bc01) <- c("read_ID", "start_position", "barcode") -sam_bc01$promoter <- sapply(sam_bc01$start_position, - classify_reads) - -write.table(sam_bc01, - file = "classification_of_reads_per_RNA.txt", - quote = FALSE, - sep = "\t", - row.names = FALSE) - -# Compute Reads number per promoters: -list_name_samples <- list(filename) - -count_promoter_reads <- function(barcode, df) { - tmpdf <- as.data.frame(df) - tmpdf <- tmpdf[tmpdf$Barcode == barcode,] - preCore <- sum(tmpdf$nb_reads[tmpdf$Start_position <= 103]) - pgRNA <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 117 & - tmpdf$Start_position <= 276]) - preS1 <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 1106 & - tmpdf$Start_position <= 1221]) - preS2S <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 1455 & - tmpdf$Start_position <= 1632]) - HBx <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 2550 & - tmpdf$Start_position <= 2968]) - total <- sum(preCore, pgRNA, preS1, preS2S, HBx) - res <- c(preCore/total*100, pgRNA/total*100, preS1/total*100, - preS2S/total*100, HBx/total*100, total) - return(res) -} - -abscount_promoter_reads <- function(barcode, df) { - tmpdf <- as.data.frame(df) - tmpdf <- tmpdf[tmpdf$Barcode == barcode,] - preCore <- sum(tmpdf$nb_reads[tmpdf$Start_position <= 103]) - pgRNA <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 117 & - tmpdf$Start_position <= 276]) - preS1 <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 1106 & - tmpdf$Start_position <= 1221]) - preS2S <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 1455 & - tmpdf$Start_position <= 1632]) - HBx <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 2550 & - tmpdf$Start_position <= 2968]) - total <- sum(preCore, pgRNA, preS1, preS2S, HBx) - res <- c(preCore, pgRNA, preS1, preS2S, - HBx, total) - return(res) -} - -promoters <- factor(c("preCore", "pgRNA", "preS1", "preS2/S", "HBx"), - levels = c("preCore", "pgRNA", "preS1", "preS2/S", "HBx")) - -abs_count_reads <- data.frame() -abs_count_reads <- sapply(list_name_samples, - abscount_promoter_reads, - df_parsed) -abs_count_reads <- cbind(c(as.vector(promoters),"total"), abs_count_reads) -colnames(abs_count_reads) <- c("promoter", "read_number") - -write.table(abs_count_reads, - file = "Count_reads_per_promoter.tsv", - quote = FALSE, - sep = "\t", - row.names = FALSE) - -resultats_start_promoters <- lapply(list_name_samples, - count_promoter_reads, - df_parsed) - -resultats_start_promoters <- as.data.frame(do.call(cbind, - resultats_start_promoters)) -totalCountSample <- as.data.frame(resultats_start_promoters[6,]) -colnames(totalCountSample) <- c(filename) -resultats_start_promoters <- as.data.frame(resultats_start_promoters[1:5,]) -colnames(resultats_start_promoters) <- as.vector(list_name_samples) -resultats_start_promoters <- cbind(promoters, resultats_start_promoters) -formated_start_promoters <- pivot_longer(resultats_start_promoters, - cols = c(filename), - names_to = "Barcodes", - values_to = "nb_reads") - -mycolors <- colorRampPalette(brewer.pal(10, "Paired"))(10) -mycolors5 <- c("#712E80", "#006695", "#3B9746", "#1F4F25", "#F5751A") -mycolors6 <- c("#A6CEE3", "#3362ff", "#33c5ff", "#6A3D9A", "#d60000") - -plot_camembert <- function(barcode, df, tot) { - camembert <- ggplot(df[df$Barcodes == barcode,], aes(x = barcode, - y = nb_reads, - fill=promoters)) + - geom_col() + - coord_polar("y") + - scale_fill_manual(values = mycolors5) + - labs(title = paste0("#reads = ", tot[1,barcode]), x=element_blank(), y=element_blank()) + - theme_light() - - print(camembert) - - ggsave(filename = paste0("./Reads_start_promoters_", barcode, "_camembert.jpg"), - plot = last_plot(), - scale = 1, - width = 1920, - height = 1080, - units = "px", - dpi = 300) -} - -lapply(list_name_samples, plot_camembert, formated_start_promoters, totalCountSample)