Skip to content
Snippets Groups Projects
Commit 077d1693 authored by xgrand's avatar xgrand
Browse files

remove r-docker-test docker_module

parent d7067099
No related branches found
No related tags found
No related merge requests found
FROM rocker/r-base:4.2.3
## copy Rscript files
COPY ./*.R .
RUN Rscript install_pkgs.R
# command to run on container start
CMD [ "bash" ]
\ No newline at end of file
#!/bin/Rscript
# Packages installation:
list.of.packages <- c("BiocManager", "ggplot2", "dplyr", "reshape2",
"RColorBrewer", "R.utils")
new.packages <- list.of.packages[!(list.of.packages %in% installed.packages()[,"Package"])]
if(length(new.packages)) install.packages(new.packages, dependencies = T)
\ No newline at end of file
#!/bin/Rscript
library(dplyr)
library(ggplot2)
library(tidyverse)
library(RColorBrewer)
library(conflicted)
#résolution de conflits entre les bibliothèques dplyr et stats
conflict_prefer("filter", "dplyr")
conflict_prefer("lag", "dplyr")
# Load Start_positions_count files:
list_file <- list.files(path=".",
pattern="*.txt",
all.files=FALSE,
full.names=FALSE)
file_to_load <- paste0("./", list_file[1])
filename <- strsplit(list_file[1], split = "[.]")[[1]][1]
sam_bc01 <- read.table(file_to_load, header = F)
sam_bc01[3] <- rep(filename, length(sam_bc01[,1]))
# Function to parse and arrange data:
parsingData <- function(df) {
binsize <- 10
pos <- as.data.frame(table(df[,2]))
colnames(pos)[1] <- "Start"
Start <- as.data.frame(as.factor(seq(0, 3300)))
colnames(Start)[1] = "Start"
tmp <- dplyr::left_join(Start, pos)
tmp[is.na(tmp)] <- 0
tmp$Start <- as.numeric(tmp$Start)
df2 <- as_tibble(tmp) %>%
mutate(bin = round(Start/binsize)*binsize) %>%
group_by(bin) %>%
summarize(nb_reads = sum(Freq, na.rm = T))
df2[is.na(df2)] <- 0
df2[3] <- rep(df[1,3], length(df2$bin))
colnames(df2) <- c("Start_position", "nb_reads", "Barcode")
df2
}
df_parsed <- parsingData(sam_bc01)
ggplot(df_parsed, aes(Start_position, nb_reads)) +
geom_area(alpha = 0.5, fill = "blue") +
scale_y_sqrt() +
facet_wrap(facets = vars(df_parsed$Barcode)) +
theme_light()+
scale_x_continuous(breaks = c(0, 127, 1114, 1490, 2554, 2732, 2907, 3421),
label = c("1692", "1819", "2806", "EcoRI", "1065",
"1243", "1418", "1932")) +
theme(axis.text.x = element_text(angle = 45)
)
ggsave(paste0(filename,".jpg"),
plot = last_plot(),
scale = 2,
width = 1920,
height = 1080,
units = "px",
dpi = 300,
)
# Classify reads based on start-position:
# Separate preCore & pg:
classify_reads <- function(read_info) {
if (read_info <= 103) {
promoter <- "preCore"
}
else if (read_info >= 117 &
read_info <= 276) {
promoter <- "pgRNA"
}
else if (read_info >= 1106 &
read_info <= 1221 ) {
promoter <- "preS1"
}
else if (read_info >= 1455 &
read_info <= 1632 ) {
promoter <- "preS2/S"
}
else if (read_info >= 2550 &
read_info <= 2968 ) {
promoter <- "HBx"
}
else promoter <- "Undefined"
}
colnames(sam_bc01) <- c("read_ID", "start_position", "barcode")
sam_bc01$promoter <- sapply(sam_bc01$start_position,
classify_reads)
write.table(sam_bc01,
file = "classification_of_reads_per_RNA.txt",
quote = FALSE,
sep = "\t",
row.names = FALSE)
# Compute Reads number per promoters:
list_name_samples <- list(filename)
count_promoter_reads <- function(barcode, df) {
tmpdf <- as.data.frame(df)
tmpdf <- tmpdf[tmpdf$Barcode == barcode,]
preCore <- sum(tmpdf$nb_reads[tmpdf$Start_position <= 103])
pgRNA <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 117 &
tmpdf$Start_position <= 276])
preS1 <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 1106 &
tmpdf$Start_position <= 1221])
preS2S <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 1455 &
tmpdf$Start_position <= 1632])
HBx <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 2550 &
tmpdf$Start_position <= 2968])
total <- sum(preCore, pgRNA, preS1, preS2S, HBx)
res <- c(preCore/total*100, pgRNA/total*100, preS1/total*100,
preS2S/total*100, HBx/total*100, total)
return(res)
}
abscount_promoter_reads <- function(barcode, df) {
tmpdf <- as.data.frame(df)
tmpdf <- tmpdf[tmpdf$Barcode == barcode,]
preCore <- sum(tmpdf$nb_reads[tmpdf$Start_position <= 103])
pgRNA <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 117 &
tmpdf$Start_position <= 276])
preS1 <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 1106 &
tmpdf$Start_position <= 1221])
preS2S <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 1455 &
tmpdf$Start_position <= 1632])
HBx <- sum(tmpdf$nb_reads[tmpdf$Start_position >= 2550 &
tmpdf$Start_position <= 2968])
total <- sum(preCore, pgRNA, preS1, preS2S, HBx)
res <- c(preCore, pgRNA, preS1, preS2S,
HBx, total)
return(res)
}
promoters <- factor(c("preCore", "pgRNA", "preS1", "preS2/S", "HBx"),
levels = c("preCore", "pgRNA", "preS1", "preS2/S", "HBx"))
abs_count_reads <- data.frame()
abs_count_reads <- sapply(list_name_samples,
abscount_promoter_reads,
df_parsed)
abs_count_reads <- cbind(c(as.vector(promoters),"total"), abs_count_reads)
colnames(abs_count_reads) <- c("promoter", "read_number")
write.table(abs_count_reads,
file = "Count_reads_per_promoter.tsv",
quote = FALSE,
sep = "\t",
row.names = FALSE)
resultats_start_promoters <- lapply(list_name_samples,
count_promoter_reads,
df_parsed)
resultats_start_promoters <- as.data.frame(do.call(cbind,
resultats_start_promoters))
totalCountSample <- as.data.frame(resultats_start_promoters[6,])
colnames(totalCountSample) <- c(filename)
resultats_start_promoters <- as.data.frame(resultats_start_promoters[1:5,])
colnames(resultats_start_promoters) <- as.vector(list_name_samples)
resultats_start_promoters <- cbind(promoters, resultats_start_promoters)
formated_start_promoters <- pivot_longer(resultats_start_promoters,
cols = c(filename),
names_to = "Barcodes",
values_to = "nb_reads")
mycolors <- colorRampPalette(brewer.pal(10, "Paired"))(10)
mycolors5 <- c("#712E80", "#006695", "#3B9746", "#1F4F25", "#F5751A")
mycolors6 <- c("#A6CEE3", "#3362ff", "#33c5ff", "#6A3D9A", "#d60000")
plot_camembert <- function(barcode, df, tot) {
camembert <- ggplot(df[df$Barcodes == barcode,], aes(x = barcode,
y = nb_reads,
fill=promoters)) +
geom_col() +
coord_polar("y") +
scale_fill_manual(values = mycolors5) +
labs(title = paste0("#reads = ", tot[1,barcode]), x=element_blank(), y=element_blank()) +
theme_light()
print(camembert)
ggsave(filename = paste0("./Reads_start_promoters_", barcode, "_camembert.jpg"),
plot = last_plot(),
scale = 1,
width = 1920,
height = 1080,
units = "px",
dpi = 300)
}
lapply(list_name_samples, plot_camembert, formated_start_promoters, totalCountSample)
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment