Skip to content
Snippets Groups Projects
Commit 62d7c8ff authored by nfontrod's avatar nfontrod
Browse files

src/06_compute_tdd_index.R: create barplot of number of peaks in relation to tdd + tdd correlation

parent fb7429a7
Branches
No related tags found
No related merge requests found
...@@ -2,6 +2,7 @@ ...@@ -2,6 +2,7 @@
# The goal of this script is to compute TDD index and TID index of every genes # The goal of this script is to compute TDD index and TID index of every genes
library(MASS)
library(tidyverse) library(tidyverse)
library(genefilter) library(genefilter)
library(gridExtra) library(gridExtra)
...@@ -13,13 +14,19 @@ library(emmeans) ...@@ -13,13 +14,19 @@ library(emmeans)
#' @param treatment The treatment for which we want to compute the TDD #' @param treatment The treatment for which we want to compute the TDD
#' @param mean_gene_count The minimum mean count a gene must have to compute #' @param mean_gene_count The minimum mean count a gene must have to compute
#' its TDDindex #' its TDDindex
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @return The count tablme with the tdd for each replicate and mean TDD and sd #' @return The count tablme with the tdd for each replicate and mean TDD and sd
#' @import tidyverse #' @import tidyverse
#' @import genefilter #' @import genefilter
compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10) { compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10, ercc = F) {
ercc_name <- ""
if (ercc) {
ercc_name <- "_ercc"
}
norm_df <- read_tsv( norm_df <- read_tsv(
paste0( paste0(
"./results/deseq2_normalisation/", treatment, "./results/deseq2_normalisation/", treatment, ercc_name,
"_norm_stable_gene.txt" "_norm_stable_gene.txt"
) )
) )
...@@ -64,7 +71,6 @@ compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10) { ...@@ -64,7 +71,6 @@ compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10) {
} }
#' Create a density figure of TDD index for a given replicate #' Create a density figure of TDD index for a given replicate
#' #'
#' @param rep The replicate name #' @param rep The replicate name
...@@ -97,7 +103,7 @@ create_density_rep <- function(rep, tdd_tibble, treatment = "BRAF") { ...@@ -97,7 +103,7 @@ create_density_rep <- function(rep, tdd_tibble, treatment = "BRAF") {
#' @import gridExtra #' @import gridExtra
create_density_figs <- function(tdd_tibble, treatment = "BRAF", create_density_figs <- function(tdd_tibble, treatment = "BRAF",
output_folder = "./results/TDD_analysis") { output_folder = "./results/TDD_analysis") {
dir.create(output_folder) dir.create(output_folder, showWarnings = F)
rep <- tdd_tibble %>% rep <- tdd_tibble %>%
select(-gene) %>% select(-gene) %>%
colnames() %>% colnames() %>%
...@@ -128,9 +134,15 @@ create_density_figs <- function(tdd_tibble, treatment = "BRAF", ...@@ -128,9 +134,15 @@ create_density_figs <- function(tdd_tibble, treatment = "BRAF",
#' @param treatment The treatment of samples for which we want to compute #' @param treatment The treatment of samples for which we want to compute
#' the TDD #' the TDD
#' @param output_folder folder where the TDD table and figure will be created #' @param output_folder folder where the TDD table and figure will be created
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @import tidyverse #' @import tidyverse
get_tdd_table <- function(treatment, output_folder = "./results/TDD_analysis") { get_tdd_table <- function(treatment,
tdd_table <- compute_tdd(treatment) mean_gene_count = 10,
output_folder = "./results/TDD_analysis",
ercc = F) {
tdd_table <- compute_tdd(treatment, mean_gene_count, ercc)
create_density_figs(tdd_table, treatment, output_folder) create_density_figs(tdd_table, treatment, output_folder)
write_tsv( write_tsv(
tdd_table, tdd_table,
...@@ -147,18 +159,30 @@ get_tdd_table <- function(treatment, output_folder = "./results/TDD_analysis") { ...@@ -147,18 +159,30 @@ get_tdd_table <- function(treatment, output_folder = "./results/TDD_analysis") {
#' TDD table #' TDD table
#' #'
#' @param output_folder Folder where the figures and TDD tables will be created #' @param output_folder Folder where the figures and TDD tables will be created
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @import tidyverse #' @import tidyverse
get_final_tdd_table <- function(output_folder = "./results/TDD_analysis") { get_final_tdd_table <- function(mean_gene_count = 10,
braf_tdd <- get_tdd_table("BRAF", output_folder) ercc = F,
dmso_tdd <- get_tdd_table("DMSO", output_folder) output_folder = "./results/TDD_analysis") {
braf_tdd <- braf_tdd[, grep("gene|mean|sd", colnames(braf_tdd))] braf_tdd <- get_tdd_table("BRAF", mean_gene_count, output_folder, ercc)
dmso_tdd <- dmso_tdd[, grep("gene|mean|sd", colnames(dmso_tdd))] dmso_tdd <- get_tdd_table("DMSO", mean_gene_count, output_folder, ercc)
braf_tdd <- braf_tdd[, grep("gene|mean|TDD|sd", colnames(braf_tdd))]
dmso_tdd <- dmso_tdd[, grep("gene|mean|TDD|sd", colnames(dmso_tdd))]
tdd_full <- dmso_tdd %>% inner_join(braf_tdd, by = "gene") # nolint tdd_full <- dmso_tdd %>% inner_join(braf_tdd, by = "gene") # nolint
tdd_full <- tdd_full %>% tdd_full <- tdd_full %>%
mutate(diff_TDD = mean_BRAF_TDD - mean_DMSO_TDD) # nolint mutate(diff_TDD = mean_BRAF_TDD - mean_DMSO_TDD) # nolint
ercc_name <- ""
if (ercc) {
ercc_name <- "_erccnorm"
}
write_tsv( write_tsv(
tdd_full, tdd_full,
paste0(output_folder, "/full_TDD_table.txt") paste0(
output_folder, "/", ercc_name, "_", mean_gene_count,
"_full_TDD_table.txt"
)
) )
return(tdd_full) return(tdd_full)
} }
...@@ -180,7 +204,7 @@ compute_stat <- function(tdd_table, output_folder = "./results/TDD_analysis", ...@@ -180,7 +204,7 @@ compute_stat <- function(tdd_table, output_folder = "./results/TDD_analysis",
name = "diag") { name = "diag") {
mod <- lm(TDD ~ group, data = tdd_table) mod <- lm(TDD ~ group, data = tdd_table)
output_folderd <- paste0(output_folder, "/diagnostics") output_folderd <- paste0(output_folder, "/diagnostics")
dir.create(output_folderd) dir.create(output_folderd, showWarnings = F)
simulationOutput <- simulateResiduals(fittedModel = mod, n = 2000) # nolint simulationOutput <- simulateResiduals(fittedModel = mod, n = 2000) # nolint
pdf(paste0(output_folderd, "/", name, "_diag.pdf"), width = 12, height = 8) pdf(paste0(output_folderd, "/", name, "_diag.pdf"), width = 12, height = 8)
print(plot(simulationOutput)) print(plot(simulationOutput))
...@@ -224,7 +248,7 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names, ...@@ -224,7 +248,7 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
target_columns, outfile, title = "groups", target_columns, outfile, title = "groups",
output_folder = "./results/TDD_analysis") { output_folder = "./results/TDD_analysis") {
output_folder <- paste0(output_folder, "/boxplot_figures") output_folder <- paste0(output_folder, "/boxplot_figures")
dir.create(output_folder) dir.create(output_folder, showWarnings = F)
if (length(gene_files) != length(gene_names) | if (length(gene_files) != length(gene_names) |
length(gene_files) != length(target_columns)) { length(gene_files) != length(target_columns)) {
message("The inputs should have the same lengths") message("The inputs should have the same lengths")
...@@ -243,7 +267,9 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names, ...@@ -243,7 +267,9 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
) )
new_tdd <- rbind(new_tdd, tmp_tdd) new_tdd <- rbind(new_tdd, tmp_tdd)
} }
if (!grepl("ercc", outfile, fixed = T)) {
new_tdd <- new_tdd %>% distinct(gene, .keep_all = T) # nolint new_tdd <- new_tdd %>% distinct(gene, .keep_all = T) # nolint
}
compute_stat(new_tdd, output_folder, outfile) compute_stat(new_tdd, output_folder, outfile)
p <- ggplot(new_tdd, mapping = aes(x = group, y = TDD)) + p <- ggplot(new_tdd, mapping = aes(x = group, y = TDD)) +
geom_violin(mapping = aes(fill = group)) + geom_violin(mapping = aes(fill = group)) +
...@@ -255,9 +281,29 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names, ...@@ -255,9 +281,29 @@ create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
return(new_tdd) return(new_tdd)
} }
#' Create a boxplot figure to compare TDD values of 3 different sets of genes
tdd_full <- get_final_tdd_table(output_folder = "./results/TDD_analysis") #'
#' @details Create 3 boxplot figures:
#' 1. Create a boxplot displaying the TDD of down-regulated genes by BRAF with
#' a BRAF peak
#' 2. Create a boxplot displaying the TDD of up-regulated genes by BRAF with
#' a DMSO peak
#' 3. Create a boxplot displaying the TDD of stable genes in DMSO and BRAF
#' conditions
#'
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @param ercc a bolean indicating if we want to use ercc or stable gene
#' normalisation
#' @param output_folder Folder where the figures will be created
create_figures <- function(mean_gene_count = 10, ercc = F,
output_folder = "./results/TDD_analysis") {
tdd_full <- get_final_tdd_table(mean_gene_count, ercc, output_folder)
dir <- "./data/gene_lists/" dir <- "./data/gene_lists/"
ercc_name <- ""
if (ercc) {
ercc_name <- "_erccnorm"
}
gn <- paste0("_geneMean>", mean_gene_count)
r <- create_tdd_fig_on_lists( r <- create_tdd_fig_on_lists(
tdd_df = tdd_full, tdd_df = tdd_full,
gene_files = c( gene_files = c(
...@@ -266,7 +312,7 @@ r <- create_tdd_fig_on_lists( ...@@ -266,7 +312,7 @@ r <- create_tdd_fig_on_lists(
), ),
gene_names = c("BRAF_down_riboRNApeak", "BRAF_down"), gene_names = c("BRAF_down_riboRNApeak", "BRAF_down"),
target_columns = c("mean_BRAF_TDD", "mean_BRAF_TDD"), target_columns = c("mean_BRAF_TDD", "mean_BRAF_TDD"),
outfile = "TDD_BRAF_down-BRAFpeak", outfile = paste0("TDD", ercc_name, "_BRAF_down-BRAFpeak", gn),
title = "down-reglated mRNA without and with ribosome peaks" title = "down-reglated mRNA without and with ribosome peaks"
) )
r <- create_tdd_fig_on_lists( r <- create_tdd_fig_on_lists(
...@@ -277,18 +323,377 @@ r <- create_tdd_fig_on_lists( ...@@ -277,18 +323,377 @@ r <- create_tdd_fig_on_lists(
), ),
gene_names = c("BRAF_up_riboRNApeakDMSO", "BRAF_up"), gene_names = c("BRAF_up_riboRNApeakDMSO", "BRAF_up"),
target_columns = c("mean_DMSO_TDD", "mean_DMSO_TDD"), target_columns = c("mean_DMSO_TDD", "mean_DMSO_TDD"),
outfile = "TDD_BRAF_up-BRAFpeakDMSO", outfile = paste0("TDD", ercc_name, "_BRAF_up_BRAFpeakDMSO", gn),
title = "down-reglated mRNA without and with ribosome peaks" title = "down-reglated mRNA without and with ribosome peaks"
) )
dir <- "./results/stable_genes/" dir <- "./results/stable_genes/"
r <- create_tdd_fig_on_lists( if (!ercc) {
tdd_df = tdd_full, stable_files <- c(
gene_files = c(
paste0(dir, "braf_stable_genes.txt"), paste0(dir, "braf_stable_genes.txt"),
paste0(dir, "dmso_stable_genes.txt") paste0(dir, "dmso_stable_genes.txt")
), )
} else {
stable_files <- c(
paste0(dir, "ercc.txt"),
paste0(dir, "ercc.txt")
)
}
r <- create_tdd_fig_on_lists(
tdd_df = tdd_full,
gene_files = stable_files,
gene_names = c("BRAF_stable", "DMSO_stable"), gene_names = c("BRAF_stable", "DMSO_stable"),
target_columns = c("mean_BRAF_TDD", "mean_DMSO_TDD"), target_columns = c("mean_BRAF_TDD", "mean_DMSO_TDD"),
outfile = "BRAF_DMSO_stable", outfile = paste0("BRAF_DMSO_stable", ercc_name, gn),
title = "BRAF and DMSO stable genes" title = "BRAF and DMSO stable genes"
) )
}
#' perform a t.test on TDD values replicate
#'
#' @param row a row of a TDD dataframe
#' @return The t.test pvalue
t_test_func <- function(row) {
x1 <- as.numeric(row[1:3])
x2 <- as.numeric(row[4:6])
test <- t.test(x1, x2)
return(test$p.value) # nolint
}
#' Get correlation value between BRAF and DMSO TDD of genes
#'
#' @details Get, the correlation values, the slope and the intercept of the
#' best linear model between the TDD of a given set of
#' genes in BRAF conditions or in DMSO conditions
#'
#' @param tdd_full A table with a column with the mean TDD of DMSO
#' and BRAF condition + a group column indicating if the gene is up
#' of down regulated by BRAF
#' @param my_group The group of gene of interest to select
#' @return a list with the correlation value of TDD, its intercept and
#' slope
get_cor_value <- function(tdd_full, my_group = "BRAF_DOWN") {
tdd <- tdd_full %>% filter(group == my_group) ## nolint
c <- cor(
tdd %>% filter(group == my_group) %>% # nolint
select(mean_DMSO_TDD) %>% unlist(), # nolint
tdd %>%
filter(group == my_group) %>% # nolint
select(mean_BRAF_TDD) %>%
unlist() # nolint
)
res <- coef(lm(mean_BRAF_TDD ~ mean_DMSO_TDD, tdd %>%
filter(group == my_group)))
return(list(cor = round(c, 3), intercept = res[1], slope = res[2]))
}
#' Add a column group to a table containing a gene column
#'
#' @param tdd_full a table of tdd with a gene columne
#' @return The table with a column indicating if the genes are up, down or not
#' regulated by BRAF
get_group_columns <- function(tdd_full) {
down_braf <- read.table(paste0(dir, "BRAF_DOWN_1.5_name.txt"))$V1
up_braf <- read.table(paste0(dir, "BRAF_UP_1.5_name.txt"))$V1
tdd_full$group <- rep("CTRL", times = nrow(tdd_full))
tdd_full[tdd_full$gene %in% down_braf, "group"] <- "BRAF_DOWN"
tdd_full[tdd_full$gene %in% up_braf, "group"] <- "BRAF_UP"
return(tdd_full)
}
#' Prepare a dataframe used to compute the correlation between BRAF and DMSO TDD
#'
#' @param mean_gene_count The minimum mean gene counts to compute their TDD
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param output_folder The folder where the results will be created
#' @return the dataframe ready to be used for correlation figures
prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F,
output_folder = "./results/TDD_analysis") {
tdd_full <- get_final_tdd_table(mean_gene_count = 10, ercc = F)
tdd_full <- tdd_full[
,
c("gene", grep("X...|mean", colnames(tdd_full), value = T))
]
tdd_full <- tdd_full %>%
mutate(p_value = apply(
tdd_full %>% select(starts_with("X")), 1,
function(x) {
t_test_func(x)
}
)) # nolint
tdd_full <- tdd_full %>% arrange(-p_value) # nolint
dir <- "./data/gene_lists/"
down_braf <- read.table(paste0(dir, "BRAF_DOWN_1.5_name.txt"))$V1
up_braf <- read.table(paste0(dir, "BRAF_UP_1.5_name.txt"))$V1
tdd_full$group <- rep("CTRL", times = nrow(tdd_full))
tdd_full[tdd_full$gene %in% down_braf, "group"] <- "BRAF_DOWN"
tdd_full[tdd_full$gene %in% up_braf, "group"] <- "BRAF_UP"
tdd_full$sig <- rep("", times = nrow(tdd_full))
tdd_full[tdd_full$p_value <= 0.05, "sig"] <- "_sig"
tdd_full <- tdd_full %>% mutate(kind = paste0(group, sig)) # nolint
tdd_full <- tdd_full %>%
filter(group != "CTRL") %>%
arrange(kind)
write_tsv(tdd_full, paste0(output_folder, "/TDD_correlation_table.txt")) # nolint
return(tdd_full)
}
#' Produce the TDD correlation figures
#'
#' @param mean_gene_count The minimum mean gene counts to compute their TDD
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param output_folder The folder where the results will be created
produce_tdd_cor_figure <- function(mean_gene_count = 10, ercc = F,
output_folder = "./results/TDD_analysis") {
tdd_full <- prepare_df_for_cor(mean_gene_count, ercc, output_folder)
downv <- get_cor_value(tdd_full, my_group = "BRAF_DOWN")
upv <- get_cor_value(tdd_full, my_group = "BRAF_UP")
p <- ggplot(tdd_full, mapping = aes(
x = mean_DMSO_TDD,
y = mean_BRAF_TDD, color = kind
)) +
geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
scale_fill_manual(values = c(
"#4169e1", "darkblue",
"#CD5C5C", "darkred"
)) +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
geom_abline(
intercept = downv$intercept, slope = downv$slope,
color = "#4169e1"
) +
geom_abline(
intercept = upv$intercept, slope = upv$slope,
color = "#CD5C5C"
) +
ggtitle(paste0(
"TDD of genes with BRAF or DMSO treatment\n",
"(R gene BRAF_down = ", downv$cor, ")(R BRAF_up = ",
upv$cor, ")"
))
p_down <- ggplot(tdd_full %>%
filter(group %in% c("BRAF_DOWN", "BRAF_DOWN_sig")),
mapping = aes(x = mean_DMSO_TDD, y = mean_BRAF_TDD, color = kind)
) +
geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
scale_fill_manual(values = c("black", "red")) +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
geom_abline(
intercept = downv$intercept, slope = downv$slope,
color = "#4169e1"
) +
geom_abline(slope = 1, color = "black") +
ggtitle(paste0(
"TDD of genes BRAF down-regulated with BRAF or DMSO",
"treatment\n(R gene BRAF_down = ", downv$cor, ")"
))
p_up <- ggplot(tdd_full %>% filter(group %in% c("BRAF_UP", "BRAF_UP_sig")),
mapping = aes(
x = mean_DMSO_TDD, y = mean_BRAF_TDD,
color = kind
)
) +
geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
scale_fill_manual(values = c("black", "red")) +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
geom_abline(
intercept = downv$intercept,
slope = downv$slope, color = "#CD5C5C"
) +
geom_abline(slope = 1, color = "black") +
ggtitle(paste0(
"TDD of genes BRAF up-regulated with BRAF or DMSO",
"treatment\n(R gene BRAF_up = ", upv$cor, ")"
))
pdf(paste0(output_folder, "/TDD_correlation_figures.pdf"),
width = 12,
height = 17
)
grid.arrange(p, p_down, p_up, ncol = 1) # nolint
dev.off()
}
#' Get the negative binomial regression p-value
#'
#' @details glm negative binomial test to check if the number of peaks
#' between groups of genes is different
#'
#' @param tab a table containing the number of ribosome peaks in genes and
#' their groups
#' @param output_folder folder where the diagnotics will be created
#' @return the p-value of the test
glm_nb_pvalue <- function(tab, pic_col,
output_folder = "./results/TDD_analysis") {
mod <- glm.nb(paste0(pic_col, "~ group"), data = tab) # nolint
output_diag <- paste0(output_folder, "/diagnotics")
dir.create(output_diag, showWarnings = F)
output_fig <- paste0(output_diag, "/glm_nb_", pic_col, ".pdf")
pdf(output_fig, width = 12, height = 8)
plot(simulateResiduals(mod)) # nolint
dev.off()
return(summary(mod)$coeff[2, 4])
}
#' Get the negative binomial regression p-value
#'
#' @details glm logistic test to check if the presence of peaks
#' between groups of genes is different
#'
#' @param tab a table containing the number of ribosome peaks in genes and
#' their groups
#' @param output_folder folder where the diagnotics will be created
#' @return the p-value of the test
glm_log_pvalue <- function(tab, pic_col,
output_folder = "./results/TDD_analysis") {
mod <- glm(paste0(pic_col, "~ group"),
data = tab,
family = binomial(link = "logit")
)
output_diag <- paste0(output_folder, "/diagnotics")
dir.create(output_diag, showWarnings = F)
output_fig <- paste0(output_diag, "/glm_nb_", pic_col, ".pdf")
pdf(output_fig, width = 12, height = 8)
plot(simulateResiduals(mod)) # nolint
dev.off()
return(summary(mod)$coeff[2, 4])
}
#' Create barplots showing if different groups of genes have a different number
#' of ribosome peaks
#'
#' @param tdd_full A table with TDD and number of peaks
#' @param gene_type The selected group of gene on which we want to perform the
#' analysis
#' @param output_folder Folder where the results will be created
#' @param peak_type The type of the peak
create_pic_figs <- function(tdd_full, gene_type = "BRAF_DOWN",
output_folder = "./results/TDD_analysis",
peak_type = "RiboRNApeak") {
treatment <- sub("_.*", "", gene_type)
regulation <- sub(".*_", "", gene_type)
if (regulation == "UP") {
treatment <- "DMSO"
}
target <- paste0("TDD_", treatment)
tdd_filt <- tdd_full %>% filter(group == gene_type) # nolint
tdd_filt <- tdd_filt %>% mutate(group = ifelse(!!as.symbol(target),
paste0(group, "_TDD"), group
))
data_col <- "peak"
res_col <- paste0("mean_", data_col)
has_col <- paste0("has_", data_col)
has_res_col <- paste0("mean_", has_col)
tdd_filt <- tdd_filt %>% mutate(
!!as.symbol(has_col) :=
ifelse(!!as.symbol(data_col) > 0, 1, 0)
)
tdd_fig <- tdd_filt %>%
select(!!as.symbol(data_col), !!as.symbol(has_col), group) %>%
group_by(group) %>%
summarise(
!!as.symbol(res_col) := mean(!!as.symbol(data_col)),
!!as.symbol(has_res_col) := mean(!!as.symbol(has_col))
)
pval <- glm_nb_pvalue(tdd_filt, data_col, output_folder)
p1 <- ggplot(tdd_fig,
mapping = aes_string(x = "group", y = res_col, fill = "group")
) +
geom_bar(stat = "identity") +
ggtitle(paste0(
"Average proportion of ", treatment, " peaks in gene", regulation,
"-regulated by BRAF (p = ", pval, ")"
))
pval <- glm_log_pvalue(tdd_filt, has_col, output_folder)
p2 <- ggplot(tdd_fig, mapping = aes_string(
x = "group", y = has_res_col, fill = "group"
)) +
geom_bar(stat = "identity") +
ggtitle(paste0(
"Average proportion of ", regulation,
"-regulated gene by BRAF having at least a ", treatment,
" peak (p = ", pval, ")"
))
pdf(paste0(output_folder, "/barplot_pic_analysis", gene_type, "_",
peak_type, ".pdf"),
width = 12, height = 12
)
grid.arrange(p1, p2, ncol = 1) #nolint
dev.off()
}
#' function that calcultaed if genes are TDD (more in BRAF or DMSO condition)
#' and if they have peaks
#'
#' @param peak_file A file containing peaks in a bed format
#' @param mean_gene_count The minimum gene count of a gene to be analysed
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
get_tdd_pic_table <- function(peak_file, mean_gene_count = 10, ercc = F) {
peak_list <- read.table(peak_file)$V1
peak_table <- tibble(
gene = names(table(peak_list)),
peak = as.vector(table(peak_list))
)
tdd_full <- get_final_tdd_table(mean_gene_count, ercc)
tdd_full <- tdd_full[,
c("gene", grep("X...", colnames(tdd_full), value = T))]
tdd_full <- tdd_full %>%
mutate(
TDD_BRAF = X276_BRAF_TDD - X276_DMSO_TDD > 0.2 &
X277_BRAF_TDD - X277_DMSO_TDD > 0.2 &
X278_BRAF_TDD - X278_DMSO_TDD > 0.2,
TDD_DMSO = X276_DMSO_TDD - X276_BRAF_TDD > 0.2 &
X277_DMSO_TDD - X277_BRAF_TDD > 0.2 &
X278_DMSO_TDD - X278_BRAF_TDD > 0.2
) %>%
select(gene, TDD_BRAF, TDD_DMSO) #nolint
tdd_full <- tdd_full %>% left_join(peak_table) #nolint
tdd_full <- tdd_full %>% replace_na(list(peak = 0)) #nolint
return(tdd_full)
}
#' Create all peaks fig
#'
#' @param peak_file A file containing peaks
#' @param mean_gene_count The minimum mean count of gene for its tdd to
#' be computed
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param gene_type The selected group of gene on which we want to perform the
#' analysis
#' @param peak_type The type of peak inside the figure
build_peaks_fig <- function(peak_file, mean_gene_count = 10, ercc = F,
gene_type = "BRAF_DOWN",
output_folder = "./results/TDD_analysis",
peak_type = "RiboRNApeak") {
tdd_table <- get_tdd_pic_table(peak_file, mean_gene_count, ercc)
tdd_table <- get_group_columns(tdd_table)
create_pic_figs(tdd_table, gene_type, output_folder, peak_type = peak_type)
}
create_figures(mean_gene_count = 0, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 10, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 20, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 50, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 0, ercc = T, output_folder = "./results/TDD_analysis")
produce_tdd_cor_figure()
build_peaks_fig()
dir <- "./data/gene_lists/"
files <- c("BRAF_vs_DMSO_RepMean.merged.bed", "DMSO_vs_BRAF_RepMean.merged.bed",
"BRAF_sorted_26-40.merged.bed", "DMSO_sorted_26-40.merged.bed")
gene_types <- c("BRAF_DOWN", "BRAF_UP", "BRAF_DOWN", "BRAF_UP")
peak_types <- c("RiboRNAPeak", "RiboRNAPeak", "RiboPeak", "RiboPeak")
for (i in seq_len(length(files))) {
peak_file <- paste0(dir, files[i])
build_peaks_fig(peak_file = peak_file, gene_type = gene_types[i],
peak_type = peak_types[i])
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment