Select Git revision
06_compute_tdd_index.R

nfontrod authored
06_compute_tdd_index.R 27.02 KiB
#!/bin/Rscript
# The goal of this script is to compute TDD index and TID index of every genes
library(MASS)
library(tidyverse)
library(genefilter)
library(gridExtra)
library(DHARMa)
library(emmeans)
source("src/common_functions.R")
#' compute the TDD of every genes for the experiment treatment
#'
#' @param treatment The treatment for which we want to compute the TDD
#' @param mean_gene_count The minimum mean count a gene must have to compute
#' its TDDindex
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @return The count tablme with the tdd for each replicate and mean TDD and sd
#' @import tidyverse
#' @import genefilter
compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10, ercc = F) {
ercc_name <- ""
if (ercc) {
ercc_name <- "_ercc"
}
norm_df <- read_tsv(
paste0(
"./results/deseq2_normalisation/", treatment, ercc_name,
"_norm_stable_gene.txt"
)
)
if (mean_gene_count > 0) {
norm_df$rmean <- norm_df %>%
select(-gene) %>%
rowMeans()
norm_df <- norm_df %>% filter(rmean >= mean_gene_count) # nolint
norm_df <- norm_df %>% select(-rmean) # nolint
}
norm_df$rmin <- norm_df %>%
select(-gene) %>%
apply(1, min)
norm_df <- norm_df %>% filter(rmin > 0) # nolint
norm_df <- norm_df %>% select(-rmin) # nolint
rep <- norm_df %>%
select(-gene) %>%
colnames() %>%
str_extract("X...") %>%
unique()
temp <- norm_df
colnames(temp) <- str_replace(colnames(temp), "DMSO_0", "T_0") # nolint
for (r in rep) {
bc <- paste0(r, "_", treatment)
temp <- temp %>% mutate(!!paste0(r, "_", treatment, "_TDD") := (
(!!as.symbol(paste0(bc, "_TCHX_5")) -
!!as.symbol(paste0(bc, "_T_5")) # nolint
) / !!as.symbol(paste0(bc, "_T_0"))))
}
tdd_cols <- str_replace(
rep, "(X...)",
paste0("\\1_", treatment, "_TDD")
) # nolint
temp <- temp %>%
mutate(
!!paste0("mean_", treatment, "_TDD") := rowMeans(.[, tdd_cols]),
!!paste0("sd_", treatment, "_TDD") := rowSds(.[, tdd_cols])
)
return(temp)
}
#' Create a density figure of TDD index for a given replicate
#'
#' @param rep The replicate name
#' @param tdd_tible The tablle containing TDD
#' @param treatment The treatment used to compute the tDD
#' @import tidyverse
create_density_rep <- function(rep, tdd_tibble, treatment = "BRAF") {
col <- paste0(rep, "_", treatment, "_TDD")
tot <- tdd_tibble[, col] %>% nrow() # nolint
good <- sum(tdd_tibble[, col] >= 0 & tdd_tibble[, col] <= 1)
perc <- round((good / tot) * 100, 1)
tit <- paste(
"TDD density rep", rep, "( 0<=TDD<=1 :",
good, "/", tot, "-", perc, "%)"
)
p <- ggplot(tdd_tibble, mapping = aes_string(x = col)) +
geom_freqpoly(binwidth = 0.11) +
coord_cartesian(xlim = c(-1, 2)) +
ggtitle(tit)
return(p)
}
#' Create density figure for every replicate and the mean of replicate
#'
#' @param tdd_tibble A tablle containing the TDD of each replicate
#' @param treatment The treatment to which the tdd_tibblerefers
#' @param output_folder The folder where the figure will be created
#' @param ercc a bolean indicating if the count data was normalized using ercc
#' @import tidyverse
#' @import gridExtra
create_density_figs <- function(tdd_tibble, treatment = "BRAF",
output_folder = "./results/TDD_analysis",
ercc = F) {
dir.create(output_folder, showWarnings = F)
rep <- tdd_tibble %>%
select(-gene) %>%
colnames() %>%
str_extract("X...") %>%
unique()
rep <- rep[!is.na(rep)]
my_plots <- lapply(rep, create_density_rep,
tdd_tibble = tdd_tibble,
treatment = treatment
)
my_plots[[length(my_plots) + 1]] <- create_density_rep(
"mean", tdd_tibble, treatment
)
ercc_name <- ""
if (ercc == T) {
ercc_name = "_ercc"
}
pdf(paste0(output_folder, "/", treatment, ercc_name, "TDD_density.pdf"),
height = 8, width = 12
)
print(do.call("grid.arrange", c(my_plots, ncol = 2)))
dev.off()
}
#' Create the TDD tables and density figures with a treatment given in input
#'
#' @details Compute the tdd table for the sample treated with `treatment`,
#' Then creates the density figures of TDD for each replicates and saves the
#' TDD table
#'
#' @param treatment The treatment of samples for which we want to compute
#' the TDD
#' @param output_folder folder where the TDD table and figure will be created
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @import tidyverse
get_tdd_table <- function(treatment,
mean_gene_count = 10,
output_folder = "./results/TDD_analysis",
ercc = F) {
tdd_table <- compute_tdd(treatment, mean_gene_count, ercc)
create_density_figs(tdd_table, treatment, output_folder, ercc)
write_tsv(
tdd_table,
paste0(output_folder, "/", treatment, "_TDD_table.txt")
)
return(tdd_table)
}
#' Create the TDD table for DMSO and BRAF treatment
#'
#' @details Compute the tdd table for the sample treated with `DMSO` and `BRAF`,
#' Then creates the density figures of TDD for each replicates and saves the
#' TDD table
#'
#' @param output_folder Folder where the figures and TDD tables will be created
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @import tidyverse
get_final_tdd_table <- function(mean_gene_count = 10,
ercc = F,
output_folder = "./results/TDD_analysis") {
braf_tdd <- get_tdd_table("BRAF", mean_gene_count, output_folder, ercc)
dmso_tdd <- get_tdd_table("DMSO", mean_gene_count, output_folder, ercc)
braf_tdd <- braf_tdd[, grep("gene|mean|TDD|sd", colnames(braf_tdd))]
dmso_tdd <- dmso_tdd[, grep("gene|mean|TDD|sd", colnames(dmso_tdd))]
tdd_full <- dmso_tdd %>% inner_join(braf_tdd, by = "gene") # nolint
tdd_full <- tdd_full %>%
mutate(diff_TDD = mean_BRAF_TDD - mean_DMSO_TDD) # nolint
ercc_name <- ""
if (ercc) {
ercc_name <- "_erccnorm"
}
write_tsv(
tdd_full,
paste0(
output_folder, "/", ercc_name, "_", mean_gene_count,
"_full_TDD_table.txt"
)
)
return(tdd_full)
}
#' Compute lm statistical analysis
#'
#' @details Compute statistical analysis with a linar model based on TDD of
#' different groups of genes
#'
#' @param tdd_tabble a table containing TDD values of genes belowing to
#' different groups. It must have the following columns: TDD, groups
#' @param output_folder folder where the stat table will be ccreated
#' @param name The name of the stat table
#' @import tidyverse
#' @import DHARMa
#' @import emmeans
compute_stat <- function(tdd_table, output_folder = "./results/TDD_analysis",
name = "diag") {
mod <- lm(TDD ~ group, data = tdd_table)
output_folderd <- paste0(output_folder, "/diagnostics")
dir.create(output_folderd, showWarnings = F)
simulationOutput <- simulateResiduals(fittedModel = mod, n = 2000) # nolint
pdf(paste0(output_folderd, "/", name, "_diag.pdf"), width = 12, height = 8)
print(plot(simulationOutput))
dev.off()
comp <- emmeans(mod, ~group) # nolint
tab <- as.data.frame(pairs(comp))
tab <- tab[, c("contrast", "estimate", "SE", "t.ratio", "p.value")]
grp1_mean <- NULL
grp2_mean <- NULL
groups <- str_split(tab$contrast, " - ") # nolint
for (i in seq_len(length(groups))) {
grp1 <- groups[[i]][1]
grp2 <- groups[[i]][2]
mean_grp1 <- mean(tdd_table[tdd_table$group %in% grp1, ][["TDD"]])
mean_grp2 <- mean(tdd_table[tdd_table$group %in% grp2, ][["TDD"]])
grp1_mean[i] <- mean_grp1
grp2_mean[i] <- mean_grp2
}
tab$grp1_mean <- grp1_mean
tab$grp2_mean <- grp2_mean
colnames(tab) <- c(
"grp1_grp2", "Estimate", "Std.Error",
"t.ratio", "p.value", "grp1_mean", "grp2_mean"
)
write_tsv(tab, file = paste0(output_folder, "/", name, "_stat.txt")) # nolint
}
#' Create a boxplot figure to compare TDD values of different sets of genes
#'
#' @param tdd_df A TDD table for every treatment of interest
#' @param gene_files A list of files containing gene names
#' @param gene_names A list of names for each gene files
#' @param target_columns A list of string corresponding to the column to
#' to which we need to get TDD values for those genes (i.e The treatment of
#' interest)
#' @param outfile The name of the figure (and stats table) to produce
#' @param title The end of the figure title
#' @param output_folder The figure to create
create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
target_columns, outfile, title = "groups",
output_folder = "./results/TDD_analysis") {
output_folder <- paste0(output_folder, "/boxplot_figures")
dir.create(output_folder, showWarnings = F)
if (length(gene_files) != length(gene_names) |
length(gene_files) != length(target_columns)) {
message("The inputs should have the same lengths")
return(1)
}
new_tdd <- data.frame(TDD = c(), group = c(), gene = c(), col = c())
for (i in seq_len(length(gene_files))) {
gene_selected <- read_tsv(gene_files[i], col_names = F)$X1 # nolint
tdd_val <- tdd_df[tdd_df$gene %in% gene_selected, ][[target_columns[i]]]
genes <- tdd_df[tdd_df$gene %in% gene_selected, ][["gene"]]
groups <- rep(gene_names[i], times = length(genes))
cols <- rep(target_columns[i], times = length(genes))
tmp_tdd <- data.frame(
TDD = tdd_val, group = groups,
gene = genes, col = cols
)
new_tdd <- rbind(new_tdd, tmp_tdd)
}
if (!grepl("ercc", outfile, fixed = T)) {
new_tdd <- new_tdd %>% distinct(gene, .keep_all = T) # nolint
}
compute_stat(new_tdd, output_folder, outfile)
p <- ggplot(new_tdd, mapping = aes(x = group, y = TDD)) +
geom_violin(mapping = aes(fill = group)) +
geom_boxplot(width = 0.05) +
ggtitle(paste("TDD index comparison between ", title))
pdf(paste0(output_folder, "/", outfile, ".pdf"), height = 8, width = 12)
print(p)
dev.off()
return(new_tdd)
}
#' Create a boxplot figure to compare TDD values of 3 different sets of genes
#'
#' @details Create 3 boxplot figures:
#' 1. Create a boxplot displaying the TDD of down-regulated genes by BRAF with
#' a BRAF peak
#' 2. Create a boxplot displaying the TDD of up-regulated genes by BRAF with
#' a DMSO peak
#' 3. Create a boxplot displaying the TDD of stable genes in DMSO and BRAF
#' conditions
#'
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @param ercc a bolean indicating if we want to use ercc or stable gene
#' normalisation
#' @param output_folder Folder where the figures will be created
create_figures <- function(mean_gene_count = 10, ercc = F,
output_folder = "./results/TDD_analysis") {
tdd_full <- get_final_tdd_table(mean_gene_count, ercc, output_folder)
dir <- "./data/gene_lists/"
ercc_name <- ""
if (ercc) {
ercc_name <- "_erccnorm"
}
gn <- paste0("_geneMean>", mean_gene_count)
r <- create_tdd_fig_on_lists(
tdd_df = tdd_full,
gene_files = c(
paste0(dir, "mRNADOWN_riboRNApeakBRAF.txt"),
paste0(dir, "BRAF_DOWN_1.5_name.txt")
),
gene_names = c("BRAF_down_riboRNApeak", "BRAF_down"),
target_columns = c("mean_BRAF_TDD", "mean_BRAF_TDD"),
outfile = paste0("TDD", ercc_name, "_BRAF_down-BRAFpeak", gn),
title = "down-reglated mRNA without and with ribosome peaks"
)
r <- create_tdd_fig_on_lists(
tdd_df = tdd_full,
gene_files = c(
paste0(dir, "mRNAUP_riboRNApeakDMSO.txt"),
paste0(dir, "BRAF_UP_1.5_name.txt")
),
gene_names = c("BRAF_up_riboRNApeakDMSO", "BRAF_up"),
target_columns = c("mean_DMSO_TDD", "mean_DMSO_TDD"),
outfile = paste0("TDD", ercc_name, "_BRAF_up_BRAFpeakDMSO", gn),
title = "down-reglated mRNA without and with ribosome peaks"
)
dir <- "./results/stable_genes/"
if (!ercc) {
stable_files <- c(
paste0(dir, "braf_stable_genes.txt"),
paste0(dir, "dmso_stable_genes.txt")
)
} else {
stable_files <- c(
paste0(dir, "ercc.txt"),
paste0(dir, "ercc.txt")
)
}
r <- create_tdd_fig_on_lists(
tdd_df = tdd_full,
gene_files = stable_files,
gene_names = c("BRAF_stable", "DMSO_stable"),
target_columns = c("mean_BRAF_TDD", "mean_DMSO_TDD"),
outfile = paste0("BRAF_DMSO_stable", ercc_name, gn),
title = "BRAF and DMSO stable genes"
)
}
#' perform a t.test on TDD values replicate
#'
#' @param row a row of a TDD dataframe
#' @return The t.test pvalue
t_test_func <- function(row) {
x1 <- as.numeric(row[1:3])
x2 <- as.numeric(row[4:6])
test <- t.test(x1, x2, paired = T)
return(test$p.value) # nolint
}
#' Get correlation value between BRAF and DMSO TDD of genes
#'
#' @details Get, the correlation values, the slope and the intercept of the
#' best linear model between the TDD of a given set of
#' genes in BRAF conditions or in DMSO conditions
#'
#' @param tdd_full A table with a column with the mean TDD of DMSO
#' and BRAF condition + a group column indicating if the gene is up
#' of down regulated by BRAF
#' @param my_group The group of gene of interest to select
#' @return a list with the correlation value of TDD, its intercept and
#' slope
get_cor_value <- function(tdd_full, my_group = "BRAF_DOWN") {
tdd <- tdd_full %>% filter(group == my_group) ## nolint
c <- cor(
tdd %>% filter(group == my_group) %>% # nolint
select(mean_DMSO_TDD) %>% unlist(), # nolint
tdd %>%
filter(group == my_group) %>% # nolint
select(mean_BRAF_TDD) %>%
unlist() # nolint
)
res <- coef(lm(mean_BRAF_TDD ~ mean_DMSO_TDD, tdd %>%
filter(group == my_group)))
return(list(cor = round(c, 3), intercept = res[1], slope = res[2]))
}
#' Prepare a dataframe used to compute the correlation between BRAF and DMSO TDD
#'
#' @param mean_gene_count The minimum mean gene counts to compute their TDD
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param output_folder The folder where the results will be created
#' @return the dataframe ready to be used for correlation figures
prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F,
output_folder = "./results/TDD_analysis") {
tdd_full <- get_final_tdd_table(mean_gene_count = 10, ercc = F)
tdd_full <- tdd_full[
,
c("gene", grep("X...|mean", colnames(tdd_full), value = T))
]
tdd_full <- tdd_full %>%
mutate(p_value = apply(
tdd_full %>% select(starts_with("X")), 1,
function(x) {
t_test_func(x)
}
)) # nolint
tdd_full <- tdd_full %>% arrange(-p_value) # nolint
dir <- "./data/gene_lists/"
down_braf <- read.table(paste0(dir, "BRAF_DOWN_1.5_name.txt"))$V1
up_braf <- read.table(paste0(dir, "BRAF_UP_1.5_name.txt"))$V1
tdd_full$group <- rep("CTRL", times = nrow(tdd_full))
tdd_full[tdd_full$gene %in% down_braf, "group"] <- "BRAF_DOWN"
tdd_full[tdd_full$gene %in% up_braf, "group"] <- "BRAF_UP"
tdd_full$sig <- rep("", times = nrow(tdd_full))
tdd_full[tdd_full$p_value <= 0.05, "sig"] <- "_sig"
tdd_full <- tdd_full %>% mutate(kind = paste0(group, sig)) # nolint
tdd_full <- tdd_full %>%
filter(group != "CTRL") %>%
arrange(kind)
write_tsv(tdd_full, paste0(output_folder, "/TDD_correlation_table.txt")) # nolint
tdd_full %>%
filter(kind == "BRAF_DOWN_sig", mean_BRAF_TDD > mean_DMSO_TDD) %>%
select(gene) %>%
write_tsv(paste0(output_folder, "/TDD_BRAF_BRAF_DOWN.txt"))
tdd_full %>%
filter(kind == "BRAF_UP_sig", mean_DMSO_TDD > mean_BRAF_TDD ) %>%
select(gene) %>%
write_tsv(paste0(output_folder, "/TDD_DMSO_BRAF_UP.txt"))
return(tdd_full)
}
#' Produce the TDD correlation figures
#'
#' @param mean_gene_count The minimum mean gene counts to compute their TDD
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param output_folder The folder where the results will be created
produce_tdd_cor_figure <- function(mean_gene_count = 10, ercc = F,
output_folder = "./results/TDD_analysis") {
tdd_full <- prepare_df_for_cor(mean_gene_count, ercc, output_folder)
downv <- get_cor_value(tdd_full, my_group = "BRAF_DOWN")
upv <- get_cor_value(tdd_full, my_group = "BRAF_UP")
p <- ggplot(tdd_full, mapping = aes(
x = mean_DMSO_TDD,
y = mean_BRAF_TDD, color = kind
)) +
geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
scale_fill_manual(values = c(
"#4169e1", "darkblue",
"#CD5C5C", "darkred"
)) +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
geom_abline(
intercept = downv$intercept, slope = downv$slope,
color = "#4169e1"
) +
geom_abline(
intercept = upv$intercept, slope = upv$slope,
color = "#CD5C5C"
) +
ggtitle(paste0(
"TDD of genes with BRAF or DMSO treatment\n",
"(R gene BRAF_down = ", downv$cor, ")(R BRAF_up = ",
upv$cor, ")"
))
p_down <- ggplot(tdd_full %>%
filter(group %in% c("BRAF_DOWN", "BRAF_DOWN_sig")),
mapping = aes(x = mean_DMSO_TDD, y = mean_BRAF_TDD, color = kind, label = gene)
) +
geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
scale_fill_manual(values = c("black", "red")) +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
geom_label_repel(data = tdd_full %>%
filter(kind == "BRAF_DOWN_sig"), size = 1,
mapping = aes(mean_DMSO_TDD, mean_BRAF_TDD, label = gene)) +
geom_abline(
intercept = downv$intercept, slope = downv$slope,
color = "#4169e1"
) +
geom_abline(slope = 1, color = "black") +
ggtitle(paste0(
"TDD of genes BRAF down-regulated with BRAF or DMSO",
"treatment\n(R gene BRAF_down = ", downv$cor, ")"
))
p_up <- ggplot(tdd_full %>% filter(group %in% c("BRAF_UP", "BRAF_UP_sig")),
mapping = aes(
x = mean_DMSO_TDD, y = mean_BRAF_TDD,
color = kind
)
) +
geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
scale_fill_manual(values = c("black", "red")) +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
geom_label_repel(data = tdd_full %>%
filter(kind == "BRAF_UP_sig"), size = 1,
mapping = aes(mean_DMSO_TDD, mean_BRAF_TDD, label = gene)) +
geom_abline(
intercept = downv$intercept,
slope = downv$slope, color = "#CD5C5C"
) +
geom_abline(slope = 1, color = "black") +
ggtitle(paste0(
"TDD of genes BRAF up-regulated with BRAF or DMSO",
"treatment\n(R gene BRAF_up = ", upv$cor, ")"
))
pdf(paste0(output_folder, "/TDD_correlation_figures.pdf"),
width = 12,
height = 17
)
grid.arrange(p, p_down, p_up, ncol = 1) # nolint
dev.off()
}
#' Get the negative binomial regression p-value
#'
#' @details glm negative binomial test to check if the number of peaks
#' between groups of genes is different
#'
#' @param tab a table containing the number of ribosome peaks in genes and
#' their groups
#' @param output_folder folder where the diagnotics will be created
#' @return the p-value of the test
glm_nb_pvalue <- function(tab, pic_col,
output_folder = "./results/TDD_analysis") {
mod <- glm.nb(paste0(pic_col, "~ group"), data = tab) # nolint
output_diag <- paste0(output_folder, "/diagnotics")
dir.create(output_diag, showWarnings = F)
output_fig <- paste0(output_diag, "/glm_nb_", pic_col, ".pdf")
pdf(output_fig, width = 12, height = 8)
plot(simulateResiduals(mod)) # nolint
dev.off()
return(summary(mod)$coeff[2, 4])
}
#' Get the negative binomial regression p-value
#'
#' @details glm logistic test to check if the presence of peaks
#' between groups of genes is different
#'
#' @param tab a table containing the number of ribosome peaks in genes and
#' their groups
#' @param output_folder folder where the diagnotics will be created
#' @return the p-value of the test
glm_log_pvalue <- function(tab, pic_col,
output_folder = "./results/TDD_analysis") {
mod <- glm(paste0(pic_col, "~ group"),
data = tab,
family = binomial(link = "logit")
)
output_diag <- paste0(output_folder, "/diagnotics")
dir.create(output_diag, showWarnings = F)
output_fig <- paste0(output_diag, "/glm_nb_", pic_col, ".pdf")
pdf(output_fig, width = 12, height = 8)
plot(simulateResiduals(mod)) # nolint
dev.off()
return(summary(mod)$coeff[2, 4])
}
#' Create barplots showing if different groups of genes have a different number
#' of ribosome peaks
#'
#' @param tdd_full A table with TDD and number of peaks
#' @param gene_type The selected group of gene on which we want to perform the
#' analysis
#' @param output_folder Folder where the results will be created
#' @param peak_type The type of the peak
create_pic_figs <- function(tdd_full, gene_type = "BRAF_DOWN",
output_folder = "./results/TDD_analysis",
peak_type = "RiboRNApeak") {
treatment <- sub("_.*", "", gene_type)
regulation <- sub(".*_", "", gene_type)
if (regulation == "UP") {
treatment <- "DMSO"
}
target <- paste0("TDD_", treatment)
tdd_filt <- tdd_full %>% filter(group == gene_type) # nolint
tdd_filt <- tdd_filt %>% mutate(group = ifelse(!!as.symbol(target),
paste0(group, "_TDD"), group
))
data_col <- "peak"
res_col <- paste0("mean_", data_col)
has_col <- paste0("has_", data_col)
has_res_col <- paste0("mean_", has_col)
tdd_filt <- tdd_filt %>% mutate(
!!as.symbol(has_col) :=
ifelse(!!as.symbol(data_col) > 0, 1, 0)
)
tdd_fig <- tdd_filt %>%
select(!!as.symbol(data_col), !!as.symbol(has_col), group) %>%
group_by(group) %>%
summarise(
!!as.symbol(res_col) := mean(!!as.symbol(data_col)),
!!as.symbol(has_res_col) := mean(!!as.symbol(has_col))
)
pval <- glm_nb_pvalue(tdd_filt, data_col, output_folder)
p1 <- ggplot(tdd_fig,
mapping = aes_string(x = "group", y = res_col, fill = "group")
) +
geom_bar(stat = "identity") +
ggtitle(paste0(
"Average proportion of ", treatment, " peaks in gene", regulation,
"-regulated by BRAF (p = ", pval, ")"
))
pval <- glm_log_pvalue(tdd_filt, has_col, output_folder)
p2 <- ggplot(tdd_fig, mapping = aes_string(
x = "group", y = has_res_col, fill = "group"
)) +
geom_bar(stat = "identity") +
ggtitle(paste0(
"Average proportion of ", regulation,
"-regulated gene by BRAF having at least a ", treatment,
" peak (p = ", pval, ")"
))
pdf(paste0(output_folder, "/barplot_pic_analysis", gene_type, "_",
peak_type, ".pdf"),
width = 12, height = 12
)
grid.arrange(p1, p2, ncol = 1) #nolint
dev.off()
}
#' function that calcultaed if genes are TDD (more in BRAF or DMSO condition)
#' and if they have peaks
#'
#' @param peak_file A file containing peaks in a bed format
#' @param mean_gene_count The minimum gene count of a gene to be analysed
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
get_tdd_pic_table <- function(peak_file, mean_gene_count = 10, ercc = F,
output_folder) {
peak_list <- read.table(peak_file)$V1
peak_table <- tibble(
gene = names(table(peak_list)),
peak = as.vector(table(peak_list))
)
tdd_full <- get_final_tdd_table(mean_gene_count, ercc)
tdd_full <- tdd_full[,
c("gene", grep("X...", colnames(tdd_full), value = T))]
tdd_braf_braf_down <- read_tsv(paste0(output_folder,
"/TDD_BRAF_BRAF_DOWN.txt"))
tdd_dmso_braf_up <- read_tsv(paste0(output_folder,
"/TDD_DMSO_BRAF_UP.txt"))
tdd_full <- tdd_full %>%
mutate(
TDD_BRAF = gene %in% tdd_braf_braf_down$gene,
TDD_DMSO = gene %in% tdd_dmso_braf_up$gene
) %>%
select(gene, TDD_BRAF, TDD_DMSO) #nolint
tdd_full <- tdd_full %>% left_join(peak_table) #nolint
tdd_full <- tdd_full %>% replace_na(list(peak = 0)) #nolint
return(tdd_full)
}
#' Create all peaks fig
#'
#' @param peak_file A file containing peaks
#' @param mean_gene_count The minimum mean count of gene for its tdd to
#' be computed
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param gene_type The selected group of gene on which we want to perform the
#' analysis
#' @param peak_type The type of peak inside the figure
build_peaks_fig <- function(peak_file, mean_gene_count = 10, ercc = F,
gene_type = "BRAF_DOWN",
output_folder = "./results/TDD_analysis",
peak_type = "RiboRNApeak") {
tdd_table <- get_tdd_pic_table(peak_file, mean_gene_count, ercc, output_folder)
tdd_table <- get_group_columns(tdd_table)
create_pic_figs(tdd_table, gene_type, output_folder, peak_type = peak_type)
}
create_figures(mean_gene_count = 0, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 10, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 20, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 50, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 0, ercc = T, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 10, ercc = T, output_folder = "./results/TDD_analysis")
produce_tdd_cor_figure()
dir <- "./data/gene_lists/"
files <- c("BRAF_vs_DMSO_RepMean.merged.bed", "DMSO_vs_BRAF_RepMean.merged.bed",
"BRAF_sorted_26-40.merged.bed", "DMSO_sorted_26-40.merged.bed")
gene_types <- c("BRAF_DOWN", "BRAF_UP", "BRAF_DOWN", "BRAF_UP")
peak_types <- c("RiboRNAPeak", "RiboRNAPeak", "RiboPeak", "RiboPeak")
for (i in seq_len(length(files))) {
peak_file <- paste0(dir, files[i])
build_peaks_fig(peak_file = peak_file, gene_type = gene_types[i],
peak_type = peak_types[i])
}