diff --git a/src/06_compute_tdd_index.R b/src/06_compute_tdd_index.R index 48105c230f7968c30b1bf6d2a72a5aff41196e49..efad0c6d8a529db4a96cd87f6f09f2dba9074004 100644 --- a/src/06_compute_tdd_index.R +++ b/src/06_compute_tdd_index.R @@ -9,6 +9,9 @@ library(gridExtra) library(DHARMa) library(emmeans) + +source("src/common_functions.R") + #' compute the TDD of every genes for the experiment treatment #' #' @param treatment The treatment for which we want to compute the TDD @@ -99,10 +102,12 @@ create_density_rep <- function(rep, tdd_tibble, treatment = "BRAF") { #' @param tdd_tibble A tablle containing the TDD of each replicate #' @param treatment The treatment to which the tdd_tibblerefers #' @param output_folder The folder where the figure will be created +#' @param ercc a bolean indicating if the count data was normalized using ercc #' @import tidyverse #' @import gridExtra create_density_figs <- function(tdd_tibble, treatment = "BRAF", - output_folder = "./results/TDD_analysis") { + output_folder = "./results/TDD_analysis", + ercc = F) { dir.create(output_folder, showWarnings = F) rep <- tdd_tibble %>% select(-gene) %>% @@ -117,7 +122,11 @@ create_density_figs <- function(tdd_tibble, treatment = "BRAF", my_plots[[length(my_plots) + 1]] <- create_density_rep( "mean", tdd_tibble, treatment ) - pdf(paste0(output_folder, "/", treatment, "TDD_density.pdf"), + ercc_name <- "" + if (ercc == T) { + ercc_name = "_ercc" + } + pdf(paste0(output_folder, "/", treatment, ercc_name, "TDD_density.pdf"), height = 8, width = 12 ) print(do.call("grid.arrange", c(my_plots, ncol = 2))) @@ -143,7 +152,7 @@ get_tdd_table <- function(treatment, output_folder = "./results/TDD_analysis", ercc = F) { tdd_table <- compute_tdd(treatment, mean_gene_count, ercc) - create_density_figs(tdd_table, treatment, output_folder) + create_density_figs(tdd_table, treatment, output_folder, ercc) write_tsv( tdd_table, paste0(output_folder, "/", treatment, "_TDD_table.txt") @@ -356,7 +365,7 @@ create_figures <- function(mean_gene_count = 10, ercc = F, t_test_func <- function(row) { x1 <- as.numeric(row[1:3]) x2 <- as.numeric(row[4:6]) - test <- t.test(x1, x2) + test <- t.test(x1, x2, paired = T) return(test$p.value) # nolint } @@ -389,21 +398,6 @@ get_cor_value <- function(tdd_full, my_group = "BRAF_DOWN") { } -#' Add a column group to a table containing a gene column -#' -#' @param tdd_full a table of tdd with a gene columne -#' @return The table with a column indicating if the genes are up, down or not -#' regulated by BRAF -get_group_columns <- function(tdd_full) { - down_braf <- read.table(paste0(dir, "BRAF_DOWN_1.5_name.txt"))$V1 - up_braf <- read.table(paste0(dir, "BRAF_UP_1.5_name.txt"))$V1 - tdd_full$group <- rep("CTRL", times = nrow(tdd_full)) - tdd_full[tdd_full$gene %in% down_braf, "group"] <- "BRAF_DOWN" - tdd_full[tdd_full$gene %in% up_braf, "group"] <- "BRAF_UP" - return(tdd_full) -} - - #' Prepare a dataframe used to compute the correlation between BRAF and DMSO TDD #' #' @param mean_gene_count The minimum mean gene counts to compute their TDD @@ -439,6 +433,14 @@ prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F, filter(group != "CTRL") %>% arrange(kind) write_tsv(tdd_full, paste0(output_folder, "/TDD_correlation_table.txt")) # nolint + tdd_full %>% + filter(kind == "BRAF_DOWN_sig", mean_BRAF_TDD > mean_DMSO_TDD) %>% + select(gene) %>% + write_tsv(paste0(output_folder, "/TDD_BRAF_BRAF_DOWN.txt")) + tdd_full %>% + filter(kind == "BRAF_UP_sig", mean_DMSO_TDD > mean_BRAF_TDD ) %>% + select(gene) %>% + write_tsv(paste0(output_folder, "/TDD_DMSO_BRAF_UP.txt")) return(tdd_full) } @@ -479,11 +481,14 @@ produce_tdd_cor_figure <- function(mean_gene_count = 10, ercc = F, )) p_down <- ggplot(tdd_full %>% filter(group %in% c("BRAF_DOWN", "BRAF_DOWN_sig")), - mapping = aes(x = mean_DMSO_TDD, y = mean_BRAF_TDD, color = kind) + mapping = aes(x = mean_DMSO_TDD, y = mean_BRAF_TDD, color = kind, label = gene) ) + geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) + scale_fill_manual(values = c("black", "red")) + coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) + + geom_label_repel(data = tdd_full %>% + filter(kind == "BRAF_DOWN_sig"), size = 1, + mapping = aes(mean_DMSO_TDD, mean_BRAF_TDD, label = gene)) + geom_abline( intercept = downv$intercept, slope = downv$slope, color = "#4169e1" @@ -502,6 +507,9 @@ produce_tdd_cor_figure <- function(mean_gene_count = 10, ercc = F, geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) + scale_fill_manual(values = c("black", "red")) + coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) + + geom_label_repel(data = tdd_full %>% + filter(kind == "BRAF_UP_sig"), size = 1, + mapping = aes(mean_DMSO_TDD, mean_BRAF_TDD, label = gene)) + geom_abline( intercept = downv$intercept, slope = downv$slope, color = "#CD5C5C" @@ -637,7 +645,8 @@ create_pic_figs <- function(tdd_full, gene_type = "BRAF_DOWN", #' @param mean_gene_count The minimum gene count of a gene to be analysed #' @param ercc Indicate if the normalisation with ercc should be used to compute #' the TDD index -get_tdd_pic_table <- function(peak_file, mean_gene_count = 10, ercc = F) { +get_tdd_pic_table <- function(peak_file, mean_gene_count = 10, ercc = F, + output_folder) { peak_list <- read.table(peak_file)$V1 peak_table <- tibble( gene = names(table(peak_list)), @@ -646,14 +655,14 @@ get_tdd_pic_table <- function(peak_file, mean_gene_count = 10, ercc = F) { tdd_full <- get_final_tdd_table(mean_gene_count, ercc) tdd_full <- tdd_full[, c("gene", grep("X...", colnames(tdd_full), value = T))] + tdd_braf_braf_down <- read_tsv(paste0(output_folder, + "/TDD_BRAF_BRAF_DOWN.txt")) + tdd_dmso_braf_up <- read_tsv(paste0(output_folder, + "/TDD_DMSO_BRAF_UP.txt")) tdd_full <- tdd_full %>% mutate( - TDD_BRAF = X276_BRAF_TDD - X276_DMSO_TDD > 0.2 & - X277_BRAF_TDD - X277_DMSO_TDD > 0.2 & - X278_BRAF_TDD - X278_DMSO_TDD > 0.2, - TDD_DMSO = X276_DMSO_TDD - X276_BRAF_TDD > 0.2 & - X277_DMSO_TDD - X277_BRAF_TDD > 0.2 & - X278_DMSO_TDD - X278_BRAF_TDD > 0.2 + TDD_BRAF = gene %in% tdd_braf_braf_down$gene, + TDD_DMSO = gene %in% tdd_dmso_braf_up$gene ) %>% select(gene, TDD_BRAF, TDD_DMSO) #nolint tdd_full <- tdd_full %>% left_join(peak_table) #nolint @@ -675,7 +684,7 @@ build_peaks_fig <- function(peak_file, mean_gene_count = 10, ercc = F, gene_type = "BRAF_DOWN", output_folder = "./results/TDD_analysis", peak_type = "RiboRNApeak") { - tdd_table <- get_tdd_pic_table(peak_file, mean_gene_count, ercc) + tdd_table <- get_tdd_pic_table(peak_file, mean_gene_count, ercc, output_folder) tdd_table <- get_group_columns(tdd_table) create_pic_figs(tdd_table, gene_type, output_folder, peak_type = peak_type) } @@ -685,8 +694,8 @@ create_figures(mean_gene_count = 10, ercc = F, output_folder = "./results/TDD_an create_figures(mean_gene_count = 20, ercc = F, output_folder = "./results/TDD_analysis") create_figures(mean_gene_count = 50, ercc = F, output_folder = "./results/TDD_analysis") create_figures(mean_gene_count = 0, ercc = T, output_folder = "./results/TDD_analysis") +create_figures(mean_gene_count = 10, ercc = T, output_folder = "./results/TDD_analysis") produce_tdd_cor_figure() -build_peaks_fig() dir <- "./data/gene_lists/" files <- c("BRAF_vs_DMSO_RepMean.merged.bed", "DMSO_vs_BRAF_RepMean.merged.bed", "BRAF_sorted_26-40.merged.bed", "DMSO_sorted_26-40.merged.bed") diff --git a/src/common_functions.R b/src/common_functions.R new file mode 100644 index 0000000000000000000000000000000000000000..ceb36ca87028365c1f4e0a086667e9f9a97426c1 --- /dev/null +++ b/src/common_functions.R @@ -0,0 +1,20 @@ +#!/bin/Rscript + + +royalblue <- "#4169e1" +indianred <- "#CD5C5C" + + +#' Add a column group to a table containing a gene column +#' +#' @param tdd_full a table of tdd with a gene columne +#' @return The table with a column indicating if the genes are up, down or not +#' regulated by BRAF +get_group_columns <- function(tdd_full, dir = "./data/gene_lists/") { + down_braf <- read.table(paste0(dir, "BRAF_DOWN_1.5_name.txt"))$V1 + up_braf <- read.table(paste0(dir, "BRAF_UP_1.5_name.txt"))$V1 + tdd_full$group <- rep("CTRL", times = nrow(tdd_full)) + tdd_full[tdd_full$gene %in% down_braf, "group"] <- "BRAF_DOWN" + tdd_full[tdd_full$gene %in% up_braf, "group"] <- "BRAF_UP" + return(tdd_full) +}