From 3989a38161761beda50d0fefb84f643d751317de Mon Sep 17 00:00:00 2001 From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr> Date: Tue, 21 Jun 2022 12:03:08 +0200 Subject: [PATCH] src/06_compute_tdd_index.R: script to compute TDD index --- src/06_compute_tdd_index.R | 219 +++++++++++++++++++++++++++++++++++++ 1 file changed, 219 insertions(+) create mode 100644 src/06_compute_tdd_index.R diff --git a/src/06_compute_tdd_index.R b/src/06_compute_tdd_index.R new file mode 100644 index 0000000..7b155cd --- /dev/null +++ b/src/06_compute_tdd_index.R @@ -0,0 +1,219 @@ +#!/bin/Rscript + +# The goal of this script is to compute TDD index and TID index of every genes + +library(tidyverse) +library(genefilter) +library(gridExtra) +library(DHARMa) +library(emmeans) + +compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10) { + norm_df <- read_tsv( + paste0( + "./results/deseq2_normalisation/", treatment, + "_norm_stable_gene.txt" + ) + ) + if (mean_gene_count > 0) { + norm_df$rmean <- norm_df %>% + select(-gene) %>% + rowMeans() + norm_df <- norm_df %>% filter(rmean >= mean_gene_count) + norm_df <- norm_df %>% select(-rmean) + } + norm_df$rmin <- norm_df %>% + select(-gene) %>% + apply(1, min) + norm_df <- norm_df %>% filter(rmin > 0) + norm_df <- norm_df %>% select(-rmin) + + rep <- norm_df %>% + select(-gene) %>% + colnames() %>% + str_extract("X...") %>% + unique() + temp <- norm_df + colnames(temp) <- str_replace(colnames(temp), "DMSO_0", "T_0") + for (r in rep) { + bc <- paste0(r, "_", treatment) + temp <- temp %>% mutate(!!paste0(r, "_", treatment, "_TDD") := ( + (!!as.symbol(paste0(bc, "_TCHX_5")) - !!as.symbol(paste0(bc, "_T_5")) + ) / !!as.symbol(paste0(bc, "_T_0")))) + } + + tdd_cols <- str_replace(rep, "(X...)", paste0("\\1_", treatment, "_TDD")) + temp <- temp %>% + mutate( + !!paste0("mean_", treatment, "_TDD") := rowMeans(.[, tdd_cols]), + !!paste0("sd_", treatment, "_TDD") := rowSds(.[, tdd_cols]) + ) + return(temp) +} + + +create_density_rep <- function(rep, tdd_tibble, treatment = "BRAF") { + col <- paste0(rep, "_", treatment, "_TDD") + tot <- tdd_tibble[, col] %>% nrow() + good <- sum(tdd_tibble[, col] >= 0 & tdd_tibble[, col] <= 1) + perc <- round((good / tot) * 100, 1) + tit <- paste("TDD density rep", rep, "( 0<=TDD<=1 :", good, "/", tot, "-", perc, "%)") + p <- ggplot(tdd_tibble, mapping = aes_string(x = col)) + + geom_density(bw = 0.2) + + coord_cartesian(xlim = c(-1, 2)) + + ggtitle(tit) + return(p) +} + +create_density_figs <- function(tdd_tibble, treatment = "BRAF", + output_folder = "./results/TDD_analysis") { + dir.create(output_folder) + rep <- tdd_tibble %>% + select(-gene) %>% + colnames() %>% + str_extract("X...") %>% + unique() + rep <- rep[!is.na(rep)] + my_plots <- lapply(rep, create_density_rep, + tdd_tibble = tdd_tibble, + treatment = treatment + ) + my_plots[[length(my_plots) + 1]] <- create_density_rep( + "mean", tdd_tibble, treatment + ) + pdf(paste0(output_folder, "/", treatment, "TDD_density.pdf"), + height = 8, width = 12 + ) + print(do.call("grid.arrange", c(my_plots, ncol = 2))) + dev.off() +} + + +get_tdd_table <- function(treatment, output_folder = "./results/TDD_analysis") { + tdd_table <- compute_tdd(treatment) + create_density_figs(tdd_table, treatment, output_folder) + write_tsv( + tdd_table, + paste0(output_folder, "/", treatment, "_TDD_table.txt") + ) + return(tdd_table) +} + +get_final_tdd_table <- function(output_folder = "./results/TDD_analysis") { + braf_tdd <- get_tdd_table("BRAF", output_folder) + dmso_tdd <- get_tdd_table("DMSO", output_folder) + braf_tdd <- braf_tdd[, grep("gene|mean|sd", colnames(braf_tdd))] + dmso_tdd <- dmso_tdd[, grep("gene|mean|sd", colnames(dmso_tdd))] + tdd_full <- dmso_tdd %>% inner_join(braf_tdd, by = "gene") + tdd_full <- tdd_full %>% mutate(diff_TDD = mean_BRAF_TDD - mean_DMSO_TDD) + write_tsv( + tdd_full, + paste0(output_folder, "/full_TDD_table.txt") + ) + return(tdd_full) +} + + +compute_stat <- function(tdd_table, output_folder = "./results/TDD_analysis", + name = "diag") { + mod <- lm(TDD ~ group, data = tdd_table) + output_folderd <- paste0(output_folder, "/diagnostics") + dir.create(output_folderd) + simulationOutput <- simulateResiduals(fittedModel = mod, n = 2000) + pdf(paste0(output_folderd, "/", name, "_diag.pdf"), width = 12, height = 8) + print(plot(simulationOutput)) + dev.off() + comp <- emmeans(mod, ~ group) + tab <- as.data.frame(pairs(comp)) + tab <- tab[, c('contrast', 'estimate', 'SE', 't.ratio', 'p.value')] + grp1_mean <- NULL + grp2_mean <- NULL + groups <- str_split(tab$contrast, " - ") + for (i in seq_len(length(groups))) { + grp1 <- groups[[i]][1] + grp2 <- groups[[i]][2] + mean_grp1 <- mean(tdd_table[tdd_table$group %in% grp1, ][["TDD"]]) + mean_grp2 <- mean(tdd_table[tdd_table$group %in% grp2, ][["TDD"]]) + grp1_mean[i] <- mean_grp1 + grp2_mean[i] <- mean_grp2 + } + tab$grp1_mean <- grp1_mean + tab$grp2_mean <- grp2_mean + colnames(tab) <- c("grp1_grp2", "Estimate", "Std.Error", + "t.ratio", "p.value", "grp1_mean", "grp2_mean") + write_tsv(tab, file = paste0(output_folder, "/", name, "_stat.txt")) +} + + +create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names, + target_columns, outfile, title = "groups", + output_folder = "./results/TDD_analysis") { + output_folder <- paste0(output_folder, "/boxplot_figures") + dir.create(output_folder) + if (length(gene_files) != length(gene_names) | + length(gene_files) != length(target_columns)) { + message("The inputs should have the same lengths") + return(1) + } + new_tdd <- data.frame(TDD = c(), group = c(), gene = c(), col = c()) + for (i in seq_len(length(gene_files))) { + gene_selected <- read_tsv(gene_files[i], col_names = F)$X1 # nolint + tdd_val <- tdd_df[tdd_df$gene %in% gene_selected, ][[target_columns[i]]] + genes <- tdd_df[tdd_df$gene %in% gene_selected, ][["gene"]] + groups <- rep(gene_names[i], times = length(genes)) + cols <- rep(target_columns[i], times = length(genes)) + tmp_tdd <- data.frame( + TDD = tdd_val, group = groups, + gene = genes, col = cols + ) + new_tdd <- rbind(new_tdd, tmp_tdd) + } + new_tdd <- new_tdd %>% distinct(gene, .keep_all = T) # nolint + compute_stat(new_tdd, output_folder, outfile) + p <- ggplot(new_tdd, mapping = aes(x = group, y = TDD)) + + geom_violin(mapping = aes(fill = group)) + + geom_boxplot(width = 0.05) + + ggtitle(paste("TDD index comparison between ", title)) + pdf(paste0(output_folder, "/", outfile, ".pdf"), height = 8, width = 12) + print(p) + dev.off() + return(new_tdd) +} + + +tdd_full <- get_final_tdd_table(output_folder = "./results/TDD_analysis") +dir <- "./data/gene_lists/" +r <- create_tdd_fig_on_lists( + tdd_df = tdd_full, + gene_files = c( + paste0(dir, "mRNADOWN_riboRNApeakBRAF.txt"), + paste0(dir, "BRAF_DOWN_1.5_name.txt") + ), + gene_names = c("BRAF_down_riboRNApeak", "BRAF_down"), + target_columns = c("mean_BRAF_TDD", "mean_BRAF_TDD"), + outfile = "TDD_BRAF_down-BRAFpeak", + title = "down-reglated mRNA without and with ribosome peaks" +) +r <- create_tdd_fig_on_lists( + tdd_df = tdd_full, + gene_files = c( + paste0(dir, "mRNAUP_riboRNApeakDMSO.txt"), + paste0(dir, "BRAF_UP_1.5_name.txt") + ), + gene_names = c("BRAF_up_riboRNApeakDMSO", "BRAF_up"), + target_columns = c("mean_DMSO_TDD", "mean_DMSO_TDD"), + outfile = "TDD_BRAF_up-BRAFpeakDMSO", + title = "down-reglated mRNA without and with ribosome peaks" +) +dir <- "./results/stable_genes/" +r <- create_tdd_fig_on_lists( + tdd_df = tdd_full, + gene_files = c( + paste0(dir, "braf_stable_genes.txt"), + paste0(dir, "dmso_stable_genes.txt") + ), + gene_names = c("BRAF_stable", "DMSO_stable"), + target_columns = c("mean_BRAF_TDD", "mean_DMSO_TDD"), + outfile = "BRAF_DMSO_stable", + title = "BRAF and DMSO stable genes" +) -- GitLab