From 3989a38161761beda50d0fefb84f643d751317de Mon Sep 17 00:00:00 2001
From: Fontrodona Nicolas <nicolas.fontrodona@ens-lyon.fr>
Date: Tue, 21 Jun 2022 12:03:08 +0200
Subject: [PATCH] src/06_compute_tdd_index.R: script to compute TDD index

---
 src/06_compute_tdd_index.R | 219 +++++++++++++++++++++++++++++++++++++
 1 file changed, 219 insertions(+)
 create mode 100644 src/06_compute_tdd_index.R

diff --git a/src/06_compute_tdd_index.R b/src/06_compute_tdd_index.R
new file mode 100644
index 0000000..7b155cd
--- /dev/null
+++ b/src/06_compute_tdd_index.R
@@ -0,0 +1,219 @@
+#!/bin/Rscript
+
+# The goal of this script is to compute TDD index and TID index of every genes
+
+library(tidyverse)
+library(genefilter)
+library(gridExtra)
+library(DHARMa)
+library(emmeans)
+
+compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10) {
+    norm_df <- read_tsv(
+        paste0(
+            "./results/deseq2_normalisation/", treatment,
+            "_norm_stable_gene.txt"
+        )
+    )
+    if (mean_gene_count > 0) {
+        norm_df$rmean <- norm_df %>%
+            select(-gene) %>%
+            rowMeans()
+        norm_df <- norm_df %>% filter(rmean >= mean_gene_count)
+        norm_df <- norm_df %>% select(-rmean)
+    }
+    norm_df$rmin <- norm_df %>%
+        select(-gene) %>%
+        apply(1, min)
+    norm_df <- norm_df %>% filter(rmin > 0)
+    norm_df <- norm_df %>% select(-rmin)
+
+    rep <- norm_df %>%
+        select(-gene) %>%
+        colnames() %>%
+        str_extract("X...") %>%
+        unique()
+    temp <- norm_df
+    colnames(temp) <- str_replace(colnames(temp), "DMSO_0", "T_0")
+    for (r in rep) {
+        bc <- paste0(r, "_", treatment)
+        temp <- temp %>% mutate(!!paste0(r, "_", treatment, "_TDD") := (
+            (!!as.symbol(paste0(bc, "_TCHX_5")) - !!as.symbol(paste0(bc, "_T_5"))
+            ) / !!as.symbol(paste0(bc, "_T_0"))))
+    }
+
+    tdd_cols <- str_replace(rep, "(X...)", paste0("\\1_", treatment, "_TDD"))
+    temp <- temp %>%
+        mutate(
+            !!paste0("mean_", treatment, "_TDD") := rowMeans(.[, tdd_cols]),
+            !!paste0("sd_", treatment, "_TDD") := rowSds(.[, tdd_cols])
+        )
+    return(temp)
+}
+
+
+create_density_rep <- function(rep, tdd_tibble, treatment = "BRAF") {
+    col <- paste0(rep, "_", treatment, "_TDD")
+    tot <- tdd_tibble[, col] %>% nrow()
+    good <- sum(tdd_tibble[, col] >= 0 & tdd_tibble[, col] <= 1)
+    perc <- round((good / tot) * 100, 1)
+    tit <- paste("TDD density rep", rep, "( 0<=TDD<=1 :", good, "/", tot, "-", perc, "%)")
+    p <- ggplot(tdd_tibble, mapping = aes_string(x = col)) +
+        geom_density(bw = 0.2) +
+        coord_cartesian(xlim = c(-1, 2)) +
+        ggtitle(tit)
+    return(p)
+}
+
+create_density_figs <- function(tdd_tibble, treatment = "BRAF",
+                                output_folder = "./results/TDD_analysis") {
+    dir.create(output_folder)
+    rep <- tdd_tibble %>%
+        select(-gene) %>%
+        colnames() %>%
+        str_extract("X...") %>%
+        unique()
+    rep <- rep[!is.na(rep)]
+    my_plots <- lapply(rep, create_density_rep,
+        tdd_tibble = tdd_tibble,
+        treatment = treatment
+    )
+    my_plots[[length(my_plots) + 1]] <- create_density_rep(
+        "mean", tdd_tibble, treatment
+    )
+    pdf(paste0(output_folder, "/", treatment, "TDD_density.pdf"),
+        height = 8, width = 12
+    )
+    print(do.call("grid.arrange", c(my_plots, ncol = 2)))
+    dev.off()
+}
+
+
+get_tdd_table <- function(treatment, output_folder = "./results/TDD_analysis") {
+    tdd_table <- compute_tdd(treatment)
+    create_density_figs(tdd_table, treatment, output_folder)
+    write_tsv(
+        tdd_table,
+        paste0(output_folder, "/", treatment, "_TDD_table.txt")
+    )
+    return(tdd_table)
+}
+
+get_final_tdd_table <- function(output_folder = "./results/TDD_analysis") {
+    braf_tdd <- get_tdd_table("BRAF", output_folder)
+    dmso_tdd <- get_tdd_table("DMSO", output_folder)
+    braf_tdd <- braf_tdd[, grep("gene|mean|sd", colnames(braf_tdd))]
+    dmso_tdd <- dmso_tdd[, grep("gene|mean|sd", colnames(dmso_tdd))]
+    tdd_full <- dmso_tdd %>% inner_join(braf_tdd, by = "gene")
+    tdd_full <- tdd_full %>% mutate(diff_TDD = mean_BRAF_TDD - mean_DMSO_TDD)
+    write_tsv(
+        tdd_full,
+        paste0(output_folder, "/full_TDD_table.txt")
+    )
+    return(tdd_full)
+}
+
+
+compute_stat <- function(tdd_table, output_folder = "./results/TDD_analysis",
+                         name = "diag") {
+    mod <- lm(TDD ~ group, data = tdd_table)
+    output_folderd <- paste0(output_folder, "/diagnostics")
+    dir.create(output_folderd)
+    simulationOutput <- simulateResiduals(fittedModel = mod, n = 2000)
+    pdf(paste0(output_folderd, "/", name, "_diag.pdf"), width = 12, height = 8)
+        print(plot(simulationOutput))
+    dev.off()
+    comp <- emmeans(mod, ~ group)
+    tab <- as.data.frame(pairs(comp))
+    tab <- tab[, c('contrast', 'estimate', 'SE', 't.ratio', 'p.value')]
+    grp1_mean <- NULL
+    grp2_mean <- NULL
+    groups <- str_split(tab$contrast, " - ")
+    for (i in seq_len(length(groups))) {
+        grp1 <- groups[[i]][1]
+        grp2 <- groups[[i]][2]
+        mean_grp1 <- mean(tdd_table[tdd_table$group %in% grp1, ][["TDD"]])
+        mean_grp2 <- mean(tdd_table[tdd_table$group %in% grp2, ][["TDD"]])
+        grp1_mean[i] <- mean_grp1
+        grp2_mean[i] <- mean_grp2
+    }
+    tab$grp1_mean <- grp1_mean
+    tab$grp2_mean <- grp2_mean
+    colnames(tab) <- c("grp1_grp2", "Estimate", "Std.Error", 
+                        "t.ratio", "p.value", "grp1_mean", "grp2_mean")
+    write_tsv(tab, file = paste0(output_folder, "/", name, "_stat.txt"))
+}
+
+
+create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
+                                    target_columns, outfile, title = "groups",
+                                    output_folder = "./results/TDD_analysis") {
+    output_folder <- paste0(output_folder, "/boxplot_figures")
+    dir.create(output_folder)
+    if (length(gene_files) != length(gene_names) |
+        length(gene_files) != length(target_columns)) {
+        message("The inputs should have the same lengths")
+        return(1)
+    }
+    new_tdd <- data.frame(TDD = c(), group = c(), gene = c(), col = c())
+    for (i in seq_len(length(gene_files))) {
+        gene_selected <- read_tsv(gene_files[i], col_names = F)$X1 # nolint
+        tdd_val <- tdd_df[tdd_df$gene %in% gene_selected, ][[target_columns[i]]]
+        genes <- tdd_df[tdd_df$gene %in% gene_selected, ][["gene"]]
+        groups <- rep(gene_names[i], times = length(genes))
+        cols <- rep(target_columns[i], times = length(genes))
+        tmp_tdd <- data.frame(
+            TDD = tdd_val, group = groups,
+            gene = genes, col = cols
+        )
+        new_tdd <- rbind(new_tdd, tmp_tdd)
+    }
+    new_tdd <- new_tdd %>% distinct(gene, .keep_all = T) # nolint
+    compute_stat(new_tdd, output_folder, outfile)
+    p <- ggplot(new_tdd, mapping = aes(x = group, y = TDD)) +
+        geom_violin(mapping = aes(fill = group)) +
+        geom_boxplot(width = 0.05) +
+        ggtitle(paste("TDD index comparison between ", title))
+    pdf(paste0(output_folder, "/", outfile, ".pdf"), height = 8, width = 12)
+    print(p)
+    dev.off()
+    return(new_tdd)
+}
+
+
+tdd_full <- get_final_tdd_table(output_folder = "./results/TDD_analysis")
+dir <- "./data/gene_lists/"
+r <- create_tdd_fig_on_lists(
+    tdd_df = tdd_full,
+    gene_files = c(
+        paste0(dir, "mRNADOWN_riboRNApeakBRAF.txt"),
+        paste0(dir, "BRAF_DOWN_1.5_name.txt")
+    ),
+    gene_names = c("BRAF_down_riboRNApeak", "BRAF_down"),
+    target_columns = c("mean_BRAF_TDD", "mean_BRAF_TDD"),
+    outfile = "TDD_BRAF_down-BRAFpeak",
+    title = "down-reglated mRNA without and with ribosome peaks"
+)
+r <- create_tdd_fig_on_lists(
+    tdd_df = tdd_full,
+    gene_files = c(
+        paste0(dir, "mRNAUP_riboRNApeakDMSO.txt"),
+        paste0(dir, "BRAF_UP_1.5_name.txt")
+    ),
+    gene_names = c("BRAF_up_riboRNApeakDMSO", "BRAF_up"),
+    target_columns = c("mean_DMSO_TDD", "mean_DMSO_TDD"),
+    outfile = "TDD_BRAF_up-BRAFpeakDMSO",
+    title = "down-reglated mRNA without and with ribosome peaks"
+)
+dir <- "./results/stable_genes/"
+r <- create_tdd_fig_on_lists(
+    tdd_df = tdd_full,
+    gene_files = c(
+        paste0(dir, "braf_stable_genes.txt"),
+        paste0(dir, "dmso_stable_genes.txt")
+    ),
+    gene_names = c("BRAF_stable", "DMSO_stable"),
+    target_columns = c("mean_BRAF_TDD", "mean_DMSO_TDD"),
+    outfile = "BRAF_DMSO_stable",
+    title = "BRAF and DMSO stable genes"
+)
-- 
GitLab