Skip to content
Snippets Groups Projects
Select Git revision
  • fced9078b6c6125058252a7de02d3e56983e9a33
  • master default protected
2 results

06_compute_tdd_index.R

Blame
  • 06_compute_tdd_index.R 27.02 KiB
    #!/bin/Rscript
    
    # The goal of this script is to compute TDD index and TID index of every genes
    
    library(MASS)
    library(tidyverse)
    library(genefilter)
    library(gridExtra)
    library(DHARMa)
    library(emmeans)
    
    
    source("src/common_functions.R")
    
    #' compute the TDD of every genes for the experiment treatment
    #'
    #' @param treatment The treatment for which we want to compute the TDD
    #' @param mean_gene_count The minimum mean count a gene must have to compute
    #' its TDDindex
    #' @param ercc A boolean indicating whether to load errc noramlized expression
    #' or not.
    #' @return The count tablme with the tdd for each replicate and mean TDD and sd
    #' @import tidyverse
    #' @import genefilter
    compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10, ercc = F) {
        ercc_name <- ""
        if (ercc) {
            ercc_name <- "_ercc"
        }
        norm_df <- read_tsv(
            paste0(
                "./results/deseq2_normalisation/", treatment, ercc_name,
                "_norm_stable_gene.txt"
            )
        )
        if (mean_gene_count > 0) {
            norm_df$rmean <- norm_df %>%
                select(-gene) %>%
                rowMeans()
            norm_df <- norm_df %>% filter(rmean >= mean_gene_count) # nolint
            norm_df <- norm_df %>% select(-rmean) # nolint
        }
        norm_df$rmin <- norm_df %>%
            select(-gene) %>%
            apply(1, min)
        norm_df <- norm_df %>% filter(rmin > 0) # nolint
        norm_df <- norm_df %>% select(-rmin) # nolint
    
        rep <- norm_df %>%
            select(-gene) %>%
            colnames() %>%
            str_extract("X...") %>%
            unique()
        temp <- norm_df
        colnames(temp) <- str_replace(colnames(temp), "DMSO_0", "T_0") # nolint
        for (r in rep) {
            bc <- paste0(r, "_", treatment)
            temp <- temp %>% mutate(!!paste0(r, "_", treatment, "_TDD") := (
                (!!as.symbol(paste0(bc, "_TCHX_5")) -
                    !!as.symbol(paste0(bc, "_T_5")) # nolint
                ) / !!as.symbol(paste0(bc, "_T_0"))))
        }
    
        tdd_cols <- str_replace(
            rep, "(X...)",
            paste0("\\1_", treatment, "_TDD")
        ) # nolint
        temp <- temp %>%
            mutate(
                !!paste0("mean_", treatment, "_TDD") := rowMeans(.[, tdd_cols]),
                !!paste0("sd_", treatment, "_TDD") := rowSds(.[, tdd_cols])
            )
        return(temp)
    }
    
    
    #' Create a density figure of TDD index for a given replicate
    #'
    #' @param rep The replicate name
    #' @param tdd_tible The tablle containing TDD
    #' @param treatment The treatment used to compute the tDD
    #' @import tidyverse
    create_density_rep <- function(rep, tdd_tibble, treatment = "BRAF") {
        col <- paste0(rep, "_", treatment, "_TDD")
        tot <- tdd_tibble[, col] %>% nrow() # nolint
        good <- sum(tdd_tibble[, col] >= 0 & tdd_tibble[, col] <= 1)
        perc <- round((good / tot) * 100, 1)
        tit <- paste(
            "TDD density rep", rep, "( 0<=TDD<=1 :",
            good, "/", tot, "-", perc, "%)"
        )
        p <- ggplot(tdd_tibble, mapping = aes_string(x = col)) +
            geom_freqpoly(binwidth = 0.11) +
            coord_cartesian(xlim = c(-1, 2)) +
            ggtitle(tit)
        return(p)
    }
    
    
    #' Create density figure for every replicate and the mean of replicate
    #'
    #' @param tdd_tibble A tablle containing the TDD of each replicate
    #' @param treatment The treatment to which the tdd_tibblerefers
    #' @param output_folder The folder where the figure will be created
    #' @param ercc a bolean indicating if the count data was normalized using ercc
    #' @import tidyverse
    #' @import gridExtra
    create_density_figs <- function(tdd_tibble, treatment = "BRAF",
                                    output_folder = "./results/TDD_analysis",
                                    ercc = F) {
        dir.create(output_folder, showWarnings = F)
        rep <- tdd_tibble %>%
            select(-gene) %>%
            colnames() %>%
            str_extract("X...") %>%
            unique()
        rep <- rep[!is.na(rep)]
        my_plots <- lapply(rep, create_density_rep,
            tdd_tibble = tdd_tibble,
            treatment = treatment
        )
        my_plots[[length(my_plots) + 1]] <- create_density_rep(
            "mean", tdd_tibble, treatment
        )
        ercc_name <- ""
        if (ercc == T) {
            ercc_name = "_ercc"
        }
        pdf(paste0(output_folder, "/", treatment, ercc_name, "TDD_density.pdf"),
            height = 8, width = 12
        )
        print(do.call("grid.arrange", c(my_plots, ncol = 2)))
        dev.off()
    }
    
    
    #' Create the TDD tables and density figures with a treatment given in input
    #'
    #' @details Compute the tdd table for the sample treated with `treatment`,
    #' Then creates the density figures of TDD for each replicates and saves the
    #' TDD table
    #'
    #' @param treatment The treatment of samples for which we want to compute
    #'  the TDD
    #' @param output_folder folder where the TDD table and figure will be created
    #' @param ercc A boolean indicating whether to load errc noramlized expression
    #' or not.
    #' @param mean_gene_count The minimum gene count to compute TDD for them
    #' @import tidyverse
    get_tdd_table <- function(treatment,
                              mean_gene_count = 10,
                              output_folder = "./results/TDD_analysis",
                              ercc = F) {
        tdd_table <- compute_tdd(treatment, mean_gene_count, ercc)
        create_density_figs(tdd_table, treatment, output_folder, ercc)
        write_tsv(
            tdd_table,
            paste0(output_folder, "/", treatment, "_TDD_table.txt")
        )
        return(tdd_table)
    }
    
    
    #' Create the TDD table for DMSO and BRAF treatment
    #'
    #' @details Compute the tdd table for the sample treated with `DMSO` and `BRAF`,
    #' Then creates the density figures of TDD for each replicates and saves the
    #' TDD table
    #'
    #' @param output_folder Folder where the figures and TDD tables  will be created
    #' @param ercc A boolean indicating whether to load errc noramlized expression
    #' or not.
    #' @param mean_gene_count The minimum gene count to compute TDD for them
    #' @import tidyverse
    get_final_tdd_table <- function(mean_gene_count = 10,
                                    ercc = F,
                                    output_folder = "./results/TDD_analysis") {
        braf_tdd <- get_tdd_table("BRAF", mean_gene_count, output_folder, ercc)
        dmso_tdd <- get_tdd_table("DMSO", mean_gene_count, output_folder, ercc)
        braf_tdd <- braf_tdd[, grep("gene|mean|TDD|sd", colnames(braf_tdd))]
        dmso_tdd <- dmso_tdd[, grep("gene|mean|TDD|sd", colnames(dmso_tdd))]
        tdd_full <- dmso_tdd %>% inner_join(braf_tdd, by = "gene") # nolint
        tdd_full <- tdd_full %>%
            mutate(diff_TDD = mean_BRAF_TDD - mean_DMSO_TDD) # nolint
        ercc_name <- ""
        if (ercc) {
            ercc_name <- "_erccnorm"
        }
        write_tsv(
            tdd_full,
            paste0(
                output_folder, "/", ercc_name, "_", mean_gene_count,
                "_full_TDD_table.txt"
            )
        )
        return(tdd_full)
    }
    
    
    #' Compute lm statistical analysis
    #'
    #' @details Compute statistical analysis with a linar model based on TDD of
    #' different groups of genes
    #'
    #' @param tdd_tabble a table containing TDD values of genes belowing to
    #' different groups. It must have the following columns: TDD, groups
    #' @param output_folder folder where the stat table will be ccreated
    #' @param  name The name of the stat table
    #' @import tidyverse
    #' @import DHARMa
    #' @import emmeans
    compute_stat <- function(tdd_table, output_folder = "./results/TDD_analysis",
                             name = "diag") {
        mod <- lm(TDD ~ group, data = tdd_table)
        output_folderd <- paste0(output_folder, "/diagnostics")
        dir.create(output_folderd, showWarnings = F)
        simulationOutput <- simulateResiduals(fittedModel = mod, n = 2000) # nolint
        pdf(paste0(output_folderd, "/", name, "_diag.pdf"), width = 12, height = 8)
        print(plot(simulationOutput))
        dev.off()
        comp <- emmeans(mod, ~group) # nolint
        tab <- as.data.frame(pairs(comp))
        tab <- tab[, c("contrast", "estimate", "SE", "t.ratio", "p.value")]
        grp1_mean <- NULL
        grp2_mean <- NULL
        groups <- str_split(tab$contrast, " - ") # nolint
        for (i in seq_len(length(groups))) {
            grp1 <- groups[[i]][1]
            grp2 <- groups[[i]][2]
            mean_grp1 <- mean(tdd_table[tdd_table$group %in% grp1, ][["TDD"]])
            mean_grp2 <- mean(tdd_table[tdd_table$group %in% grp2, ][["TDD"]])
            grp1_mean[i] <- mean_grp1
            grp2_mean[i] <- mean_grp2
        }
        tab$grp1_mean <- grp1_mean
        tab$grp2_mean <- grp2_mean
        colnames(tab) <- c(
            "grp1_grp2", "Estimate", "Std.Error",
            "t.ratio", "p.value", "grp1_mean", "grp2_mean"
        )
        write_tsv(tab, file = paste0(output_folder, "/", name, "_stat.txt")) # nolint
    }
    
    
    #' Create a boxplot figure to compare TDD values of different sets of genes
    #'
    #' @param tdd_df A TDD table for every treatment of interest
    #' @param gene_files A list of files containing gene names
    #' @param gene_names A list of names for each gene files
    #' @param target_columns A list of string corresponding to the column to
    #' to which we need to get TDD values for those genes (i.e The treatment of
    #' interest)
    #' @param outfile The name of the figure (and stats table) to produce
    #' @param title The end of the figure title
    #' @param output_folder The figure to create
    create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
                                        target_columns, outfile, title = "groups",
                                        output_folder = "./results/TDD_analysis") {
        output_folder <- paste0(output_folder, "/boxplot_figures")
        dir.create(output_folder, showWarnings = F)
        if (length(gene_files) != length(gene_names) |
            length(gene_files) != length(target_columns)) {
            message("The inputs should have the same lengths")
            return(1)
        }
        new_tdd <- data.frame(TDD = c(), group = c(), gene = c(), col = c())
        for (i in seq_len(length(gene_files))) {
            gene_selected <- read_tsv(gene_files[i], col_names = F)$X1 # nolint
            tdd_val <- tdd_df[tdd_df$gene %in% gene_selected, ][[target_columns[i]]]
            genes <- tdd_df[tdd_df$gene %in% gene_selected, ][["gene"]]
            groups <- rep(gene_names[i], times = length(genes))
            cols <- rep(target_columns[i], times = length(genes))
            tmp_tdd <- data.frame(
                TDD = tdd_val, group = groups,
                gene = genes, col = cols
            )
            new_tdd <- rbind(new_tdd, tmp_tdd)
        }
        if (!grepl("ercc", outfile, fixed = T)) {
            new_tdd <- new_tdd %>% distinct(gene, .keep_all = T) # nolint
        }
        compute_stat(new_tdd, output_folder, outfile)
        p <- ggplot(new_tdd, mapping = aes(x = group, y = TDD)) +
            geom_violin(mapping = aes(fill = group)) +
            geom_boxplot(width = 0.05) +
            ggtitle(paste("TDD index comparison between ", title))
        pdf(paste0(output_folder, "/", outfile, ".pdf"), height = 8, width = 12)
        print(p)
        dev.off()
        return(new_tdd)
    }
    
    #' Create a boxplot figure to compare TDD values of 3 different sets of genes
    #'
    #' @details Create 3 boxplot figures:
    #' 1. Create a boxplot displaying the TDD of down-regulated genes by BRAF with
    #' a BRAF peak
    #' 2. Create a boxplot displaying the TDD of up-regulated genes by BRAF with
    #' a DMSO peak
    #' 3. Create a boxplot displaying the TDD of stable genes in DMSO and BRAF
    #' conditions
    #'
    #' @param mean_gene_count The minimum gene count to compute TDD for them
    #' @param ercc a bolean indicating if we want to use ercc or stable gene
    #' normalisation
    #' @param output_folder Folder where the figures will be created
    create_figures <- function(mean_gene_count = 10, ercc = F,
                               output_folder = "./results/TDD_analysis") {
        tdd_full <- get_final_tdd_table(mean_gene_count, ercc, output_folder)
        dir <- "./data/gene_lists/"
        ercc_name <- ""
        if (ercc) {
            ercc_name <- "_erccnorm"
        }
        gn <- paste0("_geneMean>", mean_gene_count)
        r <- create_tdd_fig_on_lists(
            tdd_df = tdd_full,
            gene_files = c(
                paste0(dir, "mRNADOWN_riboRNApeakBRAF.txt"),
                paste0(dir, "BRAF_DOWN_1.5_name.txt")
            ),
            gene_names = c("BRAF_down_riboRNApeak", "BRAF_down"),
            target_columns = c("mean_BRAF_TDD", "mean_BRAF_TDD"),
            outfile = paste0("TDD", ercc_name, "_BRAF_down-BRAFpeak", gn),
            title = "down-reglated mRNA without and with ribosome peaks"
        )
        r <- create_tdd_fig_on_lists(
            tdd_df = tdd_full,
            gene_files = c(
                paste0(dir, "mRNAUP_riboRNApeakDMSO.txt"),
                paste0(dir, "BRAF_UP_1.5_name.txt")
            ),
            gene_names = c("BRAF_up_riboRNApeakDMSO", "BRAF_up"),
            target_columns = c("mean_DMSO_TDD", "mean_DMSO_TDD"),
            outfile = paste0("TDD", ercc_name, "_BRAF_up_BRAFpeakDMSO", gn),
            title = "down-reglated mRNA without and with ribosome peaks"
        )
        dir <- "./results/stable_genes/"
        if (!ercc) {
            stable_files <- c(
                paste0(dir, "braf_stable_genes.txt"),
                paste0(dir, "dmso_stable_genes.txt")
            )
        } else {
            stable_files <- c(
                paste0(dir, "ercc.txt"),
                paste0(dir, "ercc.txt")
            )
        }
        r <- create_tdd_fig_on_lists(
            tdd_df = tdd_full,
            gene_files = stable_files,
            gene_names = c("BRAF_stable", "DMSO_stable"),
            target_columns = c("mean_BRAF_TDD", "mean_DMSO_TDD"),
            outfile = paste0("BRAF_DMSO_stable", ercc_name, gn),
            title = "BRAF and DMSO stable genes"
        )
    }
    
    
    #' perform a t.test on TDD values replicate
    #'
    #' @param row a row of a TDD dataframe
    #' @return The t.test pvalue
    t_test_func <- function(row) {
        x1 <- as.numeric(row[1:3])
        x2 <- as.numeric(row[4:6])
        test <- t.test(x1, x2, paired = T)
        return(test$p.value) # nolint
    }
    
    
    #' Get correlation value between BRAF and DMSO TDD of genes
    #'
    #' @details Get, the correlation values, the slope and the intercept of the
    #' best linear model between the TDD of a given set of
    #' genes in BRAF conditions or in DMSO conditions
    #'
    #' @param tdd_full A table with a column with the mean TDD of DMSO
    #' and BRAF condition + a group column indicating if the gene is up
    #' of down regulated by BRAF
    #' @param my_group The group of gene of interest to select
    #' @return a list with the correlation value of TDD, its intercept and
    #' slope
    get_cor_value <- function(tdd_full, my_group = "BRAF_DOWN") {
        tdd <- tdd_full %>% filter(group == my_group) ## nolint
        c <- cor(
            tdd %>% filter(group == my_group) %>% # nolint
                select(mean_DMSO_TDD) %>% unlist(), # nolint
            tdd %>%
                filter(group == my_group) %>% # nolint
                select(mean_BRAF_TDD) %>%
                unlist() # nolint
        )
        res <- coef(lm(mean_BRAF_TDD ~ mean_DMSO_TDD, tdd %>%
            filter(group == my_group)))
        return(list(cor = round(c, 3), intercept = res[1], slope = res[2]))
    }
    
    
    #' Prepare a dataframe used to compute the correlation between BRAF and DMSO TDD
    #'
    #' @param mean_gene_count The minimum mean gene counts to compute their TDD
    #' @param ercc Indicate if the normalisation with ercc should be used to compute
    #' the TDD index
    #' @param output_folder The folder where the results will be created
    #' @return the dataframe ready to be used for correlation figures
    prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F,
                                   output_folder = "./results/TDD_analysis") {
        tdd_full <- get_final_tdd_table(mean_gene_count = 10, ercc = F)
        tdd_full <- tdd_full[
            ,
            c("gene", grep("X...|mean", colnames(tdd_full), value = T))
        ]
        tdd_full <- tdd_full %>%
            mutate(p_value = apply(
                tdd_full %>% select(starts_with("X")), 1,
                function(x) {
                    t_test_func(x)
                }
            )) # nolint
        tdd_full <- tdd_full %>% arrange(-p_value) # nolint
        dir <- "./data/gene_lists/"
        down_braf <- read.table(paste0(dir, "BRAF_DOWN_1.5_name.txt"))$V1
        up_braf <- read.table(paste0(dir, "BRAF_UP_1.5_name.txt"))$V1
        tdd_full$group <- rep("CTRL", times = nrow(tdd_full))
        tdd_full[tdd_full$gene %in% down_braf, "group"] <- "BRAF_DOWN"
        tdd_full[tdd_full$gene %in% up_braf, "group"] <- "BRAF_UP"
        tdd_full$sig <- rep("", times = nrow(tdd_full))
        tdd_full[tdd_full$p_value <= 0.05, "sig"] <- "_sig"
        tdd_full <- tdd_full %>% mutate(kind = paste0(group, sig)) # nolint
        tdd_full <- tdd_full %>%
            filter(group != "CTRL") %>%
            arrange(kind)
        write_tsv(tdd_full, paste0(output_folder, "/TDD_correlation_table.txt")) # nolint
        tdd_full %>% 
            filter(kind == "BRAF_DOWN_sig", mean_BRAF_TDD > mean_DMSO_TDD) %>%
            select(gene) %>% 
            write_tsv(paste0(output_folder, "/TDD_BRAF_BRAF_DOWN.txt"))
        tdd_full %>% 
            filter(kind == "BRAF_UP_sig", mean_DMSO_TDD > mean_BRAF_TDD ) %>%
            select(gene) %>% 
            write_tsv(paste0(output_folder, "/TDD_DMSO_BRAF_UP.txt"))
        return(tdd_full)
    }
    
    
    #' Produce the TDD correlation figures
    #'
    #' @param mean_gene_count The minimum mean gene counts to compute their TDD
    #' @param ercc Indicate if the normalisation with ercc should be used to compute
    #' the TDD index
    #' @param output_folder The folder where the results will be created
    produce_tdd_cor_figure <- function(mean_gene_count = 10, ercc = F,
                                       output_folder = "./results/TDD_analysis") {
        tdd_full <- prepare_df_for_cor(mean_gene_count, ercc, output_folder)
        downv <- get_cor_value(tdd_full, my_group = "BRAF_DOWN")
        upv <- get_cor_value(tdd_full, my_group = "BRAF_UP")
        p <- ggplot(tdd_full, mapping = aes(
            x = mean_DMSO_TDD,
            y = mean_BRAF_TDD, color = kind
        )) +
            geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
            scale_fill_manual(values = c(
                "#4169e1", "darkblue",
                "#CD5C5C", "darkred"
            )) +
            coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
            geom_abline(
                intercept = downv$intercept, slope = downv$slope,
                color = "#4169e1"
            ) +
            geom_abline(
                intercept = upv$intercept, slope = upv$slope,
                color = "#CD5C5C"
            ) +
            ggtitle(paste0(
                "TDD of genes with BRAF or DMSO treatment\n",
                "(R gene BRAF_down = ", downv$cor, ")(R BRAF_up = ",
                upv$cor, ")"
            ))
        p_down <- ggplot(tdd_full %>%
            filter(group %in% c("BRAF_DOWN", "BRAF_DOWN_sig")),
        mapping = aes(x = mean_DMSO_TDD, y = mean_BRAF_TDD, color = kind, label = gene)
        ) +
            geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
            scale_fill_manual(values = c("black", "red")) +
            coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
            geom_label_repel(data = tdd_full %>%
            filter(kind == "BRAF_DOWN_sig"), size = 1,
                mapping = aes(mean_DMSO_TDD, mean_BRAF_TDD, label = gene)) +
            geom_abline(
                intercept = downv$intercept, slope = downv$slope,
                color = "#4169e1"
            ) +
            geom_abline(slope = 1, color = "black") +
            ggtitle(paste0(
                "TDD of genes BRAF down-regulated with BRAF or DMSO",
                "treatment\n(R gene BRAF_down = ", downv$cor, ")"
            ))
        p_up <- ggplot(tdd_full %>% filter(group %in% c("BRAF_UP", "BRAF_UP_sig")),
            mapping = aes(
                x = mean_DMSO_TDD, y = mean_BRAF_TDD,
                color = kind
            )
        ) +
            geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
            scale_fill_manual(values = c("black", "red")) +
            coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
            geom_label_repel(data = tdd_full %>%
            filter(kind == "BRAF_UP_sig"), size = 1,
                mapping = aes(mean_DMSO_TDD, mean_BRAF_TDD, label = gene)) +
            geom_abline(
                intercept = downv$intercept,
                slope = downv$slope, color = "#CD5C5C"
            ) +
            geom_abline(slope = 1, color = "black") +
            ggtitle(paste0(
                "TDD of genes BRAF up-regulated with BRAF or DMSO",
                "treatment\n(R gene BRAF_up = ", upv$cor, ")"
            ))
        pdf(paste0(output_folder, "/TDD_correlation_figures.pdf"),
            width = 12,
            height = 17
        )
        grid.arrange(p, p_down, p_up, ncol = 1) # nolint
        dev.off()
    }
    
    
    #' Get the negative binomial regression p-value
    #'
    #' @details glm negative binomial test to check if the number of peaks
    #' between groups of genes is different
    #'
    #' @param tab a table containing the number of ribosome peaks in genes and
    #' their groups
    #' @param output_folder folder where the diagnotics will be created
    #' @return the p-value of the test
    glm_nb_pvalue <- function(tab, pic_col,
                              output_folder = "./results/TDD_analysis") {
        mod <- glm.nb(paste0(pic_col, "~ group"), data = tab) # nolint
        output_diag <- paste0(output_folder, "/diagnotics")
        dir.create(output_diag, showWarnings = F)
        output_fig <- paste0(output_diag, "/glm_nb_", pic_col, ".pdf")
        pdf(output_fig, width = 12, height = 8)
        plot(simulateResiduals(mod)) # nolint
        dev.off()
        return(summary(mod)$coeff[2, 4])
    }
    
    #' Get the negative binomial regression p-value
    #'
    #' @details glm logistic test to check if the presence of peaks
    #' between groups of genes is different
    #'
    #' @param tab a table containing the number of ribosome peaks in genes and
    #' their groups
    #' @param output_folder folder where the diagnotics will be created
    #' @return the p-value of the test
    glm_log_pvalue <- function(tab, pic_col,
                               output_folder = "./results/TDD_analysis") {
        mod <- glm(paste0(pic_col, "~ group"),
            data = tab,
            family = binomial(link = "logit")
        )
        output_diag <- paste0(output_folder, "/diagnotics")
        dir.create(output_diag, showWarnings = F)
        output_fig <- paste0(output_diag, "/glm_nb_", pic_col, ".pdf")
        pdf(output_fig, width = 12, height = 8)
        plot(simulateResiduals(mod)) # nolint
        dev.off()
        return(summary(mod)$coeff[2, 4])
    }
    
    
    #' Create barplots showing if different groups of genes have a different number
    #' of ribosome peaks
    #'
    #' @param tdd_full A table with TDD and number of peaks
    #' @param gene_type The selected group of gene on which we want to perform the
    #' analysis
    #' @param output_folder Folder where the results will be created
    #' @param peak_type The type of the peak
    create_pic_figs <- function(tdd_full, gene_type = "BRAF_DOWN",
                                output_folder = "./results/TDD_analysis",
                                peak_type = "RiboRNApeak") {
        treatment <- sub("_.*", "", gene_type)
        regulation <- sub(".*_", "", gene_type)
        if (regulation == "UP") {
            treatment <- "DMSO"
        }
        target <- paste0("TDD_", treatment)
        tdd_filt <- tdd_full %>% filter(group == gene_type) # nolint
        tdd_filt <- tdd_filt %>% mutate(group = ifelse(!!as.symbol(target),
            paste0(group, "_TDD"), group
        ))
        data_col <- "peak"
        res_col <- paste0("mean_", data_col)
        has_col <- paste0("has_", data_col)
        has_res_col <- paste0("mean_", has_col)
        tdd_filt <- tdd_filt %>% mutate(
            !!as.symbol(has_col) :=
                ifelse(!!as.symbol(data_col) > 0, 1, 0)
        )
        tdd_fig <- tdd_filt %>%
            select(!!as.symbol(data_col), !!as.symbol(has_col), group) %>%
            group_by(group) %>%
            summarise(
                !!as.symbol(res_col) := mean(!!as.symbol(data_col)),
                !!as.symbol(has_res_col) := mean(!!as.symbol(has_col))
            )
        pval <- glm_nb_pvalue(tdd_filt, data_col, output_folder)
        p1 <- ggplot(tdd_fig,
            mapping = aes_string(x = "group", y = res_col, fill = "group")
        ) +
            geom_bar(stat = "identity") +
            ggtitle(paste0(
                "Average proportion of ", treatment, " peaks in gene", regulation,
                "-regulated by BRAF (p = ", pval, ")"
            ))
        pval <- glm_log_pvalue(tdd_filt, has_col, output_folder)
        p2 <- ggplot(tdd_fig, mapping = aes_string(
            x = "group", y = has_res_col, fill = "group"
        )) +
            geom_bar(stat = "identity") +
            ggtitle(paste0(
                "Average proportion of ", regulation,
                "-regulated gene by BRAF having at least a ", treatment, 
                " peak (p = ", pval, ")"
            ))
        pdf(paste0(output_folder, "/barplot_pic_analysis", gene_type, "_",
                   peak_type, ".pdf"),
            width = 12, height = 12
        )
        grid.arrange(p1, p2, ncol = 1) #nolint
        dev.off()
    }
    
    
    #' function that calcultaed if genes are TDD (more in BRAF or DMSO condition)
    #' and if they have peaks
    #'
    #' @param peak_file A file containing peaks in a bed format
    #' @param mean_gene_count The minimum gene count of a gene to be analysed
    #' @param ercc Indicate if the normalisation with ercc should be used to compute
    #' the TDD index
    get_tdd_pic_table <- function(peak_file, mean_gene_count = 10, ercc = F,
                                  output_folder) {
        peak_list <- read.table(peak_file)$V1
        peak_table <- tibble(
            gene = names(table(peak_list)),
            peak = as.vector(table(peak_list))
        )
        tdd_full <- get_final_tdd_table(mean_gene_count, ercc)
        tdd_full <- tdd_full[, 
            c("gene", grep("X...", colnames(tdd_full), value = T))]
        tdd_braf_braf_down <- read_tsv(paste0(output_folder, 
            "/TDD_BRAF_BRAF_DOWN.txt"))
        tdd_dmso_braf_up <- read_tsv(paste0(output_folder, 
            "/TDD_DMSO_BRAF_UP.txt"))
        tdd_full <- tdd_full %>%
            mutate(
                TDD_BRAF = gene %in% tdd_braf_braf_down$gene,
                TDD_DMSO = gene %in% tdd_dmso_braf_up$gene
            ) %>%
            select(gene, TDD_BRAF, TDD_DMSO) #nolint
        tdd_full <- tdd_full %>% left_join(peak_table) #nolint
        tdd_full <- tdd_full %>% replace_na(list(peak = 0)) #nolint
        return(tdd_full)
    }
    
    #' Create all peaks fig
    #' 
    #' @param peak_file A file containing peaks
    #' @param mean_gene_count The minimum mean count of gene for its tdd to
    #' be computed
    #' @param ercc Indicate if the normalisation with ercc should be used to compute
    #' the TDD index
    #' @param gene_type The selected group of gene on which we want to perform the
    #' analysis
    #' @param peak_type The type of peak inside the figure
    build_peaks_fig <- function(peak_file, mean_gene_count = 10, ercc = F,
                                gene_type = "BRAF_DOWN",
                                output_folder = "./results/TDD_analysis",
                                peak_type = "RiboRNApeak") {
        tdd_table <- get_tdd_pic_table(peak_file, mean_gene_count, ercc, output_folder)
        tdd_table <- get_group_columns(tdd_table)
        create_pic_figs(tdd_table, gene_type, output_folder, peak_type = peak_type)
    }
    
    create_figures(mean_gene_count = 0, ercc = F, output_folder = "./results/TDD_analysis")
    create_figures(mean_gene_count = 10, ercc = F, output_folder = "./results/TDD_analysis")
    create_figures(mean_gene_count = 20, ercc = F, output_folder = "./results/TDD_analysis")
    create_figures(mean_gene_count = 50, ercc = F, output_folder = "./results/TDD_analysis")
    create_figures(mean_gene_count = 0, ercc = T, output_folder = "./results/TDD_analysis")
    create_figures(mean_gene_count = 10, ercc = T, output_folder = "./results/TDD_analysis")
    produce_tdd_cor_figure()
    dir <- "./data/gene_lists/"
    files <- c("BRAF_vs_DMSO_RepMean.merged.bed", "DMSO_vs_BRAF_RepMean.merged.bed",
               "BRAF_sorted_26-40.merged.bed", "DMSO_sorted_26-40.merged.bed")
    gene_types <- c("BRAF_DOWN", "BRAF_UP", "BRAF_DOWN", "BRAF_UP")
    peak_types <- c("RiboRNAPeak", "RiboRNAPeak", "RiboPeak", "RiboPeak")
    for (i in seq_len(length(files))) {
        peak_file <- paste0(dir, files[i])
        build_peaks_fig(peak_file = peak_file, gene_type = gene_types[i], 
                        peak_type = peak_types[i])
    }