06_compute_tdd_index.R

#!/bin/Rscript

# The goal of this script is to compute TDD index and TID index of every genes

library(MASS)
library(tidyverse)
library(genefilter)
library(gridExtra)
library(DHARMa)
library(emmeans)


source("src/common_functions.R")

#' compute the TDD of every genes for the experiment treatment
#'
#' @param treatment The treatment for which we want to compute the TDD
#' @param mean_gene_count The minimum mean count a gene must have to compute
#' its TDDindex
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @return The count tablme with the tdd for each replicate and mean TDD and sd
#' @import tidyverse
#' @import genefilter
compute_tdd <- function(treatment = "BRAF", mean_gene_count = 10, ercc = F) {
    ercc_name <- ""
    if (ercc) {
        ercc_name <- "_ercc"
    }
    norm_df <- read_tsv(
        paste0(
            "./results/deseq2_normalisation/", treatment, ercc_name,
            "_norm_stable_gene.txt"
        )
    )
    if (mean_gene_count > 0) {
        norm_df$rmean <- norm_df %>%
            select(-gene) %>%
            rowMeans()
        norm_df <- norm_df %>% filter(rmean >= mean_gene_count) # nolint
        norm_df <- norm_df %>% select(-rmean) # nolint
    }
    norm_df$rmin <- norm_df %>%
        select(-gene) %>%
        apply(1, min)
    norm_df <- norm_df %>% filter(rmin > 0) # nolint
    norm_df <- norm_df %>% select(-rmin) # nolint

    rep <- norm_df %>%
        select(-gene) %>%
        colnames() %>%
        str_extract("X...") %>%
        unique()
    temp <- norm_df
    colnames(temp) <- str_replace(colnames(temp), "DMSO_0", "T_0") # nolint
    for (r in rep) {
        bc <- paste0(r, "_", treatment)
        temp <- temp %>% mutate(!!paste0(r, "_", treatment, "_TDD") := (
            (!!as.symbol(paste0(bc, "_TCHX_5")) -
                !!as.symbol(paste0(bc, "_T_5")) # nolint
            ) / !!as.symbol(paste0(bc, "_T_0"))))
    }

    tdd_cols <- str_replace(
        rep, "(X...)",
        paste0("\\1_", treatment, "_TDD")
    ) # nolint
    temp <- temp %>%
        mutate(
            !!paste0("mean_", treatment, "_TDD") := rowMeans(.[, tdd_cols]),
            !!paste0("sd_", treatment, "_TDD") := rowSds(.[, tdd_cols])
        )
    return(temp)
}


#' Create a density figure of TDD index for a given replicate
#'
#' @param rep The replicate name
#' @param tdd_tible The tablle containing TDD
#' @param treatment The treatment used to compute the tDD
#' @import tidyverse
create_density_rep <- function(rep, tdd_tibble, treatment = "BRAF") {
    col <- paste0(rep, "_", treatment, "_TDD")
    tot <- tdd_tibble[, col] %>% nrow() # nolint
    good <- sum(tdd_tibble[, col] >= 0 & tdd_tibble[, col] <= 1)
    perc <- round((good / tot) * 100, 1)
    tit <- paste(
        "TDD density rep", rep, "( 0<=TDD<=1 :",
        good, "/", tot, "-", perc, "%)"
    )
    p <- ggplot(tdd_tibble, mapping = aes_string(x = col)) +
        geom_freqpoly(binwidth = 0.11) +
        coord_cartesian(xlim = c(-1, 2)) +
        ggtitle(tit)
    return(p)
}


#' Create density figure for every replicate and the mean of replicate
#'
#' @param tdd_tibble A tablle containing the TDD of each replicate
#' @param treatment The treatment to which the tdd_tibblerefers
#' @param output_folder The folder where the figure will be created
#' @param ercc a bolean indicating if the count data was normalized using ercc
#' @import tidyverse
#' @import gridExtra
create_density_figs <- function(tdd_tibble, treatment = "BRAF",
                                output_folder = "./results/TDD_analysis",
                                ercc = F) {
    dir.create(output_folder, showWarnings = F)
    rep <- tdd_tibble %>%
        select(-gene) %>%
        colnames() %>%
        str_extract("X...") %>%
        unique()
    rep <- rep[!is.na(rep)]
    my_plots <- lapply(rep, create_density_rep,
        tdd_tibble = tdd_tibble,
        treatment = treatment
    )
    my_plots[[length(my_plots) + 1]] <- create_density_rep(
        "mean", tdd_tibble, treatment
    )
    ercc_name <- ""
    if (ercc == T) {
        ercc_name = "_ercc"
    }
    pdf(paste0(output_folder, "/", treatment, ercc_name, "TDD_density.pdf"),
        height = 8, width = 12
    )
    print(do.call("grid.arrange", c(my_plots, ncol = 2)))
    dev.off()
}


#' Create the TDD tables and density figures with a treatment given in input
#'
#' @details Compute the tdd table for the sample treated with `treatment`,
#' Then creates the density figures of TDD for each replicates and saves the
#' TDD table
#'
#' @param treatment The treatment of samples for which we want to compute
#'  the TDD
#' @param output_folder folder where the TDD table and figure will be created
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @import tidyverse
get_tdd_table <- function(treatment,
                          mean_gene_count = 10,
                          output_folder = "./results/TDD_analysis",
                          ercc = F) {
    tdd_table <- compute_tdd(treatment, mean_gene_count, ercc)
    create_density_figs(tdd_table, treatment, output_folder, ercc)
    write_tsv(
        tdd_table,
        paste0(output_folder, "/", treatment, "_TDD_table.txt")
    )
    return(tdd_table)
}


#' Create the TDD table for DMSO and BRAF treatment
#'
#' @details Compute the tdd table for the sample treated with `DMSO` and `BRAF`,
#' Then creates the density figures of TDD for each replicates and saves the
#' TDD table
#'
#' @param output_folder Folder where the figures and TDD tables  will be created
#' @param ercc A boolean indicating whether to load errc noramlized expression
#' or not.
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @import tidyverse
get_final_tdd_table <- function(mean_gene_count = 10,
                                ercc = F,
                                output_folder = "./results/TDD_analysis") {
    braf_tdd <- get_tdd_table("BRAF", mean_gene_count, output_folder, ercc)
    dmso_tdd <- get_tdd_table("DMSO", mean_gene_count, output_folder, ercc)
    braf_tdd <- braf_tdd[, grep("gene|mean|TDD|sd", colnames(braf_tdd))]
    dmso_tdd <- dmso_tdd[, grep("gene|mean|TDD|sd", colnames(dmso_tdd))]
    tdd_full <- dmso_tdd %>% inner_join(braf_tdd, by = "gene") # nolint
    tdd_full <- tdd_full %>%
        mutate(diff_TDD = mean_BRAF_TDD - mean_DMSO_TDD) # nolint
    ercc_name <- ""
    if (ercc) {
        ercc_name <- "_erccnorm"
    }
    write_tsv(
        tdd_full,
        paste0(
            output_folder, "/", ercc_name, "_", mean_gene_count,
            "_full_TDD_table.txt"
        )
    )
    return(tdd_full)
}


#' Compute lm statistical analysis
#'
#' @details Compute statistical analysis with a linar model based on TDD of
#' different groups of genes
#'
#' @param tdd_tabble a table containing TDD values of genes belowing to
#' different groups. It must have the following columns: TDD, groups
#' @param output_folder folder where the stat table will be ccreated
#' @param  name The name of the stat table
#' @import tidyverse
#' @import DHARMa
#' @import emmeans
compute_stat <- function(tdd_table, output_folder = "./results/TDD_analysis",
                         name = "diag") {
    mod <- lm(TDD ~ group, data = tdd_table)
    output_folderd <- paste0(output_folder, "/diagnostics")
    dir.create(output_folderd, showWarnings = F)
    simulationOutput <- simulateResiduals(fittedModel = mod, n = 2000) # nolint
    pdf(paste0(output_folderd, "/", name, "_diag.pdf"), width = 12, height = 8)
    print(plot(simulationOutput))
    dev.off()
    comp <- emmeans(mod, ~group) # nolint
    tab <- as.data.frame(pairs(comp))
    tab <- tab[, c("contrast", "estimate", "SE", "t.ratio", "p.value")]
    grp1_mean <- NULL
    grp2_mean <- NULL
    groups <- str_split(tab$contrast, " - ") # nolint
    for (i in seq_len(length(groups))) {
        grp1 <- groups[[i]][1]
        grp2 <- groups[[i]][2]
        mean_grp1 <- mean(tdd_table[tdd_table$group %in% grp1, ][["TDD"]])
        mean_grp2 <- mean(tdd_table[tdd_table$group %in% grp2, ][["TDD"]])
        grp1_mean[i] <- mean_grp1
        grp2_mean[i] <- mean_grp2
    }
    tab$grp1_mean <- grp1_mean
    tab$grp2_mean <- grp2_mean
    colnames(tab) <- c(
        "grp1_grp2", "Estimate", "Std.Error",
        "t.ratio", "p.value", "grp1_mean", "grp2_mean"
    )
    write_tsv(tab, file = paste0(output_folder, "/", name, "_stat.txt")) # nolint
}


#' Create a boxplot figure to compare TDD values of different sets of genes
#'
#' @param tdd_df A TDD table for every treatment of interest
#' @param gene_files A list of files containing gene names
#' @param gene_names A list of names for each gene files
#' @param target_columns A list of string corresponding to the column to
#' to which we need to get TDD values for those genes (i.e The treatment of
#' interest)
#' @param outfile The name of the figure (and stats table) to produce
#' @param title The end of the figure title
#' @param output_folder The figure to create
create_tdd_fig_on_lists <- function(tdd_df, gene_files, gene_names,
                                    target_columns, outfile, title = "groups",
                                    output_folder = "./results/TDD_analysis") {
    output_folder <- paste0(output_folder, "/boxplot_figures")
    dir.create(output_folder, showWarnings = F)
    if (length(gene_files) != length(gene_names) |
        length(gene_files) != length(target_columns)) {
        message("The inputs should have the same lengths")
        return(1)
    }
    new_tdd <- data.frame(TDD = c(), group = c(), gene = c(), col = c())
    for (i in seq_len(length(gene_files))) {
        gene_selected <- read_tsv(gene_files[i], col_names = F)$X1 # nolint
        tdd_val <- tdd_df[tdd_df$gene %in% gene_selected, ][[target_columns[i]]]
        genes <- tdd_df[tdd_df$gene %in% gene_selected, ][["gene"]]
        groups <- rep(gene_names[i], times = length(genes))
        cols <- rep(target_columns[i], times = length(genes))
        tmp_tdd <- data.frame(
            TDD = tdd_val, group = groups,
            gene = genes, col = cols
        )
        new_tdd <- rbind(new_tdd, tmp_tdd)
    }
    if (!grepl("ercc", outfile, fixed = T)) {
        new_tdd <- new_tdd %>% distinct(gene, .keep_all = T) # nolint
    }
    compute_stat(new_tdd, output_folder, outfile)
    p <- ggplot(new_tdd, mapping = aes(x = group, y = TDD)) +
        geom_violin(mapping = aes(fill = group)) +
        geom_boxplot(width = 0.05) +
        ggtitle(paste("TDD index comparison between ", title))
    pdf(paste0(output_folder, "/", outfile, ".pdf"), height = 8, width = 12)
    print(p)
    dev.off()
    return(new_tdd)
}

#' Create a boxplot figure to compare TDD values of 3 different sets of genes
#'
#' @details Create 3 boxplot figures:
#' 1. Create a boxplot displaying the TDD of down-regulated genes by BRAF with
#' a BRAF peak
#' 2. Create a boxplot displaying the TDD of up-regulated genes by BRAF with
#' a DMSO peak
#' 3. Create a boxplot displaying the TDD of stable genes in DMSO and BRAF
#' conditions
#'
#' @param mean_gene_count The minimum gene count to compute TDD for them
#' @param ercc a bolean indicating if we want to use ercc or stable gene
#' normalisation
#' @param output_folder Folder where the figures will be created
create_figures <- function(mean_gene_count = 10, ercc = F,
                           output_folder = "./results/TDD_analysis") {
    tdd_full <- get_final_tdd_table(mean_gene_count, ercc, output_folder)
    dir <- "./data/gene_lists/"
    ercc_name <- ""
    if (ercc) {
        ercc_name <- "_erccnorm"
    }
    gn <- paste0("_geneMean>", mean_gene_count)
    r <- create_tdd_fig_on_lists(
        tdd_df = tdd_full,
        gene_files = c(
            paste0(dir, "mRNADOWN_riboRNApeakBRAF.txt"),
            paste0(dir, "BRAF_DOWN_1.5_name.txt")
        ),
        gene_names = c("BRAF_down_riboRNApeak", "BRAF_down"),
        target_columns = c("mean_BRAF_TDD", "mean_BRAF_TDD"),
        outfile = paste0("TDD", ercc_name, "_BRAF_down-BRAFpeak", gn),
        title = "down-reglated mRNA without and with ribosome peaks"
    )
    r <- create_tdd_fig_on_lists(
        tdd_df = tdd_full,
        gene_files = c(
            paste0(dir, "mRNAUP_riboRNApeakDMSO.txt"),
            paste0(dir, "BRAF_UP_1.5_name.txt")
        ),
        gene_names = c("BRAF_up_riboRNApeakDMSO", "BRAF_up"),
        target_columns = c("mean_DMSO_TDD", "mean_DMSO_TDD"),
        outfile = paste0("TDD", ercc_name, "_BRAF_up_BRAFpeakDMSO", gn),
        title = "down-reglated mRNA without and with ribosome peaks"
    )
    dir <- "./results/stable_genes/"
    if (!ercc) {
        stable_files <- c(
            paste0(dir, "braf_stable_genes.txt"),
            paste0(dir, "dmso_stable_genes.txt")
        )
    } else {
        stable_files <- c(
            paste0(dir, "ercc.txt"),
            paste0(dir, "ercc.txt")
        )
    }
    r <- create_tdd_fig_on_lists(
        tdd_df = tdd_full,
        gene_files = stable_files,
        gene_names = c("BRAF_stable", "DMSO_stable"),
        target_columns = c("mean_BRAF_TDD", "mean_DMSO_TDD"),
        outfile = paste0("BRAF_DMSO_stable", ercc_name, gn),
        title = "BRAF and DMSO stable genes"
    )
}


#' perform a t.test on TDD values replicate
#'
#' @param row a row of a TDD dataframe
#' @return The t.test pvalue
t_test_func <- function(row) {
    x1 <- as.numeric(row[1:3])
    x2 <- as.numeric(row[4:6])
    test <- t.test(x1, x2, paired = T)
    return(test$p.value) # nolint
}


#' Get correlation value between BRAF and DMSO TDD of genes
#'
#' @details Get, the correlation values, the slope and the intercept of the
#' best linear model between the TDD of a given set of
#' genes in BRAF conditions or in DMSO conditions
#'
#' @param tdd_full A table with a column with the mean TDD of DMSO
#' and BRAF condition + a group column indicating if the gene is up
#' of down regulated by BRAF
#' @param my_group The group of gene of interest to select
#' @return a list with the correlation value of TDD, its intercept and
#' slope
get_cor_value <- function(tdd_full, my_group = "BRAF_DOWN") {
    tdd <- tdd_full %>% filter(group == my_group) ## nolint
    c <- cor(
        tdd %>% filter(group == my_group) %>% # nolint
            select(mean_DMSO_TDD) %>% unlist(), # nolint
        tdd %>%
            filter(group == my_group) %>% # nolint
            select(mean_BRAF_TDD) %>%
            unlist() # nolint
    )
    res <- coef(lm(mean_BRAF_TDD ~ mean_DMSO_TDD, tdd %>%
        filter(group == my_group)))
    return(list(cor = round(c, 3), intercept = res[1], slope = res[2]))
}


#' Prepare a dataframe used to compute the correlation between BRAF and DMSO TDD
#'
#' @param mean_gene_count The minimum mean gene counts to compute their TDD
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param output_folder The folder where the results will be created
#' @return the dataframe ready to be used for correlation figures
prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F,
                               output_folder = "./results/TDD_analysis") {
    tdd_full <- get_final_tdd_table(mean_gene_count = 10, ercc = F)
    tdd_full <- tdd_full[
        ,
        c("gene", grep("X...|mean", colnames(tdd_full), value = T))
    ]
    tdd_full <- tdd_full %>%
        mutate(p_value = apply(
            tdd_full %>% select(starts_with("X")), 1,
            function(x) {
                t_test_func(x)
            }
        )) # nolint
    tdd_full <- tdd_full %>% arrange(-p_value) # nolint
    dir <- "./data/gene_lists/"
    down_braf <- read.table(paste0(dir, "BRAF_DOWN_1.5_name.txt"))$V1
    up_braf <- read.table(paste0(dir, "BRAF_UP_1.5_name.txt"))$V1
    tdd_full$group <- rep("CTRL", times = nrow(tdd_full))
    tdd_full[tdd_full$gene %in% down_braf, "group"] <- "BRAF_DOWN"
    tdd_full[tdd_full$gene %in% up_braf, "group"] <- "BRAF_UP"
    tdd_full$sig <- rep("", times = nrow(tdd_full))
    tdd_full[tdd_full$p_value <= 0.05, "sig"] <- "_sig"
    tdd_full <- tdd_full %>% mutate(kind = paste0(group, sig)) # nolint
    tdd_full <- tdd_full %>%
        filter(group != "CTRL") %>%
        arrange(kind)
    write_tsv(tdd_full, paste0(output_folder, "/TDD_correlation_table.txt")) # nolint
    tdd_full %>% 
        filter(kind == "BRAF_DOWN_sig", mean_BRAF_TDD > mean_DMSO_TDD) %>%
        select(gene) %>% 
        write_tsv(paste0(output_folder, "/TDD_BRAF_BRAF_DOWN.txt"))
    tdd_full %>% 
        filter(kind == "BRAF_UP_sig", mean_DMSO_TDD > mean_BRAF_TDD ) %>%
        select(gene) %>% 
        write_tsv(paste0(output_folder, "/TDD_DMSO_BRAF_UP.txt"))
    return(tdd_full)
}


#' Produce the TDD correlation figures
#'
#' @param mean_gene_count The minimum mean gene counts to compute their TDD
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param output_folder The folder where the results will be created
produce_tdd_cor_figure <- function(mean_gene_count = 10, ercc = F,
                                   output_folder = "./results/TDD_analysis") {
    tdd_full <- prepare_df_for_cor(mean_gene_count, ercc, output_folder)
    downv <- get_cor_value(tdd_full, my_group = "BRAF_DOWN")
    upv <- get_cor_value(tdd_full, my_group = "BRAF_UP")
    p <- ggplot(tdd_full, mapping = aes(
        x = mean_DMSO_TDD,
        y = mean_BRAF_TDD, color = kind
    )) +
        geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
        scale_fill_manual(values = c(
            "#4169e1", "darkblue",
            "#CD5C5C", "darkred"
        )) +
        coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
        geom_abline(
            intercept = downv$intercept, slope = downv$slope,
            color = "#4169e1"
        ) +
        geom_abline(
            intercept = upv$intercept, slope = upv$slope,
            color = "#CD5C5C"
        ) +
        ggtitle(paste0(
            "TDD of genes with BRAF or DMSO treatment\n",
            "(R gene BRAF_down = ", downv$cor, ")(R BRAF_up = ",
            upv$cor, ")"
        ))
    p_down <- ggplot(tdd_full %>%
        filter(group %in% c("BRAF_DOWN", "BRAF_DOWN_sig")),
    mapping = aes(x = mean_DMSO_TDD, y = mean_BRAF_TDD, color = kind, label = gene)
    ) +
        geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
        scale_fill_manual(values = c("black", "red")) +
        coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
        geom_label_repel(data = tdd_full %>%
        filter(kind == "BRAF_DOWN_sig"), size = 1,
            mapping = aes(mean_DMSO_TDD, mean_BRAF_TDD, label = gene)) +
        geom_abline(
            intercept = downv$intercept, slope = downv$slope,
            color = "#4169e1"
        ) +
        geom_abline(slope = 1, color = "black") +
        ggtitle(paste0(
            "TDD of genes BRAF down-regulated with BRAF or DMSO",
            "treatment\n(R gene BRAF_down = ", downv$cor, ")"
        ))
    p_up <- ggplot(tdd_full %>% filter(group %in% c("BRAF_UP", "BRAF_UP_sig")),
        mapping = aes(
            x = mean_DMSO_TDD, y = mean_BRAF_TDD,
            color = kind
        )
    ) +
        geom_point(aes(fill = kind), colour = "white", pch = 21, size = 3) +
        scale_fill_manual(values = c("black", "red")) +
        coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
        geom_label_repel(data = tdd_full %>%
        filter(kind == "BRAF_UP_sig"), size = 1,
            mapping = aes(mean_DMSO_TDD, mean_BRAF_TDD, label = gene)) +
        geom_abline(
            intercept = downv$intercept,
            slope = downv$slope, color = "#CD5C5C"
        ) +
        geom_abline(slope = 1, color = "black") +
        ggtitle(paste0(
            "TDD of genes BRAF up-regulated with BRAF or DMSO",
            "treatment\n(R gene BRAF_up = ", upv$cor, ")"
        ))
    pdf(paste0(output_folder, "/TDD_correlation_figures.pdf"),
        width = 12,
        height = 17
    )
    grid.arrange(p, p_down, p_up, ncol = 1) # nolint
    dev.off()
}


#' Get the negative binomial regression p-value
#'
#' @details glm negative binomial test to check if the number of peaks
#' between groups of genes is different
#'
#' @param tab a table containing the number of ribosome peaks in genes and
#' their groups
#' @param output_folder folder where the diagnotics will be created
#' @return the p-value of the test
glm_nb_pvalue <- function(tab, pic_col,
                          output_folder = "./results/TDD_analysis") {
    mod <- glm.nb(paste0(pic_col, "~ group"), data = tab) # nolint
    output_diag <- paste0(output_folder, "/diagnotics")
    dir.create(output_diag, showWarnings = F)
    output_fig <- paste0(output_diag, "/glm_nb_", pic_col, ".pdf")
    pdf(output_fig, width = 12, height = 8)
    plot(simulateResiduals(mod)) # nolint
    dev.off()
    return(summary(mod)$coeff[2, 4])
}

#' Get the negative binomial regression p-value
#'
#' @details glm logistic test to check if the presence of peaks
#' between groups of genes is different
#'
#' @param tab a table containing the number of ribosome peaks in genes and
#' their groups
#' @param output_folder folder where the diagnotics will be created
#' @return the p-value of the test
glm_log_pvalue <- function(tab, pic_col,
                           output_folder = "./results/TDD_analysis") {
    mod <- glm(paste0(pic_col, "~ group"),
        data = tab,
        family = binomial(link = "logit")
    )
    output_diag <- paste0(output_folder, "/diagnotics")
    dir.create(output_diag, showWarnings = F)
    output_fig <- paste0(output_diag, "/glm_nb_", pic_col, ".pdf")
    pdf(output_fig, width = 12, height = 8)
    plot(simulateResiduals(mod)) # nolint
    dev.off()
    return(summary(mod)$coeff[2, 4])
}


#' Create barplots showing if different groups of genes have a different number
#' of ribosome peaks
#'
#' @param tdd_full A table with TDD and number of peaks
#' @param gene_type The selected group of gene on which we want to perform the
#' analysis
#' @param output_folder Folder where the results will be created
#' @param peak_type The type of the peak
create_pic_figs <- function(tdd_full, gene_type = "BRAF_DOWN",
                            output_folder = "./results/TDD_analysis",
                            peak_type = "RiboRNApeak") {
    treatment <- sub("_.*", "", gene_type)
    regulation <- sub(".*_", "", gene_type)
    if (regulation == "UP") {
        treatment <- "DMSO"
    }
    target <- paste0("TDD_", treatment)
    tdd_filt <- tdd_full %>% filter(group == gene_type) # nolint
    tdd_filt <- tdd_filt %>% mutate(group = ifelse(!!as.symbol(target),
        paste0(group, "_TDD"), group
    ))
    data_col <- "peak"
    res_col <- paste0("mean_", data_col)
    has_col <- paste0("has_", data_col)
    has_res_col <- paste0("mean_", has_col)
    tdd_filt <- tdd_filt %>% mutate(
        !!as.symbol(has_col) :=
            ifelse(!!as.symbol(data_col) > 0, 1, 0)
    )
    tdd_fig <- tdd_filt %>%
        select(!!as.symbol(data_col), !!as.symbol(has_col), group) %>%
        group_by(group) %>%
        summarise(
            !!as.symbol(res_col) := mean(!!as.symbol(data_col)),
            !!as.symbol(has_res_col) := mean(!!as.symbol(has_col))
        )
    pval <- glm_nb_pvalue(tdd_filt, data_col, output_folder)
    p1 <- ggplot(tdd_fig,
        mapping = aes_string(x = "group", y = res_col, fill = "group")
    ) +
        geom_bar(stat = "identity") +
        ggtitle(paste0(
            "Average proportion of ", treatment, " peaks in gene", regulation,
            "-regulated by BRAF (p = ", pval, ")"
        ))
    pval <- glm_log_pvalue(tdd_filt, has_col, output_folder)
    p2 <- ggplot(tdd_fig, mapping = aes_string(
        x = "group", y = has_res_col, fill = "group"
    )) +
        geom_bar(stat = "identity") +
        ggtitle(paste0(
            "Average proportion of ", regulation,
            "-regulated gene by BRAF having at least a ", treatment, 
            " peak (p = ", pval, ")"
        ))
    pdf(paste0(output_folder, "/barplot_pic_analysis", gene_type, "_",
               peak_type, ".pdf"),
        width = 12, height = 12
    )
    grid.arrange(p1, p2, ncol = 1) #nolint
    dev.off()
}


#' function that calcultaed if genes are TDD (more in BRAF or DMSO condition)
#' and if they have peaks
#'
#' @param peak_file A file containing peaks in a bed format
#' @param mean_gene_count The minimum gene count of a gene to be analysed
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
get_tdd_pic_table <- function(peak_file, mean_gene_count = 10, ercc = F,
                              output_folder) {
    peak_list <- read.table(peak_file)$V1
    peak_table <- tibble(
        gene = names(table(peak_list)),
        peak = as.vector(table(peak_list))
    )
    tdd_full <- get_final_tdd_table(mean_gene_count, ercc)
    tdd_full <- tdd_full[, 
        c("gene", grep("X...", colnames(tdd_full), value = T))]
    tdd_braf_braf_down <- read_tsv(paste0(output_folder, 
        "/TDD_BRAF_BRAF_DOWN.txt"))
    tdd_dmso_braf_up <- read_tsv(paste0(output_folder, 
        "/TDD_DMSO_BRAF_UP.txt"))
    tdd_full <- tdd_full %>%
        mutate(
            TDD_BRAF = gene %in% tdd_braf_braf_down$gene,
            TDD_DMSO = gene %in% tdd_dmso_braf_up$gene
        ) %>%
        select(gene, TDD_BRAF, TDD_DMSO) #nolint
    tdd_full <- tdd_full %>% left_join(peak_table) #nolint
    tdd_full <- tdd_full %>% replace_na(list(peak = 0)) #nolint
    return(tdd_full)
}

#' Create all peaks fig
#' 
#' @param peak_file A file containing peaks
#' @param mean_gene_count The minimum mean count of gene for its tdd to
#' be computed
#' @param ercc Indicate if the normalisation with ercc should be used to compute
#' the TDD index
#' @param gene_type The selected group of gene on which we want to perform the
#' analysis
#' @param peak_type The type of peak inside the figure
build_peaks_fig <- function(peak_file, mean_gene_count = 10, ercc = F,
                            gene_type = "BRAF_DOWN",
                            output_folder = "./results/TDD_analysis",
                            peak_type = "RiboRNApeak") {
    tdd_table <- get_tdd_pic_table(peak_file, mean_gene_count, ercc, output_folder)
    tdd_table <- get_group_columns(tdd_table)
    create_pic_figs(tdd_table, gene_type, output_folder, peak_type = peak_type)
}

create_figures(mean_gene_count = 0, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 10, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 20, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 50, ercc = F, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 0, ercc = T, output_folder = "./results/TDD_analysis")
create_figures(mean_gene_count = 10, ercc = T, output_folder = "./results/TDD_analysis")
produce_tdd_cor_figure()
dir <- "./data/gene_lists/"
files <- c("BRAF_vs_DMSO_RepMean.merged.bed", "DMSO_vs_BRAF_RepMean.merged.bed",
           "BRAF_sorted_26-40.merged.bed", "DMSO_sorted_26-40.merged.bed")
gene_types <- c("BRAF_DOWN", "BRAF_UP", "BRAF_DOWN", "BRAF_UP")
peak_types <- c("RiboRNAPeak", "RiboRNAPeak", "RiboPeak", "RiboPeak")
for (i in seq_len(length(files))) {
    peak_file <- paste0(dir, files[i])
    build_peaks_fig(peak_file = peak_file, gene_type = gene_types[i], 
                    peak_type = peak_types[i])
}