diff --git a/src/06_compute_tdd_index.R b/src/06_compute_tdd_index.R index 2f9a82adfe5157a9576bddeccdb9895c857a899d..622ee1a3da6492ff30919cfac833e30818ad3886 100644 --- a/src/06_compute_tdd_index.R +++ b/src/06_compute_tdd_index.R @@ -513,14 +513,16 @@ prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F, tdd_full[tdd_full$gene %in% cc_peaks, "peak"] <- "UP_CC_peaks" tdd_full$sig <- rep("", times = nrow(tdd_full)) tdd_full[tdd_full$p_value <= 0.05, "sig"] <- "_sig" - tdd_full <- tdd_full %>% mutate(kind = paste0(peak, sig), - gkind = paste0(group, sig)) # nolint + tdd_full <- tdd_full %>% mutate( + kind = paste0(peak, sig), + gkind = paste0(group, sig) + ) # nolint tdd_full <- tdd_full %>% arrange(kind) write_tsv(tdd_full, paste0(output_folder, "/TDD_correlation_table.txt")) # nolint mean_tdd <- tdd_full %>% select(-time_point) %>% - group_by(gene, group, peak, sig, kind) %>% + group_by(gene, group, peak, sig, kind, gkind) %>% summarize_all(mean) %>% ungroup() write_tsv(mean_tdd, paste0(output_folder, "/TDD_correlation_table_avg.txt")) # nolint @@ -555,12 +557,12 @@ prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F, filter_on_tp <- function(tdd_table, tp = 5) { if (tp == "3_5") { tmp3 <- tdd_table %>% filter(time_point == 3) %>% # nolint - dplyr::select(gene, mean_DMSO_TDD, mean_BRAF_TDD, group, kind, peak) + dplyr::select(gene, mean_DMSO_TDD, mean_BRAF_TDD, group, kind, peak, gkind) tmp5 <- tdd_table %>% filter(time_point == 5) %>% # nolint - dplyr::select(gene, mean_DMSO_TDD, mean_BRAF_TDD, group, kind, peak) + dplyr::select(gene, mean_DMSO_TDD, mean_BRAF_TDD, group, kind, peak, gkind) tdd_full <- left_join( tmp3, tmp5, - by = c("gene", "group", "kind", "peak"), + by = c("gene", "group", "kind", "peak", "gkind"), suffix = c("_3", "_5") ) %>% rowwise() %>% @@ -583,69 +585,43 @@ filter_on_tp <- function(tdd_table, tp = 5) { produce_tdd_cor_figure <- function(mean_gene_count = 10, ercc = F, output_folder = "./results/TDD_analysis/correlation") { tdd_complete <- prepare_df_for_cor(mean_gene_count, ercc, output_folder) - for (tp in c(3, 5, "3_5")) { - tdd_full <- filter_on_tp(tdd_complete, tp) - downv <- get_cor_value(tdd_full, my_group = "BRAF_DOWN", my_peak = "DOWN_TC_peaks") - upv <- get_cor_value(tdd_full, my_group = "BRAF_UP", my_peak = "UP_CC_peaks") - p_down <- ggplot(tdd_full %>% - filter(peak == "DOWN_TC_peaks"), - mapping = aes(x = mean_DMSO_TDD, y = mean_BRAF_TDD, color = kind) - ) + - geom_point(aes(fill = kind), colour = "white", pch = 21, size = 5) + - scale_fill_manual(values = c("grey", "black")) + - coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) + - # geom_abline( - # intercept = downv$intercept, slope = downv$slope, - # color = "#4169e1" - # ) + - theme_classic() + - theme( - text = element_text(size = 26), - panel.grid.major.x = element_line(), - panel.grid.major.y = element_line(), - ) + - geom_abline(slope = 1, color = "black") + - ggtitle(paste0( - "TDD of genes down-regulated w/ TC peaks by BRAF", - "\n(R gene BRAF_down = ", downv$cor, ")" - )) - p_up <- ggplot(tdd_full %>% filter(peak == "UP_CC_peaks"), - mapping = aes( - x = mean_DMSO_TDD, y = mean_BRAF_TDD, - color = kind - ) - ) + - geom_point(aes(fill = kind), colour = "white", pch = 21, size = 5) + + tdd_full <- filter_on_tp(tdd_complete, "3_5") + tmp_df <- tibble( + col = c("group", "group", "peak", "peak"), + names = c( + "BRAF_DOWN", "BRAF_UP", + "DOWN_TC_peaks", "UP_CC_peaks" + ), + color_col = c("gkind", "gkind", "kind", "kind") + ) + for (i in seq_len(nrow(tmp_df))) { + cnames <- tmp_df[i, ]$names + ccol <- tmp_df[i, ]$color_col + tdd_tmp <- tdd_full %>% + filter(!!as.symbol(tmp_df[i, ]$col) == cnames) + p <- ggplot(tdd_tmp, mapping = aes_string( + x = "mean_DMSO_TDD", + y = "mean_BRAF_TDD", color = ccol + )) + + geom_point(mapping = aes_string(fill = ccol), colour = "white", pch = 21, size = 5) + scale_fill_manual(values = c("grey", "black")) + coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) + - theme_classic() + + theme_classic() + theme( text = element_text(size = 26), panel.grid.major.x = element_line(), panel.grid.major.y = element_line(), ) + - # geom_abline( - # intercept = upv$intercept, - # slope = upv$slope, color = "#CD5C5C" - # ) + geom_abline(slope = 1, color = "black") + - ggtitle(paste0( - "TDD of genes up-regulated by BRAF w/ CC peaks", - "\n(R gene BRAF_up = ", upv$cor, ")" - )) - pdf(paste0(output_folder, - "/TDD_correlation_figures_downtc_", tp, "h.pdf"), - width = 15, - height = 10 - ) - print(p_down) - dev.off() - pdf(paste0(output_folder, - "/TDD_correlation_figures_upcc_", tp, "h.pdf"), - width = 15, - height = 10 + ggtitle(paste("TDD of", cnames, "genes")) + pdf(paste0( + output_folder, + "/TDD_correlation_figures_", cnames, "_3_5_h.pdf" + ), + width = 15, + height = 10 ) - print(p_up) + print(p) dev.off() } } @@ -667,13 +643,17 @@ glm_nb_pvalue <- function(tab, pic_col, ptype_name <- tab$ptype[1] output_fig <- paste0(output_diag, "/glm_nb_", pic_col, "_", ptype_name, ".pdf") if (pic_col == "peak") { - mod <- glmmTMB(peak ~ group, ziformula = ~1, data = tab, - family="nbinom2") # nolint - pval <- sprintf(summary(mod)$coeff$cond[2, 4], fmt = '%#.2e') + mod <- glmmTMB(peak ~ group, + ziformula = ~1, data = tab, + family = "nbinom2" + ) # nolint + pval <- sprintf(summary(mod)$coeff$cond[2, 4], fmt = "%#.2e") } else { - mod <- glmmTMB(log1p(peak_surface) ~ group, data = tab, ziformula = ~1, - family = "gaussian") # nolint - pval <- sprintf(summary(mod)$coeff$cond[2, 4], fmt = '%#.2e') + mod <- glmmTMB(log1p(peak_surface) ~ group, + data = tab, ziformula = ~1, + family = "gaussian" + ) # nolint + pval <- sprintf(summary(mod)$coeff$cond[2, 4], fmt = "%#.2e") } pdf(output_fig, width = 12, height = 8) plot(simulateResiduals(mod)) # nolint @@ -699,12 +679,14 @@ glm_log_pvalue <- function(tab, pic_col, output_diag <- paste0(output_folder, "/diagnotics") dir.create(output_diag, showWarnings = F) ptype_name <- tab$ptype[1] - output_fig <- paste0(output_diag, "/glm_nb_", pic_col, "_", - ptype_name, ".pdf") + output_fig <- paste0( + output_diag, "/glm_nb_", pic_col, "_", + ptype_name, ".pdf" + ) pdf(output_fig, width = 12, height = 8) plot(simulateResiduals(mod)) # nolint dev.off() - return(sprintf(summary(mod)$coeff[2, 4], fmt = '%#.2e')) + return(sprintf(summary(mod)$coeff[2, 4], fmt = "%#.2e")) } @@ -753,7 +735,7 @@ create_peaks_barplots <- function(tdd_filt, pval, output_fig, ) } if (gene_type == "TDD_BRAF") { - valcol <- c("dimgrey", blue_tdd) + valcol <- c("dimgrey", blue_tdd) } else { valcol <- c("dimgrey", red_tdd) } @@ -765,10 +747,14 @@ create_peaks_barplots <- function(tdd_filt, pval, output_fig, ggtitle(title) + labs(x = "peak type") if (data_col == "peak") { - pval_log <- list(pbraf = glm_log_pvalue( - tdd_filt %>% filter(ptype == "BRAF"), has_col, dirname(output_fig)), + pval_log <- list( + pbraf = glm_log_pvalue( + tdd_filt %>% filter(ptype == "BRAF"), has_col, dirname(output_fig) + ), pdmso = glm_log_pvalue( - tdd_filt %>% filter(ptype == "DMSO"), has_col, dirname(output_fig))) + tdd_filt %>% filter(ptype == "DMSO"), has_col, dirname(output_fig) + ) + ) ntitle <- paste0( "Average proportion of genes ", gene_type, " and control genes habing at least one BRAF/DMSO peaks\n", @@ -780,8 +766,7 @@ create_peaks_barplots <- function(tdd_filt, pval, output_fig, )) + geom_bar(stat = "identity", position = "dodge") + scale_fill_manual(values = valcol) + - ggtitle(ntitle) + - labs(x = "peak type") + as_ pdf(output_fig, width = 12, height = 12) grid.arrange(p1, p2, ncol = 1) # nolint dev.off() @@ -815,7 +800,7 @@ create_peaks_density <- function(tdd_filt, pval, outfile, data_col, xcol <- paste0("log1p(", data_col, ")") } if (gene_type == "TDD_BRAF") { - valcol <- c("dimgrey", blue_tdd) # nolint + valcol <- c("dimgrey", blue_tdd) # nolint } else { valcol <- c("dimgrey", red_tdd) # nolint } @@ -853,7 +838,8 @@ create_peaks_density <- function(tdd_filt, pval, outfile, data_col, "(p_BRAF_peak = ", pval$peak_braf, ")" ) p1 <- ggplot(tdd_filt %>% filter(ptype == "BRAF"), - mapping = aes_string(x = xcol, fill = "group")) + + mapping = aes_string(x = xcol, fill = "group") + ) + geom_density(mapping = aes(fill = group), bw = bw, alpha = 0.5) + scale_fill_manual(values = valcol) + ggtitle(title) + @@ -864,7 +850,8 @@ create_peaks_density <- function(tdd_filt, pval, outfile, data_col, "(p_DMSO_peak = ", pval$peak_dmso, ")" ) p2 <- ggplot(tdd_filt %>% filter(ptype == "DMSO"), - mapping = aes_string(x = xcol, fill = "group")) + + mapping = aes_string(x = xcol, fill = "group") + ) + geom_density(mapping = aes(fill = group), bw = bw, alpha = 0.5) + scale_fill_manual(values = valcol) + ggtitle(title) + @@ -887,15 +874,20 @@ create_peaks_density <- function(tdd_filt, pval, outfile, data_col, create_pic_figs <- function(tdd_full, gene_type = "TDD_BRAF", output_folder = "./results/TDD_analysis", kind = "peak") { - data_col <- kind res_col <- paste0("mean_", data_col) has_col <- paste0("has_", data_col) has_res_col <- paste0("mean_", has_col) - pval <- list(peak_braf = glm_nb_pvalue(tdd_full %>% filter(ptype == "BRAF"), - data_col, output_folder), - peak_dmso = glm_nb_pvalue(tdd_full %>% filter(ptype == "DMSO"), - data_col, output_folder)) + pval <- list( + peak_braf = glm_nb_pvalue( + tdd_full %>% filter(ptype == "BRAF"), + data_col, output_folder + ), + peak_dmso = glm_nb_pvalue( + tdd_full %>% filter(ptype == "DMSO"), + data_col, output_folder + ) + ) output_fig <- paste0( output_folder, "/barplot_peak_analysis_", gene_type, "_", kind, ".pdf" @@ -926,7 +918,8 @@ get_peak_df <- function(kind = "peak", table_gene) { } pfiles <- c( "./data/gene_lists/BRAF_vs_DMSO.merged.bed", - "./data/gene_lists/DMSO_vs_BRAF.merged.bed") + "./data/gene_lists/DMSO_vs_BRAF.merged.bed" + ) if (kind == "peak") { peak_table <- NULL @@ -935,10 +928,12 @@ get_peak_df <- function(kind = "peak", table_gene) { cname <- str_replace(str_extract(peak_file, "(BRAF|DMSO)_"), "_", "") tpeak_table <- tibble( gene = names(table(peak_list)), - peak = as.vector(table(peak_list))) + peak = as.vector(table(peak_list)) + ) tpeak_table <- tpeak_table %>% mutate(ptype = cname) - tpeak_table <- table_gene %>% left_join(tpeak_table, by = "gene" - ) %>% replace_na(list(peak = 0, ptype = cname)) + tpeak_table <- table_gene %>% + left_join(tpeak_table, by = "gene") %>% + replace_na(list(peak = 0, ptype = cname)) peak_table <- rbind(peak_table, tpeak_table) } } else { @@ -954,8 +949,9 @@ get_peak_df <- function(kind = "peak", table_gene) { group_by(gene) %>% summarise(peak_surface = sum(size)) tpeak_table <- tpeak_table %>% mutate(ptype = cname) - tpeak_table <- table_gene %>% left_join(tpeak_table, by = "gene" - ) %>% replace_na(list(peak_surface = 0, ptype = cname)) + tpeak_table <- table_gene %>% + left_join(tpeak_table, by = "gene") %>% + replace_na(list(peak_surface = 0, ptype = cname)) peak_table <- rbind(peak_table, tpeak_table) } } @@ -1016,7 +1012,8 @@ build_peaks_fig <- function(mean_gene_count = 10, ercc = F, } #' Create TDD violin figure -create_tdd_violin <- function(kind = "UP_CC_peaks", +create_tdd_violin <- function(mkind = "UP_CC_peaks", + col = "peak", tdd_file = paste0( "./results/TDD_analysis/correlation/", "TDD_correlation_table.txt" @@ -1024,34 +1021,45 @@ create_tdd_violin <- function(kind = "UP_CC_peaks", output = "./results/TDD_analysis/tdd_violin") { tdd_table <- read_tsv(tdd_file) # nolint tdd_table <- filter_on_tp(tdd_table, "3_5") - tdd_table[tdd_table$peak != kind, "peak"] <- "CTRL" + if (col == "peak") { + tdd_table[tdd_table$peak != mkind, "peak"] <- "CTRL" + tdd_table$peak <- relevel(as.factor(tdd_table$peak), "CTRL") + tdd_table <- tdd_table %>% arrange(peak) + } else { + tdd_table[tdd_table$group != mkind, "group"] <- "CTRL" + tdd_table$group <- relevel(as.factor(tdd_table$group), "CTRL") + tdd_table <- tdd_table %>% arrange(group) + } tdd_table <- tdd_table %>% # nolint dplyr::select( gene, mean_DMSO_TDD, mean_BRAF_TDD, - mean_BRAF_TDD, peak + mean_BRAF_TDD, peak, group ) %>% rename(DMSO = mean_DMSO_TDD, BRAF = mean_BRAF_TDD) %>% - pivot_longer(c(-gene, -peak), + pivot_longer(c(-gene, -peak, -group), names_to = "condition", values_to = "TDD" ) - if (kind == "UP_CC_peaks") { + if (mkind == "UP_CC_peaks" | mkind == "BRAF_UP") { + test_color <- "#fb7b7b" tdd_table <- tdd_table %>% filter(condition == "DMSO") - title <- paste0("Average TDD index comparisons (at 3 and 5h) in DMSO condition\n", - "between TCpeak-UP genes in DMSO conditions and CTRL genes" + title <- paste0( + "Average TDD index comparisons (at 3 and 5h) in DMSO condition\n", + "between TCpeak-UP genes in DMSO conditions and CTRL genes" ) } else { + test_color <- "#7b7bfb" tdd_table <- tdd_table %>% filter(condition == "BRAF") - title <- paste0("Average TDD index comparisons (at 3 and 5h) in BRAF condition\n", - "between TCpeak-DOWN genes in BRAF conditions and CTRL genes" + title <- paste0( + "Average TDD index comparisons (at 3 and 5h) in BRAF condition\n", + "between TCpeak-DOWN genes in BRAF conditions and CTRL genes" ) } - tdd_table$peak <- factor(tdd_table$peak, level=c(kind, "CTRL")) p <- ggplot( data = tdd_table, - mapping = aes(x = peak, y = TDD, fill = peak) + mapping = aes_string(x = col, y = "TDD", fill = col) ) + - scale_fill_manual(values = c("grey", "white")) + + scale_fill_manual(values = c("white", test_color)) + geom_violin(position = position_dodge(width = 0.8)) + geom_boxplot(width = 0.05, position = position_dodge(width = 0.8)) + theme_classic() + @@ -1063,11 +1071,15 @@ create_tdd_violin <- function(kind = "UP_CC_peaks", coord_cartesian(ylim = c(-0.5, 1.5)) + ggtitle(title) dir.create(output, showWarnings = F) - outname <- paste0("Peak_", kind, "_3_5h") + outname <- paste0("Peak_", mkind, "_3_5h") pdf(paste0(output, "/", outname, ".pdf"), height = 10, width = 17) print(p) dev.off() - fit <- lm(TDD ^ 0.5 ~ peak, data = tdd_table) + if (col == "peak") { + fit <- lm(TDD^0.5 ~ peak, data = tdd_table) + } else { + fit <- lm(TDD^0.5 ~ group, data = tdd_table) + } simulationOutput <- simulateResiduals(fit, n = 250, plot = F) # nolint dir.create(paste0(output, "/diag"), showWarnings = F) pdf(paste0(output, "/diag/", outname, ".pdf"), width = 17, height = 10) @@ -1080,8 +1092,10 @@ create_tdd_violin <- function(kind = "UP_CC_peaks", #' Launch create_tdd_violins for all possible values create_tdd_violins <- function() { - create_tdd_violin(kind = "UP_CC_peaks") - create_tdd_violin(kind = "DOWN_TC_peaks") + create_tdd_violin(mkind = "UP_CC_peaks", col = "peak") + create_tdd_violin(mkind = "DOWN_TC_peaks", col = "peak") + create_tdd_violin(mkind = "BRAF_DOWN", col = "group") + create_tdd_violin(mkind = "BRAF_UP", col = "group") }