Skip to content
Snippets Groups Projects
Verified Commit 771b8eaa authored by nfontrod's avatar nfontrod
Browse files

src/06_compute_tdd_index.R: changes in correlations table + tdd density figures

parent 723a99a9
No related branches found
No related tags found
No related merge requests found
......@@ -513,14 +513,16 @@ prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F,
tdd_full[tdd_full$gene %in% cc_peaks, "peak"] <- "UP_CC_peaks"
tdd_full$sig <- rep("", times = nrow(tdd_full))
tdd_full[tdd_full$p_value <= 0.05, "sig"] <- "_sig"
tdd_full <- tdd_full %>% mutate(kind = paste0(peak, sig),
gkind = paste0(group, sig)) # nolint
tdd_full <- tdd_full %>% mutate(
kind = paste0(peak, sig),
gkind = paste0(group, sig)
) # nolint
tdd_full <- tdd_full %>%
arrange(kind)
write_tsv(tdd_full, paste0(output_folder, "/TDD_correlation_table.txt")) # nolint
mean_tdd <- tdd_full %>%
select(-time_point) %>%
group_by(gene, group, peak, sig, kind) %>%
group_by(gene, group, peak, sig, kind, gkind) %>%
summarize_all(mean) %>%
ungroup()
write_tsv(mean_tdd, paste0(output_folder, "/TDD_correlation_table_avg.txt")) # nolint
......@@ -555,12 +557,12 @@ prepare_df_for_cor <- function(mean_gene_count = 10, ercc = F,
filter_on_tp <- function(tdd_table, tp = 5) {
if (tp == "3_5") {
tmp3 <- tdd_table %>% filter(time_point == 3) %>% # nolint
dplyr::select(gene, mean_DMSO_TDD, mean_BRAF_TDD, group, kind, peak)
dplyr::select(gene, mean_DMSO_TDD, mean_BRAF_TDD, group, kind, peak, gkind)
tmp5 <- tdd_table %>% filter(time_point == 5) %>% # nolint
dplyr::select(gene, mean_DMSO_TDD, mean_BRAF_TDD, group, kind, peak)
dplyr::select(gene, mean_DMSO_TDD, mean_BRAF_TDD, group, kind, peak, gkind)
tdd_full <- left_join(
tmp3, tmp5,
by = c("gene", "group", "kind", "peak"),
by = c("gene", "group", "kind", "peak", "gkind"),
suffix = c("_3", "_5")
) %>%
rowwise() %>%
......@@ -583,69 +585,43 @@ filter_on_tp <- function(tdd_table, tp = 5) {
produce_tdd_cor_figure <- function(mean_gene_count = 10, ercc = F,
output_folder = "./results/TDD_analysis/correlation") {
tdd_complete <- prepare_df_for_cor(mean_gene_count, ercc, output_folder)
for (tp in c(3, 5, "3_5")) {
tdd_full <- filter_on_tp(tdd_complete, tp)
downv <- get_cor_value(tdd_full, my_group = "BRAF_DOWN", my_peak = "DOWN_TC_peaks")
upv <- get_cor_value(tdd_full, my_group = "BRAF_UP", my_peak = "UP_CC_peaks")
p_down <- ggplot(tdd_full %>%
filter(peak == "DOWN_TC_peaks"),
mapping = aes(x = mean_DMSO_TDD, y = mean_BRAF_TDD, color = kind)
) +
geom_point(aes(fill = kind), colour = "white", pch = 21, size = 5) +
scale_fill_manual(values = c("grey", "black")) +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
# geom_abline(
# intercept = downv$intercept, slope = downv$slope,
# color = "#4169e1"
# ) +
theme_classic() +
theme(
text = element_text(size = 26),
panel.grid.major.x = element_line(),
panel.grid.major.y = element_line(),
) +
geom_abline(slope = 1, color = "black") +
ggtitle(paste0(
"TDD of genes down-regulated w/ TC peaks by BRAF",
"\n(R gene BRAF_down = ", downv$cor, ")"
))
p_up <- ggplot(tdd_full %>% filter(peak == "UP_CC_peaks"),
mapping = aes(
x = mean_DMSO_TDD, y = mean_BRAF_TDD,
color = kind
)
) +
geom_point(aes(fill = kind), colour = "white", pch = 21, size = 5) +
tdd_full <- filter_on_tp(tdd_complete, "3_5")
tmp_df <- tibble(
col = c("group", "group", "peak", "peak"),
names = c(
"BRAF_DOWN", "BRAF_UP",
"DOWN_TC_peaks", "UP_CC_peaks"
),
color_col = c("gkind", "gkind", "kind", "kind")
)
for (i in seq_len(nrow(tmp_df))) {
cnames <- tmp_df[i, ]$names
ccol <- tmp_df[i, ]$color_col
tdd_tmp <- tdd_full %>%
filter(!!as.symbol(tmp_df[i, ]$col) == cnames)
p <- ggplot(tdd_tmp, mapping = aes_string(
x = "mean_DMSO_TDD",
y = "mean_BRAF_TDD", color = ccol
)) +
geom_point(mapping = aes_string(fill = ccol), colour = "white", pch = 21, size = 5) +
scale_fill_manual(values = c("grey", "black")) +
coord_cartesian(xlim = c(0, 1), ylim = c(0, 1)) +
theme_classic() +
theme_classic() +
theme(
text = element_text(size = 26),
panel.grid.major.x = element_line(),
panel.grid.major.y = element_line(),
) +
# geom_abline(
# intercept = upv$intercept,
# slope = upv$slope, color = "#CD5C5C"
# ) +
geom_abline(slope = 1, color = "black") +
ggtitle(paste0(
"TDD of genes up-regulated by BRAF w/ CC peaks",
"\n(R gene BRAF_up = ", upv$cor, ")"
))
pdf(paste0(output_folder,
"/TDD_correlation_figures_downtc_", tp, "h.pdf"),
width = 15,
height = 10
)
print(p_down)
dev.off()
pdf(paste0(output_folder,
"/TDD_correlation_figures_upcc_", tp, "h.pdf"),
width = 15,
height = 10
ggtitle(paste("TDD of", cnames, "genes"))
pdf(paste0(
output_folder,
"/TDD_correlation_figures_", cnames, "_3_5_h.pdf"
),
width = 15,
height = 10
)
print(p_up)
print(p)
dev.off()
}
}
......@@ -667,13 +643,17 @@ glm_nb_pvalue <- function(tab, pic_col,
ptype_name <- tab$ptype[1]
output_fig <- paste0(output_diag, "/glm_nb_", pic_col, "_", ptype_name, ".pdf")
if (pic_col == "peak") {
mod <- glmmTMB(peak ~ group, ziformula = ~1, data = tab,
family="nbinom2") # nolint
pval <- sprintf(summary(mod)$coeff$cond[2, 4], fmt = '%#.2e')
mod <- glmmTMB(peak ~ group,
ziformula = ~1, data = tab,
family = "nbinom2"
) # nolint
pval <- sprintf(summary(mod)$coeff$cond[2, 4], fmt = "%#.2e")
} else {
mod <- glmmTMB(log1p(peak_surface) ~ group, data = tab, ziformula = ~1,
family = "gaussian") # nolint
pval <- sprintf(summary(mod)$coeff$cond[2, 4], fmt = '%#.2e')
mod <- glmmTMB(log1p(peak_surface) ~ group,
data = tab, ziformula = ~1,
family = "gaussian"
) # nolint
pval <- sprintf(summary(mod)$coeff$cond[2, 4], fmt = "%#.2e")
}
pdf(output_fig, width = 12, height = 8)
plot(simulateResiduals(mod)) # nolint
......@@ -699,12 +679,14 @@ glm_log_pvalue <- function(tab, pic_col,
output_diag <- paste0(output_folder, "/diagnotics")
dir.create(output_diag, showWarnings = F)
ptype_name <- tab$ptype[1]
output_fig <- paste0(output_diag, "/glm_nb_", pic_col, "_",
ptype_name, ".pdf")
output_fig <- paste0(
output_diag, "/glm_nb_", pic_col, "_",
ptype_name, ".pdf"
)
pdf(output_fig, width = 12, height = 8)
plot(simulateResiduals(mod)) # nolint
dev.off()
return(sprintf(summary(mod)$coeff[2, 4], fmt = '%#.2e'))
return(sprintf(summary(mod)$coeff[2, 4], fmt = "%#.2e"))
}
......@@ -753,7 +735,7 @@ create_peaks_barplots <- function(tdd_filt, pval, output_fig,
)
}
if (gene_type == "TDD_BRAF") {
valcol <- c("dimgrey", blue_tdd)
valcol <- c("dimgrey", blue_tdd)
} else {
valcol <- c("dimgrey", red_tdd)
}
......@@ -765,10 +747,14 @@ create_peaks_barplots <- function(tdd_filt, pval, output_fig,
ggtitle(title) +
labs(x = "peak type")
if (data_col == "peak") {
pval_log <- list(pbraf = glm_log_pvalue(
tdd_filt %>% filter(ptype == "BRAF"), has_col, dirname(output_fig)),
pval_log <- list(
pbraf = glm_log_pvalue(
tdd_filt %>% filter(ptype == "BRAF"), has_col, dirname(output_fig)
),
pdmso = glm_log_pvalue(
tdd_filt %>% filter(ptype == "DMSO"), has_col, dirname(output_fig)))
tdd_filt %>% filter(ptype == "DMSO"), has_col, dirname(output_fig)
)
)
ntitle <- paste0(
"Average proportion of genes ", gene_type,
" and control genes habing at least one BRAF/DMSO peaks\n",
......@@ -780,8 +766,7 @@ create_peaks_barplots <- function(tdd_filt, pval, output_fig,
)) +
geom_bar(stat = "identity", position = "dodge") +
scale_fill_manual(values = valcol) +
ggtitle(ntitle) +
labs(x = "peak type")
as_
pdf(output_fig, width = 12, height = 12)
grid.arrange(p1, p2, ncol = 1) # nolint
dev.off()
......@@ -815,7 +800,7 @@ create_peaks_density <- function(tdd_filt, pval, outfile, data_col,
xcol <- paste0("log1p(", data_col, ")")
}
if (gene_type == "TDD_BRAF") {
valcol <- c("dimgrey", blue_tdd) # nolint
valcol <- c("dimgrey", blue_tdd) # nolint
} else {
valcol <- c("dimgrey", red_tdd) # nolint
}
......@@ -853,7 +838,8 @@ create_peaks_density <- function(tdd_filt, pval, outfile, data_col,
"(p_BRAF_peak = ", pval$peak_braf, ")"
)
p1 <- ggplot(tdd_filt %>% filter(ptype == "BRAF"),
mapping = aes_string(x = xcol, fill = "group")) +
mapping = aes_string(x = xcol, fill = "group")
) +
geom_density(mapping = aes(fill = group), bw = bw, alpha = 0.5) +
scale_fill_manual(values = valcol) +
ggtitle(title) +
......@@ -864,7 +850,8 @@ create_peaks_density <- function(tdd_filt, pval, outfile, data_col,
"(p_DMSO_peak = ", pval$peak_dmso, ")"
)
p2 <- ggplot(tdd_filt %>% filter(ptype == "DMSO"),
mapping = aes_string(x = xcol, fill = "group")) +
mapping = aes_string(x = xcol, fill = "group")
) +
geom_density(mapping = aes(fill = group), bw = bw, alpha = 0.5) +
scale_fill_manual(values = valcol) +
ggtitle(title) +
......@@ -887,15 +874,20 @@ create_peaks_density <- function(tdd_filt, pval, outfile, data_col,
create_pic_figs <- function(tdd_full, gene_type = "TDD_BRAF",
output_folder = "./results/TDD_analysis",
kind = "peak") {
data_col <- kind
res_col <- paste0("mean_", data_col)
has_col <- paste0("has_", data_col)
has_res_col <- paste0("mean_", has_col)
pval <- list(peak_braf = glm_nb_pvalue(tdd_full %>% filter(ptype == "BRAF"),
data_col, output_folder),
peak_dmso = glm_nb_pvalue(tdd_full %>% filter(ptype == "DMSO"),
data_col, output_folder))
pval <- list(
peak_braf = glm_nb_pvalue(
tdd_full %>% filter(ptype == "BRAF"),
data_col, output_folder
),
peak_dmso = glm_nb_pvalue(
tdd_full %>% filter(ptype == "DMSO"),
data_col, output_folder
)
)
output_fig <- paste0(
output_folder, "/barplot_peak_analysis_", gene_type, "_",
kind, ".pdf"
......@@ -926,7 +918,8 @@ get_peak_df <- function(kind = "peak", table_gene) {
}
pfiles <- c(
"./data/gene_lists/BRAF_vs_DMSO.merged.bed",
"./data/gene_lists/DMSO_vs_BRAF.merged.bed")
"./data/gene_lists/DMSO_vs_BRAF.merged.bed"
)
if (kind == "peak") {
peak_table <- NULL
......@@ -935,10 +928,12 @@ get_peak_df <- function(kind = "peak", table_gene) {
cname <- str_replace(str_extract(peak_file, "(BRAF|DMSO)_"), "_", "")
tpeak_table <- tibble(
gene = names(table(peak_list)),
peak = as.vector(table(peak_list)))
peak = as.vector(table(peak_list))
)
tpeak_table <- tpeak_table %>% mutate(ptype = cname)
tpeak_table <- table_gene %>% left_join(tpeak_table, by = "gene"
) %>% replace_na(list(peak = 0, ptype = cname))
tpeak_table <- table_gene %>%
left_join(tpeak_table, by = "gene") %>%
replace_na(list(peak = 0, ptype = cname))
peak_table <- rbind(peak_table, tpeak_table)
}
} else {
......@@ -954,8 +949,9 @@ get_peak_df <- function(kind = "peak", table_gene) {
group_by(gene) %>%
summarise(peak_surface = sum(size))
tpeak_table <- tpeak_table %>% mutate(ptype = cname)
tpeak_table <- table_gene %>% left_join(tpeak_table, by = "gene"
) %>% replace_na(list(peak_surface = 0, ptype = cname))
tpeak_table <- table_gene %>%
left_join(tpeak_table, by = "gene") %>%
replace_na(list(peak_surface = 0, ptype = cname))
peak_table <- rbind(peak_table, tpeak_table)
}
}
......@@ -1016,7 +1012,8 @@ build_peaks_fig <- function(mean_gene_count = 10, ercc = F,
}
#' Create TDD violin figure
create_tdd_violin <- function(kind = "UP_CC_peaks",
create_tdd_violin <- function(mkind = "UP_CC_peaks",
col = "peak",
tdd_file = paste0(
"./results/TDD_analysis/correlation/",
"TDD_correlation_table.txt"
......@@ -1024,34 +1021,45 @@ create_tdd_violin <- function(kind = "UP_CC_peaks",
output = "./results/TDD_analysis/tdd_violin") {
tdd_table <- read_tsv(tdd_file) # nolint
tdd_table <- filter_on_tp(tdd_table, "3_5")
tdd_table[tdd_table$peak != kind, "peak"] <- "CTRL"
if (col == "peak") {
tdd_table[tdd_table$peak != mkind, "peak"] <- "CTRL"
tdd_table$peak <- relevel(as.factor(tdd_table$peak), "CTRL")
tdd_table <- tdd_table %>% arrange(peak)
} else {
tdd_table[tdd_table$group != mkind, "group"] <- "CTRL"
tdd_table$group <- relevel(as.factor(tdd_table$group), "CTRL")
tdd_table <- tdd_table %>% arrange(group)
}
tdd_table <- tdd_table %>% # nolint
dplyr::select(
gene, mean_DMSO_TDD, mean_BRAF_TDD,
mean_BRAF_TDD, peak
mean_BRAF_TDD, peak, group
) %>%
rename(DMSO = mean_DMSO_TDD, BRAF = mean_BRAF_TDD) %>%
pivot_longer(c(-gene, -peak),
pivot_longer(c(-gene, -peak, -group),
names_to = "condition",
values_to = "TDD"
)
if (kind == "UP_CC_peaks") {
if (mkind == "UP_CC_peaks" | mkind == "BRAF_UP") {
test_color <- "#fb7b7b"
tdd_table <- tdd_table %>% filter(condition == "DMSO")
title <- paste0("Average TDD index comparisons (at 3 and 5h) in DMSO condition\n",
"between TCpeak-UP genes in DMSO conditions and CTRL genes"
title <- paste0(
"Average TDD index comparisons (at 3 and 5h) in DMSO condition\n",
"between TCpeak-UP genes in DMSO conditions and CTRL genes"
)
} else {
test_color <- "#7b7bfb"
tdd_table <- tdd_table %>% filter(condition == "BRAF")
title <- paste0("Average TDD index comparisons (at 3 and 5h) in BRAF condition\n",
"between TCpeak-DOWN genes in BRAF conditions and CTRL genes"
title <- paste0(
"Average TDD index comparisons (at 3 and 5h) in BRAF condition\n",
"between TCpeak-DOWN genes in BRAF conditions and CTRL genes"
)
}
tdd_table$peak <- factor(tdd_table$peak, level=c(kind, "CTRL"))
p <- ggplot(
data = tdd_table,
mapping = aes(x = peak, y = TDD, fill = peak)
mapping = aes_string(x = col, y = "TDD", fill = col)
) +
scale_fill_manual(values = c("grey", "white")) +
scale_fill_manual(values = c("white", test_color)) +
geom_violin(position = position_dodge(width = 0.8)) +
geom_boxplot(width = 0.05, position = position_dodge(width = 0.8)) +
theme_classic() +
......@@ -1063,11 +1071,15 @@ create_tdd_violin <- function(kind = "UP_CC_peaks",
coord_cartesian(ylim = c(-0.5, 1.5)) +
ggtitle(title)
dir.create(output, showWarnings = F)
outname <- paste0("Peak_", kind, "_3_5h")
outname <- paste0("Peak_", mkind, "_3_5h")
pdf(paste0(output, "/", outname, ".pdf"), height = 10, width = 17)
print(p)
dev.off()
fit <- lm(TDD ^ 0.5 ~ peak, data = tdd_table)
if (col == "peak") {
fit <- lm(TDD^0.5 ~ peak, data = tdd_table)
} else {
fit <- lm(TDD^0.5 ~ group, data = tdd_table)
}
simulationOutput <- simulateResiduals(fit, n = 250, plot = F) # nolint
dir.create(paste0(output, "/diag"), showWarnings = F)
pdf(paste0(output, "/diag/", outname, ".pdf"), width = 17, height = 10)
......@@ -1080,8 +1092,10 @@ create_tdd_violin <- function(kind = "UP_CC_peaks",
#' Launch create_tdd_violins for all possible values
create_tdd_violins <- function() {
create_tdd_violin(kind = "UP_CC_peaks")
create_tdd_violin(kind = "DOWN_TC_peaks")
create_tdd_violin(mkind = "UP_CC_peaks", col = "peak")
create_tdd_violin(mkind = "DOWN_TC_peaks", col = "peak")
create_tdd_violin(mkind = "BRAF_DOWN", col = "group")
create_tdd_violin(mkind = "BRAF_UP", col = "group")
}
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment