diff --git a/src/05_DESEQ2_normalisation.R b/src/05_DESEQ2_normalisation.R index 6167d7ae8c4086e67964db998e31d4c9294a0680..d36e150d3029bd222413d9be63cbb490d772f82b 100644 --- a/src/05_DESEQ2_normalisation.R +++ b/src/05_DESEQ2_normalisation.R @@ -28,6 +28,10 @@ load_n_filter_data <- function(sample_threshold = 5e6, gene_threshold = 10, c(1, grep(pattern, colnames(full_data), value = F, perl = T)) ] } + coding_gene <- read_tsv( + "results/coding_genes/coding_gene.txt" + )$gene_id # nolint + full_data <- full_data[full_data$gene %in% coding_gene, ] if (sample_threshold > 0) { # filter by read number sample_size <- full_data %>% @@ -43,17 +47,15 @@ load_n_filter_data <- function(sample_threshold = 5e6, gene_threshold = 10, "./results/matrice_count/ercc_matrices.txt" ) # nolint ercc_data <- ercc_data[, colnames(full_data)] + if (gene_threshold > 0) { + ercc_data$rmean <- ercc_data %>% + select(-gene) %>% + apply(1, mean) # nolint + ercc_data <- ercc_data %>% filter(rmean >= gene_threshold) # nolint + ercc_data <- ercc_data %>% select(-rmean) # nolint + } full_data <- rbind(full_data, ercc_data) - } - if (gene_threshold > 0) { - full_data$rmean <- full_data %>% - select(-gene) %>% - apply(1, mean) # nolint - full_data <- full_data %>% filter(rmean >= gene_threshold) # nolint - full_data <- full_data %>% select(-rmean) # nolint - } - if (load_ercc) { - ercc_gene <- full_data %>% + ercc_gene <- ercc_data %>% filter(startsWith(gene, "ERCC-")) %>% select(gene) %>% unlist() %>% @@ -111,8 +113,9 @@ normalise_matrix <- function(stable_vector, count_tibble) { #' @param output_f Folder where the figures will be created #' @param output_file The name of the file that will be created #' @param color_col The column to use to color the dots +#' @param time_step The time step of interest create_correlation <- function(norm_table, treatment, output_f, output_file, - color_col = "stable") { + color_col = "stable", time_step = 5) { dir.create(output_f, showWarnings = F) if (color_col == "stable") { my_colors <- c("dimgrey", "red") @@ -127,15 +130,16 @@ create_correlation <- function(norm_table, treatment, output_f, output_file, ) } sg <- sum(norm_table$stable) + coly <- paste0("T_", time_step) p <- ggplot(norm_table, mapping = aes( - x = log10(`T_0`), y = log10(`T_5`), + x = log10(`T_0`), y = log10(!!as.symbol(coly)), color = !!as.symbol(color_col) )) + scale_color_manual(values = my_colors) + geom_abline(slope = 1, color = "blue", size = 2) + geom_point() + ggtitle(paste0( - "Correlation between 0h vs 5h of cells", + "Correlation between 0h vs", time_step, "h of cells", "treated with triptolite and ", treatment, " (", sg, " stable genes)" )) @@ -157,7 +161,6 @@ create_correlation <- function(norm_table, treatment, output_f, output_file, #' @param colors The colors of groups of genes create_expression_boxplot <- function(norm_table, treatment, output_f, output_file, my_colors) { - message(my_colors) new_table <- norm_table %>% pivot_longer(c(-gene, -stable, -group), names_to = "condition", @@ -188,9 +191,11 @@ create_expression_boxplot <- function(norm_table, treatment, output_f, #' @param output_file File that will contain the correlation figure #' @param stable_file A file containing stable genes #' @param output_f The folder were the results will be created +#' @param time_step The time step of interest #' @import tidyverse -get_mean_correleation_figure <- function(norm_matrix, output_file, - stable_file, treatment, output_f) { +get_mean_correlation_figure <- function(norm_matrix, output_file, + stable_file, treatment, output_f, + time_step) { res <- norm_matrix %>% as_tibble() %>% pivot_longer(-gene, values_to = "counts", names_to = "sample") %>% @@ -208,8 +213,14 @@ get_mean_correleation_figure <- function(norm_matrix, output_file, mutate(stable = gene %in% stable_gene) %>% arrange(stable) # nolint res <- get_group_columns(res) # nolint - create_correlation(res, treatment, output_f, output_file, "stable") - create_correlation(res, treatment, output_f, output_file, "group") + create_correlation( + res, treatment, output_f, output_file, "stable", + time_step + ) + create_correlation( + res, treatment, output_f, output_file, "group", + time_step + ) } @@ -228,13 +239,16 @@ normalize_on_stable_gene <- function(count_threshold = 5e6, "results/deseq2_normalisation") { my_pattern <- paste0( treatment, "_T*_0|", - treatment, "_T*_5|", treatment, "_DMSO_0|", - treatment, "_TCHX_5" + treatment, "_T*_5|", + treatment, "_DMSO_0|", + treatment, "_TCHX_5|", + treatment, "_T*_3|", + treatment, "_TCHX_3" ) full_data <- load_n_filter_data( sample_threshold = count_threshold, pattern = my_pattern, - gene_threshold = 0, + gene_threshold = 10, load_ercc = use_ercc ) if (treatment == "BRAF") { @@ -255,19 +269,22 @@ normalize_on_stable_gene <- function(count_threshold = 5e6, if (use_ercc) { name_ercc <- "_ercc" } - fig_name <- paste0(treatment, name_ercc, "_T0_vs_T5_mean") - norm_table <- data.frame(gene = rownames(norm_counts), norm_counts) - get_mean_correleation_figure( - norm_table, - fig_name, stable_file, treatment, - output_f = output_folder - ) - write_tsv(norm_table, - file = paste0( - output_folder, "/", treatment, name_ercc, - "_norm_stable_gene.txt" + for (ts in c("3", "5")) { + fig_name <- paste0(treatment, name_ercc, "_T0_vs_T", ts, "_mean") + norm_table <- data.frame(gene = rownames(norm_counts), norm_counts) + get_mean_correlation_figure( + norm_table, + fig_name, stable_file, treatment, + output_f = output_folder, + time_step = ts ) - ) + write_tsv(norm_table, + file = paste0( + output_folder, "/", treatment, name_ercc, + "_norm_stable_gene.txt" + ) + ) + } } @@ -276,20 +293,20 @@ normalize_on_stable_gene <- function(count_threshold = 5e6, #' @param output_folder Folder where the results will be created grep_normalise_conditions <- function(output_folder = "results/deseq2_normalisation") { normalize_on_stable_gene( - count_threshold = 5e6, treatment = "DMSO", + count_threshold = 3e6, treatment = "DMSO", output_folder = output_folder ) normalize_on_stable_gene( - count_threshold = 4.8e6, treatment = "BRAF", + count_threshold = 3e6, treatment = "BRAF", output_folder = output_folder ) normalize_on_stable_gene( - count_threshold = 5e6, treatment = "DMSO", + count_threshold = 3e6, treatment = "DMSO", use_ercc = T, output_folder = output_folder ) normalize_on_stable_gene( - count_threshold = 4.8e6, treatment = "BRAF", + count_threshold = 3e6, treatment = "BRAF", use_ercc = T, output_folder = output_folder )