Skip to content
Snippets Groups Projects
Commit a44d47ca authored by nfontrod's avatar nfontrod
Browse files

src/05_DESEQ2_normalisation.R: create correlation figure for t3

parent dc8893c8
Branches
No related tags found
No related merge requests found
...@@ -28,6 +28,10 @@ load_n_filter_data <- function(sample_threshold = 5e6, gene_threshold = 10, ...@@ -28,6 +28,10 @@ load_n_filter_data <- function(sample_threshold = 5e6, gene_threshold = 10,
c(1, grep(pattern, colnames(full_data), value = F, perl = T)) c(1, grep(pattern, colnames(full_data), value = F, perl = T))
] ]
} }
coding_gene <- read_tsv(
"results/coding_genes/coding_gene.txt"
)$gene_id # nolint
full_data <- full_data[full_data$gene %in% coding_gene, ]
if (sample_threshold > 0) { if (sample_threshold > 0) {
# filter by read number # filter by read number
sample_size <- full_data %>% sample_size <- full_data %>%
...@@ -43,17 +47,15 @@ load_n_filter_data <- function(sample_threshold = 5e6, gene_threshold = 10, ...@@ -43,17 +47,15 @@ load_n_filter_data <- function(sample_threshold = 5e6, gene_threshold = 10,
"./results/matrice_count/ercc_matrices.txt" "./results/matrice_count/ercc_matrices.txt"
) # nolint ) # nolint
ercc_data <- ercc_data[, colnames(full_data)] ercc_data <- ercc_data[, colnames(full_data)]
full_data <- rbind(full_data, ercc_data)
}
if (gene_threshold > 0) { if (gene_threshold > 0) {
full_data$rmean <- full_data %>% ercc_data$rmean <- ercc_data %>%
select(-gene) %>% select(-gene) %>%
apply(1, mean) # nolint apply(1, mean) # nolint
full_data <- full_data %>% filter(rmean >= gene_threshold) # nolint ercc_data <- ercc_data %>% filter(rmean >= gene_threshold) # nolint
full_data <- full_data %>% select(-rmean) # nolint ercc_data <- ercc_data %>% select(-rmean) # nolint
} }
if (load_ercc) { full_data <- rbind(full_data, ercc_data)
ercc_gene <- full_data %>% ercc_gene <- ercc_data %>%
filter(startsWith(gene, "ERCC-")) %>% filter(startsWith(gene, "ERCC-")) %>%
select(gene) %>% select(gene) %>%
unlist() %>% unlist() %>%
...@@ -111,8 +113,9 @@ normalise_matrix <- function(stable_vector, count_tibble) { ...@@ -111,8 +113,9 @@ normalise_matrix <- function(stable_vector, count_tibble) {
#' @param output_f Folder where the figures will be created #' @param output_f Folder where the figures will be created
#' @param output_file The name of the file that will be created #' @param output_file The name of the file that will be created
#' @param color_col The column to use to color the dots #' @param color_col The column to use to color the dots
#' @param time_step The time step of interest
create_correlation <- function(norm_table, treatment, output_f, output_file, create_correlation <- function(norm_table, treatment, output_f, output_file,
color_col = "stable") { color_col = "stable", time_step = 5) {
dir.create(output_f, showWarnings = F) dir.create(output_f, showWarnings = F)
if (color_col == "stable") { if (color_col == "stable") {
my_colors <- c("dimgrey", "red") my_colors <- c("dimgrey", "red")
...@@ -127,15 +130,16 @@ create_correlation <- function(norm_table, treatment, output_f, output_file, ...@@ -127,15 +130,16 @@ create_correlation <- function(norm_table, treatment, output_f, output_file,
) )
} }
sg <- sum(norm_table$stable) sg <- sum(norm_table$stable)
coly <- paste0("T_", time_step)
p <- ggplot(norm_table, mapping = aes( p <- ggplot(norm_table, mapping = aes(
x = log10(`T_0`), y = log10(`T_5`), x = log10(`T_0`), y = log10(!!as.symbol(coly)),
color = !!as.symbol(color_col) color = !!as.symbol(color_col)
)) + )) +
scale_color_manual(values = my_colors) + scale_color_manual(values = my_colors) +
geom_abline(slope = 1, color = "blue", size = 2) + geom_abline(slope = 1, color = "blue", size = 2) +
geom_point() + geom_point() +
ggtitle(paste0( ggtitle(paste0(
"Correlation between 0h vs 5h of cells", "Correlation between 0h vs", time_step, "h of cells",
"treated with triptolite and ", treatment, " (", sg, "treated with triptolite and ", treatment, " (", sg,
" stable genes)" " stable genes)"
)) ))
...@@ -157,7 +161,6 @@ create_correlation <- function(norm_table, treatment, output_f, output_file, ...@@ -157,7 +161,6 @@ create_correlation <- function(norm_table, treatment, output_f, output_file,
#' @param colors The colors of groups of genes #' @param colors The colors of groups of genes
create_expression_boxplot <- function(norm_table, treatment, output_f, create_expression_boxplot <- function(norm_table, treatment, output_f,
output_file, my_colors) { output_file, my_colors) {
message(my_colors)
new_table <- norm_table %>% new_table <- norm_table %>%
pivot_longer(c(-gene, -stable, -group), pivot_longer(c(-gene, -stable, -group),
names_to = "condition", names_to = "condition",
...@@ -188,9 +191,11 @@ create_expression_boxplot <- function(norm_table, treatment, output_f, ...@@ -188,9 +191,11 @@ create_expression_boxplot <- function(norm_table, treatment, output_f,
#' @param output_file File that will contain the correlation figure #' @param output_file File that will contain the correlation figure
#' @param stable_file A file containing stable genes #' @param stable_file A file containing stable genes
#' @param output_f The folder were the results will be created #' @param output_f The folder were the results will be created
#' @param time_step The time step of interest
#' @import tidyverse #' @import tidyverse
get_mean_correleation_figure <- function(norm_matrix, output_file, get_mean_correlation_figure <- function(norm_matrix, output_file,
stable_file, treatment, output_f) { stable_file, treatment, output_f,
time_step) {
res <- norm_matrix %>% res <- norm_matrix %>%
as_tibble() %>% as_tibble() %>%
pivot_longer(-gene, values_to = "counts", names_to = "sample") %>% pivot_longer(-gene, values_to = "counts", names_to = "sample") %>%
...@@ -208,8 +213,14 @@ get_mean_correleation_figure <- function(norm_matrix, output_file, ...@@ -208,8 +213,14 @@ get_mean_correleation_figure <- function(norm_matrix, output_file,
mutate(stable = gene %in% stable_gene) %>% mutate(stable = gene %in% stable_gene) %>%
arrange(stable) # nolint arrange(stable) # nolint
res <- get_group_columns(res) # nolint res <- get_group_columns(res) # nolint
create_correlation(res, treatment, output_f, output_file, "stable") create_correlation(
create_correlation(res, treatment, output_f, output_file, "group") res, treatment, output_f, output_file, "stable",
time_step
)
create_correlation(
res, treatment, output_f, output_file, "group",
time_step
)
} }
...@@ -228,13 +239,16 @@ normalize_on_stable_gene <- function(count_threshold = 5e6, ...@@ -228,13 +239,16 @@ normalize_on_stable_gene <- function(count_threshold = 5e6,
"results/deseq2_normalisation") { "results/deseq2_normalisation") {
my_pattern <- paste0( my_pattern <- paste0(
treatment, "_T*_0|", treatment, "_T*_0|",
treatment, "_T*_5|", treatment, "_DMSO_0|", treatment, "_T*_5|",
treatment, "_TCHX_5" treatment, "_DMSO_0|",
treatment, "_TCHX_5|",
treatment, "_T*_3|",
treatment, "_TCHX_3"
) )
full_data <- load_n_filter_data( full_data <- load_n_filter_data(
sample_threshold = count_threshold, sample_threshold = count_threshold,
pattern = my_pattern, pattern = my_pattern,
gene_threshold = 0, gene_threshold = 10,
load_ercc = use_ercc load_ercc = use_ercc
) )
if (treatment == "BRAF") { if (treatment == "BRAF") {
...@@ -255,12 +269,14 @@ normalize_on_stable_gene <- function(count_threshold = 5e6, ...@@ -255,12 +269,14 @@ normalize_on_stable_gene <- function(count_threshold = 5e6,
if (use_ercc) { if (use_ercc) {
name_ercc <- "_ercc" name_ercc <- "_ercc"
} }
fig_name <- paste0(treatment, name_ercc, "_T0_vs_T5_mean") for (ts in c("3", "5")) {
fig_name <- paste0(treatment, name_ercc, "_T0_vs_T", ts, "_mean")
norm_table <- data.frame(gene = rownames(norm_counts), norm_counts) norm_table <- data.frame(gene = rownames(norm_counts), norm_counts)
get_mean_correleation_figure( get_mean_correlation_figure(
norm_table, norm_table,
fig_name, stable_file, treatment, fig_name, stable_file, treatment,
output_f = output_folder output_f = output_folder,
time_step = ts
) )
write_tsv(norm_table, write_tsv(norm_table,
file = paste0( file = paste0(
...@@ -269,6 +285,7 @@ normalize_on_stable_gene <- function(count_threshold = 5e6, ...@@ -269,6 +285,7 @@ normalize_on_stable_gene <- function(count_threshold = 5e6,
) )
) )
} }
}
#' Normalise every conditions separatly #' Normalise every conditions separatly
...@@ -276,20 +293,20 @@ normalize_on_stable_gene <- function(count_threshold = 5e6, ...@@ -276,20 +293,20 @@ normalize_on_stable_gene <- function(count_threshold = 5e6,
#' @param output_folder Folder where the results will be created #' @param output_folder Folder where the results will be created
grep_normalise_conditions <- function(output_folder = "results/deseq2_normalisation") { grep_normalise_conditions <- function(output_folder = "results/deseq2_normalisation") {
normalize_on_stable_gene( normalize_on_stable_gene(
count_threshold = 5e6, treatment = "DMSO", count_threshold = 3e6, treatment = "DMSO",
output_folder = output_folder output_folder = output_folder
) )
normalize_on_stable_gene( normalize_on_stable_gene(
count_threshold = 4.8e6, treatment = "BRAF", count_threshold = 3e6, treatment = "BRAF",
output_folder = output_folder output_folder = output_folder
) )
normalize_on_stable_gene( normalize_on_stable_gene(
count_threshold = 5e6, treatment = "DMSO", count_threshold = 3e6, treatment = "DMSO",
use_ercc = T, use_ercc = T,
output_folder = output_folder output_folder = output_folder
) )
normalize_on_stable_gene( normalize_on_stable_gene(
count_threshold = 4.8e6, treatment = "BRAF", count_threshold = 3e6, treatment = "BRAF",
use_ercc = T, use_ercc = T,
output_folder = output_folder output_folder = output_folder
) )
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment