Skip to content
Snippets Groups Projects
Commit a44d47ca authored by nfontrod's avatar nfontrod
Browse files

src/05_DESEQ2_normalisation.R: create correlation figure for t3

parent dc8893c8
No related branches found
No related tags found
No related merge requests found
......@@ -28,6 +28,10 @@ load_n_filter_data <- function(sample_threshold = 5e6, gene_threshold = 10,
c(1, grep(pattern, colnames(full_data), value = F, perl = T))
]
}
coding_gene <- read_tsv(
"results/coding_genes/coding_gene.txt"
)$gene_id # nolint
full_data <- full_data[full_data$gene %in% coding_gene, ]
if (sample_threshold > 0) {
# filter by read number
sample_size <- full_data %>%
......@@ -43,17 +47,15 @@ load_n_filter_data <- function(sample_threshold = 5e6, gene_threshold = 10,
"./results/matrice_count/ercc_matrices.txt"
) # nolint
ercc_data <- ercc_data[, colnames(full_data)]
if (gene_threshold > 0) {
ercc_data$rmean <- ercc_data %>%
select(-gene) %>%
apply(1, mean) # nolint
ercc_data <- ercc_data %>% filter(rmean >= gene_threshold) # nolint
ercc_data <- ercc_data %>% select(-rmean) # nolint
}
full_data <- rbind(full_data, ercc_data)
}
if (gene_threshold > 0) {
full_data$rmean <- full_data %>%
select(-gene) %>%
apply(1, mean) # nolint
full_data <- full_data %>% filter(rmean >= gene_threshold) # nolint
full_data <- full_data %>% select(-rmean) # nolint
}
if (load_ercc) {
ercc_gene <- full_data %>%
ercc_gene <- ercc_data %>%
filter(startsWith(gene, "ERCC-")) %>%
select(gene) %>%
unlist() %>%
......@@ -111,8 +113,9 @@ normalise_matrix <- function(stable_vector, count_tibble) {
#' @param output_f Folder where the figures will be created
#' @param output_file The name of the file that will be created
#' @param color_col The column to use to color the dots
#' @param time_step The time step of interest
create_correlation <- function(norm_table, treatment, output_f, output_file,
color_col = "stable") {
color_col = "stable", time_step = 5) {
dir.create(output_f, showWarnings = F)
if (color_col == "stable") {
my_colors <- c("dimgrey", "red")
......@@ -127,15 +130,16 @@ create_correlation <- function(norm_table, treatment, output_f, output_file,
)
}
sg <- sum(norm_table$stable)
coly <- paste0("T_", time_step)
p <- ggplot(norm_table, mapping = aes(
x = log10(`T_0`), y = log10(`T_5`),
x = log10(`T_0`), y = log10(!!as.symbol(coly)),
color = !!as.symbol(color_col)
)) +
scale_color_manual(values = my_colors) +
geom_abline(slope = 1, color = "blue", size = 2) +
geom_point() +
ggtitle(paste0(
"Correlation between 0h vs 5h of cells",
"Correlation between 0h vs", time_step, "h of cells",
"treated with triptolite and ", treatment, " (", sg,
" stable genes)"
))
......@@ -157,7 +161,6 @@ create_correlation <- function(norm_table, treatment, output_f, output_file,
#' @param colors The colors of groups of genes
create_expression_boxplot <- function(norm_table, treatment, output_f,
output_file, my_colors) {
message(my_colors)
new_table <- norm_table %>%
pivot_longer(c(-gene, -stable, -group),
names_to = "condition",
......@@ -188,9 +191,11 @@ create_expression_boxplot <- function(norm_table, treatment, output_f,
#' @param output_file File that will contain the correlation figure
#' @param stable_file A file containing stable genes
#' @param output_f The folder were the results will be created
#' @param time_step The time step of interest
#' @import tidyverse
get_mean_correleation_figure <- function(norm_matrix, output_file,
stable_file, treatment, output_f) {
get_mean_correlation_figure <- function(norm_matrix, output_file,
stable_file, treatment, output_f,
time_step) {
res <- norm_matrix %>%
as_tibble() %>%
pivot_longer(-gene, values_to = "counts", names_to = "sample") %>%
......@@ -208,8 +213,14 @@ get_mean_correleation_figure <- function(norm_matrix, output_file,
mutate(stable = gene %in% stable_gene) %>%
arrange(stable) # nolint
res <- get_group_columns(res) # nolint
create_correlation(res, treatment, output_f, output_file, "stable")
create_correlation(res, treatment, output_f, output_file, "group")
create_correlation(
res, treatment, output_f, output_file, "stable",
time_step
)
create_correlation(
res, treatment, output_f, output_file, "group",
time_step
)
}
......@@ -228,13 +239,16 @@ normalize_on_stable_gene <- function(count_threshold = 5e6,
"results/deseq2_normalisation") {
my_pattern <- paste0(
treatment, "_T*_0|",
treatment, "_T*_5|", treatment, "_DMSO_0|",
treatment, "_TCHX_5"
treatment, "_T*_5|",
treatment, "_DMSO_0|",
treatment, "_TCHX_5|",
treatment, "_T*_3|",
treatment, "_TCHX_3"
)
full_data <- load_n_filter_data(
sample_threshold = count_threshold,
pattern = my_pattern,
gene_threshold = 0,
gene_threshold = 10,
load_ercc = use_ercc
)
if (treatment == "BRAF") {
......@@ -255,19 +269,22 @@ normalize_on_stable_gene <- function(count_threshold = 5e6,
if (use_ercc) {
name_ercc <- "_ercc"
}
fig_name <- paste0(treatment, name_ercc, "_T0_vs_T5_mean")
norm_table <- data.frame(gene = rownames(norm_counts), norm_counts)
get_mean_correleation_figure(
norm_table,
fig_name, stable_file, treatment,
output_f = output_folder
)
write_tsv(norm_table,
file = paste0(
output_folder, "/", treatment, name_ercc,
"_norm_stable_gene.txt"
for (ts in c("3", "5")) {
fig_name <- paste0(treatment, name_ercc, "_T0_vs_T", ts, "_mean")
norm_table <- data.frame(gene = rownames(norm_counts), norm_counts)
get_mean_correlation_figure(
norm_table,
fig_name, stable_file, treatment,
output_f = output_folder,
time_step = ts
)
)
write_tsv(norm_table,
file = paste0(
output_folder, "/", treatment, name_ercc,
"_norm_stable_gene.txt"
)
)
}
}
......@@ -276,20 +293,20 @@ normalize_on_stable_gene <- function(count_threshold = 5e6,
#' @param output_folder Folder where the results will be created
grep_normalise_conditions <- function(output_folder = "results/deseq2_normalisation") {
normalize_on_stable_gene(
count_threshold = 5e6, treatment = "DMSO",
count_threshold = 3e6, treatment = "DMSO",
output_folder = output_folder
)
normalize_on_stable_gene(
count_threshold = 4.8e6, treatment = "BRAF",
count_threshold = 3e6, treatment = "BRAF",
output_folder = output_folder
)
normalize_on_stable_gene(
count_threshold = 5e6, treatment = "DMSO",
count_threshold = 3e6, treatment = "DMSO",
use_ercc = T,
output_folder = output_folder
)
normalize_on_stable_gene(
count_threshold = 4.8e6, treatment = "BRAF",
count_threshold = 3e6, treatment = "BRAF",
use_ercc = T,
output_folder = output_folder
)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment