Skip to content
Snippets Groups Projects
Commit a748a853 authored by nfontrod's avatar nfontrod
Browse files

src/03_ERCC_analysis_code.R: contains function dedicated to analyse ERCC in dmso/braf experiment

parent 6bdd2882
No related branches found
No related tags found
No related merge requests found
#!/bin/Rscript
# This script aims to compute the relative abundance of ERCC spike-in
source("src/03_ERCC_analysis_function.R")
# create matrices with all samples
data_count_matrix <- get_count_matrix(
"data/09_htseq_count", "BRAF|DMSO",
"", "*_no_spike-in.tsv",
0, T
)
ercc_count_matrix <- get_count_matrix(
"data/spike-in/05_htseq_count", "BRAF|DMSO",
"", "*.tsv",
0
)[, colnames(data_count_matrix)]
data_count_matrix <- data.frame(gene = rownames(data_count_matrix), data_count_matrix)
dir.create("./results/matrice_count/")
write.table(data_count_matrix,
file = "./results/matrice_count/coding_gene_matrices.txt",
quote = F,
sep = "\t",
row.names = F
)
ercc_count_matrix_new <- data.frame(gene = rownames(ercc_count_matrix), ercc_count_matrix)
write.table(ercc_count_matrix_new,
file = "./results/matrice_count/ercc_matrices.txt",
quote = F,
sep = "\t",
row.names = F
)
# Ercc correlation with all reads
ercc_real_concentration <- read_tsv("./data/ercc_concentration.txt") %>%
select(`ERCC ID`, `concentration in Mix 1 (attomoles/ul)`) %>%
rename(`ERCC ID` = "gene", `concentration in Mix 1 (attomoles/ul)` = "concentration")
c_sup0 <- create_correlation_figures(ercc_count_matrix, 0)
c_sup2 <- create_correlation_figures(ercc_count_matrix, 2)
c_sup5 <- create_correlation_figures(ercc_count_matrix, 5)
c_sup10 <- create_correlation_figures(ercc_count_matrix, 10)
c_sup20 <- create_correlation_figures(ercc_count_matrix, 20)
corr_values <- tibble(
correlation = c(c_sup0, c_sup2, c_sup5, c_sup10, c_sup20),
group = c(
rep("mean(ERCC_count) > 0", times = length(c_sup0)),
rep("mean(ERCC_count) > 02", times = length(c_sup2)),
rep("mean(ERCC_count) > 05", times = length(c_sup5)),
rep("mean(ERCC_count) > 10", times = length(c_sup10)),
rep("mean(ERCC_count) > 20", times = length(c_sup20))
)
)
pdf("./results/ERCC_analysis/Correlation_recap.pdf", width = 12, height = 100)
ggplot(corr_values %>% arrange(group),
mapping = aes(x = group, y = correlation)
) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
dev.off()
########################
# ERCC relative abundance
########################
data_count_matrix <- get_count_matrix(
"data/09_htseq_count/with_unnmmaped", "BRAF|DMSO",
c("DMSO_0", "_T_"), "*_no_spike-in.tsv",
0, F
)
ercc_count_matrix <- get_count_matrix(
"data/spike-in/05_htseq_count", "BRAF|DMSO",
c("DMSO_0", "_T_"), "*.tsv",
0
)[, colnames(data_count_matrix)]
# No normalisation
data_sample <- get_sample_count(data_count_matrix)
ercc_sample <- get_sample_count(ercc_count_matrix)
relative_counts <- get_relative_count(data_sample, ercc_sample)
relative_counts <- relative_counts %>% arrange(name)
ggplot(relative_counts, mapping = aes(x = name, y = relative_count)) +
geom_point() +
theme(axis.text.x = element_text(angle = 90))
# Normalisation
full_matrix <- rbind(data_count_matrix, ercc_count_matrix)
coldata <- data.frame(condition = relevel(as.factor(str_extract(colnames(data_count_matrix), "BRAF|DMSO")), "DMSO"))
rownames(coldata) <- colnames(data_count_matrix)
dds <- DESeqDataSetFromMatrix(
countData = full_matrix, colData = coldata,
design = ~condition
)
dds <- DESeq(dds)
cts_full <- counts(dds, norm = T)
cts_norm_ercc <- cts_full[grep("ERCC-", rownames(cts_full)), ]
cts_norm_data <- cts_full[grep("ERCC-", rownames(cts_full), invert = T), ]
data_sample <- get_sample_count(cts_norm_data)
ercc_sample <- get_sample_count(cts_norm_ercc)
relative_counts <- get_relative_count(data_sample, ercc_sample)
relative_counts <- relative_counts %>% arrange(name)
ggplot(relative_counts, mapping = aes(x = name, y = relative_count)) +
geom_boxplot() +
theme(axis.text.x = element_text(angle = 90))
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment