Skip to content
Snippets Groups Projects
Commit a41cccac authored by nfontrod's avatar nfontrod
Browse files

src/02_differential_expression_between_BRAF_and_DMSO.R

parent 8f4936e6
Branches
No related tags found
No related merge requests found
#!/bin/Rscript
# The goal of this script is to get the differentially epxressed gene
# in BRAF vs DMSO experiment
library(tximport)
library(tidyverse)
library(DESeq2)
library(RColorBrewer)
library(gplots)
library(pheatmap)
library(viridis)
# loading datasets
directory <- "data/09_htseq_count"
# load files
count_files <- list.files(
path = directory,
pattern = ".tsv",
full.names = TRUE
)
selected_files <- c(
grep("DMSO_DMSO_0", count_files, value = TRUE),
grep("BRAF_DMSO_0", count_files, value = TRUE)
)
sample_name <- sub(
paste0(directory, "/"), "",
sub("*_no_spike-in.tsv", "", selected_files)
)
condition <- as.factor(str_extract(sample_name, "BRAF|DMSO"))
condition <- relevel(condition, "DMSO")
# Build the count matrix
sample_table <- data.frame(
sampleName = sample_name,
fileName = selected_files,
condition = condition
)
dds_input <- DESeqDataSetFromHTSeqCount(
sampleTable = sample_table,
directory = ".",
design = ~condition
)
dds <- DESeq(dds_input)
cts <- counts(dds, norm = F)
# filter by read number
sample_size <- cts %>%
as_tibble() %>%
summarise_if(is.numeric, sum)
cts <- cts[, colnames(cts)[(sample_size > 5e6)]]
# filter on coding genes
coding_gene <- read_tsv("results/coding_genes/coding_gene.txt")$gene_id
cts_0 <- cts[which(rownames(cts) %in% coding_gene), ]
# filter on expressed genes
cts_1 <- data.frame(cts_0, moy = as.numeric(as.vector(rowMeans(cts_0))))
cts_2 <- cts_1[which(cts_1$moy > 2), ] # 11355 expressed genes
cts_filtered <- cts_2[, seq_len(ncol(cts_0))]
# cts_filtered <- cts_0
# creation of coldata
coldata <- as.data.frame(matrix(nc = 1, nr = ncol(cts_filtered)))
colnames(coldata) <- "condition"
rownames(coldata) <- colnames(cts_filtered)
coldata$condition <- condition
dds <- DESeqDataSetFromMatrix(
countData = cts_filtered, colData = coldata,
design = ~condition
)
dds <- DESeq(dds)
####################################
## Comptages bruts et Normalisés ##
####################################
counts_raw <- counts(dds, norm = F)
dir.create("./results/deseq_DMSO_vs_BRAF")
write.table(counts_raw,
file = "./results/deseq_DMSO_vs_BRAF/readcounts_raw_11355genes.csv",
sep = " \t"
)
#### Norm
counts_normalise <- counts(dds, norm = T)
write.table(counts_normalise,
file = "./results/deseq_DMSO_vs_BRAF/readcounts_norm_11355genes.csv",
sep = "\t", dec = ","
)
################
### General ###
################
rld <- rlogTransformation(dds, blind = TRUE)
vsd <- varianceStabilizingTransformation(dds, blind = TRUE)
vstMat <- assay(vsd)
pdf("./results/deseq_DMSO_vs_BRAF/plots_general_view.pdf")
condcols <- brewer.pal(n = length(unique(coldata$condition)), name = "Paired")[1:length(unique(coldata$condition))]
names(condcols) <- unique(coldata$condition)
barplot(colSums(counts(dds, normalized = F)),
col = condcols[as.factor(coldata$condition)],
las = 2, cex.names = 0.4,
main = "Pre Normalised Counts"
)
barplot(colSums(counts(dds, normalized = T)),
col = condcols[as.factor(coldata$condition)],
las = 2, cex.names = 0.4,
main = "Post Normalised Counts"
)
pcaData <- plotPCA(rld, intgroup = c("condition"), returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color = condition)) +
geom_point(size = 3) +
xlab(paste0("PC1: ", percentVar[1], "% variance")) +
ylab(paste0("PC2: ", percentVar[2], "% variance")) +
coord_fixed()
sampleDists <- dist(t(assay(rld)))
sampleDistMatrix <- as.matrix(sampleDists)
colours <- colorRampPalette(rev(brewer.pal(9, "Blues")))(255)
heatmap.2(sampleDistMatrix, trace = "none", col = colours, margins = c(15, 15), cexRow = 0.5, cexCol = 0.5)
cts.norm.df <- as.data.frame(counts_normalise)
annotation_col <- data.frame(condition = coldata$condition)
rownames(annotation_col) <- rownames(coldata)
mat_colors <- list(
condition = c("mediumpurple", "hotpink4")
)
names(mat_colors$condition) <- unique(coldata$condition)
pheatmap(log10(cts.norm.df + 1),
cluster_rows = TRUE, show_rownames = FALSE, color = viridis(250),
cluster_cols = TRUE, annotation_col = annotation_col, # fontsize = 12,
annotation_colors = mat_colors, angle_col = "45"
)
plotDispEsts(dds)
dev.off()
###########################
### Counts observations ###
###########################
n.counts.pivot <- as.data.frame(counts_normalise) %>%
pivot_longer(cols = c(1:ncol(counts_normalise)), names_to = "samples", values_to = "reads")
counts_raw.2 <- counts_raw[, c(2:ncol(counts_raw))]
n.counts.pivot.raw <- as.data.frame(counts_raw.2) %>%
pivot_longer(cols = c(1:ncol(counts_raw.2)), names_to = "samples", values_to = "reads")
png("./results/deseq_DMSO_vs_BRAF/couverture_de_reads_histogramme_RAW.png", width = 1000)
ggplot(data = n.counts.pivot.raw, aes(x = (reads + 1), color = samples, group = samples)) +
geom_density(alpha = .2, size = 1) +
scale_x_continuous(trans = "log10") +
theme_bw() +
xlab("Number of reads") +
ylab("Density") +
ggtitle("Profile of counting distributions - Quantseq")
dev.off()
png("./results/deseq_DMSO_vs_BRAF/couverture_de_reads_histogramme_NORM.png", width = 1000)
ggplot(data = n.counts.pivot, aes(x = (reads + 1), color = samples, group = samples)) +
geom_density(alpha = .2, size = 1) +
scale_x_continuous(trans = "log10") +
theme_bw() +
xlab("Number of reads") +
ylab("Density") +
ggtitle("Profile of counting distributions - Quantseq")
dev.off()
########################
### BRAF vs DMSO, ######
########################
# old method
resGA <- results(dds,
contrast = c("condition", "BRAF", "DMSO")
)
resGA <- as.data.frame(resGA)
col <- colnames(resGA)
resGA$gene <- rownames(resGA)
resGA <- resGA[, c("gene", col)]
write.table(resGA, file = "./results/deseq_DMSO_vs_BRAF/old_method_results_differential_expression_BRAF_DMSO.txt", row.names = F, quote= F, sep="\t")
sig_res_tmp <- resGA[which(resGA$padj <= 0.05), ]
sig_res <- sig_res_tmp[which(abs(sig_res_tmp$log2FoldChange) >= 0.585), ]
sig_res_final <- sig_res[which(abs(sig_res$baseMean) >= 10), ]
dim(sig_res_final)
write.table(sig_res_final, file = "./results/deseq_DMSO_vs_BRAF/old_method_results_differential_expression_BRAF_DMSO_sig.txt", row.names = F, quote = F, sep="\t")
# new_method
resGA <- results(dds,
contrast = c("condition", "BRAF", "DMSO"), lfcThreshold = 0.585, altHypothesis = "greaterAbs"
)
resGA <- as.data.frame(resGA)
col <- colnames(resGA)
resGA$gene <- rownames(resGA)
resGA <- resGA[, c("gene", col)]
write.table(resGA, file = "./results/deseq_DMSO_vs_BRAF/new_method_results_differential_expression_BRAF_DMSO.txt", row.names = F, quote= F, sep="\t")
sig_res_tmp <- resGA[which(resGA$padj <= 0.05), ]
sig_res_final <- sig_res_tmp[which(abs(sig_res_tmp$baseMean) >= 10), ]
dim(sig_res_final)
write.table(sig_res_final, file = "./results/deseq_DMSO_vs_BRAF/new_method_results_differential_expression_BRAF_DMSO_sig.txt", row.names = F, quote = F, sep="\t")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment