Skip to content
Snippets Groups Projects
Commit ac8f615b authored by nfontrod's avatar nfontrod
Browse files

src/03_ERCC_analysis_function.R: linter changes

parent 5fff23d3
No related branches found
No related tags found
No related merge requests found
......@@ -56,7 +56,9 @@ get_count_matrix <- function(directory, condition_pattern, selection_vec = "",
dds <- DESeq(dds_input) # nolint
cts <- counts(dds, norm = F) # nolint
if (filtering_transcript) {
coding_gene <- read_tsv("results/coding_genes/coding_gene.txt")$gene_id
coding_gene <- read_tsv(
"results/coding_genes/coding_gene.txt"
)$gene_id # nolint
cts <- cts[which(rownames(cts) %in% coding_gene), ]
}
if (read_number_filter > 0) {
......@@ -64,7 +66,7 @@ get_count_matrix <- function(directory, condition_pattern, selection_vec = "",
sample_size <- cts %>%
as_tibble() %>%
summarise_if(is.numeric, sum)
cts <- cts[, colnames(cts)[(sample_size > 5e6)]]
cts <- cts[, colnames(cts)[(sample_size > read_number_filter)]]
}
return(cts)
}
......@@ -89,15 +91,19 @@ get_relative_count <- function(data_count, ercc_count) {
by = c("name", "samples"),
suffix = c("", "_ercc")
)
res <- res %>% mutate(relative_count = counts_ercc / counts)
res <- res %>% mutate(
relative_count = counts_ercc / counts
) # nolint
return(res)
}
#' get ERCC correlation plot for one sample
#'
#' @param ercc_raw a dataframe containing raw ERCC count
#' @param col The selected sample for which we want to display the correlation figure
#' @param size_list a named vector containing the number of reads in coding gene for each samples
#' @param col The selected sample for which we want to display
#' the correlation figure
#' @param size_list a named vector containing the number of reads
#' in coding gene for each samples
create_ercc_correlation_plots <- function(ercc_raw, col, size_list) {
selected <- which(ercc_raw[[col]] > 0 & ercc_raw$concentration > 0)
size <- size_list[col]
......@@ -105,9 +111,15 @@ create_ercc_correlation_plots <- function(ercc_raw, col, size_list) {
colc <- ercc_raw$concentration[selected]
cor_val <- cor.test(log2(colc), log2(coli))$estimate
ercc <- nrow(ercc_raw)
p <- ggplot(ercc_raw, mapping = aes(x = log2(concentration), y = log2(!!as.symbol(col)))) +
p <- ggplot(ercc_raw, mapping = aes(
x = log2(concentration),
y = log2(!!as.symbol(col))
)) +
geom_point() +
ggtitle(paste("R = ", round(cor_val, 3), "- n =", ercc, "- s = ", round(size / 1e6, 1), "M")) +
ggtitle(paste(
"R = ", round(cor_val, 3), "- n =", ercc,
"- s = ", round(size / 1e6, 1), "M"
)) +
stat_smooth(method = "lm", se = FALSE, color = "red")
p$my_cor <- cor_val
return(p)
......@@ -116,29 +128,40 @@ create_ercc_correlation_plots <- function(ercc_raw, col, size_list) {
#' Create ERCC correlation figure for all samples
#'
#' @param ercc_count_matrix The ercc count matrix
#' @param count_threshold The average count threshold an ercc must have to be displayed in the figure
#' @param count_threshold The average count threshold an ercc must have
#' to be displayed in the figure
#' @import tidyverse
create_correlation_figures <- function(ercc_count_matrix, count_threshold) {
ercc_raw <- ercc_count_matrix
ercc_raw <- as_tibble(data.frame(gene = rownames(ercc_count_matrix), ercc_raw))
ercc_raw <- ercc_raw %>% left_join(ercc_real_concentration, by = "gene")
ercc_raw <- as_tibble(data.frame(
gene = rownames(ercc_count_matrix), ercc_raw
)) # nolint
ercc_raw <- ercc_raw %>%
left_join(ercc_real_concentration, by = "gene") # nolint
ercc_raw2 <- ercc_raw %>%
select(-concentration) %>%
pivot_longer(-gene, names_to = "sample", values_to = "counts") %>%
group_by(gene) %>%
summarise(min_count = mean(counts))
ercc_raw <- left_join(ercc_raw, ercc_raw2, by = "gene") %>% filter(min_count > count_threshold)
ercc_raw <- left_join(ercc_raw, ercc_raw2, by = "gene") %>% filter(min_count > count_threshold) # nolint
sample_size <- apply(data_count_matrix %>% as_tibble() %>% select(-gene), 2, sum)
sample_size <- apply(data_count_matrix %>%
as_tibble() %>% select(-gene), 2, sum) # nolint
names_c <- ercc_raw %>%
select(-gene, -concentration, -min_count) %>%
colnames()
sum(names(sample_size) == names_c) # 96 they are in the same order
my_plots <- lapply(names_c, create_ercc_correlation_plots, ercc_raw = ercc_raw, size_list = sample_size)
my_plots <- lapply(names_c, create_ercc_correlation_plots,
ercc_raw = ercc_raw, size_list = sample_size
) # nolint
correlation_sup20 <- sapply(my_plots, function(x) {
return(x$my_cor)
})
dir.create("./results/ERCC_analysis")
pdf(paste0("./results/ERCC_analysis/correlations_ERCC-meancount>", count_threshold, "_quant_vs_concentration.pdf"), width = 12, height = 100)
pdf(paste0(
"./results/ERCC_analysis/correlations_ERCC-meancount>",
count_threshold, "_quant_vs_concentration.pdf"
), width = 12, height = 100)
print(do.call("grid.arrange", c(my_plots, ncol = 3)))
dev.off()
return(correlation_sup20)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment