Skip to content
Snippets Groups Projects
Verified Commit f86c33b1 authored by nfontrod's avatar nfontrod
Browse files

Add norm to be able to use CPM normalisation: It's NOT advised to do CPM normalisation

parent cfcbc0be
No related branches found
No related tags found
No related merge requests found
...@@ -26,7 +26,7 @@ find_file <- function(prefix, path) { ...@@ -26,7 +26,7 @@ find_file <- function(prefix, path) {
#' condition. Other columns corresponding to cofactors can be present #' condition. Other columns corresponding to cofactors can be present
#' @param path The path were the count file are stored #' @param path The path were the count file are stored
#' @param my_formula The formula to use in DESeq2 #' @param my_formula The formula to use in DESeq2
#'
#' @return A DESeqDataset object filtered #' @return A DESeqDataset object filtered
#' @import DESeq2 #' @import DESeq2
#' @import dplyr #' @import dplyr
...@@ -77,17 +77,18 @@ load_matrix <- function(design_table, path, my_formula) { ...@@ -77,17 +77,18 @@ load_matrix <- function(design_table, path, my_formula) {
#' samples to keep the gene #' samples to keep the gene
#' @param addition_col Additional design columns #' @param addition_col Additional design columns
#' @param my_formula The formula to use in DESeq2 #' @param my_formula The formula to use in DESeq2
#' @param norm_kind Add a parameter used to change deseq2 normalisationto cpm
#' #'
#' @return A DESeqDataset object filtered #' @return A DESeqDataset object filtered
#' @import DESeq2 #' @import DESeq2
filter_dds <- function(dds, filter_list, min_expression, addition_col, filter_dds <- function(dds, filter_list, min_expression, addition_col,
my_formula) { my_formula, norm_kind) {
columns <- unique(str_split( columns <- unique(str_split(
str_replace_all(my_formula, "[~\\s]", ""), str_replace_all(my_formula, "[~\\s]", ""),
"([\\+\\*\\:\\-\\^]|%in%)" "([\\+\\*\\:\\-\\^]|%in%)"
)[[1]]) )[[1]])
if (is.null(filter_list) || filter_list == "") { if (is.null(filter_list) || filter_list == "") {
if (min_expression == 0) { if (min_expression == 0 && norm_kind != "CPM") {
return(dds) return(dds)
} else { } else {
cts <- DESeq2::counts(dds, norm = F) cts <- DESeq2::counts(dds, norm = F)
...@@ -112,6 +113,11 @@ filter_dds <- function(dds, filter_list, min_expression, addition_col, ...@@ -112,6 +113,11 @@ filter_dds <- function(dds, filter_list, min_expression, addition_col,
countData = cts_2, colData = coldata, countData = cts_2, colData = coldata,
design = as.formula(my_formula) design = as.formula(my_formula)
) )
if (norm_kind == "CPM") {
csum <- colSums(cts_2)
fac <- colSums(cts_2) / 1e6
sizeFactors(dds_input) <- fac
}
dds <- DESeq2::DESeq(dds_input) dds <- DESeq2::DESeq(dds_input)
return(dds) return(dds)
} }
...@@ -160,16 +166,18 @@ get_matrices <- function(dds, output_folder, gene_names) { ...@@ -160,16 +166,18 @@ get_matrices <- function(dds, output_folder, gene_names) {
#' @param output_folder Folder were the matrices will be produced #' @param output_folder Folder were the matrices will be produced
#' @param gene_names A dataframe with the columns gene_id and gene_name, #' @param gene_names A dataframe with the columns gene_id and gene_name,
#' (default NULL). #' (default NULL).
#' @param norm_kind Add a parameter used to change deseq2 normalisationto cpm
#' #'
#' @return The dds object corresponding to a DESeqDatase object obtained after #' @return The dds object corresponding to a DESeqDatase object obtained after
#' running the DESeq function. #' running the DESeq function.
#' @export #' @export
build_count_matrices <- function(design_table, path, my_formula, filter_list, build_count_matrices <- function(design_table, path, my_formula, filter_list,
min_expression, output_folder, gene_names) { min_expression, output_folder, gene_names,
norm_kind) {
res <- load_matrix(design_table, path, my_formula) res <- load_matrix(design_table, path, my_formula)
dds <- filter_dds( dds <- filter_dds(
res$dds, filter_list, min_expression, res$acol, res$dds, filter_list, min_expression, res$acol,
my_formula my_formula, norm_kind
) )
get_matrices(dds, output_folder, gene_names) get_matrices(dds, output_folder, gene_names)
return(dds) return(dds)
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#' (default 0) #' (default 0)
#' @param gene_names A dataframe with the columns gene_id and gene_name, #' @param gene_names A dataframe with the columns gene_id and gene_name,
#' (default NULL). #' (default NULL).
#' @param norm Add a parameter used to change deseq2 normalisationto cpm
#' #'
#' @return A list containing for each comparison given in my_contrasts #' @return A list containing for each comparison given in my_contrasts
#' (keys of the list) a sublist containing: #' (keys of the list) a sublist containing:
...@@ -40,14 +41,14 @@ run_deseq2 <- function(design_table, path, my_contrasts, ...@@ -40,14 +41,14 @@ run_deseq2 <- function(design_table, path, my_contrasts,
my_formula = "~ condition", filter_list = NULL, my_formula = "~ condition", filter_list = NULL,
min_expression = 2, output_folder = ".", min_expression = 2, output_folder = ".",
basemean_threshold = 0, lfc_threshold = 0, basemean_threshold = 0, lfc_threshold = 0,
gene_names = NULL) { gene_names = NULL, norm = "default") {
if (!dir.exists(output_folder)) { if (!dir.exists(output_folder)) {
stop(paste0("The output folder ", output_folder, " does not exit")) stop(paste0("The output folder ", output_folder, " does not exit"))
} }
dds <- build_count_matrices( dds <- build_count_matrices(
design_table, path, my_formula, filter_list, design_table, path, my_formula, filter_list,
min_expression, output_folder, min_expression, output_folder,
gene_names gene_names, norm
) )
create_plots(dds, output_folder) create_plots(dds, output_folder)
results <- de_analyzes( results <- de_analyzes(
...@@ -106,6 +107,6 @@ cli_run_deseq2 <- function() { ...@@ -106,6 +107,6 @@ cli_run_deseq2 <- function() {
design_table, cli$path, my_contrasts, cli$formula, filter_list, design_table, cli$path, my_contrasts, cli$formula, filter_list,
cli$gene_expression_threshold, cli$output, cli$gene_expression_threshold, cli$output,
cli$basemean_threshold, cli$lfc_threshold, cli$basemean_threshold, cli$lfc_threshold,
gene_names gene_names, cli$norm
) )
} }
...@@ -66,6 +66,13 @@ cli_function <- function() { ...@@ -66,6 +66,13 @@ cli_function <- function() {
"The second colomn contains corresponding gene names and the header is 'gene_name"), "The second colomn contains corresponding gene names and the header is 'gene_name"),
default = "" default = ""
) )
p <- add_argument(p, "--norm",
short = "-n", type = "character",
help = paste0("The kind of norm to use, CPM or default",
"to use the default normalisation"),
default = "default")
# Parse de command line arguments # Parse de command line arguments
argv <- argparser::parse_args(p) argv <- argparser::parse_args(p)
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment