diff --git a/R/load_matrix.R b/R/load_matrix.R index 9681ee68f0b5ed639e060ff0fabea9cd11bfe501..adef723f321220f247b407ab44c0b49fff7aead7 100644 --- a/R/load_matrix.R +++ b/R/load_matrix.R @@ -26,7 +26,7 @@ find_file <- function(prefix, path) { #' condition. Other columns corresponding to cofactors can be present #' @param path The path were the count file are stored #' @param my_formula The formula to use in DESeq2 -#' + #' @return A DESeqDataset object filtered #' @import DESeq2 #' @import dplyr @@ -77,17 +77,18 @@ load_matrix <- function(design_table, path, my_formula) { #' samples to keep the gene #' @param addition_col Additional design columns #' @param my_formula The formula to use in DESeq2 +#' @param norm_kind Add a parameter used to change deseq2 normalisationto cpm #' #' @return A DESeqDataset object filtered #' @import DESeq2 filter_dds <- function(dds, filter_list, min_expression, addition_col, - my_formula) { + my_formula, norm_kind) { columns <- unique(str_split( str_replace_all(my_formula, "[~\\s]", ""), "([\\+\\*\\:\\-\\^]|%in%)" )[[1]]) if (is.null(filter_list) || filter_list == "") { - if (min_expression == 0) { + if (min_expression == 0 && norm_kind != "CPM") { return(dds) } else { cts <- DESeq2::counts(dds, norm = F) @@ -112,6 +113,11 @@ filter_dds <- function(dds, filter_list, min_expression, addition_col, countData = cts_2, colData = coldata, design = as.formula(my_formula) ) + if (norm_kind == "CPM") { + csum <- colSums(cts_2) + fac <- colSums(cts_2) / 1e6 + sizeFactors(dds_input) <- fac + } dds <- DESeq2::DESeq(dds_input) return(dds) } @@ -160,16 +166,18 @@ get_matrices <- function(dds, output_folder, gene_names) { #' @param output_folder Folder were the matrices will be produced #' @param gene_names A dataframe with the columns gene_id and gene_name, #' (default NULL). +#' @param norm_kind Add a parameter used to change deseq2 normalisationto cpm #' #' @return The dds object corresponding to a DESeqDatase object obtained after #' running the DESeq function. #' @export build_count_matrices <- function(design_table, path, my_formula, filter_list, - min_expression, output_folder, gene_names) { + min_expression, output_folder, gene_names, + norm_kind) { res <- load_matrix(design_table, path, my_formula) dds <- filter_dds( res$dds, filter_list, min_expression, res$acol, - my_formula + my_formula, norm_kind ) get_matrices(dds, output_folder, gene_names) return(dds) diff --git a/R/main.R b/R/main.R index 13752bbe7d0f7f29e3ffe5eb5751b75afbb8b7a6..87a7c53350cb6cdce3fb50e91c67e17d012ccfc1 100644 --- a/R/main.R +++ b/R/main.R @@ -23,6 +23,7 @@ #' (default 0) #' @param gene_names A dataframe with the columns gene_id and gene_name, #' (default NULL). +#' @param norm Add a parameter used to change deseq2 normalisationto cpm #' #' @return A list containing for each comparison given in my_contrasts #' (keys of the list) a sublist containing: @@ -40,14 +41,14 @@ run_deseq2 <- function(design_table, path, my_contrasts, my_formula = "~ condition", filter_list = NULL, min_expression = 2, output_folder = ".", basemean_threshold = 0, lfc_threshold = 0, - gene_names = NULL) { + gene_names = NULL, norm = "default") { if (!dir.exists(output_folder)) { stop(paste0("The output folder ", output_folder, " does not exit")) } dds <- build_count_matrices( design_table, path, my_formula, filter_list, min_expression, output_folder, - gene_names + gene_names, norm ) create_plots(dds, output_folder) results <- de_analyzes( @@ -106,6 +107,6 @@ cli_run_deseq2 <- function() { design_table, cli$path, my_contrasts, cli$formula, filter_list, cli$gene_expression_threshold, cli$output, cli$basemean_threshold, cli$lfc_threshold, - gene_names + gene_names, cli$norm ) } diff --git a/R/parser.R b/R/parser.R index 830f48c6688be788a7df0def2d11b2800a9c65f5..09036f6bdb5feceff935de71d9f81e83bb6d3481 100644 --- a/R/parser.R +++ b/R/parser.R @@ -66,6 +66,13 @@ cli_function <- function() { "The second colomn contains corresponding gene names and the header is 'gene_name"), default = "" ) + p <- add_argument(p, "--norm", + short = "-n", type = "character", + help = paste0("The kind of norm to use, CPM or default", + "to use the default normalisation"), + default = "default") + + # Parse de command line arguments argv <- argparser::parse_args(p)