Add norm to be able to use CPM normalisation: It's NOT advised to do CPM normalisation

f86c33b1 · nfontrod · cfcbc0be · f86c33b1 · f86c33b1 · f86c33b1
Verified Commit f86c33b1 authored 1 year ago by nfontrod
--- a/R/load_matrix.R
+++ b/R/load_matrix.R
@@ -26,7 +26,7 @@ find_file <- function(prefix, path) {
 #' condition. Other columns corresponding to cofactors can be present
 #' @param path The path were the count file are stored
 #' @param my_formula The formula to use in DESeq2
-#'
 #' @return A DESeqDataset object filtered
 #' @import DESeq2
 #' @import dplyr
@@ -77,17 +77,18 @@ load_matrix <- function(design_table, path, my_formula) {
 #' samples to keep the gene
 #' @param addition_col Additional design columns
 #' @param my_formula The formula to use in DESeq2
+#' @param norm_kind Add a parameter used to change deseq2 normalisationto cpm
 #'
 #' @return A DESeqDataset object filtered
 #' @import DESeq2
 filter_dds <- function(dds, filter_list, min_expression, addition_col,
-                       my_formula) {
+                       my_formula, norm_kind) {
    columns <- unique(str_split(
        str_replace_all(my_formula, "[~\\s]", ""),
        "([\\+\\*\\:\\-\\^]|%in%)"
    )[[1]])
    if (is.null(filter_list) || filter_list == "") {
-        if (min_expression == 0) {
+        if (min_expression == 0 && norm_kind != "CPM") {
            return(dds)
        } else {
            cts <- DESeq2::counts(dds, norm = F)
@@ -112,6 +113,11 @@ filter_dds <- function(dds, filter_list, min_expression, addition_col,
        countData = cts_2, colData = coldata,
        design = as.formula(my_formula)
    )
+    if (norm_kind == "CPM") {
+        csum <- colSums(cts_2)
+        fac <- colSums(cts_2) / 1e6
+        sizeFactors(dds_input) <- fac
+    }
    dds <- DESeq2::DESeq(dds_input)
    return(dds)
 }
@@ -160,16 +166,18 @@ get_matrices <- function(dds, output_folder, gene_names) {
 #' @param output_folder Folder were the matrices will be produced
 #' @param gene_names A dataframe with the columns gene_id and gene_name,
 #' (default NULL).
+#' @param norm_kind Add a parameter used to change deseq2 normalisationto cpm
 #'
 #' @return The dds object corresponding to a DESeqDatase object obtained after
 #' running the DESeq function.
 #' @export
 build_count_matrices <- function(design_table, path, my_formula, filter_list,
-                                 min_expression, output_folder, gene_names) {
+                                 min_expression, output_folder, gene_names,
+                                 norm_kind) {
    res <- load_matrix(design_table, path, my_formula)
    dds <- filter_dds(
        res$dds, filter_list, min_expression, res$acol,
-        my_formula
+        my_formula, norm_kind
    )
    get_matrices(dds, output_folder, gene_names)
    return(dds)

--- a/R/main.R
+++ b/R/main.R
@@ -23,6 +23,7 @@
 #' (default 0)
 #' @param gene_names A dataframe with the columns gene_id and gene_name,
 #' (default NULL).
+#' @param norm Add a parameter used to change deseq2 normalisationto cpm
 #'
 #' @return A list containing for each comparison given in my_contrasts
 #' (keys of the list) a sublist containing:
@@ -40,14 +41,14 @@ run_deseq2 <- function(design_table, path, my_contrasts,
                       my_formula = "~ condition", filter_list = NULL,
                       min_expression = 2, output_folder = ".",
                       basemean_threshold = 0, lfc_threshold = 0,
-                       gene_names = NULL) {
+                       gene_names = NULL, norm = "default") {
    if (!dir.exists(output_folder)) {
        stop(paste0("The output folder ", output_folder, " does not exit"))
    }
    dds <- build_count_matrices(
        design_table, path, my_formula, filter_list,
        min_expression, output_folder,
-        gene_names
+        gene_names, norm
    )
    create_plots(dds, output_folder)
    results <- de_analyzes(
@@ -106,6 +107,6 @@ cli_run_deseq2 <- function() {
        design_table, cli$path, my_contrasts, cli$formula, filter_list,
        cli$gene_expression_threshold, cli$output,
        cli$basemean_threshold, cli$lfc_threshold,
-        gene_names
+        gene_names, cli$norm
    )
 }
--- a/R/parser.R
+++ b/R/parser.R
@@ -66,6 +66,13 @@ cli_function <- function() {
                                    "The second colomn contains corresponding gene names and the header is 'gene_name"),
                      default = ""
    )
+    p <- add_argument(p, "--norm",
+                      short = "-n", type = "character",
+                      help = paste0("The kind of norm to use, CPM or default",
+                                    "to use the default normalisation"),
+                        default = "default")
    # Parse de command line arguments
    argv <- argparser::parse_args(p)