Skip to content
Snippets Groups Projects
Verified Commit f27b7bde authored by nfontrod's avatar nfontrod
Browse files

add --norm parameter

parent f86c33b1
No related branches found
No related tags found
No related merge requests found
......@@ -77,7 +77,8 @@ load_matrix <- function(design_table, path, my_formula) {
#' samples to keep the gene
#' @param addition_col Additional design columns
#' @param my_formula The formula to use in DESeq2
#' @param norm_kind Add a parameter used to change deseq2 normalisationto cpm
#' @param norm_kind A file containing the size factors to apply to each sample
#' or a "" value
#'
#' @return A DESeqDataset object filtered
#' @import DESeq2
......@@ -88,7 +89,7 @@ filter_dds <- function(dds, filter_list, min_expression, addition_col,
"([\\+\\*\\:\\-\\^]|%in%)"
)[[1]])
if (is.null(filter_list) || filter_list == "") {
if (min_expression == 0 && norm_kind != "CPM") {
if (min_expression == 0 && norm_kind == "") {
return(dds)
} else {
cts <- DESeq2::counts(dds, norm = F)
......@@ -113,10 +114,19 @@ filter_dds <- function(dds, filter_list, min_expression, addition_col,
countData = cts_2, colData = coldata,
design = as.formula(my_formula)
)
if (norm_kind == "CPM") {
csum <- colSums(cts_2)
fac <- colSums(cts_2) / 1e6
sizeFactors(dds_input) <- fac
if (norm_kind != "") {
df <- read.table(norm_kind, h=T, sep="\t")
vec <- df$size_factor
names(vec) <- df$sample
if(!all(colnames(cts_2) %in% names(vec))) {
bad <- colnames(cts_2)[!(colnames(cts_2) %in% names(vec))]
bad <- paste(bad, collapse = " ")
stop(paste("Some samples (", bad ,") are not defined in", norm,
"but are",
"present in design file !"))
}
vec <- vec[colnames(cts_2)]
sizeFactors(dds_input) <- vec
}
dds <- DESeq2::DESeq(dds_input)
return(dds)
......@@ -166,18 +176,19 @@ get_matrices <- function(dds, output_folder, gene_names) {
#' @param output_folder Folder were the matrices will be produced
#' @param gene_names A dataframe with the columns gene_id and gene_name,
#' (default NULL).
#' @param norm_kind Add a parameter used to change deseq2 normalisationto cpm
#' @param norm A file containing the size factors to apply to each sample
#' or a NULL value
#'
#' @return The dds object corresponding to a DESeqDatase object obtained after
#' running the DESeq function.
#' @export
build_count_matrices <- function(design_table, path, my_formula, filter_list,
min_expression, output_folder, gene_names,
norm_kind) {
norm) {
res <- load_matrix(design_table, path, my_formula)
dds <- filter_dds(
res$dds, filter_list, min_expression, res$acol,
my_formula, norm_kind
my_formula, norm
)
get_matrices(dds, output_folder, gene_names)
return(dds)
......
......@@ -23,7 +23,8 @@
#' (default 0)
#' @param gene_names A dataframe with the columns gene_id and gene_name,
#' (default NULL).
#' @param norm Add a parameter used to change deseq2 normalisationto cpm
#' @param norm A file containing the size factors to apply to each sample
#' or a "" value
#'
#' @return A list containing for each comparison given in my_contrasts
#' (keys of the list) a sublist containing:
......@@ -41,7 +42,7 @@ run_deseq2 <- function(design_table, path, my_contrasts,
my_formula = "~ condition", filter_list = NULL,
min_expression = 2, output_folder = ".",
basemean_threshold = 0, lfc_threshold = 0,
gene_names = NULL, norm = "default") {
gene_names = NULL, norm = "") {
if (!dir.exists(output_folder)) {
stop(paste0("The output folder ", output_folder, " does not exit"))
}
......@@ -102,7 +103,6 @@ cli_run_deseq2 <- function() {
} else {
filter_list <- read.table(cli$filter, h = F)$V1
}
tmp <- run_deseq2(
design_table, cli$path, my_contrasts, cli$formula, filter_list,
cli$gene_expression_threshold, cli$output,
......
......@@ -68,9 +68,13 @@ cli_function <- function() {
)
p <- add_argument(p, "--norm",
short = "-n", type = "character",
help = paste0("The kind of norm to use, CPM or default",
"to use the default normalisation"),
default = "default")
help = paste0("A two column dataframe containing the",
"columns sample and size_factor. Sample",
" column must correspond to the samples",
" name given in design file. Size_factor",
"correspond to the scaling value to apply",
"to count data in deseq2"),
default = "")
......
......@@ -127,6 +127,13 @@ optional arguments:
second colomn contains corresponding
gene names and the header is
'gene_name [default: ]
-n, --norm A two column dataframe containing
thecolumns sample and size_factor.
Sample column must correspond to the
samples name given in design file.
Size_factorcorrespond to the scaling
value to applyto count data in
deseq2 [default: ]
```
The design parameter must point to a file with the following structure:
......@@ -163,6 +170,20 @@ The gene_names parameter must have the following structure:
1. The `gene`columns must contain the ENSEMBL gene_IDs.
2. The `gene_name` column must contain the name of the gene corresponding to ENSEMBL gene_ID.
Finally the `norm` parameter is an **optional** parameter that can be provided to use another normalization than the DESeq2 method. In most cases you won't need it. This parameter takes a 2 columns file with the following format:
```
sample size_factor
276_DMSO_DMSO_0 1
277_DMSO_DMSO_0 2
```
Note that the header must be provided.
- The `sample` column must corresponds to samples also defined in the *sample* column of the design file.
- The `size_factor` column correspond to a factor used to normalize sample in DESeq2. They will be used in the sizeFactors function of DESeq2.
## With the function run_deseq2
You can directly use the function run_deseq2 with R:
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment