diff --git a/src/00_DEA.R b/src/00_DEA.R new file mode 100755 index 0000000000000000000000000000000000000000..4b41041dbca04b7e47e0fd3a4d2a512d1984f96a --- /dev/null +++ b/src/00_DEA.R @@ -0,0 +1,88 @@ +#!Rscript +source("src/00_functions.R") +# /src/00_DEA.R --counts_matrix results/test_counts_matrix.csv --cells_matrix results/test_cells_matrix.csv --output results/test_DEA.csv --formula "p_PLS_DEA_cell_type + clonality" --test "p_PLS_DEA_cell_type" --cpus 6 +option_list = list( + make_option( + c("-x", "--counts_matrix"), + type = "character", + default = NULL, + help = "csv files of the counts (genes are row and cells as columns)", + metavar = "character" + ), + make_option( + c("-c", "--cells_matrix"), + type = "character", + default = NULL, + help = "csv files of the cells (cells are row and cell infos as columns)", + metavar = "character" + ), + make_option( + c("-f", "--formula"), + type = "character", + default = NULL, + help = "formula to model the --cells_matrix columns (e.g: \"DEA_cell_type + clonality\")", + metavar = "character" + ), + make_option( + c("-t", "--test"), + type = "character", + default = NULL, + help = "name of the variable to test for in the formula (e.g: \"DEA_cell_type\")", + metavar = "character" + ), + make_option( + c("-o", "--output"), + type = "character", + default = NULL, + help = "name of csv file to write for the results", + metavar = "character" + ), + make_option( + c("-m", "--cpus"), + type = "character", + default = NULL, + help = "number of cpus to use for the computation", + metavar = "character" + ) +) +opt_parser = OptionParser(option_list = option_list) +opt = parse_args(opt_parser) + +print(opt) + +sce <- SingleCellExperiment::SingleCellExperiment( + assays = list( + counts = data.table::fread(opt$counts_matrix) %>% + as.matrix() %>% + Matrix::Matrix(sparse = T) + ), + colData = read.csv(opt$cells_matrix), + rowData = data.frame( + id = data.table::fread(opt$counts_matrix) %>% rownames() + ) +) +colnames(sce) <- colData(sce) %>% rownames() +rownames(sce) <- rowData(sce)$id + + +DEA_results <- DEA( + sce = sce, + test = opt$test, + formula = str_c("count ~ 1 + ", opt$formula), + assay_name = "counts", + cpus = as.numeric(opt$cpus) +) + + +rowData(sce)$pval_DEA <- NA +rowData(sce)$pval_DEA <- + get_genes_pval(DEA_results, sce) + +rowData(sce)$pval_DEA_adj <- p.adjust( + rowData(sce)$pval_DEA, + method = "BH" +) + +rowData(sce) %>% + as.data.frame() %>% + write_csv(, path = opt$output) \ No newline at end of file diff --git a/src/00_functions.R b/src/00_functions.R index 3611f122a6e3486dd45eccd523c8cf071a750b38..41ad581e4725350353fc8b4c12016a37988feb73 100644 --- a/src/00_functions.R +++ b/src/00_functions.R @@ -1,36 +1,46 @@ -# if (!requireNamespace("BiocManager", quietly = TRUE)) -# install.packages("BiocManager") -# BiocManager::install(version = "3.11") -# BiocManager::install(c( -# "tidyverse", -# "SingleCellExperiment", -# "SummarizedExperiment", -# "sctransform", -# "broom", -# "broom.mixed", -# "DHARMa", -# "glmmTMB", -# "parallel", -# "pbmcapply", -# "plsgenomics", -# "SparseM", -# "DropletUtils", -# "Rtsne", -# "scater")) -library(tidyverse) -library(SingleCellExperiment) -library(SummarizedExperiment) -library(sctransform) -library(broom) -library(broom.mixed) -library(DHARMa) -library(glmmTMB) -library(lmtest) -library(parallel) -library(pbmcapply) -library(plsgenomics) -library(scater) -library(plyr) +needed_packages <- c( + "optparse", + "tidyverse", + "SingleCellExperiment", + "SummarizedExperiment", + "sctransform", + "broom", + "broom.mixed", + "DHARMa", + "glmmTMB", + "lmtest", + "parallel", + "pbmcapply", + "plsgenomics", + "scater", + "plyr" +) + +install_if_needed <- function(packages){ + if (!requireNamespace("BiocManager", quietly = TRUE)) + install.packages("BiocManager") + if (!requireNamespace("devtools", quietly = TRUE)) + install.packages("devtools") + sapply( + packages, + function(x) { + if (!requireNamespace(x, quietly = TRUE)) + tryCatch({ + BiocManager::install(x, update = F, ask = F) + }, error = function(e) { + devtools::install_github(x, upgrade = "never") + }) + if (!stringr::str_detect(x, "/")){ + library(x, character.only = TRUE) + } else { + library(stringr::str_replace(x, ".*/(.*)", "\\1"), + character.only = TRUE) + } + } + ) + return(str_c(str_c(packages, collapse = ", "), " installed and loaded.")) +} +install_if_needed(needed_packages) #' compute anscombe transform #' compute the anscombe transform of x (sqrt(x + 3/8)) @@ -286,7 +296,8 @@ DEA <- function(sce, zi_formula = zi_formula, zi_threshold = zi_threshold, mc.preschedule = F, - mc.cores = cpus + mc.cores = cpus, + ignore.interactive = T ) names(results) <- rownames(sce) return(results) diff --git a/src/05_large_clones.R b/src/05_large_clones.R index 06b50c3940f3e99d5d65b5102d3525cec9e4b56d..5964740d562a40fc4e5e65d213852217ee5e0804 100644 --- a/src/05_large_clones.R +++ b/src/05_large_clones.R @@ -1,6 +1,19 @@ source("src/00_functions.R") load(file = "results/sce_DEA_DEA_cell_type.Rdata") +sce_test <- sce[, + sce$id %in% big_M_clone & sce$male_invivo + ] +assays(sce_test)$counts_vst %>% + as.matrix() %>% + as.data.frame() %>% + head(, 10) %>% + write_csv("results/test_counts_matrix.csv") + +colData(sce_test) %>% + as.data.frame() %>% + write_csv("results/test_cells_matrix.csv") + large_clone <- readxl::read_xlsx("data/2020_05_12_In_Vivo_Clones_DEA_Test_LM_May2020.xlsx") %>% rename(c(