From 05bcdf3d6515c27c9ab5f71d62e203153a0f2011 Mon Sep 17 00:00:00 2001 From: Laurent Modolo <laurent@modolo.fr> Date: Wed, 10 Jun 2020 15:47:12 +0200 Subject: [PATCH] 00_function.R: add somme function documentation --- src/00_functions.R | 132 ++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 125 insertions(+), 7 deletions(-) diff --git a/src/00_functions.R b/src/00_functions.R index 6b14f1d..f6c121a 100644 --- a/src/00_functions.R +++ b/src/00_functions.R @@ -13,6 +13,9 @@ # "parallel", # "pbmcapply", # "plsgenomics", +# "SparseM", +# "DropletUtils", +# "Rtsne", # "scater")) library(tidyverse) library(SingleCellExperiment) @@ -26,11 +29,30 @@ library(parallel) library(pbmcapply) library(plsgenomics) library(scater) +library(plyr) +#' compute anscombe transform +#' compute the anscombe transform of x (sqrt(x + 3/8)) +#' @param x a vector of numeric +#' @return x a vector of numeric +#' @examples +#' \dontrun { +#' anscombe(1:10) +#' } +#' @export anscombe anscombe <- function(x){ 2.0 * sqrt(x + 3.0 / 8.0) } +#' compute anscombe transform shifted to zero +#' compute the anscombe transform of x (anscombe(x) + anscombe(0) +#' @param x a vector of numeric +#' @return x a vector of numeric +#' @examples +#' \dontrun { +#' anscombe(1:10) +#' } +#' @export ascb ascb <- function(x){ anscombe(x) - anscombe(0.0) } @@ -61,6 +83,15 @@ cell_type_palette <- function(cell_type){ return(cell_types_color) } +#' compute anscombe transform shifted to zero +#' compute the anscombe transform of x (anscombe(x) + anscombe(0) +#' @param x a vector of numeric +#' @return x a vector of numeric +#' @examples +#' \dontrun { +#' anscombe(1:10) +#' } +#' @export ascb QC_sample <- function(is_good) { c( base::sample( @@ -71,6 +102,16 @@ QC_sample <- function(is_good) { which(!is_good) ) } + +#' compute anscombe transform shifted to zero +#' compute the anscombe transform of x (anscombe(x) + anscombe(0) +#' @param x a vector of numeric +#' @return x a vector of numeric +#' @examples +#' \dontrun { +#' anscombe(1:10) +#' } +#' @export ascb QC_fit <- function(sce, assay_name = "logcounts", ncell_name = "cell_number") { is_good <- colData(sce)[[ncell_name]] > 0 select_sample <- QC_sample(is_good) @@ -84,6 +125,15 @@ QC_fit <- function(sce, assay_name = "logcounts", ncell_name = "cell_number") { scale = TRUE )) } +#' compute anscombe transform shifted to zero +#' compute the anscombe transform of x (anscombe(x) + anscombe(0) +#' @param x a vector of numeric +#' @return x a vector of numeric +#' @examples +#' \dontrun { +#' anscombe(1:10) +#' } +#' @export ascb QC_predict <- function(fit, sce, assay_name = "logcounts") { stats::predict( fit, @@ -243,6 +293,20 @@ get_genes_pval <- function(results, sce) { pull(pval) } +#' scaling for for scRNASeq data +#' Helper function that scale the genes expression matrix for the PLS function +#' scale a scRNA genes counts vector by dividing by the estimate of a glmmTMB +#' nbinom2 variance estimate and if a zero inflation is detected by multipling +#' by the proportion of counts outside the dirac in zero +#' @param data a data.frame with a *count* column +#' @return a data.frame with a *count* column +#' @examples +#' \dontrun { +#'scaled_counts <- scale_zi_nb( +#' data +#') +#' } +#' @import tidyverse boot scale_zi_nb <- function(data) { model <- fit_zi_nb( data = data, @@ -262,6 +326,24 @@ scale_zi_nb <- function(data) { return(data) } +#' scaling for for scRNASeq data +#' Helper function that scale the genes expression matrix for the PLS function +#' apply the function scale_zi_nb on a list of genes +#' @param sce SingleCellExperiment object +#' @param genes vector of gene name to scale (rownames of sce) +#' @param assay_name (default: "counts") name of the assay of genes counts to use +#' @param cpus (default: 4) number of cpus to use +#' @return a matrix with the result of the scaling +#' @examples +#' \dontrun { +#'scaled_counts <- scale_zi_nb_sce( +#' sce = sce, +#' genes = rownames(sce), +#' assay_name = "counts", +#' cpus = 10 +#') +#' } +#' @import plsgenomics pbmcappl SingleCellExperiment Matrix tidyverse scale_zi_nb_sce <- function(sce, genes = rownames(sce), assay_name = "counts", @@ -282,6 +364,10 @@ scale_zi_nb_sce <- function(sce, # classification function +#' classification for scRNASeq data +#' Helper function that scale the genes expression matrix for the PLS function +#' @examples +#' @import plsgenomics pbmcappl SingleCellExperiment Matrix tidyverse PLS_scaling <- function(sce, genes, features = NULL, assay_name = "counts", cpus = 4) { if (length(colnames(colData(sce)[features])) == 0) { scale_zi_nb_sce( @@ -311,6 +397,10 @@ PLS_scaling <- function(sce, genes, features = NULL, assay_name = "counts", cpus SingleCellExperiment::SingleCellExperiment(assays = list(counts = .)) } +#' classification for scRNASeq data +#' Helper function that compute the X and Xtest matrix for the PLS function +#' @examples +#' @import plsgenomics pbmcappl SingleCellExperiment Matrix tidyverse PLS_filter <- function(sce, group_by, genes , features = NULL, assay_name = "counts", altExp_name = "PLS_scaled", @@ -320,7 +410,6 @@ PLS_filter <- function(sce, group_by, genes , features = NULL, colData(altExp(sce, altExp_name))$group_predict <- NA colData(altExp(sce, altExp_name))$group_predict <- !apply(assay(altExp(sce, altExp_name), "counts"), 2, anyNA) - assay(altExp(sce, altExp_name), "counts") %>% t() %>% summary() %>% print() return(sce) } altExp(sce, altExp_name) <- PLS_scaling( @@ -339,6 +428,12 @@ PLS_filter <- function(sce, group_by, genes , features = NULL, return(sce) } +#' classification for scRNASeq data +#' copy of the plsgenomics:logit.spls.stab function using pbmcapply to display +#' a progress bar +#' @examples +#' @import plsgenomics pbmcappl +#' @export plsgenomics_logit_spls_stab plsgenomics_logit_spls_stab <- function (X, Y, lambda.ridge.range, lambda.l1.range, ncomp.range, adapt = TRUE, maxIter = 100, svd.decompose = TRUE, ncores = 1, nresamp = 100, center.X = TRUE, scale.X = FALSE, weighted.center = TRUE, @@ -495,17 +590,42 @@ plsgenomics_logit_spls_stab <- function (X, Y, lambda.ridge.range, lambda.l1.ran tmp_qLambda <- data.frame(tmp_qLambda) colnames(tmp_qLambda) <- c("lambdaL1", "lambdaL2", "ncomp", "id.samp", "qLambda") - qLambda <- ddply(tmp_qLambda, c("lambdaL1", "lambdaL2", "ncomp"), + qLambda <- plyr::ddply(tmp_qLambda, c("lambdaL1", "lambdaL2", "ncomp"), function(x) colMeans(x[c("qLambda")], na.rm = TRUE)) o.qLambda <- order(qLambda$qLambda) qLambda <- qLambda[o.qLambda, ] - probs_lambda <- ddply(grid.resampling, c("lambdaL1", "lambdaL2", + probs_lambda <- plyr::ddply(grid.resampling, c("lambdaL1", "lambdaL2", "ncomp"), function(x) colMeans(x[cnames], na.rm = TRUE)) probs_lambda <- probs_lambda[o.qLambda, ] return(list(q.Lambda = qLambda, probs.lambda = probs_lambda, p = p)) } +#' classification for scRNASeq data +#' +#' @param sce SingleCellExperiment object +#' @param group_by factor definifing the training group, with NA for the cells to classify +#' @param genes genes names to use +#' @param features colData() of sce to use +#' @param assay_name (default: "counts") name of the assay of genes counts to use +#' @param altExp_name (default: "PLS_scaled") name of the altExp to store the scaled data for the PLS +#' @param force vector of genes names to force the usage of for the classification +#' @param cpus (default: 4) number of cpus to use +#' @return a list() with the result of the classification +#' @examples +#' \dontrun { +#'model <- PLS_fit( +#' sce = sce, +#' group_by = sce$manual_cell_type, +#' genes = genes_PLS %>% pull(genes) %>% na.omit(), +#' features = genes_PLS %>% pull(proteins) %>% na.omit() %>% unique(), +#' assay_name = "counts_vst", +#' altExp_name = "PLS_surface_cell_type", +#' cpus = 10 +#') +#' } +#' @import plsgenomics tidyverse SingleCellExperiment Matrix +#' @export PLS_fit PLS_fit <- function(sce, group_by, genes, @@ -563,8 +683,6 @@ PLS_fit <- function(sce, stability.selection(fit_sparse)$selected.predictors, force ) - rowData(altExp(sce, altExp_name))$predictor %>% table() %>% print() - rowData(altExp(sce, altExp_name))$predictor_force %>% table() %>% print() print(rownames(altExp(sce, altExp_name))[ rowData(altExp(sce, altExp_name))$predictor ]) @@ -612,8 +730,8 @@ PLS_fit <- function(sce, #' @examples #' \dontrun { #'model <- PLS_predict( -#' sce = fit$sce, #' fit = fit, +#' sce = fit$sce, #' group_by = sce$manual_cell_type, #' genes = genes_PLS %>% pull(genes) %>% na.omit(), #' features = genes_PLS %>% pull(proteins) %>% na.omit() %>% unique(), @@ -622,7 +740,7 @@ PLS_fit <- function(sce, #' cpus = 10 #') #' } -#' @import plsgenomics +#' @import plsgenomics tidyverse SingleCellExperiment Matrix #' @export PLS_predict PLS_predict <- function(fit, sce, group_by, genes, features, assay_name = "counts", -- GitLab