Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
Y
yellow_fever
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
GitLab community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
Laurent Modolo
yellow_fever
Commits
05bcdf3d
Unverified
Commit
05bcdf3d
authored
Jun 10, 2020
by
Laurent Modolo
Browse files
Options
Downloads
Patches
Plain Diff
00_function.R: add somme function documentation
parent
a5846974
No related branches found
No related tags found
No related merge requests found
Changes
1
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
src/00_functions.R
+125
-7
125 additions, 7 deletions
src/00_functions.R
with
125 additions
and
7 deletions
src/00_functions.R
+
125
−
7
View file @
05bcdf3d
...
@@ -13,6 +13,9 @@
...
@@ -13,6 +13,9 @@
# "parallel",
# "parallel",
# "pbmcapply",
# "pbmcapply",
# "plsgenomics",
# "plsgenomics",
# "SparseM",
# "DropletUtils",
# "Rtsne",
# "scater"))
# "scater"))
library
(
tidyverse
)
library
(
tidyverse
)
library
(
SingleCellExperiment
)
library
(
SingleCellExperiment
)
...
@@ -26,11 +29,30 @@ library(parallel)
...
@@ -26,11 +29,30 @@ library(parallel)
library
(
pbmcapply
)
library
(
pbmcapply
)
library
(
plsgenomics
)
library
(
plsgenomics
)
library
(
scater
)
library
(
scater
)
library
(
plyr
)
#' compute anscombe transform
#' compute the anscombe transform of x (sqrt(x + 3/8))
#' @param x a vector of numeric
#' @return x a vector of numeric
#' @examples
#' \dontrun {
#' anscombe(1:10)
#' }
#' @export anscombe
anscombe
<-
function
(
x
){
anscombe
<-
function
(
x
){
2.0
*
sqrt
(
x
+
3.0
/
8.0
)
2.0
*
sqrt
(
x
+
3.0
/
8.0
)
}
}
#' compute anscombe transform shifted to zero
#' compute the anscombe transform of x (anscombe(x) + anscombe(0)
#' @param x a vector of numeric
#' @return x a vector of numeric
#' @examples
#' \dontrun {
#' anscombe(1:10)
#' }
#' @export ascb
ascb
<-
function
(
x
){
ascb
<-
function
(
x
){
anscombe
(
x
)
-
anscombe
(
0.0
)
anscombe
(
x
)
-
anscombe
(
0.0
)
}
}
...
@@ -61,6 +83,15 @@ cell_type_palette <- function(cell_type){
...
@@ -61,6 +83,15 @@ cell_type_palette <- function(cell_type){
return
(
cell_types_color
)
return
(
cell_types_color
)
}
}
#' compute anscombe transform shifted to zero
#' compute the anscombe transform of x (anscombe(x) + anscombe(0)
#' @param x a vector of numeric
#' @return x a vector of numeric
#' @examples
#' \dontrun {
#' anscombe(1:10)
#' }
#' @export ascb
QC_sample
<-
function
(
is_good
)
{
QC_sample
<-
function
(
is_good
)
{
c
(
c
(
base
::
sample
(
base
::
sample
(
...
@@ -71,6 +102,16 @@ QC_sample <- function(is_good) {
...
@@ -71,6 +102,16 @@ QC_sample <- function(is_good) {
which
(
!
is_good
)
which
(
!
is_good
)
)
)
}
}
#' compute anscombe transform shifted to zero
#' compute the anscombe transform of x (anscombe(x) + anscombe(0)
#' @param x a vector of numeric
#' @return x a vector of numeric
#' @examples
#' \dontrun {
#' anscombe(1:10)
#' }
#' @export ascb
QC_fit
<-
function
(
sce
,
assay_name
=
"logcounts"
,
ncell_name
=
"cell_number"
)
{
QC_fit
<-
function
(
sce
,
assay_name
=
"logcounts"
,
ncell_name
=
"cell_number"
)
{
is_good
<-
colData
(
sce
)[[
ncell_name
]]
>
0
is_good
<-
colData
(
sce
)[[
ncell_name
]]
>
0
select_sample
<-
QC_sample
(
is_good
)
select_sample
<-
QC_sample
(
is_good
)
...
@@ -84,6 +125,15 @@ QC_fit <- function(sce, assay_name = "logcounts", ncell_name = "cell_number") {
...
@@ -84,6 +125,15 @@ QC_fit <- function(sce, assay_name = "logcounts", ncell_name = "cell_number") {
scale
=
TRUE
scale
=
TRUE
))
))
}
}
#' compute anscombe transform shifted to zero
#' compute the anscombe transform of x (anscombe(x) + anscombe(0)
#' @param x a vector of numeric
#' @return x a vector of numeric
#' @examples
#' \dontrun {
#' anscombe(1:10)
#' }
#' @export ascb
QC_predict
<-
function
(
fit
,
sce
,
assay_name
=
"logcounts"
)
{
QC_predict
<-
function
(
fit
,
sce
,
assay_name
=
"logcounts"
)
{
stats
::
predict
(
stats
::
predict
(
fit
,
fit
,
...
@@ -243,6 +293,20 @@ get_genes_pval <- function(results, sce) {
...
@@ -243,6 +293,20 @@ get_genes_pval <- function(results, sce) {
pull
(
pval
)
pull
(
pval
)
}
}
#' scaling for for scRNASeq data
#' Helper function that scale the genes expression matrix for the PLS function
#' scale a scRNA genes counts vector by dividing by the estimate of a glmmTMB
#' nbinom2 variance estimate and if a zero inflation is detected by multipling
#' by the proportion of counts outside the dirac in zero
#' @param data a data.frame with a *count* column
#' @return a data.frame with a *count* column
#' @examples
#' \dontrun {
#'scaled_counts <- scale_zi_nb(
#' data
#')
#' }
#' @import tidyverse boot
scale_zi_nb
<-
function
(
data
)
{
scale_zi_nb
<-
function
(
data
)
{
model
<-
fit_zi_nb
(
model
<-
fit_zi_nb
(
data
=
data
,
data
=
data
,
...
@@ -262,6 +326,24 @@ scale_zi_nb <- function(data) {
...
@@ -262,6 +326,24 @@ scale_zi_nb <- function(data) {
return
(
data
)
return
(
data
)
}
}
#' scaling for for scRNASeq data
#' Helper function that scale the genes expression matrix for the PLS function
#' apply the function scale_zi_nb on a list of genes
#' @param sce SingleCellExperiment object
#' @param genes vector of gene name to scale (rownames of sce)
#' @param assay_name (default: "counts") name of the assay of genes counts to use
#' @param cpus (default: 4) number of cpus to use
#' @return a matrix with the result of the scaling
#' @examples
#' \dontrun {
#'scaled_counts <- scale_zi_nb_sce(
#' sce = sce,
#' genes = rownames(sce),
#' assay_name = "counts",
#' cpus = 10
#')
#' }
#' @import plsgenomics pbmcappl SingleCellExperiment Matrix tidyverse
scale_zi_nb_sce
<-
function
(
sce
,
scale_zi_nb_sce
<-
function
(
sce
,
genes
=
rownames
(
sce
),
genes
=
rownames
(
sce
),
assay_name
=
"counts"
,
assay_name
=
"counts"
,
...
@@ -282,6 +364,10 @@ scale_zi_nb_sce <- function(sce,
...
@@ -282,6 +364,10 @@ scale_zi_nb_sce <- function(sce,
# classification function
# classification function
#' classification for scRNASeq data
#' Helper function that scale the genes expression matrix for the PLS function
#' @examples
#' @import plsgenomics pbmcappl SingleCellExperiment Matrix tidyverse
PLS_scaling
<-
function
(
sce
,
genes
,
features
=
NULL
,
assay_name
=
"counts"
,
cpus
=
4
)
{
PLS_scaling
<-
function
(
sce
,
genes
,
features
=
NULL
,
assay_name
=
"counts"
,
cpus
=
4
)
{
if
(
length
(
colnames
(
colData
(
sce
)[
features
]))
==
0
)
{
if
(
length
(
colnames
(
colData
(
sce
)[
features
]))
==
0
)
{
scale_zi_nb_sce
(
scale_zi_nb_sce
(
...
@@ -311,6 +397,10 @@ PLS_scaling <- function(sce, genes, features = NULL, assay_name = "counts", cpus
...
@@ -311,6 +397,10 @@ PLS_scaling <- function(sce, genes, features = NULL, assay_name = "counts", cpus
SingleCellExperiment
::
SingleCellExperiment
(
assays
=
list
(
counts
=
.
))
SingleCellExperiment
::
SingleCellExperiment
(
assays
=
list
(
counts
=
.
))
}
}
#' classification for scRNASeq data
#' Helper function that compute the X and Xtest matrix for the PLS function
#' @examples
#' @import plsgenomics pbmcappl SingleCellExperiment Matrix tidyverse
PLS_filter
<-
function
(
sce
,
group_by
,
genes
,
features
=
NULL
,
PLS_filter
<-
function
(
sce
,
group_by
,
genes
,
features
=
NULL
,
assay_name
=
"counts"
,
assay_name
=
"counts"
,
altExp_name
=
"PLS_scaled"
,
altExp_name
=
"PLS_scaled"
,
...
@@ -320,7 +410,6 @@ PLS_filter <- function(sce, group_by, genes , features = NULL,
...
@@ -320,7 +410,6 @@ PLS_filter <- function(sce, group_by, genes , features = NULL,
colData
(
altExp
(
sce
,
altExp_name
))
$
group_predict
<-
NA
colData
(
altExp
(
sce
,
altExp_name
))
$
group_predict
<-
NA
colData
(
altExp
(
sce
,
altExp_name
))
$
group_predict
<-
colData
(
altExp
(
sce
,
altExp_name
))
$
group_predict
<-
!
apply
(
assay
(
altExp
(
sce
,
altExp_name
),
"counts"
),
2
,
anyNA
)
!
apply
(
assay
(
altExp
(
sce
,
altExp_name
),
"counts"
),
2
,
anyNA
)
assay
(
altExp
(
sce
,
altExp_name
),
"counts"
)
%>%
t
()
%>%
summary
()
%>%
print
()
return
(
sce
)
return
(
sce
)
}
}
altExp
(
sce
,
altExp_name
)
<-
PLS_scaling
(
altExp
(
sce
,
altExp_name
)
<-
PLS_scaling
(
...
@@ -339,6 +428,12 @@ PLS_filter <- function(sce, group_by, genes , features = NULL,
...
@@ -339,6 +428,12 @@ PLS_filter <- function(sce, group_by, genes , features = NULL,
return
(
sce
)
return
(
sce
)
}
}
#' classification for scRNASeq data
#' copy of the plsgenomics:logit.spls.stab function using pbmcapply to display
#' a progress bar
#' @examples
#' @import plsgenomics pbmcappl
#' @export plsgenomics_logit_spls_stab
plsgenomics_logit_spls_stab
<-
function
(
X
,
Y
,
lambda.ridge.range
,
lambda.l1.range
,
ncomp.range
,
plsgenomics_logit_spls_stab
<-
function
(
X
,
Y
,
lambda.ridge.range
,
lambda.l1.range
,
ncomp.range
,
adapt
=
TRUE
,
maxIter
=
100
,
svd.decompose
=
TRUE
,
ncores
=
1
,
adapt
=
TRUE
,
maxIter
=
100
,
svd.decompose
=
TRUE
,
ncores
=
1
,
nresamp
=
100
,
center.X
=
TRUE
,
scale.X
=
FALSE
,
weighted.center
=
TRUE
,
nresamp
=
100
,
center.X
=
TRUE
,
scale.X
=
FALSE
,
weighted.center
=
TRUE
,
...
@@ -495,17 +590,42 @@ plsgenomics_logit_spls_stab <- function (X, Y, lambda.ridge.range, lambda.l1.ran
...
@@ -495,17 +590,42 @@ plsgenomics_logit_spls_stab <- function (X, Y, lambda.ridge.range, lambda.l1.ran
tmp_qLambda
<-
data.frame
(
tmp_qLambda
)
tmp_qLambda
<-
data.frame
(
tmp_qLambda
)
colnames
(
tmp_qLambda
)
<-
c
(
"lambdaL1"
,
"lambdaL2"
,
"ncomp"
,
colnames
(
tmp_qLambda
)
<-
c
(
"lambdaL1"
,
"lambdaL2"
,
"ncomp"
,
"id.samp"
,
"qLambda"
)
"id.samp"
,
"qLambda"
)
qLambda
<-
ddply
(
tmp_qLambda
,
c
(
"lambdaL1"
,
"lambdaL2"
,
"ncomp"
),
qLambda
<-
plyr
::
ddply
(
tmp_qLambda
,
c
(
"lambdaL1"
,
"lambdaL2"
,
"ncomp"
),
function
(
x
)
colMeans
(
x
[
c
(
"qLambda"
)],
na.rm
=
TRUE
))
function
(
x
)
colMeans
(
x
[
c
(
"qLambda"
)],
na.rm
=
TRUE
))
o.qLambda
<-
order
(
qLambda
$
qLambda
)
o.qLambda
<-
order
(
qLambda
$
qLambda
)
qLambda
<-
qLambda
[
o.qLambda
,
]
qLambda
<-
qLambda
[
o.qLambda
,
]
probs_lambda
<-
ddply
(
grid.resampling
,
c
(
"lambdaL1"
,
"lambdaL2"
,
probs_lambda
<-
plyr
::
ddply
(
grid.resampling
,
c
(
"lambdaL1"
,
"lambdaL2"
,
"ncomp"
),
function
(
x
)
colMeans
(
x
[
cnames
],
na.rm
=
TRUE
))
"ncomp"
),
function
(
x
)
colMeans
(
x
[
cnames
],
na.rm
=
TRUE
))
probs_lambda
<-
probs_lambda
[
o.qLambda
,
]
probs_lambda
<-
probs_lambda
[
o.qLambda
,
]
return
(
list
(
q.Lambda
=
qLambda
,
probs.lambda
=
probs_lambda
,
return
(
list
(
q.Lambda
=
qLambda
,
probs.lambda
=
probs_lambda
,
p
=
p
))
p
=
p
))
}
}
#' classification for scRNASeq data
#'
#' @param sce SingleCellExperiment object
#' @param group_by factor definifing the training group, with NA for the cells to classify
#' @param genes genes names to use
#' @param features colData() of sce to use
#' @param assay_name (default: "counts") name of the assay of genes counts to use
#' @param altExp_name (default: "PLS_scaled") name of the altExp to store the scaled data for the PLS
#' @param force vector of genes names to force the usage of for the classification
#' @param cpus (default: 4) number of cpus to use
#' @return a list() with the result of the classification
#' @examples
#' \dontrun {
#'model <- PLS_fit(
#' sce = sce,
#' group_by = sce$manual_cell_type,
#' genes = genes_PLS %>% pull(genes) %>% na.omit(),
#' features = genes_PLS %>% pull(proteins) %>% na.omit() %>% unique(),
#' assay_name = "counts_vst",
#' altExp_name = "PLS_surface_cell_type",
#' cpus = 10
#')
#' }
#' @import plsgenomics tidyverse SingleCellExperiment Matrix
#' @export PLS_fit
PLS_fit
<-
function
(
sce
,
PLS_fit
<-
function
(
sce
,
group_by
,
group_by
,
genes
,
genes
,
...
@@ -563,8 +683,6 @@ PLS_fit <- function(sce,
...
@@ -563,8 +683,6 @@ PLS_fit <- function(sce,
stability.selection
(
fit_sparse
)
$
selected.predictors
,
stability.selection
(
fit_sparse
)
$
selected.predictors
,
force
force
)
)
rowData
(
altExp
(
sce
,
altExp_name
))
$
predictor
%>%
table
()
%>%
print
()
rowData
(
altExp
(
sce
,
altExp_name
))
$
predictor_force
%>%
table
()
%>%
print
()
print
(
rownames
(
altExp
(
sce
,
altExp_name
))[
print
(
rownames
(
altExp
(
sce
,
altExp_name
))[
rowData
(
altExp
(
sce
,
altExp_name
))
$
predictor
rowData
(
altExp
(
sce
,
altExp_name
))
$
predictor
])
])
...
@@ -612,8 +730,8 @@ PLS_fit <- function(sce,
...
@@ -612,8 +730,8 @@ PLS_fit <- function(sce,
#' @examples
#' @examples
#' \dontrun {
#' \dontrun {
#'model <- PLS_predict(
#'model <- PLS_predict(
#' sce = fit$sce,
#' fit = fit,
#' fit = fit,
#' sce = fit$sce,
#' group_by = sce$manual_cell_type,
#' group_by = sce$manual_cell_type,
#' genes = genes_PLS %>% pull(genes) %>% na.omit(),
#' genes = genes_PLS %>% pull(genes) %>% na.omit(),
#' features = genes_PLS %>% pull(proteins) %>% na.omit() %>% unique(),
#' features = genes_PLS %>% pull(proteins) %>% na.omit() %>% unique(),
...
@@ -622,7 +740,7 @@ PLS_fit <- function(sce,
...
@@ -622,7 +740,7 @@ PLS_fit <- function(sce,
#' cpus = 10
#' cpus = 10
#')
#')
#' }
#' }
#' @import plsgenomics
#' @import plsgenomics
tidyverse SingleCellExperiment Matrix
#' @export PLS_predict
#' @export PLS_predict
PLS_predict
<-
function
(
fit
,
sce
,
group_by
,
genes
,
features
,
PLS_predict
<-
function
(
fit
,
sce
,
group_by
,
genes
,
features
,
assay_name
=
"counts"
,
assay_name
=
"counts"
,
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
sign in
to comment