Skip to content
Snippets Groups Projects
Commit 8949693c authored by Arnaud Duvermy's avatar Arnaud Duvermy
Browse files

add entire workflow

parent 199dc333
Branches
Tags
No related merge requests found
......@@ -10,7 +10,14 @@ RoxygenNote: 7.2.2
Depends:
tidyverse
Imports:
base,
DESeq2,
dplyr,
MASS,
purrr,
Rfast,
S4Vectors,
stats,
stringr,
tidyr
tidyr,
utils
......@@ -9,8 +9,10 @@
#'
#' @examples
getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, mvrnorm.fit){
message( "Get actuals beta for each genes ...")
##### Sampling from mvnorm ########
beta.matrix <- MASS::mvrnorm(n = n_genes*(n_genotypes),
n_samplings = n_genes*n_genotypes
beta.matrix <- MASS::mvrnorm(n = n_samplings,
mu = mvrnorm.fit$mu,
Sigma = mvrnorm.fit$sigma )
......@@ -100,9 +102,11 @@ getMu_ij <- function( log_qij.dtf, size_factor ){
#'
#' @examples
getGenesDispersions <- function( n_genes, sample_id_list,
dispersion.vec , dispUniform_btweenCondition = T ) {
dispersion.vec , dispUniform_btweenCondition = T ) {
if (dispUniform_btweenCondition == T ) {
message( "Get dispersion for each genes ...\n")
gene_dispersion.dtf = base::sample( dispersion.vec ,
replace = T,
size = n_genes) %>% base::data.frame()
......@@ -113,6 +117,8 @@ getGenesDispersions <- function( n_genes, sample_id_list,
}
else {
message( "Get dispersion for each genes within each conditions ...\n")
replication_table = sample_ids %>%
stringr::str_replace(., pattern = "_[0-9]+","" ) %>% table()
gene_dispersion.dtf = replication_table %>%
......@@ -140,6 +146,7 @@ getGenesDispersions <- function( n_genes, sample_id_list,
#' @examples
get_kij <- function(mu_ij.matx, dispersion.matx, n_genes,
sample_id_list, idx_replicat) {
n_sples = length(sample_id_list)
alpha_gene = 1/dispersion.matx
k_ij = stats::rnbinom( length(mu_ij.matx),
......@@ -174,6 +181,8 @@ getCountTable <- function( mu_ij.matx, dispersion.matx,
replication.matx ) {
### Iterate on each replicates
message( "Get k_ij :")
message( "k_ij ~ NegativBinomial( mu_ij, dispersion_ij )\n")
kij.simu.list = purrr::map(.x = 1:max_n_replicates,
.f = ~get_kij(mu_ij.matx[ ,replication.matx[.x, ]],
dispersion.matx[ ,replication.matx[.x, ]],
......
......@@ -3,15 +3,16 @@
#' @param tabl_cnts table containing counts per genes & samples
#' @param bioDesign table describing bioDesgin of input
#' @import DESeq2
#' @import dplyr
#' @return DESEQ2 object
#' @export
#'
#' @examples
run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + genotype:environment){
dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model )
dds <- DESeq2::DESeq(dds)
message( "Run DESEQ2 ...\n")
tabl_cnts = tabl_cnts %>% dplyr::mutate(across(where(is.double), as.integer))
dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model )
dds <- DESeq2::DESeq(dds, quiet = TRUE)
return(dds)
}
......@@ -27,7 +28,7 @@ run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + g
#' @export
#'
#' @examples
extractionDDS <- function(dds_obj){
ddsExtraction <- function(dds_obj){
## Beta
dds.mcols = S4Vectors::mcols(dds_obj, use.names=TRUE)
beta0 <- dds.mcols$Intercept
......@@ -49,3 +50,37 @@ extractionDDS <- function(dds_obj){
gene_disp.mean = mean(gene_disp, na.rm = T) , gene_disp.sd = sd(gene_disp, na.rm = T)))
}
#' Extract beta distribution from DESEQ2 object
#'
#' @import dplyr
#' @import utils
#' @return output of dds extraction
#' @export
#'
#' @examples
publicDDS_extraction <- function(){
## Import & reshape public counts table
fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRsim")
tabl_cnts <- utils::read.table(file = fn, header = TRUE)
rownames(tabl_cnts) <- tabl_cnts$gene_id
tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
tabl_cnts = tabl_cnts[ order(tabl_cnts %>% rownames()), ]
## DESIGN
fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRsim")
bioDesign <- utils::read.table(file = fn, header = T, sep = ';')
## defining reference
bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") )
tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5"))
## Launch DESEQ2
dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
## Extract
dds.extraction = ddsExtraction(dds_obj = dds)
return(dds.extraction)
}
\ No newline at end of file
#' Fit mvnorm
#' @import Rfast
#' @return output of Rfast::mvnorm.mle
#' @export
#'
#' @examples
mvnormFitting <- function(beta.dtf) {
beta.matx = beta.dtf %>% as.matrix()
fit.mvnorm <- Rfast::mvnorm.mle(beta.matx)
return(fit.mvnorm)
}
#' get fit mvnorm from public dataset
#'
#' @return output of Rfast::mvnorm.mle
#' @export
#'
#' @examples
getPublicMvnormFit <- function(){
##### Range of observed value #########
dds.extraction = publicDDS_extraction()
beta_observed.dtf = dds.extraction$beta
fit.mvnorm = mvnormFitting(beta_observed.dtf)
return( fit.mvnorm )
}
\ No newline at end of file
#' perform all workflow
#'
#' @param n_genes number of genes
#' @param n_genotypes number of genotypes
#' @param n_environments number of env
#' @param fit.mvnorm output of Rfast::mvnorm.mle
#' @param max_n_replicates max number of replicates
#' @param uniformNumberOfReplicates boolean
#' @param uniformDispersion boolean
#' @return counts table
#' @export
#'
#' @examples
counTableBuilder <- function(fit.mvnorm = getPublicMvnormFit(),
n_genes,
n_genotypes,
n_environments = 2,
max_n_replicates,
uniformNumberOfReplicates = T,
uniformDispersion = T ) {
##### Ground truth ######
beta.actual = getBetaforSimulation( n_genes,
n_genotypes,
fit.mvnorm )
##### build input for simulation ####
model.matx = getModelMatrix()
log_qij = getLog_qij( beta.actual, model.matx )
mu_ij = getMu_ij(log_qij.dtf = log_qij, 1)
sample_ids = colnames(mu_ij)
gene_dispersion.vec = dds.extraction$gene_dispersion
dispersion.matrix = getGenesDispersions(n_genes,
sample_ids,
dispersion.vec = gene_dispersion.vec,
uniformDispersion )
##### Design replicates ######
designReplication.matx = getReplicationDesign( max_n_replicates,
n_genotypes,
n_environments,
uniformNumberOfReplicates)
##### build counts table ####
countTable = getCountTable( mu_ij, dispersion.matrix,
n_genes, n_genotypes ,
sample_id_list = sample_ids,
replication.matx = designReplication.matx)
return( countTable )
}
......@@ -4,3 +4,12 @@ usethis::use_package('stats')
usethis::use_package('tidyr')
usethis::use_package("stringr")
usethis::use_package("S4Vectors")
usethis::use_package("DESeq2")
usethis::use_package("MASS")
usethis::use_package("purrr")
usethis::use_package("base")
usethis::use_package("dplyr")
usethis::use_package("utils")
usethis::use_package("Rfast")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment