Skip to content
Snippets Groups Projects
Commit 8949693c authored by Arnaud Duvermy's avatar Arnaud Duvermy
Browse files

add entire workflow

parent 199dc333
No related branches found
No related tags found
No related merge requests found
......@@ -10,7 +10,14 @@ RoxygenNote: 7.2.2
Depends:
tidyverse
Imports:
base,
DESeq2,
dplyr,
MASS,
purrr,
Rfast,
S4Vectors,
stats,
stringr,
tidyr
tidyr,
utils
......@@ -9,8 +9,10 @@
#'
#' @examples
getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, mvrnorm.fit){
message( "Get actuals beta for each genes ...")
##### Sampling from mvnorm ########
beta.matrix <- MASS::mvrnorm(n = n_genes*(n_genotypes),
n_samplings = n_genes*n_genotypes
beta.matrix <- MASS::mvrnorm(n = n_samplings,
mu = mvrnorm.fit$mu,
Sigma = mvrnorm.fit$sigma )
......@@ -100,9 +102,11 @@ getMu_ij <- function( log_qij.dtf, size_factor ){
#'
#' @examples
getGenesDispersions <- function( n_genes, sample_id_list,
dispersion.vec , dispUniform_btweenCondition = T ) {
dispersion.vec , dispUniform_btweenCondition = T ) {
if (dispUniform_btweenCondition == T ) {
message( "Get dispersion for each genes ...\n")
gene_dispersion.dtf = base::sample( dispersion.vec ,
replace = T,
size = n_genes) %>% base::data.frame()
......@@ -113,6 +117,8 @@ getGenesDispersions <- function( n_genes, sample_id_list,
}
else {
message( "Get dispersion for each genes within each conditions ...\n")
replication_table = sample_ids %>%
stringr::str_replace(., pattern = "_[0-9]+","" ) %>% table()
gene_dispersion.dtf = replication_table %>%
......@@ -140,6 +146,7 @@ getGenesDispersions <- function( n_genes, sample_id_list,
#' @examples
get_kij <- function(mu_ij.matx, dispersion.matx, n_genes,
sample_id_list, idx_replicat) {
n_sples = length(sample_id_list)
alpha_gene = 1/dispersion.matx
k_ij = stats::rnbinom( length(mu_ij.matx),
......@@ -174,6 +181,8 @@ getCountTable <- function( mu_ij.matx, dispersion.matx,
replication.matx ) {
### Iterate on each replicates
message( "Get k_ij :")
message( "k_ij ~ NegativBinomial( mu_ij, dispersion_ij )\n")
kij.simu.list = purrr::map(.x = 1:max_n_replicates,
.f = ~get_kij(mu_ij.matx[ ,replication.matx[.x, ]],
dispersion.matx[ ,replication.matx[.x, ]],
......
......@@ -3,15 +3,16 @@
#' @param tabl_cnts table containing counts per genes & samples
#' @param bioDesign table describing bioDesgin of input
#' @import DESeq2
#' @import dplyr
#' @return DESEQ2 object
#' @export
#'
#' @examples
run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + genotype:environment){
dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model )
dds <- DESeq2::DESeq(dds)
message( "Run DESEQ2 ...\n")
tabl_cnts = tabl_cnts %>% dplyr::mutate(across(where(is.double), as.integer))
dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model )
dds <- DESeq2::DESeq(dds, quiet = TRUE)
return(dds)
}
......@@ -27,7 +28,7 @@ run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + g
#' @export
#'
#' @examples
extractionDDS <- function(dds_obj){
ddsExtraction <- function(dds_obj){
## Beta
dds.mcols = S4Vectors::mcols(dds_obj, use.names=TRUE)
beta0 <- dds.mcols$Intercept
......@@ -49,3 +50,37 @@ extractionDDS <- function(dds_obj){
gene_disp.mean = mean(gene_disp, na.rm = T) , gene_disp.sd = sd(gene_disp, na.rm = T)))
}
#' Extract beta distribution from DESEQ2 object
#'
#' @import dplyr
#' @import utils
#' @return output of dds extraction
#' @export
#'
#' @examples
publicDDS_extraction <- function(){
## Import & reshape public counts table
fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRsim")
tabl_cnts <- utils::read.table(file = fn, header = TRUE)
rownames(tabl_cnts) <- tabl_cnts$gene_id
tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
tabl_cnts = tabl_cnts[ order(tabl_cnts %>% rownames()), ]
## DESIGN
fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRsim")
bioDesign <- utils::read.table(file = fn, header = T, sep = ';')
## defining reference
bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") )
tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5"))
## Launch DESEQ2
dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
## Extract
dds.extraction = ddsExtraction(dds_obj = dds)
return(dds.extraction)
}
\ No newline at end of file
#' Fit mvnorm
#' @import Rfast
#' @return output of Rfast::mvnorm.mle
#' @export
#'
#' @examples
mvnormFitting <- function(beta.dtf) {
beta.matx = beta.dtf %>% as.matrix()
fit.mvnorm <- Rfast::mvnorm.mle(beta.matx)
return(fit.mvnorm)
}
#' get fit mvnorm from public dataset
#'
#' @return output of Rfast::mvnorm.mle
#' @export
#'
#' @examples
getPublicMvnormFit <- function(){
##### Range of observed value #########
dds.extraction = publicDDS_extraction()
beta_observed.dtf = dds.extraction$beta
fit.mvnorm = mvnormFitting(beta_observed.dtf)
return( fit.mvnorm )
}
\ No newline at end of file
#' perform all workflow
#'
#' @param n_genes number of genes
#' @param n_genotypes number of genotypes
#' @param n_environments number of env
#' @param fit.mvnorm output of Rfast::mvnorm.mle
#' @param max_n_replicates max number of replicates
#' @param uniformNumberOfReplicates boolean
#' @param uniformDispersion boolean
#' @return counts table
#' @export
#'
#' @examples
counTableBuilder <- function(fit.mvnorm = getPublicMvnormFit(),
n_genes,
n_genotypes,
n_environments = 2,
max_n_replicates,
uniformNumberOfReplicates = T,
uniformDispersion = T ) {
##### Ground truth ######
beta.actual = getBetaforSimulation( n_genes,
n_genotypes,
fit.mvnorm )
##### build input for simulation ####
model.matx = getModelMatrix()
log_qij = getLog_qij( beta.actual, model.matx )
mu_ij = getMu_ij(log_qij.dtf = log_qij, 1)
sample_ids = colnames(mu_ij)
gene_dispersion.vec = dds.extraction$gene_dispersion
dispersion.matrix = getGenesDispersions(n_genes,
sample_ids,
dispersion.vec = gene_dispersion.vec,
uniformDispersion )
##### Design replicates ######
designReplication.matx = getReplicationDesign( max_n_replicates,
n_genotypes,
n_environments,
uniformNumberOfReplicates)
##### build counts table ####
countTable = getCountTable( mu_ij, dispersion.matrix,
n_genes, n_genotypes ,
sample_id_list = sample_ids,
replication.matx = designReplication.matx)
return( countTable )
}
......@@ -4,3 +4,12 @@ usethis::use_package('stats')
usethis::use_package('tidyr')
usethis::use_package("stringr")
usethis::use_package("S4Vectors")
usethis::use_package("DESeq2")
usethis::use_package("MASS")
usethis::use_package("purrr")
usethis::use_package("base")
usethis::use_package("dplyr")
usethis::use_package("utils")
usethis::use_package("Rfast")
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment