diff --git a/src/v3/HTRsim/DESCRIPTION b/src/v3/HTRsim/DESCRIPTION index e4a92617690e90acc00d6554ee51551028447548..5a3903a93cd822f12f3c062cd2af4b239d958bf1 100644 --- a/src/v3/HTRsim/DESCRIPTION +++ b/src/v3/HTRsim/DESCRIPTION @@ -10,7 +10,14 @@ RoxygenNote: 7.2.2 Depends: tidyverse Imports: + base, + DESeq2, + dplyr, + MASS, + purrr, + Rfast, S4Vectors, stats, stringr, - tidyr + tidyr, + utils diff --git a/src/v3/HTRsim/R/countsGenerator.R b/src/v3/HTRsim/R/countsGenerator.R index ac8ae6ceada3e3250c4b3b73386f42a51f596664..a44e31e9e098a4c26a3adf9a5397a02bae57d734 100644 --- a/src/v3/HTRsim/R/countsGenerator.R +++ b/src/v3/HTRsim/R/countsGenerator.R @@ -9,8 +9,10 @@ #' #' @examples getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, mvrnorm.fit){ + message( "Get actuals beta for each genes ...") ##### Sampling from mvnorm ######## - beta.matrix <- MASS::mvrnorm(n = n_genes*(n_genotypes), + n_samplings = n_genes*n_genotypes + beta.matrix <- MASS::mvrnorm(n = n_samplings, mu = mvrnorm.fit$mu, Sigma = mvrnorm.fit$sigma ) @@ -100,9 +102,11 @@ getMu_ij <- function( log_qij.dtf, size_factor ){ #' #' @examples getGenesDispersions <- function( n_genes, sample_id_list, - dispersion.vec , dispUniform_btweenCondition = T ) { - + dispersion.vec , dispUniform_btweenCondition = T ) { + if (dispUniform_btweenCondition == T ) { + message( "Get dispersion for each genes ...\n") + gene_dispersion.dtf = base::sample( dispersion.vec , replace = T, size = n_genes) %>% base::data.frame() @@ -113,6 +117,8 @@ getGenesDispersions <- function( n_genes, sample_id_list, } else { + message( "Get dispersion for each genes within each conditions ...\n") + replication_table = sample_ids %>% stringr::str_replace(., pattern = "_[0-9]+","" ) %>% table() gene_dispersion.dtf = replication_table %>% @@ -140,6 +146,7 @@ getGenesDispersions <- function( n_genes, sample_id_list, #' @examples get_kij <- function(mu_ij.matx, dispersion.matx, n_genes, sample_id_list, idx_replicat) { + n_sples = length(sample_id_list) alpha_gene = 1/dispersion.matx k_ij = stats::rnbinom( length(mu_ij.matx), @@ -174,6 +181,8 @@ getCountTable <- function( mu_ij.matx, dispersion.matx, replication.matx ) { ### Iterate on each replicates + message( "Get k_ij :") + message( "k_ij ~ NegativBinomial( mu_ij, dispersion_ij )\n") kij.simu.list = purrr::map(.x = 1:max_n_replicates, .f = ~get_kij(mu_ij.matx[ ,replication.matx[.x, ]], dispersion.matx[ ,replication.matx[.x, ]], diff --git a/src/v3/HTRsim/R/manipulationsDDS_obj.R b/src/v3/HTRsim/R/manipulationsDDS_obj.R index 0a4cdbc7525bc5fd0cdd59293616842bb5272bd0..d36fa152e0839ce71ce0bccb81557460949b53ce 100644 --- a/src/v3/HTRsim/R/manipulationsDDS_obj.R +++ b/src/v3/HTRsim/R/manipulationsDDS_obj.R @@ -3,15 +3,16 @@ #' @param tabl_cnts table containing counts per genes & samples #' @param bioDesign table describing bioDesgin of input #' @import DESeq2 +#' @import dplyr #' @return DESEQ2 object #' @export #' #' @examples run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + genotype:environment){ - - - dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model ) - dds <- DESeq2::DESeq(dds) + message( "Run DESEQ2 ...\n") + tabl_cnts = tabl_cnts %>% dplyr::mutate(across(where(is.double), as.integer)) + dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model ) + dds <- DESeq2::DESeq(dds, quiet = TRUE) return(dds) } @@ -27,7 +28,7 @@ run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + g #' @export #' #' @examples -extractionDDS <- function(dds_obj){ +ddsExtraction <- function(dds_obj){ ## Beta dds.mcols = S4Vectors::mcols(dds_obj, use.names=TRUE) beta0 <- dds.mcols$Intercept @@ -49,3 +50,37 @@ extractionDDS <- function(dds_obj){ gene_disp.mean = mean(gene_disp, na.rm = T) , gene_disp.sd = sd(gene_disp, na.rm = T))) } + +#' Extract beta distribution from DESEQ2 object +#' +#' @import dplyr +#' @import utils +#' @return output of dds extraction +#' @export +#' +#' @examples +publicDDS_extraction <- function(){ + ## Import & reshape public counts table + fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRsim") + tabl_cnts <- utils::read.table(file = fn, header = TRUE) + rownames(tabl_cnts) <- tabl_cnts$gene_id + tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID + tabl_cnts = tabl_cnts[ order(tabl_cnts %>% rownames()), ] + ## DESIGN + fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRsim") + bioDesign <- utils::read.table(file = fn, header = T, sep = ';') + + ## defining reference + bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11")) + bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated")) + bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) + tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) + + ## Launch DESEQ2 + dds = run.deseq(tabl_cnts, bioDesign = bioDesign) + + ## Extract + dds.extraction = ddsExtraction(dds_obj = dds) + + return(dds.extraction) +} \ No newline at end of file diff --git a/src/v3/HTRsim/R/manipulations_multinormDistrib.R b/src/v3/HTRsim/R/manipulations_multinormDistrib.R new file mode 100644 index 0000000000000000000000000000000000000000..729210207eadfebaf05549531534ce3f703c2411 --- /dev/null +++ b/src/v3/HTRsim/R/manipulations_multinormDistrib.R @@ -0,0 +1,25 @@ +#' Fit mvnorm +#' @import Rfast +#' @return output of Rfast::mvnorm.mle +#' @export +#' +#' @examples +mvnormFitting <- function(beta.dtf) { + beta.matx = beta.dtf %>% as.matrix() + fit.mvnorm <- Rfast::mvnorm.mle(beta.matx) + return(fit.mvnorm) +} + +#' get fit mvnorm from public dataset +#' +#' @return output of Rfast::mvnorm.mle +#' @export +#' +#' @examples +getPublicMvnormFit <- function(){ + ##### Range of observed value ######### + dds.extraction = publicDDS_extraction() + beta_observed.dtf = dds.extraction$beta + fit.mvnorm = mvnormFitting(beta_observed.dtf) + return( fit.mvnorm ) +} \ No newline at end of file diff --git a/src/v3/HTRsim/R/workflow.R b/src/v3/HTRsim/R/workflow.R new file mode 100644 index 0000000000000000000000000000000000000000..41f52db0dca30024f6cba4bc099a4ab60e60f8f7 --- /dev/null +++ b/src/v3/HTRsim/R/workflow.R @@ -0,0 +1,54 @@ + +#' perform all workflow +#' +#' @param n_genes number of genes +#' @param n_genotypes number of genotypes +#' @param n_environments number of env +#' @param fit.mvnorm output of Rfast::mvnorm.mle +#' @param max_n_replicates max number of replicates +#' @param uniformNumberOfReplicates boolean +#' @param uniformDispersion boolean +#' @return counts table +#' @export +#' +#' @examples +counTableBuilder <- function(fit.mvnorm = getPublicMvnormFit(), + n_genes, + n_genotypes, + n_environments = 2, + max_n_replicates, + uniformNumberOfReplicates = T, + uniformDispersion = T ) { + + ##### Ground truth ###### + beta.actual = getBetaforSimulation( n_genes, + n_genotypes, + fit.mvnorm ) + + ##### build input for simulation #### + model.matx = getModelMatrix() + log_qij = getLog_qij( beta.actual, model.matx ) + mu_ij = getMu_ij(log_qij.dtf = log_qij, 1) + sample_ids = colnames(mu_ij) + gene_dispersion.vec = dds.extraction$gene_dispersion + dispersion.matrix = getGenesDispersions(n_genes, + sample_ids, + dispersion.vec = gene_dispersion.vec, + uniformDispersion ) + + ##### Design replicates ###### + designReplication.matx = getReplicationDesign( max_n_replicates, + n_genotypes, + n_environments, + uniformNumberOfReplicates) + + ##### build counts table #### + countTable = getCountTable( mu_ij, dispersion.matrix, + n_genes, n_genotypes , + sample_id_list = sample_ids, + replication.matx = designReplication.matx) + + return( countTable ) + + +} diff --git a/src/v3/HTRsim/devtools_history.R b/src/v3/HTRsim/devtools_history.R index b82eb3190879fe35cc3d2cad3ecdf9665ab2971f..aee52fac6c104d5d505eec6f956e088d401233fa 100644 --- a/src/v3/HTRsim/devtools_history.R +++ b/src/v3/HTRsim/devtools_history.R @@ -4,3 +4,12 @@ usethis::use_package('stats') usethis::use_package('tidyr') usethis::use_package("stringr") usethis::use_package("S4Vectors") +usethis::use_package("DESeq2") +usethis::use_package("MASS") +usethis::use_package("purrr") +usethis::use_package("base") +usethis::use_package("dplyr") +usethis::use_package("utils") +usethis::use_package("Rfast") + +