From 3c9de622ab7ac6084e3c6ba8a62a11b7832741bb Mon Sep 17 00:00:00 2001
From: aduvermy <arnaud.duvermy@ens-lyon.fr>
Date: Wed, 21 Sep 2022 09:45:42 +0200
Subject: [PATCH] add v2

---
 src/v2/HTRSIM/.Rbuildignore                   |   3 +
 src/v2/HTRSIM/DESCRIPTION                     |  24 +++
 src/v2/HTRSIM/HTRSIM.Rproj                    |  22 +++
 src/v2/HTRSIM/NAMESPACE                       |  24 +++
 src/v2/HTRSIM/R/countsGenerator.R             | 172 ++++++++++++++++++
 src/v2/HTRSIM/R/designSimulationBuilder.R     |  51 ++++++
 src/v2/HTRSIM/R/evaluation.R                  |  78 ++++++++
 src/v2/HTRSIM/R/modelFitting.R                |  88 +++++++++
 src/v2/HTRSIM/R/ublicsTableCountsExtractor.R  |  30 +++
 src/v2/HTRSIM/R/visualization.R               |  65 +++++++
 src/v2/HTRSIM/devtools_history.R              |   8 +
 src/v2/HTRSIM/man/buildDesign2simulate.Rd     |  31 ++++
 src/v2/HTRSIM/man/ddsExtraction.viz.Rd        |  17 ++
 .../HTRSIM/man/extractDistributionFromDDS.Rd  |  17 ++
 src/v2/HTRSIM/man/getBetaforSimulation.Rd     |  33 ++++
 src/v2/HTRSIM/man/getDfComparison.Rd          |  21 +++
 .../man/getGenesDispersionsForSimulation.Rd   |  34 ++++
 src/v2/HTRSIM/man/getK_ij.Rd                  |  19 ++
 src/v2/HTRSIM/man/getLog_qij.Rd               |  19 ++
 src/v2/HTRSIM/man/getMu_ij.Rd                 |  19 ++
 src/v2/HTRSIM/man/rnorm.distrib.beta.Rd       |  23 +++
 src/v2/HTRSIM/man/run.deseq.Rd                |  23 +++
 src/v2/HTRSIM/tests/testthat.R                |   4 +
 23 files changed, 825 insertions(+)
 create mode 100644 src/v2/HTRSIM/.Rbuildignore
 create mode 100644 src/v2/HTRSIM/DESCRIPTION
 create mode 100644 src/v2/HTRSIM/HTRSIM.Rproj
 create mode 100644 src/v2/HTRSIM/NAMESPACE
 create mode 100644 src/v2/HTRSIM/R/countsGenerator.R
 create mode 100644 src/v2/HTRSIM/R/designSimulationBuilder.R
 create mode 100644 src/v2/HTRSIM/R/evaluation.R
 create mode 100644 src/v2/HTRSIM/R/modelFitting.R
 create mode 100644 src/v2/HTRSIM/R/ublicsTableCountsExtractor.R
 create mode 100644 src/v2/HTRSIM/R/visualization.R
 create mode 100644 src/v2/HTRSIM/devtools_history.R
 create mode 100644 src/v2/HTRSIM/man/buildDesign2simulate.Rd
 create mode 100644 src/v2/HTRSIM/man/ddsExtraction.viz.Rd
 create mode 100644 src/v2/HTRSIM/man/extractDistributionFromDDS.Rd
 create mode 100644 src/v2/HTRSIM/man/getBetaforSimulation.Rd
 create mode 100644 src/v2/HTRSIM/man/getDfComparison.Rd
 create mode 100644 src/v2/HTRSIM/man/getGenesDispersionsForSimulation.Rd
 create mode 100644 src/v2/HTRSIM/man/getK_ij.Rd
 create mode 100644 src/v2/HTRSIM/man/getLog_qij.Rd
 create mode 100644 src/v2/HTRSIM/man/getMu_ij.Rd
 create mode 100644 src/v2/HTRSIM/man/rnorm.distrib.beta.Rd
 create mode 100644 src/v2/HTRSIM/man/run.deseq.Rd
 create mode 100644 src/v2/HTRSIM/tests/testthat.R

diff --git a/src/v2/HTRSIM/.Rbuildignore b/src/v2/HTRSIM/.Rbuildignore
new file mode 100644
index 0000000..86257dd
--- /dev/null
+++ b/src/v2/HTRSIM/.Rbuildignore
@@ -0,0 +1,3 @@
+^HTRSIM\.Rproj$
+^\.Rproj\.user$
+^devtools_history\.R$
diff --git a/src/v2/HTRSIM/DESCRIPTION b/src/v2/HTRSIM/DESCRIPTION
new file mode 100644
index 0000000..3191056
--- /dev/null
+++ b/src/v2/HTRSIM/DESCRIPTION
@@ -0,0 +1,24 @@
+Package: HTRSIM
+Title: RNAseq counts simulation
+Version: 0.0.0.9000
+Authors@R: 
+    person("Arnaud", "Duvermy", , "first.last@example.com", role = c("aut", "cre"),
+           comment = c(ORCID = "YOUR-ORCID-ID"))
+Description: RNAseq counts simulation.
+License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
+    license
+Encoding: UTF-8
+Roxygen: list(markdown = TRUE)
+RoxygenNote: 7.1.2
+Suggests: 
+    testthat (>= 3.0.0)
+Config/testthat/edition: 3
+Depends: 
+    tidyverse
+Imports: 
+    DESeq2,
+    furrr,
+    MASS,
+    S4Vectors,
+    stats,
+    stringr
diff --git a/src/v2/HTRSIM/HTRSIM.Rproj b/src/v2/HTRSIM/HTRSIM.Rproj
new file mode 100644
index 0000000..69fafd4
--- /dev/null
+++ b/src/v2/HTRSIM/HTRSIM.Rproj
@@ -0,0 +1,22 @@
+Version: 1.0
+
+RestoreWorkspace: No
+SaveWorkspace: No
+AlwaysSaveHistory: Default
+
+EnableCodeIndexing: Yes
+UseSpacesForTab: Yes
+NumSpacesForTab: 2
+Encoding: UTF-8
+
+RnwWeave: Sweave
+LaTeX: pdfLaTeX
+
+AutoAppendNewline: Yes
+StripTrailingWhitespace: Yes
+LineEndingConversion: Posix
+
+BuildType: Package
+PackageUseDevtools: Yes
+PackageInstallArgs: --no-multiarch --with-keep.source
+PackageRoxygenize: rd,collate,namespace
diff --git a/src/v2/HTRSIM/NAMESPACE b/src/v2/HTRSIM/NAMESPACE
new file mode 100644
index 0000000..2bf9a20
--- /dev/null
+++ b/src/v2/HTRSIM/NAMESPACE
@@ -0,0 +1,24 @@
+# Generated by roxygen2: do not edit by hand
+
+export(ddsExtraction.viz)
+export(extractDistributionFromDDS)
+export(getBetaforSimulation)
+export(getDfComparison)
+export(getGenesDispersionsForSimulation)
+export(buildDesign2simulate)
+export(getK_ij)
+export(getLog_qij)
+export(getMu_ij)
+export(rnorm.distrib.beta)
+export(run.deseq)
+export(run.glm)
+export(reshapeGlmRes)
+import(DESeq2)
+import(dplyr)
+import(ggplot2)
+import(reshape2)
+import(stats)
+import(stringr)
+import(tidyverse)
+import(MASS)
+
diff --git a/src/v2/HTRSIM/R/countsGenerator.R b/src/v2/HTRSIM/R/countsGenerator.R
new file mode 100644
index 0000000..fb17cfc
--- /dev/null
+++ b/src/v2/HTRSIM/R/countsGenerator.R
@@ -0,0 +1,172 @@
+
+
+
+
+#' Get beta_ij
+#'
+#' @param n_genes  an integer
+#' @param n_genotype A int.
+#' @param n_environment A int.
+#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE.
+#' @param model_matrix an output of stat::model.matrix()
+#' @param theta a float to control the noise introduce around betaG, betaE, betaGxE
+#' @import stringr
+#' @import base
+#' @import dplyr
+#' @import Rfast
+#' @import MASS
+#' @import purrr
+#' @return a dataframe with the gene dispersion for each samples
+#' @export
+#'
+#' @examples
+getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, n_environments = 2, beta.dtf, theta = 10 ){
+
+  x = beta.dtf %>% as.matrix()
+  fit.mvrnorm <- Rfast::mvnorm.mle(x)
+  x <- NULL
+  beta.matrix.tmp <- MASS::mvrnorm(n = n_genes,
+                                 mu = fit.mvrnorm$mu,
+                                 Sigma = fit.mvrnorm$sigma )
+
+  replicate_beta <- function(beta_vec, n, theta){
+    beta_vec.rep = rep(beta_vec, n)
+    beta_vec.rep + rnorm(length(beta_vec.rep), mean = 0, sd = abs(beta_vec/theta))
+  }
+
+  beta0 = beta.matrix.tmp[,1]
+  beta.matrix.tmp = purrr::map2(.x = c(2,3,4), .y =  c(n_genotypes-1,
+                                              n_environments-1,
+                                              (n_genotypes-1)*(n_environments-1)),
+                            ~ replicate_beta(beta.matrix.tmp[,.x], .y, theta) %>% matrix(ncol = .y)) %>%
+                    do.call(cbind, .)
+
+
+
+  beta.matrix = cbind(beta0, beta.matrix.tmp)
+  betaG.colnames = base::paste("genotype", "G", 1:(n_genotypes-1), sep = "")
+  betaE.colnames = base::paste("environment", "E", 1:(n_environments-1), sep = "")
+  betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
+  matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
+
+  colnames(beta.matrix) = matrix.colnames
+  rownames(beta.matrix) = base::paste("gene", 1:(n_genes), sep = "")
+
+  return(beta.matrix)
+}
+
+
+
+#' Get log(q_ij)
+#'
+#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE.
+#' @param model_matrix an output of stat::model.matrix()
+#' @import stringr
+#' @import base
+#' @import dplyr
+#' @import
+#' @return a dataframe with the gene dispersion for each samples
+#' @export
+#'
+#' @examples
+getLog_qij <- function( beta.matrix.input , model_matrix){
+
+    log_qij = beta.matrix.input %*% t(model_matrix) ## j samples (n_genotypes * n_environments), i genes
+
+    return(log_qij )
+
+}
+
+
+
+#' Get mu_ij
+#'
+#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE.
+#' @param model_matrix an output of stat::model.matrix()
+#' @import stringr
+#' @import base
+#' @import dplyr
+#' @import
+#' @return a dataframe with the gene dispersion for each samples
+#' @export
+#'
+#' @examples
+getMu_ij <- function( log_qij.matrix, size_factor ){
+
+  mu_ij = size_factor * 2^log_qij.matrix ## size factor * log(qij)
+
+  return(mu_ij )
+
+}
+
+#' Get genes dispersion
+#'
+#' @param n_genes  an integer
+#' @param n_genotype A int.
+#' @param n_environment A int.
+#' @param dispersion.vec A vector of observed dispersion.
+#' @param dispUniform_btweenCondition logical
+#' @param model_matrix an output of stat::model.matrix()
+#' @import stringr
+#' @import base
+#' @import dplyr
+#' @import
+#' @return a dataframe with the gene dispersion for each samples
+#' @export
+#'
+#' @examples
+getGenesDispersionsForSimulation <- function( n_genes = 100, n_genotypes, n_environments, dispersion.vec ,dispUniform_btweenCondition = T, model_matrix ){
+
+
+   if (dispUniform_btweenCondition == T ) {
+          gene_dispersion.dtf = base::sample(  dds.extraction$gene_dispersion, replace = T, size = n_genes) %>% base::data.frame()
+          n_rep =  length(rownames(model_matrix))
+          gene_dispersion.dtf = gene_dispersion.dtf[,base::rep(base::seq_len(base::ncol(gene_dispersion.dtf)), n_rep)]
+          rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "")
+          colnames(gene_dispersion.dtf) = rownames(model_matrix)
+
+  }
+
+  else {
+
+          replication_table = rownames(model_matrix) %>% stringr::str_replace(., pattern = "_[0-9]+","" ) %>% table()
+          gene_dispersion.dtf = replication_table %>% purrr::map(., ~sample(  dispersion.vec, replace = T, size = n_genes) ) %>% data.frame()
+          gene_dispersion.dtf = gene_dispersion.dtf[,rep(seq_len(ncol(gene_dispersion.dtf)), replication_table %>% as.numeric())]
+          colnames(gene_dispersion.dtf) = rownames(model_matrix)
+          rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "")
+
+  }
+
+  return(gene_dispersion.dtf %>% as.matrix)
+
+}
+
+
+
+
+
+#' Get K_ij : gene counts
+#'
+#' @param log_qij.matrix  a matrix of log_qij
+#' @param gene_disp.matrix a matrix of gene dispersion
+#' @import stats
+#' @return a dataframe with the gene dispersion for each samples
+#' @export
+#'
+#' @examples
+getK_ij <- function( mu_ij.matrix ,  gene_disp.matrix ){
+
+  n_genes =  nrow(mu_ij.matrix)
+  n_sples = ncol(mu_ij.matrix)
+  alpha_gene = 1/gene_disp.matrix
+  k_ij = stats::rnbinom(length(mu_ij.matrix), size = alpha_gene  , mu = mu_ij.matrix) %>% matrix(. , nrow = n_genes, ncol = n_sples )
+  k_ij[is.na(k_ij)] = 0
+
+  colnames(k_ij) = colnames(mu_ij.matrix)
+  rownames(k_ij) = rownames(mu_ij.matrix)
+
+  return(k_ij)
+
+}
+
+
diff --git a/src/v2/HTRSIM/R/designSimulationBuilder.R b/src/v2/HTRSIM/R/designSimulationBuilder.R
new file mode 100644
index 0000000..f360907
--- /dev/null
+++ b/src/v2/HTRSIM/R/designSimulationBuilder.R
@@ -0,0 +1,51 @@
+
+
+#' Build a design dataframe
+#'
+#' @param n_genotype A int.
+#' @param n_environment A int.
+#' @param n_replicate A int.
+#' @param uniform_nb_rep logical
+#' @examples
+#' buildDesign2simulate(1000, 2, 30)
+#
+#' @return dataframe with n_genotype rows and 3 columns (sample_id, genotype, environment)
+#' @import tidyverse
+#' @import stats
+#'
+buildDesign2simulate <- function(n_genotype , n_environment, n_replicate , uniform_nb_rep = T ){
+
+
+    genotypes = base::paste("G", 0:(n_genotype-1), sep = "")
+    environments = base::paste("E", 0:(n_environment-1), sep = "")
+    sample_ids = as.vector(outer(genotypes, environments, paste, sep="_")) %>% sort()
+
+
+    sample_id.split =  sample_ids %>% str_split(., "_", simplify = T)
+    design_without_rep = base::list(sample_id = sample_ids ,
+                                    environment = sample_id.split[,2],
+                                    genotype =  sample_id.split[,1] ) %>% data.frame()
+
+
+    rows = c(1:nrow(design_without_rep))
+    if (uniform_nb_rep == T) times = rep(n_replicate, base::nrow(design_without_rep))
+    else { times = sample(1:n_replicate, base::nrow(design_without_rep), replace=T) ; message("uniform_nb_rep = FALSE \nn_replicate consider as a maximum number of replicates possible") }
+
+    design = design_without_rep %>%
+      dplyr::mutate(rep = times) %>%
+      dplyr::group_by(sample_id, genotype, environment) %>%
+      tidyr::expand(rep = seq(1:rep)) %>%
+      tidyr::unite(sample_id, sample_id, rep) %>%
+      dplyr::ungroup()
+
+    if (n_genotype > 1) design$genotype <- factor(x = design$genotype,levels = c("G0", unique(design$genotype)[-1]))
+    if (n_environment  > 1) design$environment <- factor(x = design$environment, levels = c( "E0", unique(design$environment)[-1]))
+
+    model_matrix = stats::model.matrix(~ genotype + environment + genotype:environment, design)
+    rownames(model_matrix) = design$sample_id
+
+    return( base::list(model_matrix = model_matrix,
+                    design2simulate = design) )
+}
+
+
diff --git a/src/v2/HTRSIM/R/evaluation.R b/src/v2/HTRSIM/R/evaluation.R
new file mode 100644
index 0000000..8e96094
--- /dev/null
+++ b/src/v2/HTRSIM/R/evaluation.R
@@ -0,0 +1,78 @@
+
+#' Get getDfComparison
+#'
+#' @param dds_simu.mcols  dds object obtain on simulation
+#' @param model_matrix a stats::model.matrix output
+#' @param  beta.actual.matrix.matrix a matrix of beta used as input for simulation
+#' @param threads
+#' @import stringr
+#' @import base
+#' @import dplyr
+#' @import reshape2
+#' @import DESeq2
+#' @import furrr
+#' @return a dataframe for beta comparison
+#' @export
+#'
+#' @examples
+getDfComparison <- function(dds_simu , model_matrix, beta.actual.matrix, threads = 4 ){
+
+
+    listBeta = DESeq2::resultsNames(dds_simu)
+    plan(multisession, workers = 4)
+    res = listBeta %>% furrr::future_map(.x = ., ~DESeq2::results(dds_simu, contrast=list(.x)) %>% data.frame() %>% .$padj)
+    padj.matrix = do.call("cbind", res)
+
+
+
+    dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
+
+    dds.simu.mcols.colnamesReshaped = colnames(dds_simu.mcols) %>%
+                                        stringr::str_replace(., "_vs_G0", "") %>%
+                                        stringr::str_replace(., "_vs_E0", "") %>%
+                                        stringr::str_replace_all(., "_", "") %>%
+                                        stringr::str_replace(., "\\.", ":")
+
+    columnOfInterest = model_matrix %>% base::colnames() %>% stringr::str_replace_all(., "[//(//)]", "")
+    #dds_simu.mcols[,columnOfInterest]
+
+    ## Get only column of interest
+    idx_cols = base::match(columnOfInterest, dds.simu.mcols.colnamesReshaped)
+    beta.infered = dds_simu.mcols[,idx_cols]
+
+    ## homogeneize column names & rownames
+    idx_cols = base::match(columnOfInterest, beta.actual.matrix %>% colnames())
+    beta.actual.matrix = beta.actual.matrix[,idx_cols]
+    colnames(beta.infered) = base::colnames(beta.actual.matrix)
+    colnames(padj.matrix) = base::colnames(beta.actual.matrix)
+    rownames(padj.matrix) = base::rownames(beta.actual.matrix)
+
+    beta.infer.long = beta.infered %>% data.frame() %>%
+                              tibble::rownames_to_column(., var = "gene_id") %>%
+                              dplyr::mutate(origin = "Inference") %>%
+                              reshape2::melt(., value.name = "value", variable.name= "beta")
+    beta.actual.matrix.long = beta.actual.matrix %>% data.frame() %>%
+                              tibble::rownames_to_column(., var = "gene_id") %>%
+                              dplyr::mutate(origin = "Actual") %>%
+                              reshape2::melt(., value.name = "value", variable.name= "beta")
+    padj.matrix.long = padj.matrix  %>% data.frame() %>%
+                              tibble::rownames_to_column(., var = "gene_id") %>%
+                              dplyr::mutate(origin = "padj") %>%
+                              reshape2::melt(., value.name = "value", variable.name= "beta")
+
+    beta.merged.long = rbind(beta.infer.long, beta.actual.matrix.long, padj.matrix.long)
+    #beta.merged.long$beta %>% unique()
+
+    beta.merged.long.reshape = beta.merged.long %>% dplyr::mutate(type = dplyr::case_when(
+      str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
+      str_detect(beta, "genotypeG\\d+$") ~ "G",
+      str_detect(beta, "environmentE\\d+$") ~ "E",
+      str_detect(beta, "Intercept$") ~ "Intercept")
+    )
+
+
+    beta.merged.long.reshape2 = beta.merged.long.reshape %>% reshape2::dcast(.,  gene_id + beta + type ~ origin)
+    beta.merged.long.reshape2$type = factor(beta.merged.long.reshape2$type, levels = c("Intercept", "G", "E", "GxE"))
+
+    return(beta.merged.long.reshape2)
+}
diff --git a/src/v2/HTRSIM/R/modelFitting.R b/src/v2/HTRSIM/R/modelFitting.R
new file mode 100644
index 0000000..7cbf226
--- /dev/null
+++ b/src/v2/HTRSIM/R/modelFitting.R
@@ -0,0 +1,88 @@
+#' Launch MASS:GLM.NB
+#'
+#' @param gene_count a row of kij simulated table
+#' @param i index of the gene
+#' @import dplyr
+#' @import stringr
+#' @import stats
+#' @return a dtf
+#' @export
+#'
+#' @examples
+reshapeGlmRes <- function(fit, i, error_bool = F){
+    if (error_bool == T){ ## error while fiting model
+      res = list(Inference = NA, pval = NA,
+                 beta = NA, gene_id = paste("gene", i, sep = ""), type = NA, deviance = NA) %>%
+            data.frame()
+    }
+    else{ ## success to fit model
+      res = coef(summary(fit))[,c(1,4)] %>%
+              data.frame() %>%
+              dplyr::rename(., pval = "Pr...z.." , Inference = "Estimate") %>%
+              dplyr::mutate(beta = stringr::str_remove_all(rownames(.), "[//(//)]")) %>%
+              dplyr::mutate(beta = stringr::str_replace(beta, ":", "."))
+      rownames(res) <- NULL
+      res = res  %>%
+              dplyr::mutate(gene_id = paste("gene", i, sep = "") ) %>%
+              dplyr::mutate(type = dplyr::case_when(
+                stringr::str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
+                stringr::str_detect(beta, "genotypeG\\d+$") ~ "G",
+                stringr::str_detect(beta, "environmentE\\d+$") ~ "E",
+                stringr::str_detect(beta, "Intercept$") ~ "Intercept")) %>%
+              dplyr::mutate(dispersion = fit$theta)
+      #stats::summary.glm(fit)$dispersion
+    }
+    return(res)
+}
+
+
+
+
+#' Launch MASS:GLM.NB
+#'
+#' @param gene_count a row of kij simulated table
+#' @param i index of the gene
+#' @param  design.dtf a dtf of the design simulated
+#' @import MASS
+#' @return a dtf
+#' @export
+#'
+#' @examples
+run.glm <- function(gene_count, i, design.dtf) {
+  y = gene_count
+  genotype = design2simulate$design2simulate$genotype
+  environment = design2simulate$design2simulate$environment
+  df_gene_i = list(y = y , genotype = genotype,environment = environment) %>% data.frame()
+  rownames(df_gene_i) <- NULL
+  print(i)
+  tryCatch({
+
+    fit = MASS::glm.nb(y ~ genotype + environment + genotype:environment, data = df_gene_i, link = log)
+    return(reshapeGlmRes(fit, i))
+  },
+
+  error = function(cnd){
+    return(reshapeGlmRes(NULL, i, error_bool = T))
+}
+)
+}
+
+
+
+#' Launch Deseq
+#'
+#' @param tabl_cnts table containing counts per genes & samples
+#' @param bioDesign table describing bioDesgin of input
+#' @import DESeq2
+#' @return DESEQ2 object
+#' @export
+#'
+#' @examples
+run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + genotype:environment){
+
+
+  dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model  )
+  dds <- DESeq2::DESeq(dds)
+  return(dds)
+
+}
diff --git a/src/v2/HTRSIM/R/ublicsTableCountsExtractor.R b/src/v2/HTRSIM/R/ublicsTableCountsExtractor.R
new file mode 100644
index 0000000..cc68262
--- /dev/null
+++ b/src/v2/HTRSIM/R/ublicsTableCountsExtractor.R
@@ -0,0 +1,30 @@
+#' Extract beta distribution from DESEQ2 object
+#'
+#' @param dds_obj a DESEQ2 object
+#' @import S4vectors
+#' @return a list containing 1- mean and sd of BetaG 2- mean and sd of BetaE 3- mean and sd of BetaGE 5- mean and sd of gene dispersion
+#' @export
+#'
+#' @examples
+extractDistributionFromDDS <-  function(dds_obj){
+  ## Beta
+  dds.mcols = S4Vectors::mcols(dds, use.names=TRUE)
+  beta0 <- dds.mcols$Intercept
+  betaG <- dds.mcols$genotype_RM11_vs_GSY147
+  betaE <- dds.mcols$environment_treated_vs_untreated
+  betaGE <- dds.mcols$genotypeRM11.environmenttreated
+  beta.dtf = cbind(beta0,betaG,betaE,betaGE) %>% as.data.frame() %>% drop_na()
+
+  ## Dispersion
+  gene_disp = dds.mcols$dispersion %>% na.omit()
+
+
+  return(list(beta = beta.dtf,
+              gene_dispersion = gene_disp,
+              beta0.mean = mean(beta.dtf$beta0,  na.rm = T), beta0.sd = sd(beta.dtf$beta0,  na.rm = T) ,
+              betaG.mean = mean(beta.dtf$betaG,  na.rm = T), betaG.sd = sd(beta.dtf$betaG,  na.rm = T) ,
+              betaE.mean = mean(beta.dtf$betaE, na.rm = T)  , betaE.sd = sd(beta.dtf$betaE,  na.rm = T) ,
+              betaGE.mean =  mean(beta.dtf$betaGE, na.rm = T)  , betaGE.sd = sd(beta.dtf$betaGE,  na.rm = T),
+              gene_disp.mean = mean(gene_disp, na.rm = T) ,  gene_disp.sd = sd(gene_disp, na.rm = T)))
+
+}
diff --git a/src/v2/HTRSIM/R/visualization.R b/src/v2/HTRSIM/R/visualization.R
new file mode 100644
index 0000000..47c1e44
--- /dev/null
+++ b/src/v2/HTRSIM/R/visualization.R
@@ -0,0 +1,65 @@
+
+
+
+#' Sampling value from a known Gaussian distribution
+#'
+#' @param beta_type  a string (beta0, betaG, betaE, betaGE)
+#' @param mean.distrib  the mean of the distribution (dbl)
+#' @param sd.distrib  the standard deviation of the distribution (dbl)
+#' @param n an integer defining the number of value sampling
+#' @import dplyr
+#' @import base
+#' @return a dataframe
+#' @export
+#'
+#' @examples
+rnorm.distrib.beta <- function(parameter, mean.distrib , sd.distrib , n = 1000 ){
+
+  rnorm.distrib.dtf = rnorm(1000, mean = mean.distrib, sd = sd.distrib) %>%
+    base::data.frame() %>%
+    dplyr::mutate( parameter = parameter) %>%
+    dplyr::rename(., value = ".") %>%
+    dplyr::mutate(sampling_from = paste("rnorm(mean = ", signif(mean.distrib, 3), ", sd=" , signif(sd.distrib, 3), ")", sep = ""))
+
+
+  return(rnorm.distrib.dtf)
+}
+
+
+#' Vizualize the output of extractDistributionFromDDS
+#'
+#' @param dds.extraction  output of extractDistributionFromDDS
+#' @import ggplot2
+#' @import base
+#' @import dplyr
+#' @return a plot
+#' @export
+#'
+#' @examples
+ddsExtraction.viz <- function(dds.extraction){
+    beta_obs.dtf.long = dds.extraction$beta %>% reshape2::melt(. , na.rm = T, variable.name = "parameter")
+
+    alpha_obs.dtf.long = dds.extraction$gene_dispersion %>% base::data.frame() %>%
+                          dplyr::rename(., value = ".") %>%
+                          dplyr::mutate(parameter = "dispersion")
+
+    dtf.params_obs = rbind(beta_obs.dtf.long, alpha_obs.dtf.long)
+
+    rnorm.distrib.beta0 = rnorm.distrib.beta(parameter = "beta0", mean.distrib = dds.extraction$beta0.mean, sd = dds.extraction$beta0.sd )
+    rnorm.distrib.betaG = rnorm.distrib.beta(parameter = "betaG", mean.distrib = dds.extraction$betaG.mean, sd = dds.extraction$betaG.sd )
+    rnorm.distrib.betaE = rnorm.distrib.beta(parameter = "betaE", mean.distrib = dds.extraction$betaE.mean, sd = dds.extraction$betaE.sd )
+    rnorm.distrib.betaGE = rnorm.distrib.beta(parameter = "betaGE", mean.distrib = dds.extraction$betaGE.mean, sd = dds.extraction$betaGE.sd )
+    rnorm.distrib.alpha = rnorm.distrib.beta(parameter = "dispersion", mean.distrib = dds.extraction$gene_disp.mean, sd = dds.extraction$gene_disp.sd )
+
+    dtf.rnorm.param = base::rbind(rnorm.distrib.beta0, rnorm.distrib.betaG, rnorm.distrib.betaE, rnorm.distrib.betaGE, rnorm.distrib.alpha)
+
+    p = ggplot(dtf.params_obs, aes(x= value)) +
+      geom_histogram(aes(y=..density..), colour="black", fill="white")+
+      geom_density(data = dtf.rnorm.param, aes(x =  value,  fill= sampling_from), alpha=.4) + facet_grid(~parameter)
+
+
+
+
+    return(p)
+
+}
diff --git a/src/v2/HTRSIM/devtools_history.R b/src/v2/HTRSIM/devtools_history.R
new file mode 100644
index 0000000..4b905d3
--- /dev/null
+++ b/src/v2/HTRSIM/devtools_history.R
@@ -0,0 +1,8 @@
+usethis::use_build_ignore("devtools_history.R")
+usethis::use_package('tidyverse', type = "depends")
+usethis::use_package('stats')
+usethis::use_package("stringr")
+usethis::use_package("MASS")
+usethis::use_package("DESeq2")
+usethis::use_package("furrr")
+
diff --git a/src/v2/HTRSIM/man/buildDesign2simulate.Rd b/src/v2/HTRSIM/man/buildDesign2simulate.Rd
new file mode 100644
index 0000000..e3c699c
--- /dev/null
+++ b/src/v2/HTRSIM/man/buildDesign2simulate.Rd
@@ -0,0 +1,31 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/designSimulationBuilder.R
+\name{buildDesign2simulate}
+\alias{buildDesign2simulate}
+\title{Build a design dataframe}
+\usage{
+buildDesign2simulate(
+  n_genotype,
+  n_environment,
+  n_replicate,
+  uniform_nb_rep = T
+)
+}
+\arguments{
+\item{n_genotype}{A int.}
+
+\item{n_environment}{A int.}
+
+\item{n_replicate}{A int.}
+
+\item{uniform_nb_rep}{logical}
+}
+\value{
+dataframe with n_genotype rows and 3 columns (sample_id, genotype, environment)
+}
+\description{
+Build a design dataframe
+}
+\examples{
+buildDesign2simulate(1000, 2, 30)
+}
diff --git a/src/v2/HTRSIM/man/ddsExtraction.viz.Rd b/src/v2/HTRSIM/man/ddsExtraction.viz.Rd
new file mode 100644
index 0000000..02be5ed
--- /dev/null
+++ b/src/v2/HTRSIM/man/ddsExtraction.viz.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/visualization.R
+\name{ddsExtraction.viz}
+\alias{ddsExtraction.viz}
+\title{Vizualize the output of extractDistributionFromDDS}
+\usage{
+ddsExtraction.viz(dds.extraction)
+}
+\arguments{
+\item{dds.extraction}{output of extractDistributionFromDDS}
+}
+\value{
+a plot
+}
+\description{
+Vizualize the output of extractDistributionFromDDS
+}
diff --git a/src/v2/HTRSIM/man/extractDistributionFromDDS.Rd b/src/v2/HTRSIM/man/extractDistributionFromDDS.Rd
new file mode 100644
index 0000000..fcfc012
--- /dev/null
+++ b/src/v2/HTRSIM/man/extractDistributionFromDDS.Rd
@@ -0,0 +1,17 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ublicsTableCountsExtractor.R
+\name{extractDistributionFromDDS}
+\alias{extractDistributionFromDDS}
+\title{Extract beta distribution from DESEQ2 object}
+\usage{
+extractDistributionFromDDS(dds_obj)
+}
+\arguments{
+\item{dds_obj}{a DESEQ2 object}
+}
+\value{
+a list containing 1- mean and sd of BetaG 2- mean and sd of BetaE 3- mean and sd of BetaGE 5- mean and sd of gene dispersion
+}
+\description{
+Extract beta distribution from DESEQ2 object
+}
diff --git a/src/v2/HTRSIM/man/getBetaforSimulation.Rd b/src/v2/HTRSIM/man/getBetaforSimulation.Rd
new file mode 100644
index 0000000..1fdf6fc
--- /dev/null
+++ b/src/v2/HTRSIM/man/getBetaforSimulation.Rd
@@ -0,0 +1,33 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/countsGenerator.R
+\name{getBetaforSimulation}
+\alias{getBetaforSimulation}
+\title{Get beta_ij}
+\usage{
+getBetaforSimulation(
+  n_genes = 100,
+  n_genotypes = 20,
+  n_environments = 2,
+  beta.dtf,
+  threshold = 15
+)
+}
+\arguments{
+\item{n_genes}{an integer}
+
+\item{beta.dtf}{a dtf of beta0,betaG, betaE, betaGE.}
+
+\item{threshold}{an integer)}
+
+\item{n_genotype}{A int.}
+
+\item{n_environment}{A int.}
+
+\item{model_matrix}{an output of stat::model.matrix()}
+}
+\value{
+a dataframe with the gene dispersion for each samples
+}
+\description{
+Get beta_ij
+}
diff --git a/src/v2/HTRSIM/man/getDfComparison.Rd b/src/v2/HTRSIM/man/getDfComparison.Rd
new file mode 100644
index 0000000..7f80baa
--- /dev/null
+++ b/src/v2/HTRSIM/man/getDfComparison.Rd
@@ -0,0 +1,21 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/evaluation.R
+\name{getDfComparison}
+\alias{getDfComparison}
+\title{Get getDfComparison}
+\usage{
+getDfComparison(dds_simu, model_matrix, beta.actual.matrix)
+}
+\arguments{
+\item{model_matrix}{a stats::model.matrix output}
+
+\item{dds_simu.mcols}{dds object obtain on simulation}
+
+\item{beta.actual.matrix.matrix}{a matrix of beta used as input for simulation}
+}
+\value{
+a dataframe for beta comparison
+}
+\description{
+Get getDfComparison
+}
diff --git a/src/v2/HTRSIM/man/getGenesDispersionsForSimulation.Rd b/src/v2/HTRSIM/man/getGenesDispersionsForSimulation.Rd
new file mode 100644
index 0000000..e2b8cff
--- /dev/null
+++ b/src/v2/HTRSIM/man/getGenesDispersionsForSimulation.Rd
@@ -0,0 +1,34 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/countsGenerator.R
+\name{getGenesDispersionsForSimulation}
+\alias{getGenesDispersionsForSimulation}
+\title{Get genes dispersion}
+\usage{
+getGenesDispersionsForSimulation(
+  n_genes = 100,
+  n_genotypes,
+  n_environments,
+  dispersion.vec,
+  dispUniform_btweenCondition = T,
+  model_matrix
+)
+}
+\arguments{
+\item{n_genes}{an integer}
+
+\item{dispersion.vec}{A vector of observed dispersion.}
+
+\item{dispUniform_btweenCondition}{logical}
+
+\item{model_matrix}{an output of stat::model.matrix()}
+
+\item{n_genotype}{A int.}
+
+\item{n_environment}{A int.}
+}
+\value{
+a dataframe with the gene dispersion for each samples
+}
+\description{
+Get genes dispersion
+}
diff --git a/src/v2/HTRSIM/man/getK_ij.Rd b/src/v2/HTRSIM/man/getK_ij.Rd
new file mode 100644
index 0000000..08c5b70
--- /dev/null
+++ b/src/v2/HTRSIM/man/getK_ij.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/countsGenerator.R
+\name{getK_ij}
+\alias{getK_ij}
+\title{Get K_ij : gene counts}
+\usage{
+getK_ij(mu_ij.matrix, gene_disp.matrix)
+}
+\arguments{
+\item{gene_disp.matrix}{a matrix of gene dispersion}
+
+\item{log_qij.matrix}{a matrix of log_qij}
+}
+\value{
+a dataframe with the gene dispersion for each samples
+}
+\description{
+Get K_ij : gene counts
+}
diff --git a/src/v2/HTRSIM/man/getLog_qij.Rd b/src/v2/HTRSIM/man/getLog_qij.Rd
new file mode 100644
index 0000000..9bbc605
--- /dev/null
+++ b/src/v2/HTRSIM/man/getLog_qij.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/countsGenerator.R
+\name{getLog_qij}
+\alias{getLog_qij}
+\title{Get log(q_ij)}
+\usage{
+getLog_qij(beta.matrix.input, model_matrix)
+}
+\arguments{
+\item{model_matrix}{an output of stat::model.matrix()}
+
+\item{beta.dtf}{a dtf of beta0,betaG, betaE, betaGE.}
+}
+\value{
+a dataframe with the gene dispersion for each samples
+}
+\description{
+Get log(q_ij)
+}
diff --git a/src/v2/HTRSIM/man/getMu_ij.Rd b/src/v2/HTRSIM/man/getMu_ij.Rd
new file mode 100644
index 0000000..e4b7949
--- /dev/null
+++ b/src/v2/HTRSIM/man/getMu_ij.Rd
@@ -0,0 +1,19 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/countsGenerator.R
+\name{getMu_ij}
+\alias{getMu_ij}
+\title{Get mu_ij}
+\usage{
+getMu_ij(log_qij.matrix, size_factor)
+}
+\arguments{
+\item{beta.dtf}{a dtf of beta0,betaG, betaE, betaGE.}
+
+\item{model_matrix}{an output of stat::model.matrix()}
+}
+\value{
+a dataframe with the gene dispersion for each samples
+}
+\description{
+Get mu_ij
+}
diff --git a/src/v2/HTRSIM/man/rnorm.distrib.beta.Rd b/src/v2/HTRSIM/man/rnorm.distrib.beta.Rd
new file mode 100644
index 0000000..63a174c
--- /dev/null
+++ b/src/v2/HTRSIM/man/rnorm.distrib.beta.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/visualization.R
+\name{rnorm.distrib.beta}
+\alias{rnorm.distrib.beta}
+\title{Sampling value from a known Gaussian distribution}
+\usage{
+rnorm.distrib.beta(parameter, mean.distrib, sd.distrib, n = 1000)
+}
+\arguments{
+\item{mean.distrib}{the mean of the distribution (dbl)}
+
+\item{sd.distrib}{the standard deviation of the distribution (dbl)}
+
+\item{n}{an integer defining the number of value sampling}
+
+\item{beta_type}{a string (beta0, betaG, betaE, betaGE)}
+}
+\value{
+a dataframe
+}
+\description{
+Sampling value from a known Gaussian distribution
+}
diff --git a/src/v2/HTRSIM/man/run.deseq.Rd b/src/v2/HTRSIM/man/run.deseq.Rd
new file mode 100644
index 0000000..02d00fc
--- /dev/null
+++ b/src/v2/HTRSIM/man/run.deseq.Rd
@@ -0,0 +1,23 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/ublicsTableCountsExtractor.R
+\name{run.deseq}
+\alias{run.deseq}
+\title{Launch Deseq}
+\usage{
+run.deseq(
+  tabl_cnts,
+  bioDesign,
+  model = ~genotype + environment + genotype:environment
+)
+}
+\arguments{
+\item{tabl_cnts}{table containing counts per genes & samples}
+
+\item{bioDesign}{table describing bioDesgin of input}
+}
+\value{
+DESEQ2 object
+}
+\description{
+Launch Deseq
+}
diff --git a/src/v2/HTRSIM/tests/testthat.R b/src/v2/HTRSIM/tests/testthat.R
new file mode 100644
index 0000000..16b641e
--- /dev/null
+++ b/src/v2/HTRSIM/tests/testthat.R
@@ -0,0 +1,4 @@
+library(testthat)
+library(HTRSIM)
+
+test_check("HTRSIM")
-- 
GitLab