diff --git a/README.md b/README.md
deleted file mode 100644
index 37cb00c09c8901d4933097d502f52e2cf7e2dba6..0000000000000000000000000000000000000000
--- a/README.md
+++ /dev/null
@@ -1,19 +0,0 @@
-# DESEQ2
-
-- [dispersion inference](results/v3/DESEQ2/2022_02_03_dispersion.html)
-- [sequencing depth](results/v3/DESEQ2/2023_02_05_sequencingDepth.html)
-- [replicates](results/v3/DESEQ2/2023_02_06_replicates.html)
-
-
-# GLM
-
-- [dispersion inference](results/v3/GLM/2023_02_04_dispersion.html)
-- [sequencing depth](results/v3/GLM/2023_02_05_sequencingDepth.html)
-- [replicates](results/v3/GLM/2023_02_06_replicates.html)
--  [random intercept](results/v3/GLM/2023_02_08_randomIntercept.html)
-
-# GLM mixte
-
--  [random intercept](results/v3/GLM_mixte/2023_02_08_randomIntercept.html)
--  [dispersion inference](results/v3/GLM_mixte/2023_02_06_dispersion.html)
--  [Package benchmarking](results/v3/GLM_mixte/2023_02_08_benchmarkingPackages.html)
diff --git a/img/schema_loop.jpg b/img/schema_loop.jpg
deleted file mode 100644
index ba0ed39b7abcad42425b415fcf0c1c88ed21511a..0000000000000000000000000000000000000000
Binary files a/img/schema_loop.jpg and /dev/null differ
diff --git a/reports/2022-12-09_report.Rmd b/reports/2022-12-09_report.Rmd
deleted file mode 100644
index ca9895e4a60ead395763fc047219800229a50696..0000000000000000000000000000000000000000
--- a/reports/2022-12-09_report.Rmd
+++ /dev/null
@@ -1,799 +0,0 @@
----
-title: "High throughtput RNA-seq "
-output: html_document
-date: "2022-12-09"
-css: 
- - css/air.css
----
-
-```{r setup, message=FALSE, warning=FALSE, include=TRUE, results="hide", include=FALSE}
-library(HTRfit)
-library(HTRsim)
-library(plotROC)
-library(gridExtra)
-library(ggVennDiagram)
-```
-
-## RNA-seq and GLM
-
-Generalized linear models (GLM) are a classic method for analyzing RNA-seq expression data. In contrast to exact tests, GLMs allow for more general comparisons. It provides the ability to analyse complex experiments involving multiple treatment conditions while still taking full account of biological variation. Biological variation between RNA samples is estimated separately from the technical variation
-
-### Negative binomial parameters
-
-```{r message=FALSE, warning=FALSE, message=FALSE, warning=FALSE, include=TRUE, echo = FALSE, fig.width = 4, fig.height = 3, fig.align='center'}
-a <- rnbinom(10000, size = 10, mu = 100)
-mu = rep("mu = 100", 10000 )
-size = rep("size = 0.1", 10000 )
-dtf = cbind(a, mu, size) %>% data.frame()
-
-a <- rnbinom(10000, size = 100, mu = 100)
-mu = rep("mu = 100", 10000 )
-size = rep("size = 100", 10000 )
-tmp = cbind(a, mu, size) %>% data.frame()
-dtf = rbind(dtf, tmp)
-
-a <- rnbinom(10000, size = 10, mu = 1000)
-mu = rep("mu = 1000", 10000 )
-size = rep("size = 0.1", 10000 )
-tmp = cbind(a, mu, size) %>% data.frame()
-dtf = rbind(dtf, tmp)
-
-a <- rnbinom(10000, size = 100, mu = 1000)
-mu = rep("mu = 1000", 10000 )
-size = rep("size = 100", 10000 )
-tmp = cbind(a, mu, size) %>% data.frame()
-dtf = rbind(dtf, tmp)
-
-dtf$size <- factor(dtf$size)
-dtf$mu <- factor(dtf$mu)
-dtf$a = as.numeric(dtf$a)
-ggplot(dtf) + geom_density(aes(x = a, fill = size)) + 
-            facet_wrap(~mu, scales = "free") + theme_minimal() +
-    scale_fill_brewer(palette="Paired") 
-```
-
-
-### False discovery rate
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide", include=FALSE}
-dtf.comp.annot.uncorrected = getAnnotation(dtf.comp, threshold = thr, alphaRisk = 0.05, postInferenceSelection = F, pvalCorrection = F)
-p1 = getVennDiagramm(dtf.comp.annot.uncorrected, title = "pvalues")
-```
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide", include=FALSE}
-dtf.comp.annot.corrected = getAnnotation(dtf.comp, threshold = thr, alphaRisk = 0.05, postInferenceSelection = F, pvalCorrection = T)
-p2 = getVennDiagramm(dtf.comp.annot.corrected, title = "padj")
-```
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide", include=FALSE}
-dtf1 = dtf.comp.annot.corrected %>% mutate(pvalueCorrection = "pvalue adjusted")
-dtf2 = dtf.comp.annot.uncorrected %>% mutate(pvalueCorrection = "pvalue")
-dtf = rbind(dtf1, dtf2)
-
-dtf$annotation <- factor(dtf$annotation, levels = c("TRUE", "FALSE"))
-
-p3 = ggplot(dtf %>% filter(beta != "(Intercept)")) + 
-  geom_point(aes(x = actual.value, y = estimate, col = annotation), alpha = 0.5) +  geom_abline(intercept = 0, slope = 1) + 
-  geom_vline(xintercept = c(-thr, thr), linetype = "dotted") +
-  facet_grid(beta~pvalueCorrection, scales = "free") + 
-  scale_color_brewer(palette = "Set2")
-
-p4 = ggplot(dtf %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, color = pvalueCorrection)) + 
-  geom_roc(n.cuts = 0, labels = F)
-
-p1p2 = grid.arrange(p1,p2, ncol = 1, nrow = 2)
-p1p2p3 = grid.arrange(p1p2, p4 , ncol = 2, nrow = 1)
-```
-
-```{r}
-
-p1p2p3p4 = grid.arrange(p3, p1p2p3, nrow = 2, ncol = 1)
-```
-
-```{r}
-dtf.comp.annot$annotation <- factor(dtf.comp.annot$annotation, levels = c("TRUE", "FALSE"))
-dtf.comp.annot$from <- factor(dtf.comp.annot$from, levels = c("p(|beta| > 0) > 0.95 & |beta| > T", "p(|beta| > T) > 0.95"))
-
-ggplot(dtf.comp.annot %>% filter(beta != "(Intercept)")) + 
-  geom_point(aes(x = actual.value, y = estimate, col = annotation), alpha = 0.5) +  geom_abline(intercept = 0, slope = 1) + 
-  geom_vline(xintercept = c(-thr, thr), linetype = "dotted") +
-  facet_wrap(~beta, scales = "free") + 
-  scale_color_brewer(palette = "Set2")
-
-p = ggplot(dtf.comp.annot %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj)) + 
-  geom_roc(n.cuts = 0, labels = F)  + facet_wrap(~from) 
-p
-```
-
-
-#### Increasing performances increasing number of replicates
-
-```{r}
-n_genes = 4
-n_genotypes = 10 
-n_environments = 2
-n_rep_list = c(3, 5, 10, 25)
-sequencing_factor = 2
-uniformNumberOfReplicates = T
-uniformDispersion = T
-dds.extraction = loadEmbedded_ObservedValues()
-thr = 2
-  
-remove(df_final)
-## Fit mvnorm ##
-fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
-##### Ground truth ######
-beta.actual <- getBetaforSimulation(
-n_genes,
-n_genotypes,
-fit.mvnorm, n_clusters = 5
-)
-
-##### build input for simulation ####
-model.matx <- getModelMatrix()
-log_qij <- getLog_qij(beta.actual, model.matx)
-mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
-sample_ids <- colnames(mu_ij)
-gene_dispersion.vec <- dds.extraction$gene_dispersion
-dispersion.matrix <- getGenesDispersions(n_genes,
-              sample_ids,
-              dispersion.vec = gene_dispersion.vec,
-              uniformDispersion
-              )
-
-for (n_rep in n_rep_list){
-  print(n_rep)
-  ##### Design replicates ######
-  designReplication.matx <- getReplicationDesign(
-    3,
-    n_genotypes,
-    n_environments,
-    uniformNumberOfReplicates
-    )
-  
-  ##### build counts table ####
-  countTable <- getCountTable(mu_ij, dispersion.matrix,
-  n_genes, n_genotypes,
-  sample_id_list = sample_ids,
-  replication.matx = designReplication.matx
-  )
-  design <- summariseDesign(countTable)
-  actualParam <- list(
-  dispersion = dispersion.matrix,
-  beta = beta.actual, mvnorm = fit.mvnorm
-  )
-  mock = list(design = design, countTable = countTable,
-          actualParameters = actualParam)
-  
-  count_table = mock$countTable %>% as.data.frame()
-  bioDesign = mock$design
-  
-  ############### DESEQ ##################
-  dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
-  deseqFitdtf = getCoefficientsFromDds(dds_simu)
-  prediction = getPrediction(deseqFitdtf, threshold = thr, alphaRisk = 0.05)
-  expectation = getExpectation(mock$actualParameters$beta, threshold = thr)
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  comparison.deseq = comparison %>% mutate(from = "Deseq2") %>% mutate(n_rep = n_rep) 
-  
-  ############### GLM ####################
-  count_data = HTRfit::reshapeCounTable(count_table, bioDesign)
-  l = HTRfit::launch.glm(count_data)
-  fitDtf =listFit2dtf(l)
-  prediction = getPrediction(fitDtf$inference, threshold = thr, alphaRisk = 0.05)
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  comparison.glm = comparison %>% mutate(from = "MASS::glm") %>% mutate(n_rep = n_rep)
-  ###################### DF final ###################
-  tmp = rbind(comparison.glm, comparison.deseq)
-  if (exists('df_final')){
-  df_final = rbind(df_final, tmp)
-  }
-  else{
-    df_final = tmp
-  }
-}
-df_final$n_rep %>% unique()
-```
-## ROC
-
-```{r}
-df_final$n_rep <- factor(df_final$n_rep)
-df_final[is.na(df_final$term),]
-p = ggplot(df_final %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, col = n_rep)) + 
-  geom_roc(n.cuts = 0, labels = F)  + facet_wrap(~from) 
-p = p + scale_color_manual(values = c(c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D")))
-
-ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
-  #scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E"))
-  #scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D", "#696969" ))
-
-```
-
-#### Increasing performances increasing sequencing depth
-
-```{r}
-n_genes = 6000
-n_genotypes = 3 
-n_environments = 2
-max_n_replicates = 3
-sequencing_factor_list = c(0.01, 0.1, 1, 2)
-uniformNumberOfReplicates = T
-uniformDispersion = T
-dds.extraction = loadObservedValues()
-thr = 2
-  
-remove(df_final)
-## Fit mvnorm ##
-fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
-##### Ground truth ######
-beta.actual <- getBetaforSimulation(
-n_genes,
-n_genotypes,
-fit.mvnorm, n_clusters = 5
-)
-
-##### build input for simulation ####
-model.matx <- getModelMatrix()
-log_qij <- getLog_qij(beta.actual, model.matx)
-
-
-for (sequencing_factor in sequencing_factor_list){
-  print(sequencing_factor)
-mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
-sample_ids <- colnames(mu_ij)
-gene_dispersion.vec <- dds.extraction$gene_dispersion
-dispersion.matrix <- getGenesDispersions(n_genes,
-              sample_ids,
-              dispersion.vec = gene_dispersion.vec,
-              uniformDispersion
-              )
-
-
-  ##### Design replicates ######
-  designReplication.matx <- getReplicationDesign(
-    max_n_replicates,
-    n_genotypes,
-    n_environments,
-    uniformNumberOfReplicates
-    )
-  
-  ##### build counts table ####
-  countTable <- getCountTable(mu_ij, dispersion.matrix,
-  n_genes, n_genotypes,
-  sample_id_list = sample_ids,
-  replication.matx = designReplication.matx
-  )
-  design <- summariseDesign(countTable)
-  actualParam <- list(
-  dispersion = dispersion.matrix,
-  beta = beta.actual, mvnorm = fit.mvnorm
-  )
-  mock = list(design = design, countTable = countTable,
-          actualParameters = actualParam)
-  
-  
-  count_table = mock$countTable %>% as.data.frame()
-  count_table %>% dim()
-  reads_counts = count_table %>% colSums() %>% sum() %>% format(., scientific = TRUE, digits=1)
-  bioDesign = mock$design
-  
-  ############### DESEQ ##################
-  dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
-  deseqFitdtf = getCoefficientsFromDds(dds_simu)
-  prediction = getPrediction(deseqFitdtf, threshold = thr, alphaRisk = 0.05)
-  expectation = getExpectation(mock$actualParameters$beta, threshold = thr)
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  comparison.deseq = comparison %>% mutate(from = "Deseq2") %>% mutate(sequencingDepth = sequencing_factor) 
-  
-  ############### GLM ####################
-  count_data = HTRfit::reshapeCounTable(count_table, bioDesign)
-  l = HTRfit::launch.glm(count_data)
-  fitDtf =listFit2dtf(l)
-  prediction = getPrediction(fitDtf$inference, threshold = thr, alphaRisk = 0.05)
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  comparison.glm = comparison %>% mutate(from = "MASS::glm") %>% mutate(sequencingDepth = sequencing_factor)
-  ###################### DF final ###################
-  tmp = rbind(comparison.glm, comparison.deseq) %>% mutate(reads_sequenced = reads_counts)
-  if (exists('df_final')){
-  df_final = rbind(df_final, tmp)
-  }
-  else{
-    df_final = tmp
-  }
-}
-df_final$reads_sequenced %>% unique() 
-```
-## ROC
-
-```{r}
-df_final$sequencingDepth <- factor(df_final$sequencingDepth)
-df_final$reads_sequenced <- as.numeric(df_final$reads_sequenced)
-df_final$reads_sequenced <- factor(df_final$reads_sequenced)
-
-p = ggplot(df_final %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, col = reads_sequenced)) + 
-  geom_roc(n.cuts = 0, labels = F)  + facet_wrap(~from) 
-p
-p =p + scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D"))
-ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
-
-  #scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D", "#696969" ))
-
-```
-
-## MASS::GLM
-
-```{r}
-s = HTRfit::reshapeCounTable(count_table, bioDesign)
-fit <- MASS::glm.nb(k_ij ~ genotype + environment + genotype:environment, data = s %>% filter(gene_id == "gene1"), link = log)
-broom.mixed::tidy(fit, component = c("disp"))
-list(estimate = fit$theta, gene_id = "gene1") %>% as.data.frame()
-a  =summary(fit)
-a %>% as.data.frame()
-MASS::dis
-fit$fitted.value
-l = HTRfit::launch.glm(s)
-l[[1]]
-fitDtf =listFit2dtf(l)
-fitDtf$deviance
-
-prediction = getPrediction(fitDtf$inference, threshold = 2, alphaRisk = 0.05, postInferenceSelection = T)
-expectation = getExpectation(mock$actualParameters$beta, threshold = 2)
-comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-
-
-getVennDiagramm(comparison)
-```
-
-## increase number of genotypes
-
-```{r}
-n_genes = 100
-n_genotypes_list = c(10) 
-n_environments = 2
-max_n_replicates = 15
-sequencing_factor = 2
-uniformNumberOfReplicates = T
-uniformDispersion = T
-dds.extraction = loadObservedValues()
-thr = 2
-  
-#remove(df_final)
-## Fit mvnorm ##
-fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
-
-for (x in 1:4){
-  print(x)
-for (n_genotypes in n_genotypes_list){
-  ##### Ground truth ######
-  beta.actual <- getBetaforSimulation(
-  n_genes,
-  n_genotypes,
-  fit.mvnorm, n_clusters = 5
-  )
-  
-  ##### build input for simulation ####
-  model.matx <- getModelMatrix()
-  log_qij <- getLog_qij(beta.actual, model.matx)
-  mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
-  sample_ids <- colnames(mu_ij)
-  gene_dispersion.vec <- dds.extraction$gene_dispersion
-  dispersion.matrix <- getGenesDispersions(n_genes,
-                sample_ids,
-                dispersion.vec = gene_dispersion.vec,
-                uniformDispersion
-                )
-  
-  ##### Design replicates ######
-  designReplication.matx <- getReplicationDesign(
-    max_n_replicates,
-    n_genotypes,
-    n_environments,
-    uniformNumberOfReplicates
-    )
-  
-  ##### build counts table ####
-  countTable <- getCountTable(mu_ij, dispersion.matrix,
-  n_genes, n_genotypes,
-  sample_id_list = sample_ids,
-  replication.matx = designReplication.matx
-  )
-  
-  design <- summariseDesign(countTable)
-  actualParam <- list(
-  dispersion = dispersion.matrix,
-  beta = beta.actual, mvnorm = fit.mvnorm
-  )
-  mock = list(design = design, countTable = countTable,
-          actualParameters = actualParam)
-  
-  count_table = mock$countTable %>% as.data.frame()
-  bioDesign = mock$design
-  
-  ############### DESEQ ##################
-  start_time <- Sys.time()
-  dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
-  deseqFitdtf = getCoefficientsFromDds(dds_simu)
-  end_time <- Sys.time()
-  time_process = difftime(end_time, start_time, units = "secs") %>% as.numeric()
-  prediction = getPrediction(deseqFitdtf, threshold = thr, alphaRisk = 0.05)
-  expectation = getExpectation(mock$actualParameters$beta, threshold = thr)
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  comparison.deseq = comparison %>% mutate(from = "Deseq2") %>% mutate(n_G = n_genotypes) %>% mutate(timeProcess = time_process)
-  
-  ############### GLM ####################
-  count_data = HTRfit::reshapeCounTable(count_table, bioDesign)
-  start_time <- Sys.time()
-  l = HTRfit::launch.glm(count_data)
-  fitDtf =listFit2dtf(l)
-  end_time <- Sys.time()
-  time_process = difftime(end_time, start_time, units = "secs") %>% as.numeric()
-  prediction = getPrediction(fitDtf$inference, threshold = thr, alphaRisk = 0.05)
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  comparison.glm = comparison %>% mutate(from = "MASS::glm") %>% mutate(n_G = n_genotypes) %>% mutate(timeProcess = time_process)
-  ###################### DF final ###################
-  tmp = rbind(comparison.glm, comparison.deseq)
-  #tmp = comparison.glm
-  if (exists('df_final')){
-  df_final = rbind(df_final, tmp)
-  #df_final = comparison.glm
-  }
-  else{
-    df_final = tmp
-  }
-}
-}
-
-df_final$n_G %>% unique()
-write_tsv(df_final, file = "backup_genotypeEffect.tsv")
-
-
-listBeta <- DESeq2::resultsNames(dds_simu)
-library(furrr)
-future::plan(multisession, workers = 2)
-res <- listBeta %>% furrr::future_map(
-        .x = .,
-        ~ DESeq2::results(dds_simu,
-            contrast = list(.x),
-             lfcThreshold = 2, #/!\ statistic & pvalue resestimate later
-            # altHypothesis = altH,
-            tidy = TRUE
-        ) %>%
-            dplyr::select(-baseMean) %>%
-            dplyr::mutate(term = .x) %>%
-            dplyr::rename(
-                estimate = log2FoldChange,
-                std.error = lfcSE,
-                statistic = stat,
-                p.value = pvalue,
-                gene_id = row
-            ),
-        .options = furrr_options(seed = TRUE)
-    )
-deseq_inference <- do.call("rbind", res)
-
-plot(deseq_inference$p.value,prediction$p.value)
-z = prediction %>% 
-        rstatix::adjust_pvalue(p.col = "p.value", method = "BH", output.col = "padj2") %>% mutate()
-
-deseq_inference
-plot(deseq_inference$padj ,z$padj2)
-p.adjust.methods
-```
-
-## ROC
-
-```{r}
-
-p = ggplot(df_final %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, col = from)) + 
-  geom_roc(n.cuts = 0, labels = F)  + facet_wrap(~n_G, nrow = 1) 
-p = p + scale_colour_manual(values = c("#F8CBAD", "#9DC3E6"))
-p
-ggsave("../img/plotROC.png",p, height = 6, width = 8)
-library("Rmisc")
-tgc <- summarySE(df_final, measurevar="timeProcess", groupvars=c("n_G","from"))
-tgc$n_G <- factor(tgc$n_G)
-tgc$n_G = as.numeric(as.character(tgc$n_G))
-p= ggplot(tgc, aes(x = n_G, y = timeProcess, colour = from)) + 
-  geom_line(aes(x = n_G, y= timeProcess+sd)) +
-    geom_line(aes(x = n_G, y= timeProcess-sd)) +
-    geom_point() +  
-      scale_y_log10() + scale_colour_manual(values = c("#F8CBAD", "#9DC3E6"))
-
-p
-bakcup = rbind(bakcup, df_final)
-bakcup$n_G <- as.numeric(bakcup$n_G)
-bakcup$n_G %>% unique()
-```
-## GLMM
-
-```{r}
-
-n_genes = 10
-n_genotypes = 200#c(5, 60, 600, 1000) 
-n_environments = 2
-max_n_replicates = 5
-sequencing_factor = 1
-uniformNumberOfReplicates = T
-uniformDispersion = T
-dds.extraction = loadEmbedded_ObservedValues()
-thr = 2
-fit.mvnorm <- getListMvnormFit(dds.extraction$beta, n_clusters = 5)
-beta.actual <- getBetaforSimulation(
-n_genes,
-n_genotypes,
-fit.mvnorm, fixIntercept = FALSE, fixBetaE = TRUE, n_clusters = 5
-)
-
-##### build input for simulation ####
-model.matx <- getModelMatrix()
-log_qij <- getLog_qij(beta.actual, model.matx)
-mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
-sample_ids <- colnames(mu_ij)
-gene_dispersion.vec <- dds.extraction$gene_dispersion
-dispersion.matrix <- getGenesDispersions(n_genes,
-              sample_ids,
-              dispersion.vec = gene_dispersion.vec,
-              uniformDispersion
-              )
-
-##### Design replicates ######
-designReplication.matx <- getReplicationDesign(
-  max_n_replicates,
-  n_genotypes,
-  n_environments,
-  uniformNumberOfReplicates
-  )
-
-##### build counts table ####
-countTable <- getCountTable(mu_ij, dispersion.matrix,
-n_genes, n_genotypes,
-sample_id_list = sample_ids,
-replication.matx = designReplication.matx
-)
-
-design <- summariseDesign(countTable)
-
-actualParam <- list(
-dispersion = dispersion.matrix,
-beta = beta.actual, mvnorm = fit.mvnorm
-)
-mock = list(design = design, countTable = countTable,
-        actualParameters = actualParam)
-
-count_table = mock$countTable %>% as.data.frame()
-bioDesign = mock$design
-
-count_data = HTRfit::reshapeCounTable(count_table, bioDesign)
-iteration_list <- count_data[, "gene_id"] %>%
-        unique() %>%
-        unlist() %>%
-        unname() ## get list gene
-
-library(future)
-plan(multisession, workers = 4)
-library(lme4)
-library(glmmTMB)
-fit.glmm <- function(data2fit, id){
-  tryCatch(
-        {
-    fit <- lme4::glmer.nb(k_ij ~ environment  + ( 1 + environment | genotype)  , data= data2fit , verbose=F)
-    fit.dtf <- tidySummary(fit, "glmm")
-    fit.dtf$inference <- fit.dtf$inference %>% dplyr::mutate(gene_id = id)
-    fit.dtf$fitQuality <- fit.dtf$fitQuality %>% dplyr::mutate(gene_id = id)
-    return(fit.dtf)
-    
-        }
-    ,error = function(cnd) {
-            inference <- list(gene_id = id, estimate = NA, std.error = NA, term=  NA) %>% as.data.frame()
-            fitQuality <- list(null.deviance= NA,df.null = NA, logLik = NA,AIC = NA ,BIC=NA,deviance = NA,df.residual=NA,nobs = NA,gene_id = id) %>% as.data.frame()
-            fit.dtf <- list(inference = inference, fitQuality = fitQuality) 
-            return(fit.dtf)
-
-        }
-
-   ) }
-a = lme4::glmer.nb(k_ij ~ environment + (1 + environment | genotype), data = count_data %>% filter(gene_id == "gene1"), verbose = F)
-as = summary(a)
-
-res = launch.glm_mixte(count_data)
-results = furrr::future_map(.x = iteration_list, .f = 
-                              ~fit.glmm(count_data[which(count_data[,"gene_id"] == .x),], .x) )
-glmm.res = listFit2dtf(res)
-glmm.res$inference
-beta.actual.glmm = beta.actual %>% dplyr::group_by(gene_id) %>% 
-       dplyr::summarise(tmp=mean(`(Intercept)` + betaG ),
-                 environmentE1 = mean(betaE + betaGE),
-                 "sd__(Intercept)" = sd(`(Intercept)` + betaG ),
-                 sd__environmentE1 = sd(betaGE + betaE ),
-                 "cor__(Intercept).environmentE1"= cor((betaGE + betaE),(`(Intercept)`+ betaG))) %>% 
-        dplyr::rename("(Intercept)" = tmp) %>%
-      reshape2::melt(id = "gene_id", value.name = "actual.value", variable.name = 'term') 
-gene_id2fitMvnorm = beta.actual %>% select(gene_id, idx_mvrnom) %>% unique()
-
-actual2join.dtf <- data.table::data.table( beta.actual.glmm, key = c("gene_id", "term"))
-gene_id2fitMvnorm2join <- data.table::data.table( gene_id2fitMvnorm, key = c("gene_id"))
-actual2join.dtf = actual2join.dtf[gene_id2fitMvnorm2join]
-inference2join.dtf <- data.table::data.table(glmm.res$inference, key = c("gene_id", "term"))
-comparison.dtf <- actual2join.dtf[inference2join.dtf]
-
-
-comparison.dtf = comparison.dtf %>% mutate(actual.value = if_else(str_detect(term, "cor_"), actual.value, actual.value*log(2) ))
-comparison.dtf$idx_mvrnom = factor(comparison.dtf$idx_mvrnom)
-comparison.dtf$term = factor(comparison.dtf$term, levels = c("(Intercept)", "environmentE1","cor__(Intercept).environmentE1", "sd__(Intercept)", "sd__environmentE1"))
-p = ggplot(comparison.dtf)  + 
-  geom_point(aes(x = actual.value, y = estimate, col = idx_mvrnom), alpha = 0.5, size = 2) +  geom_abline(intercept = 0, slope = 1) + 
-  facet_wrap(~term, scales = "free") 
-p
-ggsave("../img/graph/poc_glmm2_1000.png", p,) 
-
-
-x = comparison.dtf %>% reshape2::dcast(., gene_id ~ term, value.var = "estimate")
-gene_vec = rep(x$gene_id, 1000)
-a = rnorm(n_genes* 1000, sd = x$`sd__(Intercept)`, mean = x$`(Intercept)` )  %>% data.frame() %>% mutate(Env = "(Intercept)") %>% mutate(gene_id = gene_vec)
-b = rnorm(n_genes*1000, sd = x$sd__environmentE1, mean = x$`sd__(Intercept)` )  %>% data.frame() %>% mutate(Env = "E1")  %>% mutate(gene_id = gene_vec)
-dtf = rbind(a,b) %>% rename(value = ".") %>% mutate(from = "prediction")
-
-x2 = comparison.dtf %>% reshape2::dcast(., gene_id ~ term, value.var = "actual.value")
-gene_vec = rep(x$gene_id, 1000)
-a = rnorm(n_genes* 1000, sd = x2$`sd__(Intercept)`, mean = x2$`(Intercept)` )  %>% data.frame() %>% mutate(Env = "(Intercept)") %>% mutate(gene_id = gene_vec)
-b = rnorm(n_genes*1000, sd = x2$sd__environmentE1, mean = x2$`sd__(Intercept)` )  %>% data.frame() %>% mutate(Env = "E1")  %>% mutate(gene_id = gene_vec)
-dtf2 = rbind(a,b) %>% rename(value = ".") %>% mutate(from = "actual")
-
-dtf = rbind(dtf, dtf2)
-library("ggridges")
-
-
-dtf$gene_id = factor(dtf$gene_id, level = x$gene_id[order(x$sd__environmentE1)])
-dtf$from = factor(dtf$from, level = c("prediction", 'actual'))
-p = ggplot(dtf) + geom_density_ridges(aes(x = value , y = gene_id, fill = from), alpha = 0.6) + facet_wrap(~Env, scales = 'free_x', ncol = 2) + xlim(c(-15,15)) + scale_fill_manual(values = c("#FFE699", "#1F4E79"))
-p
-ggsave("../img/graph/poc_glmm1000.png", p, height = 6, width = 8) 
-```
-
-```{r}
-
-############ GLMMM#####
-fit_glmm <- function(data2fit, id){
-  print(data2fit)
-    ###########################  FIT LME4   #################################
-    print("LME4")
-    m.nb <- glmer.nb(k_ij ~ 0 + environment  + ( 1 + environment | genotype )  , data= data2fit , verbose=F)
-    res_lme4 = fit_extraction(m.nb)
-    res_lme4 = res_lme4 %>% mutate(gene_id = id) %>% mutate(from = "lme4")
-    
-    #####################  FIT GMTMB 1  #########################
-    print("glmTMB 1")
-    m.nb <- glmmTMB::glmmTMB(k_ij ~  0 + environment  + ( 1 + environment | genotype )   , data=data2fit, family=nbinom1, verbose = F)
-    res_glmTMB1 = fit_extraction(m.nb)
-    res_glmTMB1 = res_glmTMB1 %>% mutate(gene_id = id) %>% mutate(from = "glmTMB nbinom1")
-    
-    #####################  FIT GMTMB 2  #########################
-    print("glmTMB 2")
-    
-    m.nb <- glmmTMB::glmmTMB(k_ij ~ 0 + environment  + ( 1 + environment | genotype )  , data=data2fit, family=nbinom2, verbose = F)
-    res_glmTMB2 = fit_extraction(m.nb)
-    res_glmTMB2 = res_glmTMB2 %>% mutate(gene_id = id) %>% mutate(from = "glmTMB nbinom2")
-    
-    res = rbind(res_lme4, res_glmTMB1, res_glmTMB2)
-    return(res)
-}
-```
-
-## fixed or random effect
-
-#### Increasing performances increasing sequencing depth
-
-```{r}
-n_genes = 1000
-n_genotypes = 5 
-n_environments = 2
-max_n_replicates = 10
-uniformNumberOfReplicates = T
-uniformDispersion = T
-dds.extraction = loadObservedValues()
-thr = 2
-  
-remove(df_final)
-## Fit mvnorm ##
-fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
-##### Ground truth ######
-
-
-
-for (boolean  in c(F,T)){
-beta.actual <- getBetaforSimulation(
-n_genes,
-n_genotypes,
-fit.mvnorm, n_clusters = 5, fixIntercept = boolean
-)
-
-##### build input for simulation ####
-model.matx <- getModelMatrix()
-log_qij <- getLog_qij(beta.actual, model.matx)
-mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
-sample_ids <- colnames(mu_ij)
-gene_dispersion.vec <- dds.extraction$gene_dispersion
-dispersion.matrix <- getGenesDispersions(n_genes,
-              sample_ids,
-              dispersion.vec = gene_dispersion.vec,
-              uniformDispersion
-              )
-
-
-  ##### Design replicates ######
-  designReplication.matx <- getReplicationDesign(
-    max_n_replicates,
-    n_genotypes,
-    n_environments,
-    uniformNumberOfReplicates
-    )
-  
-  ##### build counts table ####
-  countTable <- getCountTable(mu_ij, dispersion.matrix,
-  n_genes, n_genotypes,
-  sample_id_list = sample_ids,
-  replication.matx = designReplication.matx
-  )
-  design <- summariseDesign(countTable)
-  actualParam <- list(
-  dispersion = dispersion.matrix,
-  beta = beta.actual, mvnorm = fit.mvnorm
-  )
-  mock = list(design = design, countTable = countTable,
-          actualParameters = actualParam)
-  
-  
-  count_table = mock$countTable %>% as.data.frame()
-  bioDesign = mock$design
-  
-  ############### DESEQ ##################
-  dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
-  deseqFitdtf = getCoefficientsFromDds(dds_simu)
-  prediction = getPrediction(deseqFitdtf, threshold = thr, alphaRisk = 0.05)
-  expectation = getExpectation(mock$actualParameters$beta, threshold = thr)
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  comparison.deseq = comparison %>% mutate(from = "Deseq2") %>% mutate(Intercept_fixed = boolean) 
-  
-  ############### GLM ####################
-  count_data = HTRfit::reshapeCounTable(count_table, bioDesign)
-  l = HTRfit::launch.glm(count_data)
-  fitDtf =listFit2dtf(l)
-  prediction = getPrediction(fitDtf$inference, threshold = thr, alphaRisk = 0.05)
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  comparison.glm = comparison %>% mutate(from = "MASS::glm") %>% mutate(Intercept_fixed = boolean)
-  ###################### DF final ###################
-  tmp = rbind(comparison.glm, comparison.deseq) 
-  if (exists('df_final')){
-  df_final = rbind(df_final, tmp)
-  }
-  else{
-    df_final = tmp
-  }
-}
-```
-## ROC
-
-```{r}
-
-
-p = ggplot(df_final %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, col = Intercept_fixed)) + 
-  geom_roc(n.cuts = 0, labels = F)  + facet_wrap(~from) 
-p
-p =p + scale_color_manual(values = c("#A9D18E", "#1F4E79"))
-ggsave("../img/graph/ROCfixed_eff.png", p, height = 4, width = 6)
-
-######
-df_final[is.na(df_final$beta),]
-p = ggplot(df_final %>% filter(beta != is.na(beta)))  + 
-  geom_point(aes(x = actual.value, y = estimate, col = Intercept_fixed), alpha = 0.4, size = 2) +  geom_abline(intercept = 0, slope = 1) + 
-  facet_grid(from~beta, scales = "free") 
-p = p + scale_color_manual(values = c("#A9D18E", "#1F4E79"))
-ggsave("../img/graph/IDfixed_eff.png", p,) 
-
-```
diff --git a/reports/css/air.css b/reports/css/air.css
deleted file mode 100644
index b8e17479af3ca0940722be4c89d2579eab7014f8..0000000000000000000000000000000000000000
--- a/reports/css/air.css
+++ /dev/null
@@ -1,198 +0,0 @@
-@media print {
-    *,
-    *:before,
-    *:after {
-      background: transparent !important;
-      color: #000 !important;
-      box-shadow: none !important;
-      text-shadow: none !important;
-    }
-  
-    a,
-    a:visited {
-      text-decoration: underline;
-    }
-  
-    a[href]:after {
-      content: " (" attr(href) ")";
-    }
-  
-    abbr[title]:after {
-      content: " (" attr(title) ")";
-    }
-  
-    a[href^="#"]:after,
-    a[href^="javascript:"]:after {
-      content: "";
-    }
-  
-    pre,
-    blockquote {
-      border: 1px solid #999;
-      page-break-inside: avoid;
-    }
-  
-    thead {
-      display: table-header-group;
-    }
-  
-    tr,
-    img {
-      page-break-inside: avoid;
-    }
-  
-    img {
-      max-width: 100% !important;
-    }
-  
-    p,
-    h2,
-    h3 {
-      orphans: 3;
-      widows: 3;
-    }
-  
-    h2,
-    h3 {
-      page-break-after: avoid;
-    }
-  }
-  
-  html {
-    font-size: 12px;
-  }
-  
-  @media screen and (min-width: 32rem) and (max-width: 48rem) {
-    html {
-      font-size: 15px;
-    }
-  }
-  
-  @media screen and (min-width: 48rem) {
-    html {
-      font-size: 16px;
-    }
-  }
-  
-  body {
-    line-height: 1.85;
-  }
-  
-  p,
-  .air-p {
-    font-size: 1rem;
-    margin-bottom: 1.3rem;
-  }
-  
-  h1,
-  .air-h1,
-  h2,
-  .air-h2,
-  h3,
-  .air-h3,
-  h4,
-  .air-h4 {
-    margin: 1.414rem 0 .5rem;
-    font-weight: inherit;
-    line-height: 1.42;
-  }
-  
-  h1,
-  .air-h1 {
-    text-align: center;
-    margin-top: 0;
-    font-size: 3.998rem;
-  }
-  
-  h2,
-  .air-h2 {
-    font-size: 2.827rem;
-  }
-  
-  h3,
-  .air-h3 {
-    font-size: 1.999rem;
-  }
-  
-  h4,
-  .air-h4 {
-    font-size: 1.414rem;
-    text-align: center;
-  }
-  
-  h5,
-  .air-h5 {
-    font-size: 1.121rem;
-  }
-  
-  h6,
-  .air-h6 {
-    font-size: .88rem;
-  }
-  
-  small,
-  .air-small {
-    font-size: .707em;
-  }
-  
-  /* https://github.com/mrmrs/fluidity */
-  
-  img,
-  canvas,
-  iframe,
-  video,
-  svg,
-  select,
-  textarea {
-    max-width: 100%;
-  }
-  
-  @import url(http://fonts.googleapis.com/css?family=Open+Sans:300italic,300);
-  
-  body {
-    color: #444;
-    font-family: 'Open Sans', Helvetica, sans-serif;
-    font-weight: 300;
-    margin: 6rem auto 1rem;
-    max-width: 48rem;
-    text-align: justify;
-  }
-  
-  img {
-    border-radius: 20%;
-  }
-  
-  a,
-  a:visited {
-    color: #3498db;
-  }
-  
-  a:hover,
-  a:focus,
-  a:active {
-    color: #2980b9;
-  }
-  
-  pre {
-    background-color: #fafafa;
-    padding: 1rem;
-    text-align: left;
-  }
-  
-  blockquote {
-    margin: 0;
-    border-left: 5px solid #7a7a7a;
-    font-style: italic;
-    padding: 1.33em;
-    text-align: left;
-  }
-  
-  ul,
-  ol,
-  li {
-    text-align: left;
-  }
-  
-  p {
-    color: #777;
-  }
\ No newline at end of file
diff --git a/reports/css/modest.css b/reports/css/modest.css
deleted file mode 100644
index 858fd7433631a98ba2c0f61241c5952a81b4d6e5..0000000000000000000000000000000000000000
--- a/reports/css/modest.css
+++ /dev/null
@@ -1,219 +0,0 @@
-@media print {
-    *,
-    *:before,
-    *:after {
-      background: transparent !important;
-      color: #000 !important;
-      box-shadow: none !important;
-      text-shadow: none !important;
-    }
-  
-    a,
-    a:visited {
-      text-decoration: underline;
-    }
-  
-    a[href]:after {
-      content: " (" attr(href) ")";
-    }
-  
-    abbr[title]:after {
-      content: " (" attr(title) ")";
-    }
-  
-    a[href^="#"]:after,
-    a[href^="javascript:"]:after {
-      content: "";
-    }
-  
-    pre,
-    blockquote {
-      border: 1px solid #999;
-      page-break-inside: avoid;
-    }
-  
-    thead {
-      display: table-header-group;
-    }
-  
-    tr,
-    img {
-      page-break-inside: avoid;
-    }
-  
-    img {
-      max-width: 100% !important;
-    }
-  
-    p,
-    h2,
-    h3 {
-      orphans: 3;
-      widows: 3;
-    }
-  
-    h2,
-    h3 {
-      page-break-after: avoid;
-    }
-  }
-  
-  pre,
-  code {
-    font-family: Menlo, Monaco, "Courier New", monospace;
-  }
-  
-  pre {
-    padding: .5rem;
-    line-height: 1.25;
-    overflow-x: scroll;
-  }
-  
-  a,
-  a:visited {
-    color: #3498db;
-  }
-  
-  a:hover,
-  a:focus,
-  a:active {
-    color: #2980b9;
-  }
-  
-  .modest-no-decoration {
-    text-decoration: none;
-  }
-  
-  html {
-    font-size: 12px;
-  }
-  
-  @media screen and (min-width: 32rem) and (max-width: 48rem) {
-    html {
-      font-size: 15px;
-    }
-  }
-  
-  @media screen and (min-width: 48rem) {
-    html {
-      font-size: 16px;
-    }
-  }
-  
-  body {
-    line-height: 1.85;
-  }
-  
-  p,
-  .modest-p {
-    font-size: 1rem;
-    margin-bottom: 1.3rem;
-  }
-  
-  h1,
-  .modest-h1,
-  h2,
-  .modest-h2,
-  h3,
-  .modest-h3,
-  h4,
-  .modest-h4 {
-    margin: 1.414rem 0 .5rem;
-    font-weight: inherit;
-    line-height: 1.42;
-  }
-  
-  h1,
-  .modest-h1 {
-    margin-top: 0;
-    font-size: 3.998rem;
-  }
-  
-  h2,
-  .modest-h2 {
-    font-size: 2.827rem;
-  }
-  
-  h3,
-  .modest-h3 {
-    font-size: 1.999rem;
-  }
-  
-  h4,
-  .modest-h4 {
-    font-size: 1.414rem;
-  }
-  
-  h5,
-  .modest-h5 {
-    font-size: 1.121rem;
-  }
-  
-  h6,
-  .modest-h6 {
-    font-size: .88rem;
-  }
-  
-  small,
-  .modest-small {
-    font-size: .707em;
-  }
-  
-  /* https://github.com/mrmrs/fluidity */
-  
-  img,
-  canvas,
-  iframe,
-  video,
-  svg,
-  select,
-  textarea {
-    max-width: 100%;
-  }
-  
-  @import url(http://fonts.googleapis.com/css?family=Open+Sans+Condensed:300,300italic,700);
-  
-  @import url(http://fonts.googleapis.com/css?family=Arimo:700,700italic);
-  
-  html {
-    font-size: 18px;
-    max-width: 100%;
-  }
-  
-  body {
-    color: #444;
-    font-family: 'Open Sans Condensed', sans-serif;
-    font-weight: 300;
-    margin: 0 auto;
-    max-width: 48rem;
-    line-height: 1.45;
-    padding: .25rem;
-  }
-  
-  h1,
-  h2,
-  h3,
-  h4,
-  h5,
-  h6 {
-    font-family: Arimo, Helvetica, sans-serif;
-  }
-  
-  h1,
-  h2,
-  h3 {
-    border-bottom: 2px solid #fafafa;
-    margin-bottom: 1.15rem;
-    padding-bottom: .5rem;
-    text-align: center;
-  }
-  
-  blockquote {
-    border-left: 8px solid #fafafa;
-    padding: 1rem;
-  }
-  
-  pre,
-  code {
-    background-color: #fafafa;
-  }
\ No newline at end of file
diff --git a/reports/css/retro.css b/reports/css/retro.css
deleted file mode 100644
index 8a26db723a9e601f4ef432589e95c4d8dd6747f2..0000000000000000000000000000000000000000
--- a/reports/css/retro.css
+++ /dev/null
@@ -1,202 +0,0 @@
-
-
-pre,
-code {
-  font-family: Menlo, Monaco, "Courier New", monospace;
-}
-
-pre {
-  padding: .5rem;
-  line-height: 1.25;
-  overflow-x: scroll;
-}
-
-@media print {
-  *,
-  *:before,
-  *:after {
-    background: transparent !important;
-    color: #000 !important;
-    box-shadow: none !important;
-    text-shadow: none !important;
-  }
-
-  a,
-  a:visited {
-    text-decoration: underline;
-  }
-
-  a[href]:after {
-    content: " (" attr(href) ")";
-  }
-
-  abbr[title]:after {
-    content: " (" attr(title) ")";
-  }
-
-  a[href^="#"]:after,
-  a[href^="javascript:"]:after {
-    content: "";
-  }
-
-  pre,
-  blockquote {
-    border: 1px solid #999;
-    page-break-inside: avoid;
-  }
-
-  thead {
-    display: table-header-group;
-  }
-
-  tr,
-  img {
-    page-break-inside: avoid;
-  }
-
-  img {
-    max-width: 100% !important;
-  }
-
-  p,
-  h2,
-  h3 {
-    orphans: 3;
-    widows: 3;
-  }
-
-  h2,
-  h3 {
-    page-break-after: avoid;
-  }
-}
-
-a,
-a:visited {
-  color: #01ff70;
-}
-
-a:hover,
-a:focus,
-a:active {
-  color: #2ecc40;
-}
-
-.retro-no-decoration {
-  text-decoration: none;
-}
-
-html {
-  font-size: 12px;
-}
-
-@media screen and (min-width: 32rem) and (max-width: 48rem) {
-  html {
-    font-size: 15px;
-  }
-}
-
-@media screen and (min-width: 48rem) {
-  html {
-    font-size: 16px;
-  }
-}
-
-body {
-  line-height: 1.85;
-}
-
-p,
-.retro-p {
-  font-size: 1rem;
-  margin-bottom: 1.3rem;
-}
-
-h1,
-.retro-h1,
-h2,
-.retro-h2,
-h3,
-.retro-h3,
-h4,
-.retro-h4 {
-  margin: 1.414rem 0 .5rem;
-  font-weight: inherit;
-  line-height: 1.42;
-}
-
-h1,
-.retro-h1 {
-  margin-top: 0;
-  font-size: 3.998rem;
-}
-
-h2,
-.retro-h2 {
-  font-size: 2.827rem;
-}
-
-h3,
-.retro-h3 {
-  font-size: 1.999rem;
-}
-
-h4,
-.retro-h4 {
-  font-size: 1.414rem;
-}
-
-h5,
-.retro-h5 {
-  font-size: 1.121rem;
-}
-
-h6,
-.retro-h6 {
-  font-size: .88rem;
-}
-
-small,
-.retro-small {
-  font-size: .707em;
-}
-
-/* https://github.com/mrmrs/fluidity */
-
-img,
-canvas,
-iframe,
-video,
-svg,
-select,
-textarea {
-  max-width: 100%;
-}
-
-html,
-body {
-  background-color: #222;
-  min-height: 100%;
-}
-
-html {
-  font-size: 18px;
-}
-
-body {
-  color: #fafafa;
-  font-family: "Courier New";
-  line-height: 1.45;
-  margin: 6rem auto 1rem;
-  max-width: 48rem;
-  padding: .25rem;
-}
-
-pre {
-  background-color: #333;
-}
-
-blockquote {
-  border-left: 3px solid #01ff70;
-  padding-left: 1rem;
-}
\ No newline at end of file
diff --git a/reports/css/splendor.css b/reports/css/splendor.css
deleted file mode 100644
index 4121b51a2bca4da9b80ea4544b8fc63ce5f59f9e..0000000000000000000000000000000000000000
--- a/reports/css/splendor.css
+++ /dev/null
@@ -1,225 +0,0 @@
-@media print {
-    *,
-    *:before,
-    *:after {
-      background: transparent !important;
-      color: #000 !important;
-      box-shadow: none !important;
-      text-shadow: none !important;
-    }
-  
-    a,
-    a:visited {
-      text-decoration: underline;
-    }
-  
-    a[href]:after {
-      content: " (" attr(href) ")";
-    }
-  
-    abbr[title]:after {
-      content: " (" attr(title) ")";
-    }
-  
-    a[href^="#"]:after,
-    a[href^="javascript:"]:after {
-      content: "";
-    }
-  
-    pre,
-    blockquote {
-      border: 1px solid #999;
-      page-break-inside: avoid;
-    }
-  
-    thead {
-      display: table-header-group;
-    }
-  
-    tr,
-    img {
-      page-break-inside: avoid;
-    }
-  
-    img {
-      max-width: 100% !important;
-    }
-  
-    p,
-    h2,
-    h3 {
-      orphans: 3;
-      widows: 3;
-    }
-  
-    h2,
-    h3 {
-      page-break-after: avoid;
-    }
-  }
-  
-  html {
-    font-size: 12px;
-  }
-  
-  @media screen and (min-width: 32rem) and (max-width: 48rem) {
-    html {
-      font-size: 15px;
-    }
-  }
-  
-  @media screen and (min-width: 48rem) {
-    html {
-      font-size: 16px;
-    }
-  }
-  
-  body {
-    line-height: 1.85;
-  }
-  
-  p,
-  .splendor-p {
-    font-size: 1rem;
-    margin-bottom: 1.3rem;
-  }
-  
-  h1,
-  .splendor-h1,
-  h2,
-  .splendor-h2,
-  h3,
-  .splendor-h3,
-  h4,
-  .splendor-h4 {
-    margin: 1.414rem 0 .5rem;
-    font-weight: inherit;
-    line-height: 1.42;
-  }
-  
-  h1,
-  .splendor-h1 {
-    margin-top: 0;
-    font-size: 3.998rem;
-  }
-  
-  h2,
-  .splendor-h2 {
-    font-size: 2.827rem;
-  }
-  
-  h3,
-  .splendor-h3 {
-    font-size: 1.999rem;
-  }
-  
-  h4,
-  .splendor-h4 {
-    font-size: 1.414rem;
-  }
-  
-  h5,
-  .splendor-h5 {
-    font-size: 1.121rem;
-  }
-  
-  h6,
-  .splendor-h6 {
-    font-size: .88rem;
-  }
-  
-  small,
-  .splendor-small {
-    font-size: .707em;
-  }
-  
-  /* https://github.com/mrmrs/fluidity */
-  
-  img,
-  canvas,
-  iframe,
-  video,
-  svg,
-  select,
-  textarea {
-    max-width: 100%;
-  }
-  
-  @import url(http://fonts.googleapis.com/css?family=Merriweather:300italic,300);
-  
-  html {
-    font-size: 18px;
-    max-width: 100%;
-  }
-  
-  body {
-    color: #444;
-    font-family: 'Merriweather', Georgia, serif;
-    margin: 0;
-    max-width: 100%;
-  }
-  
-  /* === A bit of a gross hack so we can have bleeding divs/blockquotes. */
-  
-  p,
-  *:not(div):not(img):not(body):not(html):not(li):not(blockquote):not(p) {
-    margin: 1rem auto 1rem;
-    max-width: 36rem;
-    padding: .25rem;
-  }
-  
-  div {
-    width: 100%;
-  }
-  
-  div img {
-    width: 100%;
-  }
-  
-  blockquote p {
-    font-size: 1.5rem;
-    font-style: italic;
-    margin: 1rem auto 1rem;
-    max-width: 48rem;
-  }
-  
-  li {
-    margin-left: 2rem;
-  }
-  
-  /* Counteract the specificity of the gross *:not() chain. */
-  
-  h1 {
-    padding: 4rem 0 !important;
-  }
-  
-  /*  === End gross hack */
-  
-  p {
-    color: #555;
-    height: auto;
-    line-height: 1.45;
-  }
-  
-  pre,
-  code {
-    font-family: Menlo, Monaco, "Courier New", monospace;
-  }
-  
-  pre {
-    background-color: #fafafa;
-    font-size: .8rem;
-    overflow-x: scroll;
-    padding: 1.125em;
-  }
-  
-  a,
-  a:visited {
-    color: #3498db;
-  }
-  
-  a:hover,
-  a:focus,
-  a:active {
-    color: #2980b9;
-  }
\ No newline at end of file
diff --git a/results/v1/2022-04-22_batch_effect.Rmd b/results/v1/2022-04-22_batch_effect.Rmd
deleted file mode 100644
index c596b2c5210f6c148ac97b30ae8ed3f04ef93083..0000000000000000000000000000000000000000
--- a/results/v1/2022-04-22_batch_effect.Rmd
+++ /dev/null
@@ -1,432 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-04-21'
-output:   
-  html_document:
- 
-
-css: 
- - css/template.css
- - css/footer.css
-
----
-
-
-## Introduction
-
-
-In living world, phenotypes are understanding as a mixture between a genotype effect, an environment effect and an interaction between G&E. 
-$$Phenotype = Genotype + Environment + Genotype.Environment$$
-The quantification of each strengths (G,E; G&E) can be estimate by a coefficient $\beta$. 
-Then, our expression becomes: 
-$$Phenotype = \beta_{G} * Genotype + \beta_{E}*Environment +  \beta_{G*E} * Genotype.Environment + \epsilon$$
-Notice that $\beta$ is specific of each component. Furthermore, we introduced above $\epsilon$. It's the residual of the model. $\epsilon$ can be seen as the difference between observed values and values predicted by the model.
-
-Genes expression can be also considered as a phenotype. <br> 
-According to this, the quantification of $\beta_{G}$, $\beta_{E}$ and  $\beta_{G*E}$ for a given gene in a given condition may open the possibility to assess differences between the strengths in presence in different conditions.
-
-That's the purpose of Htrsim !
-
-## Htrsim
-
-##### Model
-
-In this aim, Htrsim is based on a model. <br> 
-Because of is easy of use this model is managed by DESEQ2.
-Then, $K_{ij}$ for gene i, sample j are modeled using a Negative Binomial distribution with fitted mean $\mu_{ij}$ and a gene-specific dispersion parameter $\alpha_i$. 
-
-$$
-K_{ij} \sim {\sf NB}(\mu_{ij} ; \sigma_i)
-$$
-$$
-\mu_{ij} = s_jq_{ij}
-$$
-$$
-log_2(q_{ij}) = x_j*\beta_i
-$$
-The fitted mean is composed of a sample-specific size factor $s_j$ and a parameter qij proportional to the expected true concentration of fragments for sample j. 
-The coefficients $\beta_i$ give the log2 fold changes for gene i for each column of the model matrix X. The sample-specific size factors can be replaced by gene-specific normalization factors for each sample using normalizationFactors.
-
-
-
-According to the DESEQ2 GLM and our purpose, we can write: 
-$$
-log_2(q_{ij}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-According to this generalized linear model, we wish to estimate $\beta_{G}$, $\beta_{E}$ and $\beta_{G*E}$ for a given gene i, in a given condition j. Achieve this, would allow us to quantify each strengths (G, E, G&E) for a given gene i, in a given condition j.
-
-
-##### Required
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(htrsim)
-library(tidyverse)
-library(reshape2)
-```
-
-##### Worklow 
-
-Using public libraries (from BioProject PRJNA675209b - chinese paper), and an usual RNA-seq pipeline, we build actual RNA-seq counts per genes for 3 genotypes and 2 environments.<br>
-<br>
-Using htrsim (in particular DESEQ2) and this count table, we are able to estimate $\beta_{G}$, $\beta_{E}$ and $\beta_{G*E}$.
-
-
-a. Input 
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Import & reshape table counts
-fn = system.file("extdata/", "public_tablCnts.tsv", package = "htrsim")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-
-## import design of bioProject
-fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-bioDesign$batch = 1:12
-```
-
-b. Launch DESEQ2
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign, batch = TRUE)
-```
-
-
-DESEQ returns a dds object which contains many, many things ... <br>
-In particular it contains the $\beta$ coefficients. <br>
-<br>
-You can access to beta coefficients using:
-
-```{r}
-dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
-```
-
-c. $\mu_{ij}$ 
-
-Following our model, we can estimate $log_2(\mu_{ij]})$ from $\beta$ coefficients inferred by DESEQ2,
-
-$$
-log_2(q_{ij}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-Then, $\mu_{ij]}$ can be estimate
-
-$$
-\mu_{ij} = s_j * 2^{log_2(q_{ij})}
-$$
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Model matrix per samples
-mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
-
-## Input estimation
-estim_mu = estim.mu(dds, mm)
-mu.input = estim_mu$mu
-```
-
-d. $K_{ij}$
-
-As defined by our model, counts $K_{ij}$ for gene i, sample j are modeled using a Negative Binomial distribution with fitted mean $\mu_{ij}$ and a gene-specific dispersion parameter $\alpha_i$. 
-
-$$
-K_{ij} \sim {\sf NB}(\mu_{ij} ; \alpha_i)
-$$
-The gene-specific dispersion parameter $\alpha_i$ is also stored in the dds object.<br>
-You can access to $\alpha_i$  using:
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-alpha.input = estim.alpha(dds)
-```
-
-Knowing $\alpha_i$ and $\mu_{ij]}$ for each gene and each condition given by the BioProject PRJNA675209b - chinese paper.
-We are now able to simulate $K_{ij}$ for each gene and each condition given by the BioProject PRJNA675209b.
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# Setup simulation
-input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input, average_rep = FALSE)
-
-#input$gene_id
-setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                     n_rep = 1,
-                                     alpha = input$alpha,
-                                     gene_id = input$gene_id,
-                                     mu = input$mu)
-
-#setup.simulation %>% dim()
-# Simulate counts
-htrs <- generate_counts(setup.simulation)
-
-```
-
-
-```{r, error=TRUE}
-sample = htrs %>% select(where(is.numeric)) %>% colnames()
-genotype = htrs %>% 
-          select(where(is.numeric)) %>% 
-            colnames() %>% 
-              map(., ~str_split(.,pattern = '_', simplify = T)[1]) %>% unlist()
-
-env = htrs %>% 
-          select(where(is.numeric)) %>% 
-            colnames() %>% 
-              map(., ~str_split(.,pattern = '_', simplify = T)[2]) %>% unlist()
-
-designSimu = cbind(sample, env, genotype) %>% data.frame()
-
-## RESHAPE HTRS
-htrs.reshape = htrs
-rownames(htrs.reshape) = htrs.reshape$gene_id
-htrs.reshape = htrs.reshape %>% select(-gene_id)
-########### LAUNCH DESEQ #############
-## Design model - specify reference
-designSimu$genotype <- factor(x = designSimu$genotype,levels = c('WT','Msn2D', 'Msn4D'))
-designSimu$env <- factor(x = designSimu$env,levels = c('control', 'KCl'))
-
-
-k_ij.simulation = htrs.reshape
-
-## DESEQ standard analysis
-dds_simu = DESeq2::DESeqDataSetFromMatrix( countData = k_ij.simulation , 
-                                           colData = designSimu , 
-                                           design = ~ genotype + env + genotype:env)
-
-max(k_ij.simulation)
-.Machine$integer.max
-
-```
-
-##### Evaluation
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-k_ij.simu = htrs %>% select(-gene_id) %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  scale_x_log10()
-```
-
-Comment: $K_{ij}$ simulated are abnormally huge !
-Comment: $K_{ij}$ simulated are slightly different from the actual K_{ij} !
-
-
-## Why so much differences
-
-b. $\epsilon$
-
-In our model, we define as follow:
-$$
-\epsilon_{ij} \sim {\sf N}(0 ; deviance_i)
-$$
-
-Let's see the distribution of $deviance_{i}$.
-
-
-```{r warning=FALSE}
-#estim_mu$beta.matrix
-
-deviance_i = estim_mu$deviance.sqrt[!is.na(estim_mu$deviance.sqrt)]^2
-#epsilon_ij <- mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = deviance_i.sqrt ))  %>% data.frame() %>% flatten() %>% unlist()
-
-
-# Histogram logarithmic y axis
-ggplot(data.frame(deviance_i), aes(deviance_i)) +               
-  geom_histogram(bins = 100) #+ scale_x_log10()
-
-
-```
-
-
-The deviance is also inferred by DESEQ while computing its model.
-deviance is mostly inferred between 100 and 200.  
-
-
-
-$$
-log_2(q_{ij}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-```{r warning=FALSE}
-#estim_mu$beta.matrix
-
-deviance_i.sqrt = estim_mu$deviance.sqrt[!is.na(estim_mu$deviance.sqrt)]
-epsilon_ij <- mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = deviance_i.sqrt ))  %>% data.frame() %>% flatten() %>% unlist()
-#epsilon_ij <- mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = 0 ))  %>% data.frame() %>% flatten() %>% unlist()
-
-# Histogram logarithmic y axis
-ggplot(data.frame(epsilon_ij), aes(epsilon_ij)) +               
-  geom_histogram(bins = 100) #+ scale_x_log10()
-
-
-```
-
-
-Comment: Some  $\epsilon_{ij}$ are huge !
-Recall: $\epsilon$ can be seen as the difference between observed values and values predicted by the model.
-
-A large panel of $\epsilon$ mean that the model doesn't fit well with the observed data.
-
-It means that even if $\beta$ coefficients are well estimate. $log_2(q_{ij})$ will vary around them with a large panel of values (+/- 40)
-
-
-
-
-```{r warning=FALSE}
-
-beta.dtf =  estim_mu$beta.matrix %>% data.frame()
-beta.dtf$B6 = dds.mcols$batch
-beta.dtf.long = beta.dtf %>% reshape2::melt(., value.name = "beta", variable.name = "origin")
-
-ggplot(beta.dtf.long, aes(x = beta )) +  geom_density(bins = 100, alpha = 0.5, fill = 'grey') + facet_grid(~origin, scales = "free_x")
-
-```
-
-```{r warning=FALSE}
-beta.dtf =  estim_mu$beta.matrix %>% data.frame()
-beta.dtf$B6 = dds.mcols$batch
-beta.dtf.long = beta.dtf %>% reshape2::melt(., value.name = "beta", variable.name = "origin")
-
-
-
-## Standard Error
-B0 = estim_mu$dds.mcols$SE_Intercept
-B1 = estim_mu$dds.mcols$SE_genotype_msn2D_vs_wt
-B2 <- estim_mu$dds.mcols$SE_genotype_msn4D_vs_wt
-B3 <- estim_mu$dds.mcols$SE_env_kcl_vs_control
-B4 <- estim_mu$dds.mcols$SE_genotypemsn2D.envkcl
-B5 <- estim_mu$dds.mcols$SE_genotypemsn4D.envkcl
-B6 <- estim_mu$dds.mcols$SE_batch 
-
-SE_B.dtf <- cbind(B0, B1, B2, B3, B4, B5, B6) %>% data.frame()
-SE_B.dtf.long = SE_B.dtf %>% reshape2::melt(., value.name = "SE_beta", variable.name = "origin")
-
-ggplot(SE_B.dtf.long, aes(x = SE_beta, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + facet_grid(~origin)
-```
-
-```{r warning=FALSE}
-bind_dtf<- cbind(SE_B.dtf.long, beta.dtf.long %>% select(-origin))
-ggplot(bind_dtf, aes(x = beta, y= SE_beta, fill= origin )) +  geom_point(alpha = 0.1) + facet_grid(~origin)
-
-
-#new <- bind_dtf %>% mutate(annot = ifelse(origin == "B4 | B5" && SE_beta > 6 , TRUE, FALSE ))
-#new <- bind_dtf %>% tail
-#new %>% filter(beta == "B4")
-```
-
-
-```{r warning=FALSE}
-#dim(htrs)
-
-#new <- bind_dtf %>% mutate(annot = ifelse(((origin == "B4") | (origin == "B5")) & (SE_beta > 6) , TRUE, FALSE ))
-### WARNING 
-#new %>% dcast(., annot ~ origin)
-
-
-SE_threshold = 6
-SE_B.dtf.annot = SE_B.dtf %>%  mutate(annot = ifelse((B4 > SE_threshold) | (B5 > SE_threshold) , TRUE, FALSE ))
-SE_B.dtf.annot %>% group_by(annot) %>% tally()
-SE_B.dtf.annot.long = SE_B.dtf.annot %>% reshape2::melt(., value.name = "SE_beta", variable.name = "origin")
-
-
-bind_dtf.annot<- cbind(SE_B.dtf.annot.long, beta.dtf.long %>% select(-origin))
-bind_dtf.annot = bind_dtf.annot %>% filter(!is.na(annot))
-ggplot(bind_dtf.annot, aes(x = beta, y= SE_beta, col = annot )) +  geom_point(alpha = 0.1, na.rm = T) + facet_grid(~origin)
-
-
-```
-
-
-## Beta0 vs SE & deviance
-
-
-
-```{r}
-
-B0 = beta.dtf$B0
-
-
-L = SE_B.dtf %>% colnames() %>% length()
-
-
-B0_vector = replicate(L, B0) %>% as.data.frame() %>% flatten() %>% unlist()
-deviance.sqrt_vec = replicate(L , estim_mu$deviance.sqrt )%>% as.data.frame() %>% flatten() %>% unlist()
-SE_B.dtf.annot.long$B0 = B0_vector
-SE_B.dtf.annot.long$deviance.sqrt = estim_mu$deviance.sqrt
-
-ggplot(SE_B.dtf.annot.long, aes(x = B0, y = SE_beta, col= annot )) +  geom_point(alpha = 0.1, na.rm = T) + facet_grid(~origin)
-
-
-SE_B.dtf.annot.long_B0= SE_B.dtf.annot.long %>% filter(origin == "B0") %>% filter(!is.na(annot))
-
-ggplot( SE_B.dtf.annot.long_B0, aes(x = B0, y = 2^deviance.sqrt, col = annot)) +  geom_point(alpha = 0.1, na.rm = TRUE) + scale_y_log10()
-
-
-```
-
-
-
-
-```{undefined eval=FALSE, include=FALSE}
-hist(dds.mcols$dispFit)
-hist(log_qij, )
-hist(log10(alpha.input$alpha))
-max(dds.mcols$dispersion, na.rm = T)
-hist(log(dds.mcols$dispersion))
-max(dds.mcols$deviance, na.rm = T)
-hist(dds.mcols$deviance)
-max(dds.mcols$dispFit, na.rm = T)
-DESeq2::design(dds)
-
-fitted.common.scale = t(t(dds@assays@data$mu)/dds$sizeFactor)
-
-hist(t(t(dds@assays@data$mu)/dds$sizeFactor))
-hist(residual_deseq)
-max(residual_deseq, na.rm = T)
-residual_deseq = (DESeq2::counts(dds, normalized=TRUE) - fitted.common.scale )
-
-
-w =DESeq2::nbinomWaldTest(dds)
-w@dispersionFunction()
-#S4Vectors::assays(dds)[["mu"]]
-
-
-vst = DESeq2::varianceStabilizingTransformation(dds)
-vst@assays@data$
-```
-
-
-
-https://support.bioconductor.org/p/123305/
-https://support.bioconductor.org/p/60567/
-http://bioconductor.org/packages/release/bioc/vignettes/RUVSeq/inst/doc/RUVSeq.pdf
-https://bioinformatics-core-shared-training.github.io/RNAseq-R/slides/LinearModels.pdf
-
-
-```{r eval=FALSE, include=FALSE}
-#install.packages("RUVSeq")
-#BiocManager::install("RUVSeq")
-library(RUVSeq)
-
-mm
-y <- DGEList(counts=counts(tabl_cnts), group=x)
-y <- calcNormFactors(y, method="upperquartile")
-y <- estimateGLMCommonDisp(y, design)
-y <- estimateGLMTagwiseDisp(y, design)
-fit <- glmFit(y, design)
-res <- residuals(fit, type="deviance")
-
-
-```
-
-
-
-
diff --git a/results/v1/2022-04-22_htrsim.Rmd b/results/v1/2022-04-22_htrsim.Rmd
deleted file mode 100644
index 10be5f9a1d13228d9f56f1ed3482ede05421459a..0000000000000000000000000000000000000000
--- a/results/v1/2022-04-22_htrsim.Rmd
+++ /dev/null
@@ -1,428 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-04-21'
-output:   
-  html_document:
- 
-
-css: 
- - css/template.css
- - css/footer.css
-
----
-
-
-## Introduction
-
-
-In living world, phenotypes are understanding as a mixture between a genotype effect, an environment effect and an interaction between G&E. 
-$$Phenotype = Genotype + Environment + Genotype.Environment$$
-The quantification of each strengths (G,E; G&E) can be estimate by a coefficient $\beta$. 
-Then, our expression becomes: 
-$$Phenotype = \beta_{G} * Genotype + \beta_{E}*Environment +  \beta_{G*E} * Genotype.Environment + \epsilon$$
-Notice that $\beta$ is specific of each component. Furthermore, we introduced above $\epsilon$. It's the residual of the model. $\epsilon$ can be seen as the difference between observed values and values predicted by the model.
-
-Genes expression can be also considered as a phenotype. <br> 
-According to this, the quantification of $\beta_{G}$, $\beta_{E}$ and  $\beta_{G*E}$ for a given gene in a given condition may open the possibility to assess differences between the strengths in presence in different conditions.
-
-That's the purpose of Htrsim !
-
-## Htrsim
-
-##### Model
-
-In this aim, Htrsim is based on a model. <br> 
-Because of is easy of use this model is managed by DESEQ2.
-Then, $K_{ij}$ for gene i, sample j are modeled using a Negative Binomial distribution with fitted mean $\mu_{ij}$ and a gene-specific dispersion parameter $\alpha_i$. 
-
-$$
-K_{ij} \sim {\sf NB}(\mu_{ij} ; \sigma_i)
-$$
-$$
-\mu_{ij} = s_jq_{ij}
-$$
-$$
-log_2(q_{ij}) = x_j*\beta_i
-$$
-The fitted mean is composed of a sample-specific size factor $s_j$ and a parameter qij proportional to the expected true concentration of fragments for sample j. 
-The coefficients $\beta_i$ give the log2 fold changes for gene i for each column of the model matrix X. The sample-specific size factors can be replaced by gene-specific normalization factors for each sample using normalizationFactors.
-
-
-
-According to the DESEQ2 GLM and our purpose, we can write: 
-$$
-log_2(q_{ij}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-According to this generalized linear model, we wish to estimate $\beta_{G}$, $\beta_{E}$ and $\beta_{G*E}$ for a given gene i, in a given condition j. Achieve this, would allow us to quantify each strengths (G, E, G&E) for a given gene i, in a given condition j.
-
-
-##### Required
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(htrsim)
-library(tidyverse)
-library(reshape2)
-```
-
-##### Worklow 
-
-Using public libraries (from BioProject PRJNA675209b - chinese paper), and an usual RNA-seq pipeline, we build actual RNA-seq counts per genes for 3 genotypes and 2 environments.<br>
-<br>
-Using htrsim (in particular DESEQ2) and this count table, we are able to estimate $\beta_{G}$, $\beta_{E}$ and $\beta_{G*E}$.
-
-
-a. Input 
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Import & reshape table counts
-fn = system.file("extdata/", "public_tablCnts.tsv", package = "htrsim")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-
-## import design of bioProject
-fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-```
-
-b. Launch DESEQ2
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign)
-```
-
-
-DESEQ returns a dds object which contains many, many things ... <br>
-In particular it contains the $\beta$ coefficients. <br>
-<br>
-You can access to beta coefficients using:
-
-```{r}
-dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
-```
-
-c. $\mu_{ij}$ 
-
-Following our model, we can estimate $log_2(\mu_{ij]})$ from $\beta$ coefficients inferred by DESEQ2,
-
-$$
-log_2(q_{ij}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-Then, $\mu_{ij]}$ can be estimate
-
-$$
-\mu_{ij} = s_j * 2^{log_2(q_{ij})}
-$$
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Model matrix per samples
-mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
-
-## Input estimation
-estim_mu = estim.mu(dds, mm)
-mu.input = estim_mu$mu
-```
-
-d. $K_{ij}$
-
-As defined by our model, counts $K_{ij}$ for gene i, sample j are modeled using a Negative Binomial distribution with fitted mean $\mu_{ij}$ and a gene-specific dispersion parameter $\alpha_i$. 
-
-$$
-K_{ij} \sim {\sf NB}(\mu_{ij} ; \alpha_i)
-$$
-The gene-specific dispersion parameter $\alpha_i$ is also stored in the dds object.<br>
-You can access to $\alpha_i$  using:
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-alpha.input = estim.alpha(dds)
-```
-
-Knowing $\alpha_i$ and $\mu_{ij]}$ for each gene and each condition given by the BioProject PRJNA675209b - chinese paper.
-We are now able to simulate $K_{ij}$ for each gene and each condition given by the BioProject PRJNA675209b.
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# Setup simulation
-input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input, average_rep = FALSE)
-
-#input$gene_id
-setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                     n_rep = 1,
-                                     alpha = input$alpha,
-                                     gene_id = input$gene_id,
-                                     mu = input$mu)
-
-#setup.simulation %>% dim()
-# Simulate counts
-htrs <- generate_counts(setup.simulation)
-
-```
-
-
-```{r, error=TRUE}
-sample = htrs %>% select(where(is.numeric)) %>% colnames()
-genotype = htrs %>% 
-          select(where(is.numeric)) %>% 
-            colnames() %>% 
-              map(., ~str_split(.,pattern = '_', simplify = T)[1]) %>% unlist()
-
-env = htrs %>% 
-          select(where(is.numeric)) %>% 
-            colnames() %>% 
-              map(., ~str_split(.,pattern = '_', simplify = T)[2]) %>% unlist()
-
-designSimu = cbind(sample, env, genotype) %>% data.frame()
-
-## RESHAPE HTRS
-htrs.reshape = htrs
-rownames(htrs.reshape) = htrs.reshape$gene_id
-htrs.reshape = htrs.reshape %>% select(-gene_id)
-########### LAUNCH DESEQ #############
-## Design model - specify reference
-designSimu$genotype <- factor(x = designSimu$genotype,levels = c('WT','Msn2D', 'Msn4D'))
-designSimu$env <- factor(x = designSimu$env,levels = c('control', 'KCl'))
-
-
-k_ij.simulation = htrs.reshape
-
-## DESEQ standard analysis
-dds_simu = DESeq2::DESeqDataSetFromMatrix( countData = k_ij.simulation , 
-                                           colData = designSimu , 
-                                           design = ~ genotype + env + genotype:env)
-
-max(k_ij.simulation)
-.Machine$integer.max
-
-```
-
-##### Evaluation
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-k_ij.simu = htrs %>% select(-gene_id) %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  scale_x_log10()
-```
-
-Comment: $K_{ij}$ simulated are abnormally huge !
-Comment: $K_{ij}$ simulated are slightly different from the actual K_{ij} !
-
-
-## Why so much differences
-
-b. $\epsilon$
-
-In our model, we define as follow:
-$$
-\epsilon_{ij} \sim {\sf N}(0 ; deviance_i)
-$$
-
-Let's see the distribution of $deviance_{i}$.
-
-
-```{r warning=FALSE}
-#estim_mu$beta.matrix
-
-deviance_i = estim_mu$deviance.sqrt[!is.na(estim_mu$deviance.sqrt)]^2
-#epsilon_ij <- mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = deviance_i.sqrt ))  %>% data.frame() %>% flatten() %>% unlist()
-
-
-# Histogram logarithmic y axis
-ggplot(data.frame(deviance_i), aes(deviance_i)) +               
-  geom_histogram(bins = 100) #+ scale_x_log10()
-
-
-```
-
-
-The deviance is also inferred by DESEQ while computing its model.
-deviance is mostly inferred between 100 and 200.  
-
-
-
-$$
-log_2(q_{ij}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-```{r warning=FALSE}
-#estim_mu$beta.matrix
-
-deviance_i.sqrt = estim_mu$deviance.sqrt[!is.na(estim_mu$deviance.sqrt)]
-epsilon_ij <- mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = deviance_i.sqrt ))  %>% data.frame() %>% flatten() %>% unlist()
-#epsilon_ij <- mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = 0 ))  %>% data.frame() %>% flatten() %>% unlist()
-
-# Histogram logarithmic y axis
-ggplot(data.frame(epsilon_ij), aes(epsilon_ij)) +               
-  geom_histogram(bins = 100) #+ scale_x_log10()
-
-
-```
-
-
-Comment: Some  $\epsilon_{ij}$ are huge !
-Recall: $\epsilon$ can be seen as the difference between observed values and values predicted by the model.
-
-A large panel of $\epsilon$ mean that the model doesn't fit well with the observed data.
-
-It means that even if $\beta$ coefficients are well estimate. $log_2(q_{ij})$ will vary around them with a large panel of values (+/- 40)
-
-
-
-
-```{r warning=FALSE}
-beta.dtf = estim_mu$beta.matrix %>% data.frame()
-beta.dtf.long = beta.dtf %>% reshape2::melt(., value.name = "beta", variable.name = "origin")
-
-ggplot(beta.dtf.long, aes(x = beta )) +  geom_density(bins = 100, alpha = 0.5, fill = 'grey') + facet_grid(~origin, scales = "free_x")
-
-```
-```{r warning=FALSE}
-beta.dtf = estim_mu$beta.matrix %>% data.frame()
-beta.dtf.long = beta.dtf %>% reshape2::melt(., value.name = "beta", variable.name = "origin")
-
-
-
-## Standard Error
-B0 = estim_mu$dds.mcols$SE_Intercept
-B1 = estim_mu$dds.mcols$SE_genotype_msn2D_vs_wt
-B2 <- estim_mu$dds.mcols$SE_genotype_msn4D_vs_wt
-B3 <- estim_mu$dds.mcols$SE_env_kcl_vs_control
-B4 <- estim_mu$dds.mcols$SE_genotypemsn2D.envkcl
-B5 <- estim_mu$dds.mcols$SE_genotypemsn4D.envkcl
-
-
-SE_B.dtf <- cbind(B0, B1, B2, B3, B4, B5) %>% data.frame()
-SE_B.dtf.long = SE_B.dtf %>% reshape2::melt(., value.name = "SE_beta", variable.name = "origin")
-
-ggplot(SE_B.dtf.long, aes(x = SE_beta, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + facet_grid(~origin)
-```
-
-```{r warning=FALSE}
-bind_dtf<- cbind(SE_B.dtf.long, beta.dtf.long %>% select(-origin))
-ggplot(bind_dtf, aes(x = beta, y= SE_beta, fill= origin )) +  geom_point(alpha = 0.1) + facet_grid(~origin)
-
-
-#new <- bind_dtf %>% mutate(annot = ifelse(origin == "B4 | B5" && SE_beta > 6 , TRUE, FALSE ))
-#new <- bind_dtf %>% tail
-#new %>% filter(beta == "B4")
-```
-
-
-```{r warning=FALSE}
-#dim(htrs)
-
-#new <- bind_dtf %>% mutate(annot = ifelse(((origin == "B4") | (origin == "B5")) & (SE_beta > 6) , TRUE, FALSE ))
-### WARNING 
-#new %>% dcast(., annot ~ origin)
-
-
-SE_threshold = 6
-SE_B.dtf.annot = SE_B.dtf %>%  mutate(annot = ifelse((B4 > SE_threshold) | (B5 > SE_threshold) , TRUE, FALSE ))
-SE_B.dtf.annot %>% group_by(annot) %>% tally()
-SE_B.dtf.annot.long = SE_B.dtf.annot %>% reshape2::melt(., value.name = "SE_beta", variable.name = "origin")
-
-
-bind_dtf.annot<- cbind(SE_B.dtf.annot.long, beta.dtf.long %>% select(-origin))
-bind_dtf.annot = bind_dtf.annot %>% filter(!is.na(annot))
-ggplot(bind_dtf.annot, aes(x = beta, y= SE_beta, col = annot )) +  geom_point(alpha = 0.1, na.rm = T) + facet_grid(~origin)
-
-
-```
-
-
-## Beta0 vs SE & deviance
-
-
-
-```{r}
-
-B0 = beta.dtf$B0
-
-
-L = SE_B.dtf %>% colnames() %>% length()
-
-
-B0_vector = replicate(L, B0) %>% as.data.frame() %>% flatten() %>% unlist()
-deviance.sqrt_vec = replicate(L , estim_mu$deviance.sqrt )%>% as.data.frame() %>% flatten() %>% unlist()
-SE_B.dtf.annot.long$B0 = B0_vector
-SE_B.dtf.annot.long$deviance.sqrt = estim_mu$deviance.sqrt
-
-ggplot(SE_B.dtf.annot.long, aes(x = B0, y = SE_beta, col= annot )) +  geom_point(alpha = 0.1, na.rm = T) + facet_grid(~origin)
-
-
-SE_B.dtf.annot.long_B0= SE_B.dtf.annot.long %>% filter(origin == "B0") %>% filter(!is.na(annot))
-
-ggplot( SE_B.dtf.annot.long_B0, aes(x = B0, y = 2^deviance.sqrt, col = annot)) +  geom_point(alpha = 0.1, na.rm = TRUE) + scale_y_log10()
-
-
-```
-
-
-
-
-```{undefined eval=FALSE, include=FALSE}
-hist(dds.mcols$dispFit)
-hist(log_qij, )
-hist(log10(alpha.input$alpha))
-max(dds.mcols$dispersion, na.rm = T)
-hist(log(dds.mcols$dispersion))
-max(dds.mcols$deviance, na.rm = T)
-hist(dds.mcols$deviance)
-max(dds.mcols$dispFit, na.rm = T)
-DESeq2::design(dds)
-
-fitted.common.scale = t(t(dds@assays@data$mu)/dds$sizeFactor)
-
-hist(t(t(dds@assays@data$mu)/dds$sizeFactor))
-hist(residual_deseq)
-max(residual_deseq, na.rm = T)
-residual_deseq = (DESeq2::counts(dds, normalized=TRUE) - fitted.common.scale )
-
-
-w =DESeq2::nbinomWaldTest(dds)
-w@dispersionFunction()
-#S4Vectors::assays(dds)[["mu"]]
-
-
-vst = DESeq2::varianceStabilizingTransformation(dds)
-vst@assays@data$
-```
-
-
-
-https://support.bioconductor.org/p/123305/
-https://support.bioconductor.org/p/60567/
-http://bioconductor.org/packages/release/bioc/vignettes/RUVSeq/inst/doc/RUVSeq.pdf
-https://bioinformatics-core-shared-training.github.io/RNAseq-R/slides/LinearModels.pdf
-
-
-```{r eval=FALSE, include=FALSE}
-#install.packages("RUVSeq")
-#BiocManager::install("RUVSeq")
-library(RUVSeq)
-
-mm
-y <- DGEList(counts=counts(tabl_cnts), group=x)
-y <- calcNormFactors(y, method="upperquartile")
-y <- estimateGLMCommonDisp(y, design)
-y <- estimateGLMTagwiseDisp(y, design)
-fit <- glmFit(y, design)
-res <- residuals(fit, type="deviance")
-
-
-```
-
-
-
-
diff --git a/results/v1/2022-04-22_lowCnts_effect.Rmd b/results/v1/2022-04-22_lowCnts_effect.Rmd
deleted file mode 100644
index 323a2abd9ea61e11243128d0025c3a03d0fcb637..0000000000000000000000000000000000000000
--- a/results/v1/2022-04-22_lowCnts_effect.Rmd
+++ /dev/null
@@ -1,436 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-04-21'
-output:   
-  html_document:
- 
-
-css: 
- - css/template.css
- - css/footer.css
-
----
-
-
-## Introduction
-
-
-In living world, phenotypes are understanding as a mixture between a genotype effect, an environment effect and an interaction between G&E. 
-$$Phenotype = Genotype + Environment + Genotype.Environment$$
-The quantification of each strengths (G,E; G&E) can be estimate by a coefficient $\beta$. 
-Then, our expression becomes: 
-$$Phenotype = \beta_{G} * Genotype + \beta_{E}*Environment +  \beta_{G*E} * Genotype.Environment + \epsilon$$
-Notice that $\beta$ is specific of each component. Furthermore, we introduced above $\epsilon$. It's the residual of the model. $\epsilon$ can be seen as the difference between observed values and values predicted by the model.
-
-Genes expression can be also considered as a phenotype. <br> 
-According to this, the quantification of $\beta_{G}$, $\beta_{E}$ and  $\beta_{G*E}$ for a given gene in a given condition may open the possibility to assess differences between the strengths in presence in different conditions.
-
-That's the purpose of Htrsim !
-
-## Htrsim
-
-##### Model
-
-In this aim, Htrsim is based on a model. <br> 
-Because of is easy of use this model is managed by DESEQ2.
-Then, $K_{ij}$ for gene i, sample j are modeled using a Negative Binomial distribution with fitted mean $\mu_{ij}$ and a gene-specific dispersion parameter $\alpha_i$. 
-
-$$
-K_{ij} \sim {\sf NB}(\mu_{ij} ; \sigma_i)
-$$
-$$
-\mu_{ij} = s_jq_{ij}
-$$
-$$
-log_2(q_{ij}) = x_j*\beta_i
-$$
-The fitted mean is composed of a sample-specific size factor $s_j$ and a parameter qij proportional to the expected true concentration of fragments for sample j. 
-The coefficients $\beta_i$ give the log2 fold changes for gene i for each column of the model matrix X. The sample-specific size factors can be replaced by gene-specific normalization factors for each sample using normalizationFactors.
-
-
-
-According to the DESEQ2 GLM and our purpose, we can write: 
-$$
-log_2(q_{ij}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-According to this generalized linear model, we wish to estimate $\beta_{G}$, $\beta_{E}$ and $\beta_{G*E}$ for a given gene i, in a given condition j. Achieve this, would allow us to quantify each strengths (G, E, G&E) for a given gene i, in a given condition j.
-
-
-##### Required
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(htrsim)
-library(tidyverse)
-library(reshape2)
-```
-
-##### Worklow 
-
-Using public libraries (from BioProject PRJNA675209b - chinese paper), and an usual RNA-seq pipeline, we build actual RNA-seq counts per genes for 3 genotypes and 2 environments.<br>
-<br>
-Using htrsim (in particular DESEQ2) and this count table, we are able to estimate $\beta_{G}$, $\beta_{E}$ and $\beta_{G*E}$.
-
-
-a. Input 
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Import & reshape table counts
-fn = system.file("extdata/", "public_tablCnts.tsv", package = "htrsim")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-
-
-keep <- rowSums( tabl_cnts ) >= 5000
-#keep 
-#tabl_cnts <- 
-tabl_cnts <- tabl_cnts[keep,]
-
-## import design of bioProject
-fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-#bioDesign$batch = 1:12
-```
-
-b. Launch DESEQ2
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign)
-```
-
-
-DESEQ returns a dds object which contains many, many things ... <br>
-In particular it contains the $\beta$ coefficients. <br>
-<br>
-You can access to beta coefficients using:
-
-```{r}
-dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
-```
-
-c. $\mu_{ij}$ 
-
-Following our model, we can estimate $log_2(\mu_{ij]})$ from $\beta$ coefficients inferred by DESEQ2,
-
-$$
-log_2(q_{ij}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-Then, $\mu_{ij]}$ can be estimate
-
-$$
-\mu_{ij} = s_j * 2^{log_2(q_{ij})}
-$$
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Model matrix per samples
-mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
-
-## Input estimation
-estim_mu = estim.mu(dds, mm)
-mu.input = estim_mu$mu
-```
-
-d. $K_{ij}$
-
-As defined by our model, counts $K_{ij}$ for gene i, sample j are modeled using a Negative Binomial distribution with fitted mean $\mu_{ij}$ and a gene-specific dispersion parameter $\alpha_i$. 
-
-$$
-K_{ij} \sim {\sf NB}(\mu_{ij} ; \alpha_i)
-$$
-The gene-specific dispersion parameter $\alpha_i$ is also stored in the dds object.<br>
-You can access to $\alpha_i$  using:
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-alpha.input = estim.alpha(dds)
-```
-
-Knowing $\alpha_i$ and $\mu_{ij]}$ for each gene and each condition given by the BioProject PRJNA675209b - chinese paper.
-We are now able to simulate $K_{ij}$ for each gene and each condition given by the BioProject PRJNA675209b.
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# Setup simulation
-input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input, average_rep = FALSE)
-
-#input$gene_id
-setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                     n_rep = 1,
-                                     alpha = input$alpha,
-                                     gene_id = input$gene_id,
-                                     mu = input$mu)
-
-#setup.simulation %>% dim()
-# Simulate counts
-htrs <- generate_counts(setup.simulation)
-
-```
-
-
-```{r, error=TRUE}
-sample = htrs %>% select(where(is.numeric)) %>% colnames()
-genotype = htrs %>% 
-          select(where(is.numeric)) %>% 
-            colnames() %>% 
-              map(., ~str_split(.,pattern = '_', simplify = T)[1]) %>% unlist()
-
-env = htrs %>% 
-          select(where(is.numeric)) %>% 
-            colnames() %>% 
-              map(., ~str_split(.,pattern = '_', simplify = T)[2]) %>% unlist()
-
-designSimu = cbind(sample, env, genotype) %>% data.frame()
-
-## RESHAPE HTRS
-htrs.reshape = htrs
-rownames(htrs.reshape) = htrs.reshape$gene_id
-htrs.reshape = htrs.reshape %>% select(-gene_id)
-########### LAUNCH DESEQ #############
-## Design model - specify reference
-designSimu$genotype <- factor(x = designSimu$genotype,levels = c('WT','Msn2D', 'Msn4D'))
-designSimu$env <- factor(x = designSimu$env,levels = c('control', 'KCl'))
-
-
-k_ij.simulation = htrs.reshape
-
-## DESEQ standard analysis
-dds_simu = DESeq2::DESeqDataSetFromMatrix( countData = k_ij.simulation , 
-                                           colData = designSimu , 
-                                           design = ~ genotype + env + genotype:env)
-
-max(k_ij.simulation)
-.Machine$integer.max
-
-```
-
-##### Evaluation
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-k_ij.simu = htrs %>% select(-gene_id) %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  scale_x_log10()
-```
-
-Comment: $K_{ij}$ simulated are abnormally huge !
-Comment: $K_{ij}$ simulated are slightly different from the actual K_{ij} !
-
-
-## Why so much differences
-
-b. $\epsilon$
-
-In our model, we define as follow:
-$$
-\epsilon_{ij} \sim {\sf N}(0 ; deviance_i)
-$$
-
-Let's see the distribution of $deviance_{i}$.
-
-
-```{r warning=FALSE}
-#estim_mu$beta.matrix
-
-deviance_i = estim_mu$deviance.sqrt[!is.na(estim_mu$deviance.sqrt)]^2
-#epsilon_ij <- mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = deviance_i.sqrt ))  %>% data.frame() %>% flatten() %>% unlist()
-
-
-# Histogram logarithmic y axis
-ggplot(data.frame(deviance_i), aes(deviance_i)) +               
-  geom_histogram(bins = 100) #+ scale_x_log10()
-
-
-```
-
-
-The deviance is also inferred by DESEQ while computing its model.
-deviance is mostly inferred between 100 and 200.  
-
-
-
-$$
-log_2(q_{ij}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-```{r warning=FALSE}
-#estim_mu$beta.matrix
-
-deviance_i.sqrt = estim_mu$deviance.sqrt[!is.na(estim_mu$deviance.sqrt)]
-epsilon_ij <- mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = deviance_i.sqrt ))  %>% data.frame() %>% flatten() %>% unlist()
-#epsilon_ij <- mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = 0 ))  %>% data.frame() %>% flatten() %>% unlist()
-
-# Histogram logarithmic y axis
-ggplot(data.frame(epsilon_ij), aes(epsilon_ij)) +               
-  geom_histogram(bins = 100) #+ scale_x_log10()
-
-
-```
-
-
-Comment: Some  $\epsilon_{ij}$ are huge !
-Recall: $\epsilon$ can be seen as the difference between observed values and values predicted by the model.
-
-A large panel of $\epsilon$ mean that the model doesn't fit well with the observed data.
-
-It means that even if $\beta$ coefficients are well estimate. $log_2(q_{ij})$ will vary around them with a large panel of values (+/- 40)
-
-
-
-
-```{r warning=FALSE}
-
-
-beta.dtf = estim_mu$beta.matrix %>% data.frame()
-beta.dtf.long = beta.dtf %>% reshape2::melt(., value.name = "beta", variable.name = "origin")
-
-ggplot(beta.dtf.long, aes(x = beta )) +  geom_density(bins = 100, alpha = 0.5, fill = 'grey') + facet_grid(~origin, scales = "free_x")
-
-```
-```{r warning=FALSE}
-beta.dtf = estim_mu$beta.matrix %>% data.frame()
-beta.dtf.long = beta.dtf %>% reshape2::melt(., value.name = "beta", variable.name = "origin")
-
-
-
-## Standard Error
-B0 = estim_mu$dds.mcols$SE_Intercept
-B1 = estim_mu$dds.mcols$SE_genotype_msn2D_vs_wt
-B2 <- estim_mu$dds.mcols$SE_genotype_msn4D_vs_wt
-B3 <- estim_mu$dds.mcols$SE_env_kcl_vs_control
-B4 <- estim_mu$dds.mcols$SE_genotypemsn2D.envkcl
-B5 <- estim_mu$dds.mcols$SE_genotypemsn4D.envkcl
-
-
-SE_B.dtf <- cbind(B0, B1, B2, B3, B4, B5) %>% data.frame()
-SE_B.dtf.long = SE_B.dtf %>% reshape2::melt(., value.name = "SE_beta", variable.name = "origin")
-
-ggplot(SE_B.dtf.long, aes(x = SE_beta, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + facet_grid(~origin)
-```
-
-```{r warning=FALSE}
-bind_dtf<- cbind(SE_B.dtf.long, beta.dtf.long %>% select(-origin))
-ggplot(bind_dtf, aes(x = beta, y= SE_beta, fill= origin )) +  geom_point(alpha = 0.1) + facet_grid(~origin)
-
-
-#new <- bind_dtf %>% mutate(annot = ifelse(origin == "B4 | B5" && SE_beta > 6 , TRUE, FALSE ))
-#new <- bind_dtf %>% tail
-#new %>% filter(beta == "B4")
-```
-
-
-```{r warning=FALSE}
-#dim(htrs)
-
-#new <- bind_dtf %>% mutate(annot = ifelse(((origin == "B4") | (origin == "B5")) & (SE_beta > 6) , TRUE, FALSE ))
-### WARNING 
-#new %>% dcast(., annot ~ origin)
-
-
-SE_threshold = 6
-SE_B.dtf.annot = SE_B.dtf %>%  mutate(annot = ifelse((B4 > SE_threshold) | (B5 > SE_threshold) , TRUE, FALSE ))
-SE_B.dtf.annot %>% group_by(annot) %>% tally()
-SE_B.dtf.annot.long = SE_B.dtf.annot %>% reshape2::melt(., value.name = "SE_beta", variable.name = "origin")
-
-
-bind_dtf.annot<- cbind(SE_B.dtf.annot.long, beta.dtf.long %>% select(-origin))
-bind_dtf.annot = bind_dtf.annot %>% filter(!is.na(annot))
-ggplot(bind_dtf.annot, aes(x = beta, y= SE_beta, col = annot )) +  geom_point(alpha = 0.1, na.rm = T) + facet_grid(~origin)
-
-
-```
-
-
-## Beta0 vs SE & deviance
-
-
-
-```{r}
-
-B0 = beta.dtf$B0
-
-
-L = SE_B.dtf %>% colnames() %>% length()
-
-
-B0_vector = replicate(L, B0) %>% as.data.frame() %>% flatten() %>% unlist()
-deviance.sqrt_vec = replicate(L , estim_mu$deviance.sqrt )%>% as.data.frame() %>% flatten() %>% unlist()
-SE_B.dtf.annot.long$B0 = B0_vector
-SE_B.dtf.annot.long$deviance.sqrt = estim_mu$deviance.sqrt
-
-ggplot(SE_B.dtf.annot.long, aes(x = B0, y = SE_beta, col= annot )) +  geom_point(alpha = 0.1, na.rm = T) + facet_grid(~origin)
-
-
-SE_B.dtf.annot.long_B0= SE_B.dtf.annot.long %>% filter(origin == "B0") %>% filter(!is.na(annot))
-
-ggplot( SE_B.dtf.annot.long_B0, aes(x = B0, y = 2^deviance.sqrt, col = annot)) +  geom_point(alpha = 0.1, na.rm = TRUE) + scale_y_log10()
-
-
-```
-
-
-
-
-```{undefined eval=FALSE, include=FALSE}
-hist(dds.mcols$dispFit)
-hist(log_qij, )
-hist(log10(alpha.input$alpha))
-max(dds.mcols$dispersion, na.rm = T)
-hist(log(dds.mcols$dispersion))
-max(dds.mcols$deviance, na.rm = T)
-hist(dds.mcols$deviance)
-max(dds.mcols$dispFit, na.rm = T)
-DESeq2::design(dds)
-
-fitted.common.scale = t(t(dds@assays@data$mu)/dds$sizeFactor)
-
-hist(t(t(dds@assays@data$mu)/dds$sizeFactor))
-hist(residual_deseq)
-max(residual_deseq, na.rm = T)
-residual_deseq = (DESeq2::counts(dds, normalized=TRUE) - fitted.common.scale )
-
-
-w =DESeq2::nbinomWaldTest(dds)
-w@dispersionFunction()
-#S4Vectors::assays(dds)[["mu"]]
-
-
-vst = DESeq2::varianceStabilizingTransformation(dds)
-vst@assays@data$
-```
-
-
-
-https://support.bioconductor.org/p/123305/
-https://support.bioconductor.org/p/60567/
-http://bioconductor.org/packages/release/bioc/vignettes/RUVSeq/inst/doc/RUVSeq.pdf
-https://bioinformatics-core-shared-training.github.io/RNAseq-R/slides/LinearModels.pdf
-
-
-```{r eval=FALSE, include=FALSE}
-#install.packages("RUVSeq")
-#BiocManager::install("RUVSeq")
-library(RUVSeq)
-
-mm
-y <- DGEList(counts=counts(tabl_cnts), group=x)
-y <- calcNormFactors(y, method="upperquartile")
-y <- estimateGLMCommonDisp(y, design)
-y <- estimateGLMTagwiseDisp(y, design)
-fit <- glmFit(y, design)
-res <- residuals(fit, type="deviance")
-
-
-```
-
-
-
-
diff --git a/results/v1/2022-04-24_epsilon_effect.Rmd b/results/v1/2022-04-24_epsilon_effect.Rmd
deleted file mode 100644
index e7b0f276d3ede81df7a2e604225720a67db8e949..0000000000000000000000000000000000000000
--- a/results/v1/2022-04-24_epsilon_effect.Rmd
+++ /dev/null
@@ -1,136 +0,0 @@
----
-title: "Epsilon effect"
-date: '2022-04-21'
-output:   
-  html_document:
- 
-
-css: 
- - css/template.css
-
----
-
-
-
-##### Required
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(htrsim)
-library(tidyverse)
-library(reshape2)
-```
-
-
-
-## Worklow 
-
-Using public libraries (from BioProject PRJNA675209b - chinese paper), and an usual RNA-seq pipeline, we build actual RNA-seq counts per genes for 3 genotypes and 2 environments.<br>
-<br>
-Using htrsim (in particular DESEQ2) and this count table, we are able to estimate $\beta_{G}$, $\beta_{E}$ and $\beta_{G*E}$.
-
-
-##### a. Input 
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Import & reshape table counts
-fn = system.file("extdata/", "public_tablCnts.tsv", package = "htrsim")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-
-## import design of bioProject
-fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-```
-
-##### b. Launch DESEQ2
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign)
-```
-
-DESEQ returns a dds object which contains many, many things ... <br>
-In particular it contains the $\beta$ coefficients. <br>
-<br>
-You can access to beta coefficients using:
-
-```{r}
-dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
-```
-
-##### c. $\mu_{ij}$ 
-
-Following our model, we can estimate $log_2(\mu_{ij]})$ from $\beta$ coefficients inferred by DESEQ2,
-
-$$
-log_2(\mu_{ij]}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-Then, $\mu_{ij]}$ can be estimate
-
-$$
-\mu_{ij} = s_j * 2^{log_2\mu_{ij]}}
-$$
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Model matrix per samples
-mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
-
-## Input estimation
-estim_mu = estim.mu(dds, mm, epsilon =  FALSE)
-mu.input = estim_mu$mu
-```
-
-##### d. $K_{ij}$
-
-As defined by our model, counts $K_{ij}$ for gene i, sample j are modeled using a Negative Binomial distribution with fitted mean $\mu_{ij}$ and a gene-specific dispersion parameter $\alpha_i$. 
-
-$$
-K_{ij} \sim {\sf NB}(\mu_{ij} ; \sigma_i)
-$$
-The gene-specific dispersion parameter $\alpha_i$ is also stored in the dds object.<br>
-You can access to $\alpha_i$  using:
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-alpha.input = estim.alpha(dds)
-```
-
-Knowing $\alpha_i$ and $\mu_{ij]}$ for each gene and each condition given by the BioProject PRJNA675209b - chinese paper.
-We are now able to simulate $K_{ij}$ for each gene and each condition given by the BioProject PRJNA675209b.
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# Setup simulation
-input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input, average_rep = FALSE)
-
-#input$gene_id
-setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                     n_rep = 1,
-                                     alpha = input$alpha,
-                                     gene_id = input$gene_id,
-                                     mu = input$mu)
-
-#setup.simulation %>% dim()
-# Simulate counts
-htrs <- generate_counts(setup.simulation)
-
-```
-
-##### Evaluation
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-k_ij.simu = htrs %>% select(-gene_id) %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  scale_x_log10()
-```
diff --git a/results/v1/2022-04-28_beta_SE_filter.Rmd b/results/v1/2022-04-28_beta_SE_filter.Rmd
deleted file mode 100644
index ef206af3924ef3af0528ec777097830f7d114064..0000000000000000000000000000000000000000
--- a/results/v1/2022-04-28_beta_SE_filter.Rmd
+++ /dev/null
@@ -1,293 +0,0 @@
----
-title: "Apply filter on beta SE"
-date: '2022-04-21'
-output:   
-  html_document:
- 
-
-css: 
- - css/template.css
-
----
-
-
-
-##### Required
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(htrsim)
-library(tidyverse)
-library(reshape2)
-```
-
-
-##### Worklow 
-
-Using public libraries (from BioProject PRJNA675209b - chinese paper), and an usual RNA-seq pipeline, we build actual RNA-seq counts per genes for 3 genotypes and 2 environments.<br>
-<br>
-Using htrsim (in particular DESEQ2) and this count table, we are able to estimate $\beta_{G}$, $\beta_{E}$ and $\beta_{G*E}$.
-
-
-##### a. Input 
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Import & reshape table counts
-fn = system.file("extdata/", "public_tablCnts.tsv", package = "htrsim")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-
-## import design of bioProject
-fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-```
-
-
-##### b. Launch DESEQ2
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign)
-```
-
-DESEQ returns a dds object which contains many, many things ... <br>
-In particular it contains the $\beta$ coefficients. <br>
-<br>
-You can access to beta coefficients using:
-
-```{r}
-dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
-```
-
-
-
-## Removing genes with SE_B4 ou B5 > threshold
-
-
-##### Some manipulations
-
-```{r warning=FALSE}
-## BETA
-B0 <- dds.mcols$Intercept
-B1 <- dds.mcols$genotype_msn2D_vs_wt
-B2 <- dds.mcols$genotype_msn4D_vs_wt
-B3 <- dds.mcols$env_kcl_vs_control
-B4 <- dds.mcols$genotypemsn2D.envkcl
-B5 <- dds.mcols$genotypemsn4D.envkcl
-
-beta.matrix = cbind(B0, B1,B2,B3,B4,B5) %>% as.matrix()
-## BETA
-beta.dtf = beta.matrix %>% data.frame()
-beta.dtf.long = beta.dtf %>% reshape2::melt(., value.name = "beta", variable.name = "origin")
-
-
-
-## Standard Error
-B0 = dds.mcols$SE_Intercept
-B1 = dds.mcols$SE_genotype_msn2D_vs_wt
-B2 <- dds.mcols$SE_genotype_msn4D_vs_wt
-B3 <- dds.mcols$SE_env_kcl_vs_control
-B4 <- dds.mcols$SE_genotypemsn2D.envkcl
-B5 <- dds.mcols$SE_genotypemsn4D.envkcl
-
-SE_B.dtf <- cbind(B0, B1, B2, B3, B4, B5) %>% data.frame()
-SE_B.dtf.long = SE_B.dtf %>% reshape2::melt(., value.name = "SE_beta", variable.name = "origin")
-
-ggplot(SE_B.dtf.long, aes(x = SE_beta, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + facet_grid(~origin)
-```
-
-##### Filter
-
-```{r warning=FALSE}
-SE_threshold = 2
-
-SE_B.dtf.annot = SE_B.dtf %>%  mutate(toKickOut = ifelse((B4 > SE_threshold) | (B5 > SE_threshold) , TRUE, FALSE ))
-SE_B.dtf.annot$gene_id = rownames(tabl_cnts)
-dtf_forFiltration = SE_B.dtf.annot %>% select(c(gene_id, toKickOut))
-SE_B.dtf.annot.long = SE_B.dtf.annot  %>% reshape2::melt(., value.name = "SE_beta", variable.name = "origin")
-
-
-bind_dtf <- cbind(SE_B.dtf.annot.long, beta.dtf.long %>% select(-origin))
-bind_df.filter = bind_dtf %>% filter(toKickOut == FALSE) 
-ggplot(bind_df.filter, aes(x = beta, y= SE_beta, fill= origin )) +  geom_point(alpha = 0.1) + facet_grid(~origin)
-
-```
-
-
-##### mu for genes with SE_B4/5 > threshold
-
-```{r warning=FALSE}
-
-beta_filtered.short = bind_df.filter %>% dcast(., gene_id ~ origin, value.var = "beta")
-
-### Estimate mu
-mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
-beta.matrix = beta_filtered.short %>% select(-gene_id) %>% as.matrix()
-
-
-
-deviance_i =dds.mcols$deviance[!SE_B.dtf.annot$toKickOut & !is.na(SE_B.dtf.annot$toKickOut)]
-deviance_i.sqrt = sqrt(deviance_i)
-epsilon_ij = mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = deviance_i.sqrt ))  %>% data.frame() %>% as.matrix()
-
-
-#p_ij = B0_i*mm1_j + B1_i*mm2_j + B3_i*mm3_j + B4_i*mm4_j + B5_i*mm5_j
-p_ij = beta.matrix %*% t(mm)
-#message("EPSILON : TRUE")
-log_qij <- p_ij + epsilon_ij
-
-## s_j
-s_j = dds$sizeFactor
-mu_ij = s_j * 2^log_qij
-rownames(mu_ij) = beta_filtered.short$gene_id
-## drop NA in dispersion estimate (link to unexpressed gene)
-### and convert to lovely dataframe
-mu_gene_filtered = mu_ij %>%
-    stats::na.omit() %>%
-    data.frame()
-colnames(mu_gene_filtered) <- rownames(dds@colData)
-mu_gene_filtered <- mu_gene_filtered %>%
-    tibble::rownames_to_column(var = "gene_id")
-```
-
-
-##### alpha for genes with SE_B4/5 > threshold
-
-```{r}
-alpha.input = estim.alpha(dds)
-alpha.input.filtered = alpha.input %>% filter(gene_id %in% beta_filtered.short$gene_id)
-```
-
-###### Setup simulation
-
-```{r}
-input = reshape_input2setup(mu.dtf = mu_gene_filtered, alpha.dtf = alpha.input.filtered, average_rep = FALSE)
-
-#input$gene_id
-setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                     n_rep = 1,
-                                     alpha = input$alpha,
-                                     gene_id = input$gene_id,
-                                     mu = input$mu)
-
-#setup.simulation %>% dim()
-# Simulate counts
-htrs <- generate_counts(setup.simulation)
-
-####
-```
-
-
-##### Evaluation
-
-
-```{r}
-
-#dtf_merged = merge(htrs, dtf_forFiltration, "gene_id") 
-#htrs.filterSE = dtf_merged %>% filter(toKickOut == FALSE) %>% select(-toKickOut)
-
-k_ij.simu = htrs %>% select(-gene_id) %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density( alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  geom_vline(xintercept = 0, col= "#00BFC4" )   +
-  scale_x_log10()
-```
-
-
-
-## Without epsilon
-
-
-##### mu for genes with SE_B4/5 > threshold
-
-```{r warning=FALSE}
-
-beta_filtered.short = bind_df.filter %>% dcast(., gene_id ~ origin, value.var = "beta")
-
-### Estimate mu
-mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
-beta.matrix = beta_filtered.short %>% select(-gene_id) %>% as.matrix()
-
-#deviance_i.sqrt = sqrt(dds.mcols$deviance)
-#epsilon_ij = mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = deviance_i.sqrt ))  %>% data.frame() %>% as.matrix()
-
-
-#p_ij = B0_i*mm1_j + B1_i*mm2_j + B3_i*mm3_j + B4_i*mm4_j + B5_i*mm5_j
-p_ij = beta.matrix %*% t(mm)
-#message("EPSILON : FALSE")
-log_qij <- p_ij #+ epsilon_ij
-
-## s_j
-s_j = dds$sizeFactor
-mu_ij = s_j * 2^log_qij
-rownames(mu_ij) = beta_filtered.short$gene_id
-## drop NA in dispersion estimate (link to unexpressed gene)
-### and convert to lovely dataframe
-mu_gene_filtered = mu_ij %>%
-    stats::na.omit() %>%
-    data.frame()
-colnames(mu_gene_filtered) <- rownames(dds@colData)
-mu_gene_filtered <- mu_gene_filtered %>%
-    tibble::rownames_to_column(var = "gene_id")
-```
-
-
-##### alpha for genes with SE_B4/5 > threshold
-```{r}
-alpha.input = estim.alpha(dds)
-alpha.input.filtered = alpha.input %>% filter(gene_id %in% beta_filtered.short$gene_id)
-```
-
-###### Setup simulation
-
-```{r}
-input = reshape_input2setup(mu.dtf = mu_gene_filtered, alpha.dtf = alpha.input.filtered, average_rep = FALSE)
-
-#input$gene_id
-setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                     n_rep = 1,
-                                     alpha = input$alpha,
-                                     gene_id = input$gene_id,
-                                     mu = input$mu)
-
-#setup.simulation %>% dim()
-# Simulate counts
-htrs <- generate_counts(setup.simulation)
-
-####
-```
-
-
-##### Evaluation
-
-
-```{r}
-
-#dtf_merged = merge(htrs, dtf_forFiltration, "gene_id") 
-#htrs.filterSE = dtf_merged %>% filter(toKickOut == FALSE) %>% select(-toKickOut)
-
-k_ij.simu = htrs %>% select(-gene_id) %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density( alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  geom_vline(xintercept = 0, col= "#00BFC4" )   +
-  scale_x_log10()
-```
diff --git a/results/v1/2022-05-22_alpha_effect.Rmd b/results/v1/2022-05-22_alpha_effect.Rmd
deleted file mode 100644
index d553f17e3c3c1960479d53273327d01e3d273452..0000000000000000000000000000000000000000
--- a/results/v1/2022-05-22_alpha_effect.Rmd
+++ /dev/null
@@ -1,329 +0,0 @@
----
-title: "Alpha effect"
-date: '2022-04-21'
-output:   
-  html_document:
- 
-
-css: 
- - css/template.css
-
----
-
-
-## Required
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(htrsim)
-library(tidyverse)
-library(reshape2)
-```
-
-
-
-## Worklow 
-
-Using public libraries (from BioProject PRJNA675209b - chinese paper), and an usual RNA-seq pipeline, we build actual RNA-seq counts per genes for 3 genotypes and 2 environments.<br>
-<br>
-Using htrsim (in particular DESEQ2) and this count table, we are able to estimate $\beta_{G}$, $\beta_{E}$ and $\beta_{G*E}$.
-
-
-##### a. Input 
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Import & reshape table counts
-fn = system.file("extdata/", "public_tablCnts.tsv", package = "htrsim")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-
-## import design of bioProject
-fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-```
-
-##### Launch DESEQ 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign)
-```
-
-DESEQ returns a dds object which contains many, many things ... <br>
-In particular it contains the $\beta$ coefficients. <br>
-<br>
-You can access to beta coefficients using:
-
-```{r}
-dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
-```
-
-##### mu estimation
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Model matrix per samples
-mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
-
-## Input estimation
-estim_mu = estim.mu(dds, mm)
-mu.input = estim_mu$mu
-```
-
-d. $\alpha_{i}$
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-alpha.input = estim.alpha(dds)
-```
-
-
-```{r}
-hist(alpha.input$alpha)
-maximum_alpha = max(alpha.input$alpha, na.rm = T)
-maximum_alpha
-min(alpha.input$alpha, na.rm = T)
-
-```
-
-variance = n(1-alpha)/alpha^2.
-
-```{r}
-n = 1000
-alpha = 2.5
-variance = n*(1 - alpha)/alpha^2
-variance
-hist(rnbinom(n=1000, mu = 400, size = alpha))
-```
-
-```{r}
-n = 1000
-alpha = 0.01
-variance = n*(1 - alpha)/alpha^2
-variance
-hist(rnbinom(n=1000, mu = 400, size = alpha))
-```
-
-
-```{r}
-## filter on alphz
-#alpha.input.filtered = alpha.input %>% filter(gene_id %in% beta_filtered.short$gene_id)
-#alpha.input.filtered = alpha.input %>% filter(alpha > 10)
-
-
-### FIX ALPHA
-N_genes = length(alpha.input$gene_id)
-alpha.input$alpha = runif(n= N_genes, min = 0.1, max = maximum_alpha)
-```
-
-Knowing $\alpha_i$ and $\mu_{ij]}$ for each gene and each condition given by the BioProject PRJNA675209b - chinese paper.
-We are now able to simulate $K_{ij}$ for each gene and each condition given by the BioProject PRJNA675209b.
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## filter mu based on alpha filter
-#mu.input.filtered = mu.input %>% filter(gene_id %in% alpha.input.filtered$gene_id )
-
-# Setup simulation
-input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input, average_rep = FALSE)
-
-#input$gene_id
-setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                     n_rep = 1,
-                                     alpha = input$alpha,
-                                     gene_id = input$gene_id,
-                                     mu = input$mu)
-
-#setup.simulation %>% dim()
-# Simulate counts
-htrs <- generate_counts(setup.simulation)
-
-```
-
-##### Evaluation
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-k_ij.simu = htrs %>% select(-gene_id) %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% filter(rownames(.) %in% htrs$gene_id ) %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  scale_x_log10()
-```
-
-
-
-## Without Epsilon
-
-##### mu estimation
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Model matrix per samples
-mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
-
-## Input estimation
-estim_mu = estim.mu(dds, mm, epsilon = FALSE)
-mu.input = estim_mu$mu
-```
-
-d. $\alpha_{i}$
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-alpha.input = estim.alpha(dds)
-```
-
-```{r}
-## filter on alphz
-#alpha.input.filtered = alpha.input %>% filter(gene_id %in% beta_filtered.short$gene_id)
-#alpha.input.filtered = alpha.input %>% filter(alpha > 10)
-
-
-### FIX ALPHA
-N_genes = length(alpha.input$gene_id)
-alpha.input$alpha = runif(n= N_genes, min = 0.1, max = maximum_alpha)
-#alpha.input$alpha = rnbinom(n = N_genes, mu = mean(alpha.input$alpha), size = 20)
-#hist(alpha.input$alpha)
-```
-
-Knowing $\alpha_i$ and $\mu_{ij]}$ for each gene and each condition given by the BioProject PRJNA675209b - chinese paper.
-We are now able to simulate $K_{ij}$ for each gene and each condition given by the BioProject PRJNA675209b.
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## filter mu based on alpha filter
-#mu.input.filtered = mu.input %>% filter(gene_id %in% alpha.input.filtered$gene_id )
-
-# Setup simulation
-input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input, average_rep = FALSE)
-#input$gene_id %>% length()
-#input$gene_id
-setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                     n_rep = 1,
-                                     alpha = input$alpha,
-                                     gene_id = input$gene_id,
-                                     mu = input$mu)
-
-#setup.simulation %>% dim()
-# Simulate counts
-htrs <- generate_counts(setup.simulation)
-
-```
-
-##### Evaluation
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-k_ij.simu = htrs %>% select(-gene_id) %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% filter(rownames(.) %in% htrs$gene_id ) %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  scale_x_log10()
-```
-
-
-```{r}
-#colnames(htrs)
-##colnames(tabl_cnts)
-#tabl_cnts.geneSimulate <- tabl_cnts %>% filter(rownames(.) %in% htrs$gene_id )
-#actual = tabl_cnts.geneSimulate$WT_control_rep1
-#simu = htrs$WT_control_rep1_1
-ggplot() + geom_point(aes(x=k_ij.actual, y = k_ij.simu), alpha=0.5) + geom_abline(intercept = 0, slope = 1)
-#abline(a=0, b=1)
-```
-
-
-## Re-estimate beta
-
-##### Design
-
-```{r}
-sample = htrs %>% select(where(is.numeric)) %>% colnames()
-genotype = htrs %>% 
-          select(where(is.numeric)) %>% 
-            colnames() %>% 
-              map(., ~str_split(.,pattern = '_', simplify = T)[1]) %>% unlist()
-
-env = htrs %>% 
-          select(where(is.numeric)) %>% 
-            colnames() %>% 
-              map(., ~str_split(.,pattern = '_', simplify = T)[2]) %>% unlist()
-
-designSimu = cbind(sample, env, genotype) %>% data.frame()
-```
-
-
-##### Launch DESEQ 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## RESHAPE HTRS
-htrs.reshape = htrs
-rownames(htrs.reshape) = htrs.reshape$gene_id
-htrs.reshape = htrs.reshape %>% select(-gene_id)
-########### LAUNCH DESEQ #############
-## Design model - specify reference
-designSimu$genotype <- factor(x = designSimu$genotype,levels = c('WT','Msn2D', 'Msn4D'))
-designSimu$env <- factor(x = designSimu$env,levels = c('control', 'KCl'))
-
-
-k_ij.simulate = htrs.reshape
-
-## DESEQ standard analysis
-dds_simu = DESeq2::DESeqDataSetFromMatrix( countData = k_ij.simulate , colData = designSimu , design = ~ genotype + env + genotype:env)
-
-max(htrs.reshape)
-.Machine$integer.max
-
-dds_simu <- DESeq2::DESeq(dds_simu)
-```
-
-DESEQ returns a dds object which contains many, many things ... <br>
-In particular it contains the $\beta$ coefficients. <br>
-<br>
-You can access to beta coefficients using:
-
-```{r}
-dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
-#rownames(tabl_cnts)
-```
-
-
-## Evaluation of beta inference
-
-
-```{r}
-## BETA INPUT
-beta_input = estim_mu$beta.matrix %>% as.data.frame()
-idx_nonNA = which(!is.na(beta_input$B0))
-beta_input = beta_input[idx_nonNA,]
-beta_input$gene_id = input$gene_id
-## BETA SIMU
-B0 <- dds_simu.mcols$Intercept
-B1 <- dds_simu.mcols$genotype_Msn2D_vs_WT
-B2 <- dds_simu.mcols$genotype_Msn4D_vs_WT
-B3 <- dds_simu.mcols$env_KCl_vs_control
-B4 <- dds_simu.mcols$genotypeMsn2D.envKCl
-B5 <- dds_simu.mcols$genotypeMsn4D.envKCl
-
-
-beta.dtf = cbind(B0, B1,B2,B3,B4,B5) %>% as.data.frame()
-beta.dtf$gene_id = input$gene_id
-beta.dtf$origin = "inference"
-beta_input$origin = "input"
-dtf.merged = rbind(beta.dtf, beta_input)
-
-dtf.merged.long.tmp = dtf.merged %>% reshape2::melt(., value.name = "value", variable.name= "beta")#, variable.name = "origin")()
-dtf.merged.long  = dtf.merged.long.tmp %>% reshape2::dcast(., gene_id + beta ~ origin)
-
-ggplot(dtf.merged.long) + geom_point(aes(x=input, y = inference),alpha =0.1)+ geom_abline(intercept = 0, slope = 1) + facet_grid(~beta)
-```
diff --git a/results/v1/2022-06-24_subsampling.Rmd b/results/v1/2022-06-24_subsampling.Rmd
deleted file mode 100644
index 6a7b9e53dbfa067559ba48678e7f7a7c01eb8d74..0000000000000000000000000000000000000000
--- a/results/v1/2022-06-24_subsampling.Rmd
+++ /dev/null
@@ -1,303 +0,0 @@
----
-title: "Simulated counts subsampling"
-output: html_document
-date: '2022-06-08'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-##### Required
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(htrsim)
-library(tidyverse)
-library(reshape2)
-```
-
-## Worklow 
-
-##### a. Input 
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Import & reshape table counts
-fn = system.file("extdata/", "public_tablCnts.tsv", package = "htrsim")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-
-## import design of bioProject
-fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-########### LAUNCH DESEQ #############
-## Design model - specify reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c('wt','msn2D', 'msn4D'))
-bioDesign$env <- factor(x = bioDesign$env,levels = c('control', 'kcl'))
-```
-
-##### b. Launch DESEQ2
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign)
-```
-
-DESEQ returns a dds object which contains many, many things ... <br>
-In particular it contains the $\beta$ coefficients. <br>
-<br>
-You can access to beta coefficients using:
-
-```{r}
-dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
-```
-
-##### c. $\mu_{ij}$ 
-
-Following our model, we can estimate $log_2(\mu_{ij]})$ from $\beta$ coefficients inferred by DESEQ2,
-
-$$
-log_2(\mu_{ij]}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-Then, $\mu_{ij]}$ can be estimate
-
-$$
-\mu_{ij} = s_j * 2^{log_2\mu_{ij]}}
-$$
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Model matrix per samples
-mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
-
-## Input estimation
-estim_mu = estim.mu(dds, mm, epsilon =  TRUE)
-mu.input = estim_mu$mu
-```
-
-##### d. $K_{ij}$
-
-As defined by our model, counts $K_{ij}$ for gene i, sample j are modeled using a Negative Binomial distribution with fitted mean $\mu_{ij}$ and a gene-specific dispersion parameter $\alpha_i$. 
-
-$$
-K_{ij} \sim {\sf NB}(\mu_{ij} ; \sigma_i)
-$$
-The gene-specific dispersion parameter $\alpha_i$ is also stored in the dds object.<br>
-You can access to $\alpha_i$  using:
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-alpha.input = estim.alpha(dds)
-```
-
-Knowing $\alpha_i$ and $\mu_{ij]}$ for each gene and each condition given by the BioProject PRJNA675209b - chinese paper.
-We are now able to simulate $K_{ij}$ for each gene and each condition given by the BioProject PRJNA675209b.
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# Setup simulation
-input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input, average_rep = FALSE)
-
-#input$gene_id
-setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                     n_rep = 1,
-                                     alpha = input$alpha,
-                                     gene_id = input$gene_id,
-                                     mu = input$mu)
-
-#setup.simulation %>% dim()
-# Simulate counts
-htrs <- generate_counts(setup.simulation)
-
-```
-
-
-##### Evaluation after simulation
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-k_ij.simu = htrs %>% select(-gene_id) %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  scale_x_log10()
-```
-
-
-
-```{r}
-#max(tabl_cnts)
-
-htrs.filter = htrs %>% column_to_rownames("gene_id") %>% filter_all(all_vars(is.numeric(.) & . < max(tabl_cnts)))
-max(htrs.filter)
-```
-
-
-##### Evaluation after simulation
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-k_ij.simu = htrs.filter %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% filter(rownames(.) %in% rownames(htrs.filter)) %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  scale_x_log10()
-```
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-k_ij.simu = htrs.filter %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% filter(rownames(.) %in% rownames(htrs.filter)) %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% data.frame()
-
-ggplot(df, aes(x = k_ij.actual, y = k_ij.simu )) +  geom_point() + geom_abline(slope=1, intercept=0)
-```
-
-
-##### Subsampling
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-
-#
-sub_sampling <- function(kij.sample, N_reads){
-  
-  kij.sample = log2(kij.sample ) %>% replace(!is.finite(.), 0)
-
-  ecdf_maison = cumsum(kij.sample)/sum(kij.sample)
-  decumulativ_vector = c(0, ecdf_maison[-length(ecdf_maison)]) ## build decumulative vector 
-                                                              ## start with 0 and without the last element
-                                                              ## reads_vec_cnsts = ceiling((ecdf_maison - decumulativ_vector) * 10*10^6)
-  reads_vec_cnts = round((ecdf_maison - decumulativ_vector) * N_reads)
-  return(reads_vec_cnts)
-}
-
-htrs.sub = htrs %>% select_if(., is.numeric) %>% map_df(., ~sub_sampling(., 10^6))
-rownames(htrs.sub) = htrs$gene_id
-
-#( rowSums(htrs.sub) > 100 ) %>% table()
-```
-
-```{r}
-k_ij.simu = htrs.sub %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% filter(rownames(.) %in% rownames(htrs.filter)) %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density( alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  scale_x_log10()
-```
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-k_ij.simu = htrs.sub %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% filter(rownames(.) %in% rownames(htrs.filter)) %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% data.frame()
-
-ggplot(df, aes(x = k_ij.actual, y = k_ij.simu )) +  geom_point() + geom_abline(slope=1, intercept=0)
-```
-
-
-
-## DESEQ2 on simulation
-
-```{r }
-sample = htrs.sub %>% select(where(is.numeric)) %>% colnames()
-genotype = htrs %>% 
-          select(where(is.numeric)) %>% 
-            colnames() %>% 
-              map(., ~str_split(.,pattern = '_', simplify = T)[1]) %>% unlist()
-
-env = htrs %>% 
-          select(where(is.numeric)) %>% 
-            colnames() %>% 
-              map(., ~str_split(.,pattern = '_', simplify = T)[2]) %>% unlist()
-
-designSimu = cbind(sample, env, genotype) %>% data.frame()
-
-
-########### LAUNCH DESEQ #############
-## Design model - specify reference
-designSimu$genotype <- factor(x = designSimu$genotype,levels = c('WT','Msn2D', 'Msn4D'))
-designSimu$env <- factor(x = designSimu$env,levels = c('control', 'KCl'))
-
-
-k_ij.simulation = htrs.sub
-
-## DESEQ standard analysis
-dds_simu = DESeq2::DESeqDataSetFromMatrix( countData = k_ij.simulation , 
-                                           colData = designSimu , 
-                                           design = ~ genotype + env + genotype:env)
-
-
-
-dds_simu = run.deseq(tabl_cnts = k_ij.simulation, bioDesign = designSimu)
-
-```
-
-
-```{r}
-dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
-  #rownames(tabl_cnts)
-```
-
-
-## Evaluation of beta inference
-
-
-```{r}
-## BETA INPUT
-beta_input = estim_mu$beta.matrix %>% as.data.frame()
-idx_nonNA = which(!is.na(beta_input$B0))
-beta_input = beta_input[idx_nonNA,]
-beta_input$gene_id = input$gene_id
-## BETA SIMU
-B0 <- dds_simu.mcols$Intercept
-B1 <- dds_simu.mcols$genotype_Msn2D_vs_WT
-B2 <- dds_simu.mcols$genotype_Msn4D_vs_WT
-B3 <- dds_simu.mcols$env_KCl_vs_control
-B4 <- dds_simu.mcols$genotypeMsn2D.envKCl
-B5 <- dds_simu.mcols$genotypeMsn4D.envKCl
-
-plot(B0)
-plot(beta_input$B0)
-
-beta.dtf = cbind(B0, B1,B2,B3,B4,B5) %>% as.data.frame()
-beta.dtf$gene_id = input$gene_id
-beta.dtf$origin = "inference"
-beta_input$origin = "input"
-dtf.merged = rbind(beta.dtf, beta_input)
-
-dtf.merged.long.tmp = dtf.merged %>% reshape2::melt(., value.name = "value", variable.name= "beta")#, variable.name = "origin")()
-dtf.merged.long  = dtf.merged.long.tmp %>% reshape2::dcast(., gene_id + beta ~ origin)
-
-ggplot(dtf.merged.long) + geom_point(aes(x=input, y = inference),alpha =0.1)+ geom_abline(intercept = 0, slope = 1) + facet_grid(~beta)
-```
-
-
-```{r}
-
-plot(x = tabl_cnts$WT_control_rep1[idx_nonNA], y = htrs.sub$WT_control_rep1_1) + abline(1,0)
-```
diff --git a/results/v1/2022-06-28_kallistoEffect.Rmd b/results/v1/2022-06-28_kallistoEffect.Rmd
deleted file mode 100644
index 30a89c3b1c041eb666e9bada07b8589944bab063..0000000000000000000000000000000000000000
--- a/results/v1/2022-06-28_kallistoEffect.Rmd
+++ /dev/null
@@ -1,133 +0,0 @@
----
-title: "Kallisto Effect"
-output: html_document
-date: '2022-06-28'
-
-output: html_document
-date: '2022-06-08'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-##### Required
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(htrsim)
-library(tidyverse)
-library(reshape2)
-```
-
-
-## Worklow 
-
-##### a. Input 
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Import & reshape table counts
-fn = system.file("extdata/", "PRJNA675209v2.tsv", package = "htrsim")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-#tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-
-## import design of bioProject
-fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-```
-
-##### b. Launch DESEQ2
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign)
-```
-
-DESEQ returns a dds object which contains many, many things ... <br>
-In particular it contains the $\beta$ coefficients. <br>
-<br>
-You can access to beta coefficients using:
-
-```{r}
-dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
-```
-
-##### c. $\mu_{ij}$ 
-
-Following our model, we can estimate $log_2(\mu_{ij]})$ from $\beta$ coefficients inferred by DESEQ2,
-
-$$
-log_2(\mu_{ij]}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0} + \epsilon_{ij}
-$$
-
-Then, $\mu_{ij]}$ can be estimate
-
-$$
-\mu_{ij} = s_j * 2^{log_2\mu_{ij]}}
-$$
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Model matrix per samples
-mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
-
-## Input estimation
-estim_mu = estim.mu(dds, mm, epsilon =  TRUE)
-mu.input = estim_mu$mu
-```
-
-##### d. $K_{ij}$
-
-As defined by our model, counts $K_{ij}$ for gene i, sample j are modeled using a Negative Binomial distribution with fitted mean $\mu_{ij}$ and a gene-specific dispersion parameter $\alpha_i$. 
-
-$$
-K_{ij} \sim {\sf NB}(\mu_{ij} ; \sigma_i)
-$$
-The gene-specific dispersion parameter $\alpha_i$ is also stored in the dds object.<br>
-You can access to $\alpha_i$  using:
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-alpha.input = estim.alpha(dds)
-```
-
-Knowing $\alpha_i$ and $\mu_{ij]}$ for each gene and each condition given by the BioProject PRJNA675209b - chinese paper.
-We are now able to simulate $K_{ij}$ for each gene and each condition given by the BioProject PRJNA675209b.
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# Setup simulation
-input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input, average_rep = FALSE)
-
-#input$gene_id
-setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                     n_rep = 1,
-                                     alpha = input$alpha,
-                                     gene_id = input$gene_id,
-                                     mu = input$mu)
-
-#setup.simulation %>% dim()
-# Simulate counts
-htrs <- generate_counts(setup.simulation)
-
-```
-
-
-##### Evaluation after simulation
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-k_ij.simu = htrs %>% select(-gene_id) %>% flatten() %>% unlist()
-k_ij.actual = tabl_cnts %>% flatten() %>% unlist()
-
-df = cbind(k_ij.actual, k_ij.simu) %>% reshape2::melt(., value.name = "k_ij", variable.name = "origin")
-df$origin = df$Var2
-df = df %>% select(-Var2)
-max_k_ij.simu = df %>% filter(origin == "k_ij.simu") %>% select(k_ij) %>% max()
-max_k_ij.actual = df %>% filter(origin == "k_ij.actual") %>% select(k_ij) %>% max()
-
-ggplot(df, aes(x = k_ij, fill= origin )) +  geom_density(bins = 100, alpha = 0.5) + 
-  geom_vline(xintercept = max_k_ij.actual, col = "#F8766D" ) +  
-  geom_vline(xintercept = (max_k_ij.simu), col= "#00BFC4" )   +
-  scale_x_log10()
-```
diff --git a/results/v1/2022_06_08_investigation.Rmd b/results/v1/2022_06_08_investigation.Rmd
deleted file mode 100644
index 95e071e054cd75871a878dfa947b76891669417b..0000000000000000000000000000000000000000
--- a/results/v1/2022_06_08_investigation.Rmd
+++ /dev/null
@@ -1,30 +0,0 @@
----
-title: "2022_06_08_investigation"
-output: html_document
-date: '2022-06-08'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-
-##### Required
-
-```{r setup message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(htrsim)
-library(tidyverse)
-library(reshape2)
-library(DESeq2)
-```
-
-
-```{r}
-BiocManager::install('tximport')
-BiocManager::install('rhdf5')
-library(rhdf5)
-tximport("../data/abundance.h5", type = "kallisto", txOut = TRUE)
-```
diff --git a/results/v1/SVA.R b/results/v1/SVA.R
deleted file mode 100644
index 6004178cda5e8562d413050ddc1b45034c4daae4..0000000000000000000000000000000000000000
--- a/results/v1/SVA.R
+++ /dev/null
@@ -1,72 +0,0 @@
-library(htrsim)
-library(tidyverse)
-library(reshape2)
-library(sva)
-
-## Import & reshape table counts
-fn = system.file("extdata/", "public_tablCnts.tsv", package = "htrsim")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-
-## import design of bioProject
-fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-
-dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign)
-
-
-########## SVA
-dat  <- DESeq2::counts(dds, normalized = TRUE)
-idx  <- rowMeans(dat) > 1
-dat  <- dat[idx, ]
-mod <- model.matrix(~genotype + env + genotype:env, bioDesign)
-#mod  <- model.matrix(~ dex, colData(dds))
-mod0 <- model.matrix(~   1, bioDesign)
-svseq <- svaseq(dat, mod, mod0, n.sv = 2)
-
-
-plot(svseq$sv)
-
-
-par(mfrow = c(2, 1), mar = c(3,5,3,1))
-for (i in 1:2) {
-  stripchart(svseq$sv[, i] ~ dds$genotype, vertical = TRUE, main = paste0("SV", i))
-  abline(h = 0)
-}
-
-
-## PCA
-data2PCA <- dat %>% data.frame() %>%
-        mutate(gene_id = rownames(.)) %>% remove_rownames() %>%
-        reshape2::melt(.,id = c('gene_id'), variable.name = "sample_id", value.name = "count_norm") %>%
-        dcast(., sample_id ~ gene_id)
-
-rownames(data2PCA) = data2PCA$sample_id
-data2PCA = data2PCA %>% select(-sample_id)
-
-## PCA processed
-pca.obj <- prcomp( data2PCA , scale. = FALSE)
-summary(pca.obj)
-plot(pca.obj)
-## Annotation of the pca object
-sample_id = data2PCA %>% rownames()
-environment = data2PCA %>% rownames() %>% str_split(pattern = "_", simplify = TRUE)%>% .[,2]
-genotype = data2PCA %>% rownames() %>% str_split(pattern = "_", simplify = TRUE)%>% .[,1]
-
-dtp <- data.frame( 'sample_id' = sample_id ,
-                   'genotype' = genotype,
-                   'environment' = environment,
-                   pca.obj$x[,1:2])
-
-## Plot
-P <- ggplot(data = dtp) +
-  geom_point(aes(x = PC1, y = PC2,
-                 col = genotype, ### Possible choice: period, compartments, day, lumen_mucus, ...
-                 shape = environment,  ### Possible choice: period, compartments, day, lumen_mucus, ...
-                 text = paste("ID:", sample_id))) +
-  theme_minimal()
-ggplotly(P,  tooltip = c("text"))
-P
diff --git a/results/v1/SVA.Rmd b/results/v1/SVA.Rmd
deleted file mode 100644
index 21775c187e470dd9e60df7823b5c0c7c37a37a25..0000000000000000000000000000000000000000
--- a/results/v1/SVA.Rmd
+++ /dev/null
@@ -1,180 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-04-21'
-output:   
-  html_document:
- 
-
-css: 
- - css/template.css
- - css/footer.css
-
----
-
-
-```{r setup, include=FALSE}
-library(htrsim)
-library(tidyverse)
-library(reshape2)
-library(sva)
-library(kableExtra)
-library(gridExtra)
-```
-
-## SVA
-
-```{r}
-
-## Import & reshape table counts
-fn = system.file("extdata/", "public_tablCnts.tsv", package = "htrsim")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-
-## import design of bioProject
-fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-
-dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign)
-dds = S4Vectors::mcols(dds,use.names=TRUE)
-deviance_i  = dds$deviance
-
-testing_tmp2 = deviance_i %>% data.frame()
-testing_tmp2$n = "0"
-
-```
-
-
-
-
-## Take into account SVA
-
-
-```{r}
-
-########## SVA 
-dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign)
-
-dat  <- DESeq2::counts(dds, normalized = TRUE)
-idx  <- rowMeans(dat) > 1
-dat  <- dat[idx, ]
-mod <- model.matrix(~genotype + env + genotype:env, bioDesign)
-#mod  <- model.matrix(~ dex, colData(dds))
-mod0 <- model.matrix(~   1, bioDesign)
-
-## 1 SVA ####
-svseq <- svaseq(dat, mod, mod0, n.sv = 1)
-ddsva <- dds
-ddsva$SV1 <- svseq$sv[,1]
-DESeq2::design(ddsva) <- ~ SV1 + genotype + env + genotype:env
-ddsva <- DESeq2::DESeq(ddsva)
-
-ddsva.mcols = S4Vectors::mcols(ddsva,use.names=TRUE)
-deviance_i  = ddsva.mcols$deviance
-
-testing_tmp = deviance_i %>% data.frame()
-testing_tmp$n = "1"
-testing <- rbind(testing_tmp, testing_tmp2)
-
-### 2 SVA ####
-svseq <- svaseq(dat, mod, mod0, n.sv = 2)
-ddsva <- dds
-ddsva$SV1 <- svseq$sv[,1]
-ddsva$SV2 <- svseq$sv[,2]
-DESeq2::design(ddsva) <- ~ SV1 + SV2 + genotype + env + genotype:env
-ddsva <- DESeq2::DESeq(ddsva)
-
-ddsva.mcols = S4Vectors::mcols(ddsva,use.names=TRUE)
-deviance_i  = ddsva.mcols$deviance
-
-testing_tmp2 = deviance_i %>% data.frame()
-testing_tmp2$n = "2"
-testing <- rbind(testing, testing_tmp2)
-## 18 SVA ###
-svseq <- svaseq(dat, mod, mod0, n.sv = 4)
-ddsva <- dds
-ddsva$SV1 <- svseq$sv[,1]
-ddsva$SV2 <- svseq$sv[,2]
-ddsva$SV3 <- svseq$sv[,3]
-ddsva$SV4 <- svseq$sv[,4]
-
-DESeq2::design(ddsva) <- ~ SV1 + SV2 + SV3 + SV4 + genotype + env + genotype:env 
-ddsva <- DESeq2::DESeq(ddsva)
-
-ddsva.mcols = S4Vectors::mcols(ddsva,use.names=TRUE)
-deviance_i  = ddsva.mcols$deviance
-
-testing_tmp = deviance_i %>% data.frame()
-testing_tmp$n = "4"
-testing <- rbind(testing, testing_tmp)
-testing<- testing %>% rename(. , 'deviance_i' = .)
-#DESeq2::results(ddsva)
-
-
-
-```
-
-```{r}
-# Histogram logarithmic y axis
-ggplot(data.frame(testing), aes(deviance_i)) +               
-  geom_histogram(bins = 100) + facet_grid(~n, scales = "free") #+ scale_x_log10()
-```
-
-## PCA
-
-```{r  echo = FALSE,echo = FALSE, message=FALSE, warning=FALSE, include=TRUE, fig.align='center', fig.width = 6, fig.cap="Table: PCA results"}
-data2PCA <- dat %>% data.frame() %>% 
-        mutate(gene_id = rownames(.)) %>% remove_rownames() %>%  
-        reshape2::melt(.,id = c('gene_id'), variable.name = "sample_id", value.name = "count_norm") %>%
-        dcast(., sample_id ~ gene_id)
-
-rownames(data2PCA) = data2PCA$sample_id
-data2PCA = data2PCA %>% select(-sample_id)
-
-## PCA processed
-pca.obj <- prcomp( data2PCA , scale. = FALSE)
-
-res = summary(pca.obj)
-
-res$importance[,1:3] %>% 
-  kbl(., caption = "Table: Variance explained per Principal Component", position = "bottom", align = 'c') %>% 
-  kable_styling(full_width = F)
-
-```
-
-
-```{r fig.align='center', fig.width = 10}
-## Annotation of the pca object
-sample_id = data2PCA %>% rownames()
-environment = data2PCA %>% rownames() %>% str_split(pattern = "_", simplify = TRUE)%>% .[,2]
-genotype = data2PCA %>% rownames() %>% str_split(pattern = "_", simplify = TRUE)%>% .[,1]
-
-dtp <- data.frame( 'sample_id' = sample_id ,
-                   'genotype' = genotype,
-                   'environment' = environment,
-                   pca.obj$x[,1:3])
-
-## Plot
-P1 <- ggplot(data = dtp) + 
-  geom_point(aes(x = PC1, y = PC2, 
-                 col = genotype, ### Possible choice: period, compartments, day, lumen_mucus, ...
-                 shape = environment,  ### Possible choice: period, compartments, day, lumen_mucus, ...
-                 text = paste("ID:", sample_id)), size = 5) + 
-  theme_minimal() 
-#ggplotly(P,  tooltip = c("text"))
-#P
-
-P2 <- ggplot(data = dtp) + 
-  geom_point(aes(x = PC2, y = PC3, 
-                 col = genotype, ### Possible choice: period, compartments, day, lumen_mucus, ...
-                 shape = environment,  ### Possible choice: period, compartments, day, lumen_mucus, ...
-                 text = paste("ID:", sample_id)), size = 5) + 
-  theme_minimal() 
-#ggplotly(P,  tooltip = c("text"))
-#P
-
-grid.arrange(P1, P2, ncol = 2)
-
-```
diff --git a/results/v1/css/footer.css b/results/v1/css/footer.css
deleted file mode 100644
index d4bc7f0eb3cd61e0eb8694f8dc0093c2dd6a958e..0000000000000000000000000000000000000000
--- a/results/v1/css/footer.css
+++ /dev/null
@@ -1,29 +0,0 @@
-/*------------FOOTER----------*/
-
-/* Divider line above footer */
-.footer hr{
-  width: 100%;
-}
-
-.footer {
-  font-size: 16px;
-  color: #808080;
-  text-align: center;
-  width: 90%;
-  margin: 3rem auto;
-  font-weight: 300;
-}
-
-.footer.logo {
-  width: 25px;
-  margin: 0px !important;
-}
-
-.rstudio4edu-footer {
-  font-size: 12px;
-  text-transform: uppercase;
-}
-
-.tocify-extend-page {
-  height: 0px !important; /* Gets rid of extra space after footer*/
-}
diff --git a/results/v1/css/template.css b/results/v1/css/template.css
deleted file mode 100644
index d92e7e6febfda482190682f583b0d2f1542145fb..0000000000000000000000000000000000000000
--- a/results/v1/css/template.css
+++ /dev/null
@@ -1,84 +0,0 @@
- /*------------- Whole Document---------------- */
-
-body {
-    font-family: 'Muli';
-    font-size: 19px;
-
-
-}
-
-title {
-    font-size: 40px;
-    margin-top: 200px;
-    text-align: center;
-
-}
-
-
-
-
-h1, .h1 {
-    font-size: 40px;
-    margin-top: 84px;
-    text-align: center;
-    color: #0A3B95;
-    padding: 10px;
-    display: grid;
-    /*background: #0A3B95 ;*/
-    background: #E7EEF1;
-}
-
-
-h2, .h2{
-    font-size: 30px;
-    /*color: #fff;*/
-    color: #0A3B95;
-    padding: 4px;
-    /*background: #0A3B95 ;*/
-    background: #E7EEF1;
-
-    text-align: center;
-    margin-bottom: 0.75em;
-    margin-top: 30px;
-
-}
-
-
-h3, .h3 {
-    font-size: 30px;
-    text-align: center;
-
-
-}
-
-h4, .h4 {
-    font-style: italic;
-    font-size: 20px;
-    text-align: center;
-
-
-}
-
-h5, .h5{
-  /*color:#8A7B60  ;*/
-  text-indent: 5em;
-  font-weight: bold;
-  font-size: 20px;
-  margin-top: 20px;
-  margin-bottom: 0.75em;
-  text-decoration: underline;
-
-}
-
-/* FIG */
-
-.caption {
-  font-size: 0.9em;
-  font-style: italic;
-  color: grey;
-  margin-right: 10%;
-  margin-left: 10%;
-  text-align: center;
-}
-
-
diff --git a/results/v1/lab-meeting_preparation.Rmd b/results/v1/lab-meeting_preparation.Rmd
deleted file mode 100644
index 8bc7cae8a843b96293ed77fe4082cd170453b041..0000000000000000000000000000000000000000
--- a/results/v1/lab-meeting_preparation.Rmd
+++ /dev/null
@@ -1,61 +0,0 @@
----
-title: "lab-meeting"
-output:   
-  html_document
----
-
-
-```{r}
-library(tidyverse)
-
-
-x=runif(100,0,100)
-epsilon.simu = rnorm(100,0,16)
-y = 150 + 4*x + epsilon.simu
-mysimu1 = cbind(x, y)  %>% data.frame()
-mysimu1$epsilon = "eps ~ N(mean = 0, sd = 16)"
-
-ggplot(mysimu1) + geom_point(aes(x=x, y=y),col= "#00BFC4") + geom_abline(intercept = 150, slope = 4, col= "#F8766D") + ylim(100, 500) + xlim(0, 100)
-
-ggplot(mysimu1) + geom_abline(intercept = 150, slope = 4, col= "#F8766D") + ylim(100, 500) + xlim(0, 100)
-
-epsilon.simu= epsilon.simu %>% data.frame() %>% mutate(., epsilon = .) %>% select(-.)
-ggplot(epsilon.simu) + geom_density(aes(x=epsilon))
-
-```
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-
-x=runif(100,0,100)
-epsilon.simu = rnorm(1000,0,160)
-y = 150 + 4*x + epsilon.simu
-mysimu2 = cbind(x, y) %>% data.frame()
-mysimu2$epsilon = "eps ~ N(mean = 0, sd = 160)"
-ggplot(mysimu2) + geom_point(aes(x=x, y=y), col= "#00BFC4") + geom_abline(intercept = 150, slope = 4, col= "#F8766D") + ylim(100, 500) + xlim(0, 100)
-epsilon.simu= epsilon.simu %>% data.frame() %>% mutate(., epsilon = .) %>% select(-.)
-ggplot(epsilon.simu) + geom_density(aes(x=epsilon))
-
-```
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-dtf.merged <- rbind(mysimu1, mysimu2)
-
-ggplot(dtf.merged) + geom_point(aes(x=x, y=y), col= "#00BFC4") + geom_abline(intercept = 150, slope = 4, col= "#F8766D") + ylim(100, 500) + xlim(0, 100) + facet_grid(~epsilon)
-
-```
-
-```{r}
-dtf.merged <- rbind(mysimu1, mysimu2)
-
-ggplot(dtf.merged) + geom_point(aes(x=x, y=y), col= "#00BFC4")  + ylim(100, 500) + xlim(0, 100) + facet_grid(~epsilon)
-```
-
-```{r}
-x=runif(10,40,70)
-epsilon.simu = rnorm(10,0,100)
-y = 150 + 4*x + epsilon.simu
-mysimu1 = cbind(x, y)  %>% data.frame()
-mysimu1$epsilon = "eps ~ N(mean = 0, sd = 16)"
-
-ggplot(mysimu1) + geom_point(aes(x=x, y=y),col= "#00BFC4") + geom_abline(intercept = 150, slope = 4, col= "#F8766D")
-```
diff --git a/results/v2/2022-07-25_dev.Rmd b/results/v2/2022-07-25_dev.Rmd
deleted file mode 100644
index 36cc31ea1425557ca456e1bc7168e3fdb6e45b6c..0000000000000000000000000000000000000000
--- a/results/v2/2022-07-25_dev.Rmd
+++ /dev/null
@@ -1,925 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-07-26'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-library(furrr)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Simulation without ru_rm_5 - all genes
-
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-
-
-
-```{r }
-## Launch DESEQ2
-dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
-
-## Extract
-
-dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
-#dds.extraction$gene_dispersion
-
-```
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Data viz
-p<- ddsExtraction.viz(dds.extraction = dds.extraction)
-p
-ggsave(filename = "figures/inputParams_distrib.png", plot = p, width = 20, height = 12) 
-
-```
-
-
-
-
-## Design simulation
-
-## Workflow
-
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-#### params #####
-n_G = 10
-n_E = 2
-n_genes = 300
-
-
-##################
-beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-
-
-design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 15)
-#rowSums(beta.input) %>% which.max()
-#rowSums(beta.input) %>% max()
-log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-
-
-gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
-
-
-#2* 2^log_qij
-mu_ij = getMu_ij(log_qij, 1)
-#max(mu_ij)
-kij.simulated = getK_ij(mu_ij, gene_dispersion)
-#max(kij.simulated)
-
-#kij.simulated = kij.simulated %>% data.frame() %>% filter_all(all_vars(. < 1000000)) 
-
-```
-
-
-## DEseq on simulation
-
-
-```{r}
-#kij.simulated[is.na(kij.simulated)] = 0
-
-dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
-
-```
-
-## Evaluation
-
-
-```{r}
-
-dds_simu.mcols = S4Vectors::mcols(dds_simu, use.names=TRUE)
-
-gene_disp.simu = dds_simu.mcols$dispersion %>% na.omit()
-
-
-deviance_dtf = list(deviance_infered = gene_disp.simu, deviance_actual = gene_dispersion[,1]) %>% data.frame()
-
-p = ggplot(deviance_dtf) + 
-      geom_point(aes(x=deviance_actual, y = deviance_infered ),alpha =0.2) + 
-    geom_abline(intercept = 0, slope = 1)
-      
-p
-
-ggsave(filename = "figures/DESEQ_dispersion.png", plot = p, width = 15, height = 8) 
-
-```
-
-
-## MASS
-
-```{r}
-#### params #####
-n_G = 10
-n_E = 2
-n_genes = 2
-##################
-
-## Get beta for simulation
-beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-
-design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 3)
-design2simulate$model_matrix %>% dim()
-log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-#log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-
-
-gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                            n_genotypes = n_G, 
-                                            n_environments = n_E, 
-                                            dispersion.vec = dds.extraction$gene_dispersion, 
-                                            model_matrix = design2simulate$model_matrix, 
-                                                      dispUniform_btweenCondition = T)
-    
-    
-    #2* 2^log_qij
-mu_ij = getMu_ij(log_qij, 1)
-    #max(mu_ij)
-
-kij.simulated = getK_ij(mu_ij, gene_dispersion)
-kij.simulated %>% colSums() %>% mean()
-
-
-dim(kij.simulated)
-
-plan(multisession, workers = 4)
-a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], 
-                                              .x, 
-                                              design2simulate$design2simulate) )
-
-  
-
-c = do.call(rbind, a)
-beta.input.long = beta.input %>% data.frame() %>%
-      tibble::rownames_to_column(., var = "gene_id") %>%
-      dplyr::mutate(origin = "Actual") %>%
-      reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-      dplyr::rename(Actual = "value") %>% 
-      dplyr::select(-origin)
-
-x = merge(c, beta.input.long)
-
-x$type <- factor(x$type, levels = c("Intercept", "G", "E", 'GxE'))
-
-p = ggplot(x) + geom_point(aes(x=Actual* log(2), y = Inference ),alpha =0.2)+ geom_abline(intercept = 0, slope = 1) + facet_grid(~type, scales = "free") + 
-        theme(strip.text.x = element_text(size = 20), 
-              strip.text.y = element_text(size = 15), 
-              legend.key.size = unit(1, 'cm'), #change legend key size
-        legend.key.height = unit(1, 'cm'), #change legend key height
-        legend.key.width = unit(1, 'cm'), #change legend key width
-        legend.title = element_text(size=20), #change legend title font size
-        legend.text = element_text(size=15), #change legend text font size)
-        axis.title = element_text(size = 15),
-        axis.text  = element_text(size = 13)) 
-p
-
-ggsave(filename = "figures/GLM_identity.png", plot = p) 
-
-
-infer_dispersion = x %>% arrange(.,gene_id) %>% .$dispersion %>% unique()
-names(infer_dispersion) = x %>% arrange(.,gene_id) %>% .$gene_id %>% unique()
-
-disp_dtf = list(dispersion_infered = infer_dispersion, dispersion_actual = gene_dispersion[,1][names(infer_dispersion)], gene_id = names(infer_dispersion)) %>% data.frame()
-
-p = ggplot(disp_dtf) + 
-      geom_point(aes(x=dispersion_actual, y = 1/dispersion_infered ),alpha =0.2) + 
-    geom_abline(intercept = 0, slope = 1) 
-      
-p
-
-ggsave(filename = "figures/GLM_dispersion.png", plot = p, width = 15, height = 8) 
-
-```
-
-
-```{r}
-
-fitted_models = x %>% group_by(type) %>% do(model = lm(Inference ~ Actual, data = .))
-fitted_models$model
-
-```
-
-## Multidimensional normal distribution
-
-
-```{r}
-library(Rfast)
-library(MASS)
-
-x <- matrnorm(100, 4)
-x = dds.extraction$beta %>% as.matrix()
-fit.mvrnorm <- mvnorm.mle(x)
-x <- NULL
-sample_distribution <- mvrnorm(n = 3,
-                               mu = fit.mvrnorm$mu, 
-                               Sigma = fit.mvrnorm$sigma )
-
-
-plot(density(24 + rnorm(1000,24/10,1)))
-
-
-covar.matrix = cov(dds.extraction$beta)
-beta.mean <- dds.extraction$beta %>% colMeans()
-sample_distribution <- mvrnorm(n = 100,
-                               mu = beta.mean, 
-                               Sigma = covar.matrix)
-rnorm(10,  mean = 0.04, sd = 1)
-beta.simu <- mvrnorm(n = 100, mu = beta.mean, Sigma = covar.matrix)
-
-beta.simu %>% head()
-
-sample_distribution %>% head()
-library(MASS)
-betaG = rep(dds.extraction$beta[,2], 3) %>% matrix(ncol = 3)
-beta0 = dds.extraction$beta[,1]   %>% matrix(ncol = 1)
-betaE = dds.extraction$beta[,3] %>% matrix(ncol = 1)
-betaGE = rep(dds.extraction$beta[,4], 3) %>% matrix(ncol = 3)
-
-beta.custom = cbind(beta0,betaG,betaE, betaGE)
-beta.custom %>% head()
-covar.matrix = cov(beta.custom)
-beta.mean <- beta.custom %>% colMeans()
-sample_distribution <- mvrnorm(n = 100,
-                               mu = beta.mean, 
-                               Sigma = covar.matrix)
-
-
-sample_distribution %>% head()
-```
-
-
-```{r}
-
-
-idx = sample.int(nrow(dds.extraction$beta), size = 2, replace = T )
-beta.simu.tmp = dds.extraction$beta[idx,]
-
-
-n_genotypes = 5
-n_environments = 4
-
-replicate_beta <- function(beta_vec, n){
- beta_vec.rep = rep(beta_vec, n)
- beta_vec.rep + rnorm(length(beta_vec.rep), mean = beta_vec/10, sd = 1)
-}
-#replicate_beta(beta.simu.tmp[,2], 2)
-
-beta0 = beta.simu.tmp[,1]
-beta.simu.tmp = map2(.x = c(2,3,4), .y =  c(n_genotypes-1,
-                                            n_environments-1,
-                                            (n_genotypes-1)*(n_environments-1)),  
-                          ~ replicate_beta(beta.simu.tmp[,.x], .y) %>% matrix(ncol = .y)) %>%
-                do.call(cbind, .)
-
-
-
-beta.simu = cbind(beta0, beta.simu.tmp)
-betaG.colnames = base::paste("genotype", "G", 1:(n_genotypes-1), sep = "")
-betaE.colnames = base::paste("environment", "E", 1:(n_environments-1), sep = "")
-betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
-matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
-
-colnames(beta.simu) = matrix.colnames
-beta.simu
-
-
-```
-
-
-## dispersion MASS
-
-```{r}
-y = kij.simulated[1,]
-genotype = design2simulate$design2simulate$genotype
-environment = design2simulate$design2simulate$environment
-df_gene_i = list(y = y , genotype = genotype,environment = environment) %>% data.frame()
-rownames(df_gene_i) <- NULL
-
-fit = MASS::glm.nb(y ~ genotype + environment + genotype:environment, data = df_gene_i, link = log)
-summa = 
-summa$dispersion
-di
-fit$theta
-
-1/gene_dispersion[1,]
-
-```
-
-## dipsersion exponentielle
-
-```{r}
-plot(density(dds.extraction$gene_dispersion))
-alpha_obs.dtf.long = dds.extraction$gene_dispersion %>% base::data.frame() %>%
-                          dplyr::rename(., value = ".") %>%
-                          dplyr::mutate(parameter = "dispersion")
-
-
-
-p =ggplot(alpha_obs.dtf.long) + geom_histogram(aes(x= value, y = ..density..), fill = 'white') + facet_grid(~parameter) + geom_density(aes(x = predict_exp))
-fit_exp = fitdistr(alpha_obs.dtf.long$value, "exponential")
-predict_exp = rexp(6643, rate = fit_exp$estimate)
-
-ggsave(filename = "figures/dispersion_exponential_distib.png", plot = p, width = 8, height = 6)
-
-```
-
-
-## gene segemntation
-
-
-```{r}
-dds.extraction$beta %>% dim()
-kmean.res = kmeans(dds.extraction$beta[,c(2,3)], 3)
-#kmean.res$cluster
-
-
-#dds.extraction.beta.flip = dds.extraction$beta %>% rownames_to_column()  %>%
-#      reshape2::melt(.,  value.name = "value", variable.name= "type")  %>%
-#              reshape2::dcast(., type  ~ rowname, value.var = "value") %>% column_to_rownames(., "type")
-
-pca.obj= prcomp(dds.extraction$beta[,c(2,3)])
-
-
-res = summary(pca.obj)
-library(kableExtra)
-res$importance[,1:2] %>% 
-  kbl(., caption = "Table: Variance explained per Principal Component", position = "bottom", align = 'c') %>% 
-  kable_styling(full_width = F)
-
-
-dtp <- data.frame( 'cluster' = kmean.res$cluster ,
-                  pca.obj$x[,1:2]) # the first two components are selected (NB: you can also select 3 for 3D plotting or 3+)
-
-
-dtp$cluster <- factor(dtp$cluster)
-## Plot
-P1 <- ggplot(data = dtp) + 
-       geom_point(aes(x = PC1, y = PC2, 
-                      col = cluster),
-                      size =3) + 
-       theme_minimal() 
-P1 
-
-ggsave("../results/figures/ACP_kmeans.png", P1)
-
-dtf = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster ) %>%
-      reshape2::melt(. ,id=c("cluster"), value.name = "value", variable.name= "type") 
-dtf$cluster <- factor(dtf$cluster)
-
-
-p = ggplot(dtf) + geom_density(aes(x = value, fill = cluster ), alpha = 0.4) + facet_grid(~ type, scales = "free")
-p
-ggsave("../results/figures/densityBeta_kmeans.png", p)
-
-```
-
-
-### test
-
-
-
-```{r}
-
-
-y = kij.simulated[1,]
-genotype = design2simulate$design2simulate$genotype
-environment = design2simulate$design2simulate$environment
-df_gene_i = list(y = y , genotype = genotype,environment = environment) %>% data.frame()
-rownames(df_gene_i) <- NULL
-#print(i)
-fit = MASS::glm.nb(y ~ genotype + environment + genotype:environment, data = df_gene_i, link = log)
-library(aod)
-  
-wald.test.res = wald.test(Sigma = vcov(fit), b = coef(fit), Terms = 1, H0 = 0.40)# %>% .[3]
-wald.test.pvalue = wald.test.res$result$chi2 %>% data.frame() %>% .[3,]
-
-
-                                            
-
-
-                                                
-test_wald(fit, 0.3)
-coef(fit) %>% abs %>% which.min() 
-coef(fit) %>% abs %>% min()
-coef(fit) %>% length()
-
-pnorm(summary(m)$coefficients[2,3], lower.tail = F)
-
-res = coef(summary(fit))[,c(1,4)] %>%
-              data.frame() %>%
-              dplyr::rename(., pval = "Pr...z.." , Inference = "Estimate") %>%
-              dplyr::mutate(beta = stringr::str_remove_all(rownames(.), "[//(//)]")) %>%
-              dplyr::mutate(beta = stringr::str_replace(beta, ":", "."))
-
-
-test_wald <- function(model.res, term, threshold){
-  wald.test.res = wald.test(Sigma = vcov(model.res), b = coef(model.res), Terms = term, H0 = threshold)
-  wald.test.pvalue = wald.test.res$result$chi2 %>% data.frame() %>% .[3,]
-  return(wald.test.pvalue)
-}
-
-
-glmglrt::p_value.glm(fit)
-
-list_pval = 1:length(coef(fit)) %>% map(.x = .,  ~ test_wald(fit,.x , 0)) %>% unlist()
-list_pval.adj = p.adjust(list_pval, method = 'fdr')  
-
-glmglrt::p_value_contrast(fit, contrast = x ,  alternative ="two.sided", method = "Wald", H0 = 0)
-
-coef(fit %>% summary)
-res %>% mutate(p.adj = list_pval.adj )
-
-
-
-## news
-coef(fit %>% summary)[2,]
-
-x = c(rep(0, length(coef(fit))-1),1)
-
-
-test_wald2 <- function(model.res, term, threshold, initvec){
-  constrast_vec = replace(initvec, term , 1)
-  wald.test.pvalue = glmglrt::p_value_contrast(fit, contrast = constrast_vec ,  alternative ="two.sided", method = "Wald", H0 = threshold)
-  return(wald.test.pvalue %>% as.numeric())
-}
-
-
-kij.simulated %>% dim()
-design2simulate$model_matrix %>% dim()
-
-vecofzero = rep(0, length(coef(fit)))
-test_wald
-
-list_pval = 1:length(coef(fit)) %>% furrr::future_map(.x = .,  ~ test_wald2(model.res = fit, term= .x ,   threshold = 0.5, initvec = vecofzero) , print("hd") ) %>% unlist()
-
-
-which.min(coef(fit))
-library(aod)
-
-
-coef(fit)
-
-
-
-WaldTest = function(L,thetahat,Vn,h=0) # H0: L theta = h
- # Note Vn is the asymptotic covariance matrix, so it's the
-# Consistent estimator divided by n. For true Wald tests 
-# based on numerical MLEs, just use the inverse of the Hessian. 
-{
-WaldTest = numeric(3)
-names(WaldTest) = c("W","df","p-value")
-r = dim(L)[1]
-W = t(L%*%thetahat-h) %*% solve(L%*%Vn%*%t(L)) %*% (L%*%thetahat-h)
-W = as.numeric(W)
-pval = 1-pchisq(W,r)
-WaldTest[1] = W; WaldTest[2] = r; WaldTest[3] = pval
-WaldTest
-} # End function WaldTest
-
-WaldTest(coef(fit),thetahat,vcov(fit))
-p <- 1 - pchisq(0.3, df = 1)
-
-
-which.min(coef(fit))
-coef(fit)[41]
-
-
-wald.test.res = wald.test(b = abs(coef(fit)), Sigma = vcov(fit), Terms = 41, H0 = 1) 
-stat.chi2  = wald.test.res$result$chi2[1] %>% as.numeric()
-pchisq(stat.chi2, df = 1, lower.tail = T)
-
-
-
-initvec = rep(0, length(coef(fit)))
-x = replace(initvec, 41 , 1)
-
-
-
-
-```
-
-
-
-
-## mvronrom
-
-```{r}
-getBetaforSimulation2 <- function(n_genes = 100, n_genotypes = 20, n_environments = 2, beta.dtf, theta = 10 ){
-
-  x = beta.dtf %>% as.matrix()
-  fit.mvrnorm <- Rfast::mvnorm.mle(x)
-  print(fit.mvrnorm$mu)
-  x <- NULL
-  print('base')
-  print(diag(fit.mvrnorm$sigma))
-  print(MASS::mvrnorm(n = n_genes,
-                                 mu = fit.mvrnorm$mu,
-                                 Sigma = fit.mvrnorm$sigma ) %>% data.frame() %>% summarise_if(is.numeric, sd))
-  print("modif")
-  diag(fit.mvrnorm$sigma) <- diag(fit.mvrnorm$sigma) + 10
-  beta.matrix.tmp <- MASS::mvrnorm(n = n_genes,
-                                 mu = fit.mvrnorm$mu,
-                                 Sigma = fit.mvrnorm$sigma )
-
-  print(fit.mvrnorm$sigma) 
-  print(diag(fit.mvrnorm$sigma))
-  print(beta.matrix.tmp %>% cov())
-  replicate_beta <- function(beta_vec, n, theta){
-    beta_vec.rep = rep(beta_vec, n)
-    beta_vec.rep + rnorm(length(beta_vec.rep), mean = 0, sd = abs(beta_vec/theta))
-  }
-
-  beta0 = beta.matrix.tmp[,1]
-  beta.matrix.tmp = purrr::map2(.x = c(2,3,4), .y =  c(n_genotypes-1,
-                                              n_environments-1,
-                                              (n_genotypes-1)*(n_environments-1)),
-                            ~ replicate_beta(beta.matrix.tmp[,.x], .y, theta) %>% matrix(ncol = .y)) %>%
-                    do.call(cbind, .)
-
-
-
-  beta.matrix = cbind(beta0, beta.matrix.tmp)
-  betaG.colnames = base::paste("genotype", "G", 1:(n_genotypes-1), sep = "")
-  betaE.colnames = base::paste("environment", "E", 1:(n_environments-1), sep = "")
-  betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
-  matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
-
-  colnames(beta.matrix) = matrix.colnames
-  rownames(beta.matrix) = base::paste("gene", 1:(n_genes), sep = "")
-
-  return(beta.matrix)
-}
-
-beta.input2 = getBetaforSimulation2(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-```
-
-
-```{r}
-
-# set up the custom data simulation function
-n_subj = 100 # number of subjects
-n_ingroup = 25 # number of ingroup stimuli
-n_outgroup = 25 # number of outgroup stimuli
-beta_0 = 800 # grand mean
-beta_1 = 50 # effect of category
-omega_0 = 80 # by-item random intercept sd
-tau_0 = 100 # by-subject random intercept sd
-tau_1 = 40 # by-subject random slope sd
-rho = 0.2 # correlation between intercept and slope
-sigma = 200 
-
-  items <- data.frame(
-      item_id = seq_len(n_ingroup + n_outgroup),
-      category = rep(c("ingroup", "outgroup"), c(n_ingroup, n_outgroup)),
-      X_i = rep(c(-0.5, 0.5), c(n_ingroup, n_outgroup)),
-      O_0i = rnorm(n = n_ingroup + n_outgroup, mean = 0, sd = omega_0))
-  items    
-  # variance-covariance matrix
-  cov_mx <- matrix(
-      c(tau_0^2, rho * tau_0 * tau_1,
-        rho * tau_0 * tau_1, tau_1^2 ),
-        nrow = 2, byrow = TRUE)
-  
-      subjects <- data.frame(subj_id = seq_len(n_subj),
-      
-                              MASS::mvrnorm(n = n_subj,
-                                mu = c(T_0s = 0, T_1s = 0),
-                                  Sigma = cov_mx))
-      
-      crossing(subjects, items) %>%
-            mutate(e_si = rnorm(nrow(.), mean = 0, sd = sigma),
-                RT = beta_0 + T_0s + O_0i + (beta_1 + T_1s) * X_i + e_si) %>%
-                  select(subj_id, item_id, category, X_i, RT)
-
-
-
-
-my_sim_data()
-```
-
-
-```{r}
-get_beta_gene_i <- function(fit.mvnorm, n_G, n_E){
-  beta.matrix.tmp <- MASS::mvrnorm(n = (n_E-1)*(n_G-1),
-                                 mu = fit.mvnorm$mu,
-                                 Sigma = fit.mvnorm$sigma )
-  
-  beta0 = beta.matrix.tmp[1,1] %>% unname()
-  betaG = beta.matrix.tmp[1:(n_G-1),2]
-  betaE = beta.matrix.tmp[1:(n_E-1),3]
-  betaGE = beta.matrix.tmp[,4]
-  
-  ### name
-  betaG.colnames = base::paste("genotype", "G", 1:(n_G-1), sep = "")
-  betaE.colnames = base::paste("environment", "E", 1:(n_E-1), sep = "")
-  betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
-  matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
-  
-  beta_gene_i <- c( beta0, betaG, betaE, betaGE)
-  names(beta_gene_i) = matrix.colnames
-
-  return(beta_gene_i)
-}
-
-
-n_g = 1
-n_group_gene = 3
-n_E = 2
-n_G = 400
-
-
-dds.extraction$beta 
-kmean.res = kmeans(dds.extraction$beta[,c(2,3)], n_group_gene)
-
-
-beta.cluster1= dds.extraction$beta[kmean.res$cluster == 1,]
-beta.cluster2= dds.extraction$beta[kmean.res$cluster == 2,]
-beta.cluster3= dds.extraction$beta[kmean.res$cluster == 3,]
-
-
-x = beta.cluster2 %>% as.matrix()
-fit.mvrnorm <- Rfast::mvnorm.mle(x)
-fit.mvrnorm$sigma[c(2,4),c(2,4)] = fit.mvrnorm$sigma[c(2,4),c(2,4)]*3
-#diag(fit.mvrnorm$sigma) <- diag(fit.mvrnorm$sigma) 
-
-#### BETAG.E #####
-fit.mvrnorm$mu[4] = 0
-fit.mvrnorm$sigma[,4] = 0
-fit.mvrnorm$sigma[4,] = 0
-#################
-#fit.mvrnorm$mu[0] = 30
-
-#### BETA0 #####
-fit.mvrnorm$mu[1] = 0
-fit.mvrnorm$sigma[,1] = 0
-fit.mvrnorm$sigma[1,] = 0
-#################
-
-#### BETAE #####
-fit.mvrnorm$mu[3] = 0
-fit.mvrnorm$sigma[,3] = 0
-fit.mvrnorm$sigma[3,] = 0
-
-#fit.mvrnorm$mu[2] = 8
-#fit.mvrnorm$sigma[2,2]= 6
-x <- NULL
-
-
-n_gene = n_g
-a = purrr::map(.x = 1:n_gene, ~ get_beta_gene_i(fit.mvrnorm, n_G, n_E)) 
-beta.matrix = do.call(rbind, a) 
-rownames(beta.matrix) = base::paste("gene", 1:(n_g), sep = "")
-
-
-
-x = beta.matrix %>% data.frame() %>% #%>% select(!starts_with("environment")) %>%
-  rownames_to_column('gene_id') %>% reshape2::melt(., id = "gene_id") %>% dplyr::mutate(type = dplyr::case_when(
-      str_detect(variable, "genotypeG\\d+\\.environment") ~ "GxE",
-      str_detect(variable, "genotypeG\\d+$") ~ "G",
-      str_detect(variable, "environmentE\\d+$") ~ "E",
-      str_detect(variable, "Intercept$") ~ "Intercept")) %>%  reshape2::dcast(., gene_id  ~ type, value.var = "value", fun.aggregate = list)
-
-
-g = x$G
-names(g) <- x$gene_id
-df_tmp2 = data.frame(g) %>% mutate(type = 'betaG') %>% reshape2::melt( id = "type", value.name = "betaG")
-g = x$GxE
-
-names(g) <- x$gene_id
-df_tmp3 = data.frame(g) %>% mutate(type = 'GxE') %>% reshape2::melt( id = "type", value.name = "betaGxE")
-
-
-df = cbind(df_tmp2, df_tmp3) %>% dplyr::select(c(betaGxE, betaG)) %>% mutate(from = 'Simulated') %>% mutate(cluster = 3)
-df_simu = df
-df_simu = rbind(df_simu, df)
-
-
-df_simu$cluster <- factor(df_simu$cluster)
-ggplot(df_simu) + geom_point(aes(x = betaG, y = betaGxE, col = cluster), alpha = 0.2)
-
-```
-
-```{r}
-
-### Visualisation
-dtf = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster ) %>%
-      reshape2::melt(. ,id=c("cluster"), value.name = "value", variable.name= "type") 
-dtf$cluster <- factor(dtf$cluster)
-
-
-p = ggplot(dtf) + geom_density(aes(x = value, fill = cluster ), alpha = 0.4) + facet_grid(~ type, scales = "free")
-
-p
-ggsave("figures/densityBeta_kmeans.png", p)
-
-
-betas = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster) %>% mutate(from = "Actual")
-betas$cluster = factor(betas$cluster)
-df2 <- rbind(betas %>% dplyr::select(betaG, betaGE, from, cluster), df_simu %>% rename(betaGE = "betaGxE"))
-p = ggplot(df2) + geom_point(aes(x= betaG, betaGE, col = cluster), alpha = 0.1) + facet_grid(~from)
-
-p
-ggsave("figures/scatterplot_clustering_covIncreased.png", p, width = 10)
-
-```
-
-```{r}
-
-beta.input = beta.matrix
-n_genes = n_g
-design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 15)
-#rowSums(beta.input) %>% which.max()
-#rowSums(beta.input) %>% max()
-log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-
-
-gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
-
-
-#2* 2^log_qij
-mu_ij = getMu_ij(log_qij, 1)
-#max(mu_ij)
-kij.simulated = getK_ij(mu_ij, gene_dispersion)
-
-```
-
-
-## Mixte model
-
-
-```{r}
-library(lme4)
-kij.simulated %>% dim()
-df_2glmmm <- function(y , design2simulate, i){
-  genotype = design2simulate$design2simulate$genotype
-  environment = design2simulate$design2simulate$environment
-  #message("Fitting model ...")
-
-  df_gene_i = list(y = y , genotype = genotype,environment = environment) %>% data.frame() %>% mutate(inter = paste(environment, genotype, sep = '_'))
-  df_gene_i$inter <- factor( df_gene_i$inter )
-  rownames(df_gene_i) <- NULL
-  df_gene_i = df_gene_i %>% mutate(gene_id = paste("gene", i, sep = ""))
-  return(df_gene_i)
-}
-
-
-library(broom.mixed)
-overdisp_fun <- function(model) {
-    rdf <- df.residual(model)
-    rp <- residuals(model,type="pearson")
-    Pearson.chisq <- sum(rp^2)
-    prat <- Pearson.chisq/rdf
-    pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
-    c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
-}
-
-tidy_quasi <- function(model, phi=overdisp_fun(model)["ratio"], conf.level=0.95) {
-    tt <- (tidy(model, effects="ran_vals") %>% 
-              mutate(std.error=std.error*sqrt(phi),
-                   statistic=estimate/std.error,
-                   p.value=2*pnorm(abs(statistic), lower.tail=FALSE))
-    )
-    return(tt)
-}
-
-```
-
-```{r}
-
-beta.matrix.long = beta.matrix %>% data.frame() %>% rownames_to_column('gene_id') %>% reshape2::melt(.,id= "gene_id",  value.name = "value", variable.name= "beta")
-beta.matrix.long = beta.matrix.long %>% filter(gene_id == "gene1")  %>% dplyr::mutate(type = dplyr::case_when(
-      str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
-      str_detect(beta, "genotypeG\\d+$") ~ "G",
-      str_detect(beta, "environmentE\\d+$") ~ "E",
-      str_detect(beta, "Intercept$") ~ "Intercept"))
-
-beta.long.actual = beta.matrix.long %>% filter(type == "G") %>% mutate(from = "actual") %>% dplyr::select(-gene_id)
-
-```
-
-
-
-```{r}
-library(lme4)
-f = map(1:1, ~df_2glmmm(kij.simulated[1,], design2simulate, .x) )
-n = do.call(rbind, f)
-n2 = n %>% mutate(group = paste(gene_id, genotype, sep = "_"))
-data_glmm = n
-m.nb <- glmer.nb(y ~ environment +  ( 1 | genotype ) , data=data_glmm, verbose=TRUE)
-
-summary(m.nb)
-tidy(m.nb)
-fit.mvrnorm$sigma
-fit.mvrnorm$mu
-beta.input[1,]
-tidy_quasi(m.nb)
-mean(res.glmm$value)
-sd(res.glmm$value)
-sd((ranef(m.nb)$genotype[,1]))
-
-plot(density(coef(m.nb)$genotype[,1]))
-
-res.glmm = tidy_quasi(m.nb) %>% dplyr::select(estimate, level)  %>% 
-      dplyr::mutate(beta = str_replace(level, "G", "genotypeG")) %>% rename(c("type" = level, 'value' =  estimate))
-
-B0 = tidy(m.nb, effects = 'fixed') %>% .$estimate %>% .[1]
-
-res.glmm$value = res.glmm$value + B0
-coef(m.nb)
-
-## TIPS
-tidy_quasi(m.nb) %>% rbind(tidy(m.nb, effects = 'fixed') %>% mutate(group ="NA") %>% mutate(level = 'NA'))
-tidy(m.nb)
-
-```
-
-
-```{r}
-res.glmm = res.glmm %>% mutate(from = 'infered')
-beta.long.actual$value = beta.long.actual$value + beta.matrix %>% data.frame() %>% .$Intercept
-#beta.long.actual$value = beta.long.actual$value*log(2)
-df_distrib = rbind(beta.long.actual, res.glmm)
-
-
-B0 = tidy(m.nb, effects = 'fixed') %>% .$estimate %>% .[1]
-stadd = tidy(m.nb)  %>% .$estimate %>% .[2]
-rnorm_df = list(beta = df_distrib$beta %>% unique(), value = rnorm(n = 100000, mean = B0, sd = stadd  ), type = "G", from = paste('rnorm(', B0 %>% signif(2),",", stadd %>% signif(2), ')', sep = '')) %>% data.frame()
-
-df_distrib = rbind(df_distrib, rnorm_df)
-ggplot(df_distrib) + geom_density(aes(x = value, col = from ), alpha = 0.5)
-
-
-df_distrib.short = df_distrib %>% filter(from %in% c("actual", 'infered')) %>% reshape2::dcast(., beta  ~ from, value.var = "value")
-ggplot(df_distrib.short) + geom_point(aes(x = actual, y = infered )) + geom_abline(slope=1, intercept=0)#
-
-
-fit.mvrnorm
-
-```
-
-```{r}
-
-beta0.input = beta.input[,1]
-######## beta G
-mean_betaG = beta.input[,2:n_G] %>% mean()
-mean.input = beta0.input + mean_betaG
-sd_betaG = beta.input[,2:n_G] %>% sd()
-######## beta E
-mean_betaE = beta.input[,(n_G+1):(n_G+n_E-1)] %>% mean()
-
-
-rnorm_df_input = list(beta = df_distrib$beta %>% unique(), value = rnorm(n = 100000, mean = mean.input, sd = sd_betaG  ), type = "G", from = "Actual") %>% data.frame()
-
-df_distrib = rbind(rnorm_df_input, rnorm_df)
-ggplot(df_distrib) + geom_density(aes(x = value, col = from ), alpha = 0.5)
-
-```
-
-
-### GLMTMBB
-
-```{r}
-library(glmmTMB)
-
-glmmTMB::glmmTMB(y ~ environment +  ( genotype | genotype ) , data=data_glmm,
-  family=nbinom2, verbose = T)
-
-```
diff --git a/results/v2/2022-07-26_replicatesEffect.Rmd b/results/v2/2022-07-26_replicatesEffect.Rmd
deleted file mode 100644
index 58903187bec33dacde02e1cfbe5954df98c6f0af..0000000000000000000000000000000000000000
--- a/results/v2/2022-07-26_replicatesEffect.Rmd
+++ /dev/null
@@ -1,278 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-07-26'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Simulation without ru_rm_5 - all genes
-
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-
-
-
-```{r }
-## Launch DESEQ2
-dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
-
-## Extract
-
-dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
-#dds.extraction$gene_dispersion
-
-```
-
-## Replicates effect
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-#### params #####
-n_G = 50
-n_E = 2
-n_genes = 40
-
-##################
-
-
-## Get beta for simulation
-beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-
-
-rm(df_comparison)
-## Replicates effect
-rep_vector = c(15, 50, 75)
-
-for (n_rep in rep_vector ){
-    print(n_rep)
-  
-    design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = n_rep)
-
-    log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-    #log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-    
-    
-    gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                                      n_genotypes = n_G, 
-                                                      n_environments = n_E, 
-                                                      dispersion.vec = dds.extraction$gene_dispersion, 
-                                                      model_matrix = design2simulate$model_matrix, 
-                                                      dispUniform_btweenCondition = T)
-    
-    
-    #2* 2^log_qij
-    mu_ij = getMu_ij(log_qij, 1)
-    #max(mu_ij)
-    kij.simulated = getK_ij(mu_ij, gene_dispersion)
-    
-    kij.simulated = kij.simulated[kij.simulated %>% rowSums() < 100000000,]
-    kij.simulated = kij.simulated + 1
-    idx = which(kij.simulated %>% rowSums() < 100000000)
-    beta.input = beta.input[idx,] 
-
-    dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
-
-    
-    if (exists('df_comparison') && is.data.frame(get('df_comparison'))){
-      df_tmp = getDfComparison(dds_simu = dds_simu , model_matrix = design2simulate$model_matrix, beta.actual.matrix = beta.input)
-      df_tmp = df_tmp %>% dplyr::mutate(p.adjusted = if_else(padj < 0.05, "<0.05", ">0.05")) %>% drop_na()
-      df_tmp = df_tmp %>% dplyr::mutate(n_replicates = n_rep )
-      df_comparison = rbind(df_comparison, df_tmp)
-    }
-    else{
-    df_comparison = getDfComparison(dds_simu = dds_simu , model_matrix = design2simulate$model_matrix, beta.actual.matrix = beta.input)
-    df_comparison = df_comparison %>% dplyr::mutate(p.adjusted = if_else(padj < 0.05, "<0.05", ">0.05")) %>% drop_na()
-    df_comparison = df_comparison %>% dplyr::mutate(n_replicates = n_rep )
-    }
-  
-  
-}
-
-
-
-df_comparison$p.adjusted <- factor(df_comparison$p.adjusted, levels = c( ">0.05", "<0.05"))
-
-p <- ggplot(df_comparison) + geom_point(aes(x=Actual, y = Inference, col = p.adjusted , fill = p.adjusted),alpha =0.2)+ geom_abline(intercept = 0, slope = 1) + facet_grid(type~n_replicates, scales = "free") + 
-        theme(strip.text.x = element_text(size = 20), 
-              strip.text.y = element_text(size = 15), 
-              legend.key.size = unit(1, 'cm'), #change legend key size
-        legend.key.height = unit(1, 'cm'), #change legend key height
-        legend.key.width = unit(1, 'cm'), #change legend key width
-        legend.title = element_text(size=20), #change legend title font size
-        legend.text = element_text(size=15), #change legend text font size)
-        axis.title = element_text(size = 15),
-        axis.text  = element_text(size = 13))
-
-p
-ggsave(filename = "figures/nRepEffect_beta-comp_50genotypes.png", plot = p, width = 20, height = 12)
-
-p <- ggplot(df_comparison) + geom_density(aes(x = Inference, fill = p.adjusted, col = p.adjusted), alpha = 0.3) + facet_grid(type~n_replicates, scales = "free") +
-        theme(strip.text.x = element_text(size = 20), 
-              strip.text.y = element_text(size = 15), 
-              legend.key.size = unit(1, 'cm'), #change legend key size
-        legend.key.height = unit(1, 'cm'), #change legend key height
-        legend.key.width = unit(1, 'cm'), #change legend key width
-        legend.title = element_text(size=20), #change legend title font size
-        legend.text = element_text(size=15), #change legend text font size)
-        axis.title = element_text(size = 15),
-        axis.text  = element_text(size = 13))
-p
-ggsave(filename = "figures/nRepEffect_beta-density_50genotypes.png", plot = p, width = 20, height = 12) 
-
-
-
-df_tally_signifGenes = df_comparison %>% 
-                  select(c(beta, n_replicates, p.adjusted)) %>% 
-                  group_by(beta, n_replicates, p.adjusted) %>% 
-                  tally() %>%
-                  dplyr::filter(p.adjusted == "<0.05") 
-                  
-
-#df_tally_signifGenes$n_replicates <- factor(df_tally_signifGenes$n_replicates,levels = rep_vector )
-p <- ggplot(df_tally_signifGenes) + 
-        geom_point(aes(x = n_replicates, y = n), size = 5) + 
-    facet_grid(type~beta) + 
-  theme(strip.text.x = element_text(size = 20), 
-              strip.text.y = element_text(size = 15), 
-              legend.key.size = unit(1, 'cm'), #change legend key size
-        legend.key.height = unit(1, 'cm'), #change legend key height
-        legend.key.width = unit(1, 'cm'), #change legend key width
-        legend.title = element_text(size=20), #change legend title font size
-        legend.text = element_text(size=15), #change legend text font size)
-        axis.title = element_text(size = 15),
-        axis.text  = element_text(size = 13))
-
-#p
-ggsave(filename = "figures/nRepEffect_nSignifGenes_50genotypes.png", plot = p, width = 12, height = 6)
-
-
-```
-
-
-
-## GLM
-
-
-```{r}
-
-#### params #####
-n_G = 50
-n_E = 2
-n_genes = 40
-
-##################
-
-
-## Get beta for simulation
-beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-
-
-rm(df_comparison)
-## Replicates effect
-rep_vector = c(15, 50, 75)
-
-for (n_rep in rep_vector ){
-    print(n_rep)
-
-
-
-    
-    design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = n_rep)
-    design2simulate$model_matrix %>% dim()
-    log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-    #log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-    
-    
-    gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                                n_genotypes = n_G, 
-                                                n_environments = n_E, 
-                                                dispersion.vec = dds.extraction$gene_dispersion, 
-                                                model_matrix = design2simulate$model_matrix, 
-                                                          dispUniform_btweenCondition = F)
-        
-        
-    mu_ij = getMu_ij(log_qij, 1)
-    
-    kij.simulated = getK_ij(mu_ij, gene_dispersion)
-    
-    
-    
-    plan(multisession, workers = 4)
-    a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], 
-                                                  .x, 
-                                                  design2simulate$design2simulate) )
-    
-    c = do.call(rbind, a)
-    beta.input.long = beta.input %>% data.frame() %>%
-          tibble::rownames_to_column(., var = "gene_id") %>%
-          dplyr::mutate(origin = "Actual") %>%
-          reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-          dplyr::rename(Actual = "value") %>% 
-          dplyr::select(-origin)
-    
-    df_tmp = merge(c, beta.input.long) %>% mutate(Nreplicates = n_rep)
-    
-    if (exists('df_comparison') && is.data.frame(get('df_comparison'))){
-        df_comparison = rbind(df_comparison, df_tmp)
- 
-    }
-    else{
-        df_comparison = df_tmp
-    }
-          
-}
-
-
-df_comparison$type <- factor(df_comparison$type, levels = c("Intercept", "G", "E", "GxE"))
-
-p = ggplot(df_comparison %>% filter(abs(Inference) < 100)) + geom_point(aes(x=Actual, y = Inference ),alpha =0.2)+ geom_abline(intercept = 0, slope = 1) + facet_grid(type~Nreplicates, scales = "free") + 
-        theme(strip.text.x = element_text(size = 20), 
-              strip.text.y = element_text(size = 15), 
-              legend.key.size = unit(1, 'cm'), #change legend key size
-        legend.key.height = unit(1, 'cm'), #change legend key height
-        legend.key.width = unit(1, 'cm'), #change legend key width
-        legend.title = element_text(size=20), #change legend title font size
-        legend.text = element_text(size=15), #change legend text font size)
-        axis.title = element_text(size = 15),
-        axis.text  = element_text(size = 13))
-p
-ggsave(filename = "figures/GLM_nGenoEffect_beta-comp_50geno.png", plot = p, width = 20, height = 12)
-
-
-```
diff --git a/results/v2/2022-07-28_sizeFactorEffect.Rmd b/results/v2/2022-07-28_sizeFactorEffect.Rmd
deleted file mode 100644
index a347a9121c5aed9a136bf2481c05d8221361a902..0000000000000000000000000000000000000000
--- a/results/v2/2022-07-28_sizeFactorEffect.Rmd
+++ /dev/null
@@ -1,183 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-07-26'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Simulation without ru_rm_5 - all genes
-
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-
-
-
-```{r }
-## Launch DESEQ2
-dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
-
-## Extract
-
-dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
-#dds.extraction$gene_dispersion
-
-```
-
-
-## Size factor effect
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-#### params #####
-n_G = 2
-n_E = 2
-n_genes = 6000
-
-##################
-
-
-## Get beta for simulation
-#beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-
-rm(df_comparison)
-
-## Replicates effect
-sizeFac_vector = c(0.01, 0.1, 0.5, 1, 2, 3)
-
-listLevel = c()
-for (sj in sizeFac_vector ){
-    print(sj)
-  
-    design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 5)
-
-    log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-    #log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-    
-    
-    gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                                      n_genotypes = n_G, 
-                                                      n_environments = n_E, 
-                                                      dispersion.vec = dds.extraction$gene_dispersion, 
-                                                      model_matrix = design2simulate$model_matrix, 
-                                                      dispUniform_btweenCondition = T)
-    
-    
-    #2* 2^log_qij
-    mu_ij = getMu_ij(log_qij, sj)
-    #max(mu_ij)
-    kij.simulated = getK_ij(mu_ij, gene_dispersion)
-    
-    dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
-
-    
-    mean_readsPerSample = kij.simulated %>% colSums() %>% mean() %>% round() %>% format(., scientific = FALSE, big.mark = ',' )
-    
-    listLevel = c(listLevel, mean_readsPerSample)
-    
-    if (exists('df_comparison') && is.data.frame(get('df_comparison'))){
-      df_tmp = getDfComparison(dds_simu = dds_simu , model_matrix = design2simulate$model_matrix, beta.actual.matrix = beta.input)
-      df_tmp = df_tmp %>% dplyr::mutate(p.adjusted = if_else(padj < 0.05, "<0.05", ">0.05")) %>% drop_na()
-      df_tmp = df_tmp %>% dplyr::mutate(readsPerSample.mean = mean_readsPerSample  )
-      df_comparison = rbind(df_comparison, df_tmp)
-    }
-    else{
-    df_comparison = getDfComparison(dds_simu = dds_simu , model_matrix = design2simulate$model_matrix, beta.actual.matrix = beta.input)
-    df_comparison = df_comparison %>% dplyr::mutate(p.adjusted = if_else(padj < 0.05, "<0.05", ">0.05")) %>% drop_na()
-    df_comparison = df_comparison %>% dplyr::mutate(readsPerSample.mean = mean_readsPerSample )
-    }
-  
-  
-}
-
-#kij.simulated %>% colSums() %>% mean()
-
-
-df_comparison$p.adjusted <- factor(df_comparison$p.adjusted, levels = c( ">0.05", "<0.05"))
-df_comparison$readsPerSample.mean <- factor(df_comparison$readsPerSample.mean, levels = listLevel)
-
-p <-ggplot(df_comparison) + 
-  geom_point(aes(x=Actual, y = Inference, col = p.adjusted , fill = p.adjusted),alpha =0.2)+   geom_abline(intercept = 0, slope = 1) + facet_grid(beta~readsPerSample.mean, scales = "free") + 
-        theme(strip.text.x = element_text(size = 20), 
-              strip.text.y = element_text(size = 15), 
-              legend.key.size = unit(1, 'cm'), #change legend key size
-        legend.key.height = unit(1, 'cm'), #change legend key height
-        legend.key.width = unit(1, 'cm'), #change legend key width
-        legend.title = element_text(size=20), #change legend title font size
-        legend.text = element_text(size=15), #change legend text font size)
-        axis.title = element_text(size = 15),
-        axis.text  = element_text(size = 13))
-ggsave(filename = "figures/sizeFactorEffect_beta-comp.png", plot = p, width = 20, height = 12)
-
-
-p <- ggplot(df_comparison) + 
-  geom_density(aes(x = Inference, fill = p.adjusted, col = p.adjusted), alpha = 0.3) + facet_grid(beta~readsPerSample.mean, scales = "free")  +
-        theme(strip.text.x = element_text(size = 20), 
-              strip.text.y = element_text(size = 15), 
-              legend.key.size = unit(1, 'cm'), #change legend key size
-        legend.key.height = unit(1, 'cm'), #change legend key height
-        legend.key.width = unit(1, 'cm'), #change legend key width
-        legend.title = element_text(size=20), #change legend title font size
-        legend.text = element_text(size=15), #change legend text font size)
-        axis.title = element_text(size = 15),
-        axis.text  = element_text(size = 13))
-ggsave(filename = "figures/sizeFactorEffect_beta-density.png", plot = p, width = 20, height = 12)
-
-
- df_tally_signifGenes = df_comparison %>% select(c(beta, p.adjusted, readsPerSample.mean)) %>% 
-                  group_by(beta, p.adjusted, readsPerSample.mean) %>% 
-                  tally() %>%
-                  dplyr::filter(p.adjusted == "<0.05")
-
-p<- ggplot(df_tally_signifGenes) + 
-        geom_point(aes(x = readsPerSample.mean, y = n), size = 4) + 
-    facet_grid(~beta) + 
-  theme(strip.text.x = element_text(size = 15), 
-              strip.text.y = element_text(size = 15), 
-              legend.key.size = unit(1, 'cm'), #change legend key size
-        legend.key.height = unit(1, 'cm'), #change legend key height
-        legend.key.width = unit(1, 'cm'), #change legend key width
-        legend.title = element_text(size=20), #change legend title font size
-        legend.text = element_text(size=15), #change legend text font size)
-        axis.title = element_text(size = 15),
-        axis.text  = element_text(size = 13, angle=45, hjust = 1))
-
-p
-ggsave(filename = "figures/sizeFactorEffect_nSignifGenes.png", plot = p, width = 12, height = 6)
-
-```
diff --git a/results/v2/2022-07-29_dispersionEffect.Rmd b/results/v2/2022-07-29_dispersionEffect.Rmd
deleted file mode 100644
index fb828a3414c65f6b38c54535b3979b4e975050a3..0000000000000000000000000000000000000000
--- a/results/v2/2022-07-29_dispersionEffect.Rmd
+++ /dev/null
@@ -1,139 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-07-26'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Simulation without ru_rm_5 - all genes
-
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-## Uniform dispersion
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-#### params #####
-n_G = 2
-n_E = 2
-n_genes = 6000
-##################
-
-
-## Get beta for simulation
-beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-
-rm(df_comparison)
-
-## Replicates effect
-bool_vector = c(TRUE, FALSE)
-
-#listLevel = c()
-for (logik in bool_vector ){
-    print(logik)
-  
-    design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 5)
-
-    log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-    #log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-    
-    
-    gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                            n_genotypes = n_G, 
-                                            n_environments = n_E, 
-                                            dispersion.vec = dds.extraction$gene_dispersion, 
-                                            model_matrix = design2simulate$model_matrix, 
-                                                      dispUniform_btweenCondition = logik)
-    
-    
-    #2* 2^log_qij
-    mu_ij = getMu_ij(log_qij, 2)
-    #max(mu_ij)
-    kij.simulated = getK_ij(mu_ij, gene_dispersion)
-    
-    dds_simu = run.deseq(tabl_cnts = kij.simulated , 
-                        bioDesign = design2simulate$design2simulate )
-
-    str_logik = ifelse(logik == TRUE,  "Uniform dispersion" , "Non-uniform dispersion" )
-
-    if (exists('df_comparison') && is.data.frame(get('df_comparison'))){
-      df_tmp = getDfComparison(dds_simu = dds_simu , 
-                               model_matrix = design2simulate$model_matrix, 
-                               beta.actual.matrix = beta.input)
-      
-      df_tmp = df_tmp %>% dplyr::mutate(p.adjusted = if_else(padj < 0.05, "<0.05", ">0.05")) %>% drop_na()
-      df_tmp = df_tmp %>% dplyr::mutate(dispUniform_btweenCondition = str_logik  )
-      df_comparison = rbind(df_comparison, df_tmp)
-    }
-    else{
-    df_comparison = getDfComparison(dds_simu = dds_simu , 
-                    model_matrix = design2simulate$model_matrix, beta.actual.matrix = beta.input)
-    df_comparison = df_comparison %>% dplyr::mutate(p.adjusted = 
-                                                if_else(padj < 0.05, "<0.05", ">0.05")) %>% drop_na()
-    df_comparison = df_comparison %>% dplyr::mutate(dispUniform_btweenCondition = str_logik )
-    }
-  
-  
-}
-
-#kij.simulated %>% colSums() %>% mean()
-
-
-df_comparison$p.adjusted <- factor(df_comparison$p.adjusted, levels = c( ">0.05", "<0.05"))
-
-
-ggplot(df_comparison) + geom_point(aes(x=Actual, y = Inference, col = p.adjusted , fill = p.adjusted),alpha =0.2)+ geom_abline(intercept = 0, slope = 1) + facet_grid(beta~dispUniform_btweenCondition, scales = "free")
-
-ggplot(df_comparison) + geom_density(aes(x = Inference, fill = p.adjusted, col = p.adjusted), alpha = 0.3) + facet_grid(beta~dispUniform_btweenCondition, scales = "free")
-
-
-
-df_tally_signifGenes = df_comparison %>% 
-                  select(c(beta, p.adjusted, dispUniform_btweenCondition)) %>% 
-                  group_by(beta, p.adjusted, dispUniform_btweenCondition) %>% 
-                  tally() %>%
-                  dplyr::filter(p.adjusted == "<0.05")
-
-ggplot(df_tally_signifGenes) + 
-        geom_point(aes(x = dispUniform_btweenCondition, y = n)) + 
-    facet_grid(~beta)
-
-ggplot(df_comparison) + geom_point(aes(x=p.adjusted, y =  Inference - Actual,fill = p.adjusted ,col = p.adjusted), alpha = 0.4) +  facet_grid(beta~dispUniform_btweenCondition, scales = "free")
-
-```
diff --git a/results/v2/2022-08-01_NgenesEffect.Rmd b/results/v2/2022-08-01_NgenesEffect.Rmd
deleted file mode 100644
index 6abc427bd2c523db88721af4020207ef3e311176..0000000000000000000000000000000000000000
--- a/results/v2/2022-08-01_NgenesEffect.Rmd
+++ /dev/null
@@ -1,89 +0,0 @@
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-#### params #####
-n_G = 2
-n_E = 2
-n_genes = 30000
-##################
-
-
-## Get beta for simulation
-beta.inputFull = getBetaforSimulation(n_genes = 30000, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-
-rm(df_comparison)
-
-## Replicates effect
-nGene_vector = c(100, 500, 2000, 6000, 10000, 20000, 30000)
-
-#listLevel = c()
-for (ngene in nGene_vector ){
-    print(ngene)
-    beta.input = beta.inputFull[1:ngene,]
-  
-    design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 5)
-
-    log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-    #log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-    
-    
-    gene_dispersion = getGenesDispersionsForSimulation(n_genes = ngene, 
-                                            n_genotypes = n_G, 
-                                            n_environments = n_E, 
-                                            dispersion.vec = dds.extraction$gene_dispersion, 
-                                            model_matrix = design2simulate$model_matrix, 
-                                                      dispUniform_btweenCondition = TRUE)
-    
-    
-    #2* 2^log_qij
-    mu_ij = getMu_ij(log_qij, 2)
-    #max(mu_ij)
-    kij.simulated = getK_ij(mu_ij, gene_dispersion)
-    
-    dds_simu = run.deseq(tabl_cnts = kij.simulated , 
-                        bioDesign = design2simulate$design2simulate )
-
-
-    if (exists('df_comparison') && is.data.frame(get('df_comparison'))){
-      df_tmp = getDfComparison(dds_simu = dds_simu , 
-                               model_matrix = design2simulate$model_matrix, 
-                               beta.actual.matrix = beta.input)
-      
-      df_tmp = df_tmp %>% dplyr::mutate(p.adjusted = if_else(padj < 0.05, "<0.05", ">0.05")) %>% drop_na()
-      df_tmp = df_tmp %>% dplyr::mutate(nGenes = ngene  )
-      df_comparison = rbind(df_comparison, df_tmp)
-    }
-    else{
-    df_comparison = getDfComparison(dds_simu = dds_simu , 
-                    model_matrix = design2simulate$model_matrix, beta.actual.matrix = beta.input)
-    df_comparison = df_comparison %>% dplyr::mutate(p.adjusted = 
-                                                if_else(padj < 0.05, "<0.05", ">0.05")) %>% drop_na()
-    df_comparison = df_comparison %>% dplyr::mutate(nGenes = ngene )
-    }
-  
-  
-}
-
-#kij.simulated %>% colSums() %>% mean()
-
-
-df_comparison$p.adjusted <- factor(df_comparison$p.adjusted, levels = c( ">0.05", "<0.05"))
-
-
-ggplot(df_comparison) + geom_point(aes(x=Actual, y = Inference, col = p.adjusted , fill = p.adjusted),alpha =0.2)+ geom_abline(intercept = 0, slope = 1) + facet_grid(beta~nGenes, scales = "free")
-
-ggplot(df_comparison) + geom_density(aes(x = Inference, fill = p.adjusted, col = p.adjusted), alpha = 0.3) + facet_grid(beta~nGenes, scales = "free")
-
-
-
-df_tally_signifGenes = df_comparison %>% 
-                  dplyr::select(c(beta, p.adjusted, nGenes)) %>% 
-                  group_by(beta, p.adjusted, nGenes) %>% 
-                  tally() %>%
-                  dplyr::filter(p.adjusted == "<0.05")
-
-ggplot(df_tally_signifGenes) + 
-        geom_point(aes(x = nGenes, y = n/nGenes)) + 
-    facet_grid(~beta)
-
-ggplot(df_comparison) + geom_point(aes(x=p.adjusted, y =  Inference - Actual,fill = p.adjusted ,col = p.adjusted), alpha = 0.4) +  facet_grid(beta~nGenes, scales = "free")
-
-```
diff --git a/results/v2/2022-08-20_NgenotypesEffect.Rmd b/results/v2/2022-08-20_NgenotypesEffect.Rmd
deleted file mode 100644
index 50cf50d5adc02a05619a43c5825558c6903fd9f7..0000000000000000000000000000000000000000
--- a/results/v2/2022-08-20_NgenotypesEffect.Rmd
+++ /dev/null
@@ -1,252 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-07-26'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-library(furrr)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Simulation without ru_rm_5 - all genes
-
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-
-
-
-```{r }
-## Launch DESEQ2
-dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
-
-## Extract
-
-dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
-#dds.extraction$gene_dispersion
-
-
-beta_obs.dtf.long = dds.extraction$beta %>% reshape2::melt(. , na.rm = T, variable.name = "parameter")
-
-alpha_obs.dtf.long = dds.extraction$gene_dispersion %>% base::data.frame() %>%
-                          dplyr::rename(., value = ".") %>%
-                          dplyr::mutate(parameter = "dispersion")
-
-dtf.params_obs = rbind(beta_obs.dtf.long, alpha_obs.dtf.long)
-
-
-
-p = ggplot(dtf.params_obs, aes(x= value)) +
-      geom_histogram(aes(y=..density..), colour="black", fill="white") + facet_grid(~parameter)+
-        theme(strip.text.x = element_text(size = 13),
-        axis.title = element_text(size = 5),
-        axis.text  = element_text(size = 5))
-
-p
-ggsave(filename = "figures/params_observed.png", plot = p, width = 6, height = 3) 
-
-```
-
-## N genotypes effect - GLM
-
-```{r}
-
-rm(df_comparison)
-## Replicates effect
-gen_vector = c(3, 50, 100, 500)
-
-for (n_genotype in gen_vector ){
-
-    #### params #####
-    n_G = n_genotype
-    n_E = 2
-    n_genes = 100
-    ##################
-    
-    ## Get beta for simulation
-    beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-    
-    design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 10)
-    design2simulate$model_matrix %>% dim()
-    log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-    #log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-    
-    
-    gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                                n_genotypes = n_G, 
-                                                n_environments = n_E, 
-                                                dispersion.vec = dds.extraction$gene_dispersion, 
-                                                model_matrix = design2simulate$model_matrix, 
-                                                          dispUniform_btweenCondition = T)
-        
-        
-    mu_ij = getMu_ij(log_qij, 1)
-    
-    kij.simulated = getK_ij(mu_ij, gene_dispersion)
-    
-    
-    plan(multisession, workers = 4)
-    a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], 
-                                                  .x, 
-                                                  design2simulate$design2simulate) )
-    
-    c = do.call(rbind, a)
-    beta.input.long = beta.input %>% data.frame() %>%
-          tibble::rownames_to_column(., var = "gene_id") %>%
-          dplyr::mutate(origin = "Actual") %>%
-          reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-          dplyr::rename(Actual = "value") %>% 
-          dplyr::select(-origin)
-    
-    df_tmp = merge(c, beta.input.long) %>% mutate(Ngenotype = n_genotype)
-    
-    if (exists('df_comparison') && is.data.frame(get('df_comparison'))){
-        df_comparison = rbind(df_comparison, df_tmp)
- 
-    }
-    else{
-        df_comparison = df_tmp
-    }
-          
-}
-
-
-df_comparison$type <- factor(df_comparison$type, levels = c("Intercept", "G", "E", "GxE"))
-
-p = ggplot(df_comparison) + geom_point(aes(x=Actual, y = Inference ),alpha =0.2)+ geom_abline(intercept = 0, slope = 1) + facet_grid(type~Ngenotype, scales = "free") + 
-        theme(strip.text.x = element_text(size = 20), 
-              strip.text.y = element_text(size = 15), 
-              legend.key.size = unit(1, 'cm'), #change legend key size
-        legend.key.height = unit(1, 'cm'), #change legend key height
-        legend.key.width = unit(1, 'cm'), #change legend key width
-        legend.title = element_text(size=20), #change legend title font size
-        legend.text = element_text(size=15), #change legend text font size)
-        axis.title = element_text(size = 15),
-        axis.text  = element_text(size = 13))
-p
-ggsave(filename = "figures/GLM_nGenoEffect_beta-comp.png", plot = p, width = 20, height = 12)
-
-#########deeviance ###
-df_comparison %>% dim()
-
-infer_deviance = df_comparison %>% filter(Ngenotype == 500) %>% arrange(.,gene_id) %>% .$deviance
-names(infer_deviance) = df_comparison %>% filter(Ngenotype == 500) %>% arrange(.,gene_id) %>% .$gene_id
-
-deviance_dtf = list(deviance_infered = infer_deviance, deviance_actual = gene_dispersion[,1][names(infer_deviance)], gene_id = names(infer_deviance)) %>% data.frame()
-
-
-p = ggplot(deviance_dtf) + 
-    geom_point(aes(x=deviance_actual, y = deviance_infered ),alpha =0.2) + 
-    geom_abline(intercept = 0, slope = 1)
-      
-p
-ggsave(filename = "figures/GLM_dspersion.png", plot = p, width = 20, height = 12)
-
-
-```
-
-
-
-## N genotypes effect - DESEQ2
-
-
-```{r}
-rm(df_comparison)
-## Replicates effect
-gen_vector = c(300)
-
-for (n_genotype in gen_vector ){
-
-    #### params #####
-    n_G = n_genotype
-    n_E = 2
-    n_genes = 100
-    ##################
-    
-     ## Get beta for simulation
-    beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-    design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 10)
-    log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-    #log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-    
-    
-    gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                                n_genotypes = n_G, 
-                                                n_environments = n_E, 
-                                                dispersion.vec = dds.extraction$gene_dispersion, 
-                                                model_matrix = design2simulate$model_matrix, 
-                                                          dispUniform_btweenCondition = T)
-        
-        
-    mu_ij = getMu_ij(log_qij, 1)
-    
-    kij.simulated = getK_ij(mu_ij, gene_dispersion)
-
-    dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
-
-    if (exists('df_comparison') && is.data.frame(get('df_comparison'))){
-      df_tmp = getDfComparison(dds_simu = dds_simu , model_matrix = design2simulate$model_matrix, beta.actual.matrix = beta.input)
-      df_tmp = df_tmp %>% dplyr::mutate(p.adjusted = if_else(padj < 0.05, "<0.05", ">0.05")) %>% drop_na()
-      df_tmp = df_tmp %>% dplyr::mutate(nGenotype = n_genotype )
-      df_comparison = rbind(df_comparison, df_tmp)
-    }
-    else{
-    df_comparison = getDfComparison(dds_simu = dds_simu , model_matrix = design2simulate$model_matrix, beta.actual.matrix = beta.input)
-    df_comparison = df_comparison %>% dplyr::mutate(p.adjusted = if_else(padj < 0.05, "<0.05", ">0.05")) %>% drop_na()
-    df_comparison = df_comparison %>% dplyr::mutate(nGenotype = n_genotype )
-    }
-  
-  
-}
-
-
-
-df_comparison$p.adjusted <- factor(df_comparison$p.adjusted, levels = c( ">0.05", "<0.05"))
-
-p <- ggplot(df_comparison) + geom_point(aes(x=Actual, y = Inference),alpha =0.2)+ geom_abline(intercept = 0, slope = 1) + facet_grid(type~nGenotype, scales = "free") + 
-        theme(strip.text.x = element_text(size = 20), 
-              strip.text.y = element_text(size = 15), 
-              legend.key.size = unit(1, 'cm'), #change legend key size
-        legend.key.height = unit(1, 'cm'), #change legend key height
-        legend.key.width = unit(1, 'cm'), #change legend key width
-        legend.title = element_text(size=20), #change legend title font size
-        legend.text = element_text(size=15), #change legend text font size)
-        axis.title = element_text(size = 15),
-        axis.text  = element_text(size = 13))
-p
-
-ggsave(filename = "figures/DESEQ_nGenoEffect_beta-comp.png", plot = p, width = 20, height = 12)
-```
diff --git a/results/v2/2022-09-01_simulationBetaDemo.Rmd b/results/v2/2022-09-01_simulationBetaDemo.Rmd
deleted file mode 100644
index bd812d29076dac146f9dd3338b982f61e93f8a59..0000000000000000000000000000000000000000
--- a/results/v2/2022-09-01_simulationBetaDemo.Rmd
+++ /dev/null
@@ -1,333 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-09-01'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-library(furrr)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Public data
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-```{r }
-## Launch DESEQ2
-dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
-
-## Extract
-
-dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
-#dds.extraction$gene_dispersion
-
-```
-
-## params visualization
-
-
-```{r}
-beta_obs.dtf.long = dds.extraction$beta %>% reshape2::melt(. , na.rm = T, variable.name = "parameter")
-
-alpha_obs.dtf.long = dds.extraction$gene_dispersion %>% base::data.frame() %>%
-                          dplyr::rename(., value = ".") %>%
-                          dplyr::mutate(parameter = "dispersion")
-
-dtf.params_obs = rbind(beta_obs.dtf.long, alpha_obs.dtf.long)
-
-
-
-p = ggplot(dtf.params_obs, aes(x= value)) +
-      geom_histogram(aes(y=..density..), colour="black", fill="white") + facet_grid(~parameter)+
-        theme(strip.text.x = element_text(size = 13),
-        axis.title = element_text(size = 5),
-        axis.text  = element_text(size = 5))
-
-p
-```
-
-
-## MRVNORM
-
-```{r}
-library(Rfast)
-library(MASS)
-
-
-## fit mvrnorm
-x = dds.extraction$beta %>% as.matrix()
-fit.mvrnorm <- Rfast::mvnorm.mle(x)
-x <- NULL
-
-## sampling from mvrnorm fitting
-n_genes = 1
-beta.matrix.template <- MASS::mvrnorm(n = n_genes,
-                               mu = fit.mvrnorm$mu,
-                               Sigma = fit.mvrnorm$sigma )
-
-replicate_beta <- function(beta_vec, n, theta){
-  beta_vec.rep = rep(beta_vec, n)
-  beta_vec.rep + rnorm(length(beta_vec.rep), mean = 0, sd = abs(beta_vec/theta))
-}
-
-betaG = beta.matrix.template[2]
-n_genotypes = 50
-
-
-betaG + rnorm(4, mean = 0, sd = abs(betaG/10))
-hist(replicate_beta(betaG, n_genotypes, theta = 10))
-
-
-n_environments = 2
-theta = 10
-beta.matrix.tmp = purrr::map2(.x = c(2,3,4), .y =  c(n_genotypes-1,
-                                            n_environments-1,
-                                            (n_genotypes-1)*(n_environments-1)),
-                          ~ replicate_beta(beta.matrix.template[.x], .y, theta) %>% matrix(ncol = .y)) %>%
-                  do.call(cbind, .)
-
-beta.matrix = cbind(beta.matrix.template[1], beta.matrix.tmp)
-betaG.colnames = base::paste("genotype", "G", 1:(n_genotypes-1), sep = "")
-betaE.colnames = base::paste("environment", "E", 1:(n_environments-1), sep = "")
-betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
-matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
-
-colnames(beta.matrix) = matrix.colnames
-rownames(beta.matrix) = base::paste("gene", 1:(n_genes), sep = "")
-beta.matrix %>% dim()
-```
-
-
-## kij simu
-
-```{r}
-
-n_G = n_genotypes
-n_E = 2
-
-design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 25)
-design2simulate$model_matrix %>% dim()
-log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.matrix)
-#log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-
-
-gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                            n_genotypes = n_G, 
-                                            n_environments = n_E, 
-                                            dispersion.vec = dds.extraction$gene_dispersion, 
-                                            model_matrix = design2simulate$model_matrix, 
-                                                      dispUniform_btweenCondition = T)
-    
-    
-mu_ij = getMu_ij(log_qij, 1)
-
-kij.simulated = getK_ij(mu_ij, gene_dispersion)
-    
-kij.simulated %>% dim()
-```
-
-## GLM 
-
-```{r}
-options(future.globals.maxSize = 800 * 1024^2)
-
-plan(multisession, workers = 1)
-a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], 
-                                              .x, 
-                                              design2simulate$design2simulate) )
-
-c = do.call(rbind, a) %>% mutate(p.val.adj = p.adjust(p.val, method= "fdr"))
-beta.input.long = beta.matrix %>% data.frame() %>%
-      tibble::rownames_to_column(., var = "gene_id") %>%
-      dplyr::mutate(origin = "Actual") %>%
-      reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-      dplyr::rename(Actual = "value") %>% 
-      dplyr::select(-origin)
-
-
-df_comparison = merge(c, beta.input.long) %>% mutate(Ngenotype = n_genotypes) %>% mutate(pred = ifelse(p.val.adj < 0.05, "DE", "nonDE"))
-
-df_comparison %>% dplyr::filter(type %in% c("G", "GxE")) %>% group_by(type, pred) %>% tally()
-
-ggplot(df_comparison %>% dplyr::filter(type %in% c("G", "GxE")) )  +
-    geom_histogram(aes(x=Inference, y = ..density.., col = pred, fill = pred),  ) +
-   geom_density(aes(x = Actual*log(2))) +
-    facet_grid(~type, scales = "free") 
-df_comparison$type
-ggplot(df_comparison %>% dplyr::filter(type %in% c("Intercept")) )  +
-  geom_point(aes(x = Actual, y = Inference))
-```
-
-
-## Simu
-
-
-```{r}
-
-rm(df_comparison)
-
-
-## utils
-beta.matrix.template <- MASS::mvrnorm(n = n_genes,
-                               mu = fit.mvrnorm$mu,
-                               Sigma = fit.mvrnorm$sigma )
-
-
-replicate_beta <- function(beta_vec, n, theta){
-  beta_vec.rep = rep(beta_vec, n)
-  beta_vec.rep + rnorm(length(beta_vec.rep), mean =  beta_vec/theta , sd = 1)
-}
-
-
-## Params
-n_genes = 1
-n_environments = 2
-theta = 10
-#gen_vector = c(100, 300 ,500, 700, 1000)
-gen_vector = 150
-n_genotypes = 150
-thr =  c(0, 0.07, 0.48, 1)
-#thr = c(0.04, 1)
-
-beta.matrix.tmp = purrr::map2(.x = c(2,3,4), .y =  c(n_genotypes-1,
-                                            n_environments-1,
-                                            (n_genotypes-1)*(n_environments-1)),
-                          ~ replicate_beta(beta.matrix.template[.x], .y, theta) %>% matrix(ncol = .y)) %>%
-                  do.call(cbind, .)
-
-beta.matrix = cbind(beta.matrix.template[1], beta.matrix.tmp)
-betaG.colnames = base::paste("genotype", "G", 1:(n_genotypes-1), sep = "")
-betaE.colnames = base::paste("environment", "E", 1:(n_environments-1), sep = "")
-betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
-matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
-
-colnames(beta.matrix) = matrix.colnames
-beta.matrix %>% dim()
-rownames(beta.matrix) = base::paste("gene", 1:(n_genes), sep = "")
-beta.input = beta.matrix
-#beta.matrix %>% dim()
-
-########## kij ################
-n_G = n_genotypes
-n_E = 2
-
-design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 10)
-design2simulate$model_matrix %>% dim()
-log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.matrix)
-#log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-
-
-gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                            n_genotypes = n_G, 
-                                            n_environments = n_E, 
-                                            dispersion.vec = dds.extraction$gene_dispersion, 
-                                            model_matrix = design2simulate$model_matrix, 
-                                                      dispUniform_btweenCondition = T)
-    
-    
-mu_ij = getMu_ij(log_qij, 1)
-
-kij.simulated = getK_ij(mu_ij, gene_dispersion)
-
-for (t in thr){
-  for (n_genotypes in gen_vector ){
-  
-   
-    
-    ############# GLM fitting ###################
-    plan(multisession, workers = 1)
-    a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], 
-                                                .x, 
-                                                design2simulate$design2simulate, threshold = t) )
-  
-    #kij.simulated %>% dim()
-    ############ save res #####
-    c = do.call(rbind, a)
-    beta.input.long = beta.input %>% data.frame() %>%
-            tibble::rownames_to_column(., var = "gene_id") %>%
-            dplyr::mutate(origin = "Actual") %>%
-            reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-            dplyr::rename(Actual = "value") %>% 
-            dplyr::select(-origin)
-      
-      df_tmp = merge(c, beta.input.long) %>% mutate(Ngenotype = n_genotypes)  %>% mutate(p.val.adj = p.adjust(p.val, method= "fdr")) %>% mutate(threshold = t)
-      
-      if (exists('df_comparison') && is.data.frame(get('df_comparison'))){
-          df_comparison = rbind(df_comparison, df_tmp) 
-   
-      }
-      else{
-            df_comparison = df_tmp
-      }
-            
-  }
-}
-
-df_comparison$threshold %>% table()
-
-df_comparison$type <- factor(df_comparison$type, levels = c("Intercept", "G", "E", "GxE"))
-
-
-df_comparison = df_comparison %>% mutate(pred = ifelse(p.val.adj < 0.05, "DE", "nonDE"))
-
-
-
-ggplot(df_comparison) + geom_bar(aes(x= p.val)) + xlim(c(0,0.1))
-
-
-df_comparison %>% filter(type %in% c("G", "GxE")) %>% group_by(threshold, type, pred, Ngenotype  ) %>% tally()
-
-df_comparison %>%  filter(type %in% c("G", "GxE")) %>% group_by(type, threshold, pred) %>% tally()
-
-
-df_comparison$Ngenotype %>% table()
-
-
-#df_comparison = df_comparison %>% filter(gene_id == "gene1")
-p = ggplot(df_comparison %>% dplyr::filter(type %in% c("G", "GxE")))  +
-    geom_histogram(aes(x = Inference, y=..density..) , fill = 'black' ) + 
-    geom_histogram(data = df_comparison %>% dplyr::filter(type %in% c("G", "GxE")) %>% dplyr::filter(pred == "DE"),  aes(x=Inference, y = ..density.. , fill = pred )) +
-    facet_grid(type~threshold , scales = "free") +
-    geom_vline(aes(xintercept= -threshold), linetype="dotted", 
-                color = "blue") +
-  geom_vline(aes(xintercept= threshold), linetype="dotted", 
-                color = "blue") +
-   geom_density(aes(x = Actual * log(2) )) 
-p
-ggsave(filename = "../../results/v2/figures/GLM_distrib.png", plot = p, width = 20, height = 12)
-
-
-```
diff --git a/results/v2/2022-09-07_Postinference_filtering.Rmd b/results/v2/2022-09-07_Postinference_filtering.Rmd
deleted file mode 100644
index ffbeb9299cb3d21339a32280a75860e2865ee234..0000000000000000000000000000000000000000
--- a/results/v2/2022-09-07_Postinference_filtering.Rmd
+++ /dev/null
@@ -1,360 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-09-01'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-library(furrr)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Public data
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-```{r }
-## Launch DESEQ2
-dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
-
-## Extract
-
-dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
-#dds.extraction$gene_dispersion
-
-```
-
-## params visualization
-
-
-```{r}
-beta_obs.dtf.long = dds.extraction$beta %>% reshape2::melt(. , na.rm = T, variable.name = "parameter")
-
-alpha_obs.dtf.long = dds.extraction$gene_dispersion %>% base::data.frame() %>%
-                          dplyr::rename(., value = ".") %>%
-                          dplyr::mutate(parameter = "dispersion")
-
-dtf.params_obs = rbind(beta_obs.dtf.long, alpha_obs.dtf.long)
-
-
-
-p = ggplot(dtf.params_obs, aes(x= value)) +
-      geom_histogram(aes(y=..density..), colour="black", fill="white") + facet_grid(~parameter)+
-        theme(strip.text.x = element_text(size = 13),
-        axis.title = element_text(size = 5),
-        axis.text  = element_text(size = 5))
-
-p
-```
-
-
-## DESEQ threshold 0
-
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-#### params #####
-n_G = 10
-n_E = 2
-n_genes = 300
-
-
-##################
-beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-
-
-design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 15)
-#rowSums(beta.input) %>% which.max()
-#rowSums(beta.input) %>% max()
-log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-
-
-gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
-
-
-#2* 2^log_qij
-mu_ij = getMu_ij(log_qij, 1)
-#max(mu_ij)
-kij.simulated = getK_ij(mu_ij, gene_dispersion)
-#max(kij.simulated)
-
-#kij.simulated = kij.simulated %>% data.frame() %>% filter_all(all_vars(. < 1000000)) 
-
-
-```
-
-
-## DEseq on simulation
-
-
-```{r}
-#kij.simulated[is.na(kij.simulated)] = 0
-
-dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
-
-```
-
-
-## Evaluation
-
-
-```{r}
-n_G = 40
-n_E = 2
-n_genes = 10
-
-
-rm(df2return)
-threshold_list = c(0, 0.04, 0.07, 0.2, 0.48, 1)
-beta.actual.matrix = beta.input 
-
-for (thr in threshold_list){
-
-  listBeta = DESeq2::resultsNames(dds_simu)
-  plan(multisession, workers = 4)
-  res = listBeta %>% furrr::future_map(.x = ., ~DESeq2::results(dds_simu, contrast=list(.x), lfcThreshold = thr) %>% data.frame() %>% .$padj)
-  padj.matrix = do.call("cbind", res)
-  
-  
-  
-  dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
-  
-  dds.simu.mcols.colnamesReshaped = colnames(dds_simu.mcols) %>%
-                                      stringr::str_replace(., "_vs_G0", "") %>%
-                                      stringr::str_replace(., "_vs_E0", "") %>%
-                                      stringr::str_replace_all(., "_", "") %>%
-                                      stringr::str_replace(., "\\.", ":")
-  
-  columnOfInterest =  design2simulate$model_matrix %>% base::colnames() %>% stringr::str_replace_all(., "[//(//)]", "")
-  #dds_simu.mcols[,columnOfInterest]
-  
-  ## Get only column of interest
-  idx_cols = base::match(columnOfInterest, dds.simu.mcols.colnamesReshaped)
-  beta.infered = dds_simu.mcols[,idx_cols]
-  
-  ## homogeneize column names & rownames
-  idx_cols = base::match(columnOfInterest, beta.actual.matrix %>% colnames())
-  beta.actual.matrix = beta.actual.matrix[,idx_cols]
-  colnames(beta.infered) = base::colnames(beta.actual.matrix)
-  colnames(padj.matrix) = base::colnames(beta.actual.matrix)
-  rownames(padj.matrix) = base::rownames(beta.actual.matrix)
-  
-  beta.infer.long = beta.infered %>% data.frame() %>%
-                            tibble::rownames_to_column(., var = "gene_id") %>%
-                            dplyr::mutate(origin = "Inference") %>%
-                            reshape2::melt(., value.name = "value", variable.name= "beta")
-  beta.actual.matrix.long = beta.actual.matrix %>% data.frame() %>%
-                            tibble::rownames_to_column(., var = "gene_id") %>%
-                            dplyr::mutate(origin = "Actual") %>%
-                            reshape2::melt(., value.name = "value", variable.name= "beta")
-  padj.matrix.long = padj.matrix  %>% data.frame() %>%
-                            tibble::rownames_to_column(., var = "gene_id") %>%
-                            dplyr::mutate(origin = "padj") %>%
-                            reshape2::melt(., value.name = "value", variable.name= "beta")
-  
-  beta.merged.long = rbind(beta.infer.long, beta.actual.matrix.long, padj.matrix.long)
-  #beta.merged.long$beta %>% unique()
-  
-  beta.merged.long.reshape = beta.merged.long %>% dplyr::mutate(type = dplyr::case_when(
-    str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
-    str_detect(beta, "genotypeG\\d+$") ~ "G",
-    str_detect(beta, "environmentE\\d+$") ~ "E",
-    str_detect(beta, "Intercept$") ~ "Intercept")
-  )
-  
-  
-  beta.merged.long.reshape2 = beta.merged.long.reshape %>% reshape2::dcast(.,  gene_id + beta + type ~ origin)
-  beta.merged.long.reshape2$type = factor(beta.merged.long.reshape2$type, levels = c("Intercept", "G", "E", "GxE"))
-  beta.merged.long.reshape2$threshold = thr
-  
-  
-   if (exists('df2return') && is.data.frame(get('df2return'))){
-            df2return = rbind(df2return, beta.merged.long.reshape2)
-     
-        }
-        else{
-            df2return = beta.merged.long.reshape2
-        }
-}  
-```
-
-```{r}
-  
-df_comparison = df2return  
-
-
-  
-df_comparison = df_comparison  %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
-                    mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
-
-df_comparison$label <- factor(df_comparison$label)
-df_comparison$threshold <- factor(df_comparison$threshold)
-
-
-  
-```
-
-```{r}
-library(plotROC)
-df_comparison %>% filter(type != "Intercept") %>% select(-threshold)
-## ROC curve
-df_comparison %>% filter(type != "Intercept") %>% filter(threshold != 0) %>% group_by(threshold, type, label_bin) %>% tally()
-df_comparison$threshold = factor(df_comparison$threshold)
-
-
-p = ggplot(df_comparison %>% filter(type != "Intercept") , aes(d = label , m = padj, color = threshold)) + 
-  geom_roc(n.cuts = 0, labels = F)  + facet_grid(~type) + 
-  scale_colour_manual(values = c("#F2F2F2","#E1E0DC", "#C6C2BB", "#908A86", "#777572", "#16323B")) + 
-  theme(panel.background = element_rect(fill = "#a4c2a5",colour = "#a4c2a5"), 
-                            panel.grid.major = element_blank(), 
-                             panel.grid.minor = element_blank())
- 
-p  + style_roc()
-ggsave(filename = "figures/ROCcurve.png", plot = p + style_roc(), width = 15, height = 8) 
-
-p = ggplot(calc_auc(p), aes(x = threshold, y = AUC, group=1)) + geom_point() + geom_line() + facet_grid(~type) 
-ggsave(filename = "figures/AUC.png", plot = p, width = 15, height = 10) 
-### boxplot FP
-
-df_comparison2 = df_comparison %>% 
-      filter(type != "Intercept") %>% 
-      group_by(label, prediction, type, threshold) %>% 
-      tally() %>% 
-      ungroup() %>% 
-      group_by(label, type, threshold) %>% 
-      mutate(tt = sum(n)) %>% 
-      mutate(proportion = n/tt) %>%
-      mutate(predict = ifelse(label == prediction, "true", "false"))
-
-
-df_comparison2$threshold = factor(df_comparison2$threshold)
-p = ggplot(df_comparison2) + geom_bar(aes(x = threshold , y = proportion, fill = predict),stat="identity") + facet_grid(type~label)
-p
-p = ggsave(filename = "figures/boxplotFP.png", plot = p, width = 15, height = 8) 
-
-```
-
-
-## post inference filtering
-
-
-```{r}
-library(plotROC)
-
-## ROC curve
-#df_postInf =
-  
-list_df = map( threshold_list, ~df_comparison %>% 
-                                filter(type != "Intercept") %>% 
-                                filter(threshold == 0) %>%
-                                mutate( prediction = ifelse(padj < 0.05 & abs(Inference) > .x , "DE", "nonDE"  )) %>% 
-                                mutate(threshold = .x))
-df_postInf = do.call("rbind", list_df) %>% mutate(method = "H0:|beta|>0 & FC>thr")
-df_h0 = df_comparison %>% mutate(method = "H0:|beta|>thr")
-
-
-
-df = rbind(df_postInf, df_h0)
-df$method %>% table()
-p = ggplot(df %>% filter(type != "Intercept") %>% filter(threshold !=0) , aes(d = label , m = padj, color = method)) + 
-  geom_roc(n.cuts = 0, labels = F)  + facet_grid(type~threshold) + 
-  scale_colour_manual(values = c("#E1E0DC", "#16323B")) + 
-  theme(panel.background = element_rect(fill = "#a4c2a5",colour = "#a4c2a5"), 
-                            panel.grid.major = element_blank(), 
-                             panel.grid.minor = element_blank())
-
-
-
-p + style_roc()
-
-p
-ggsave(filename = "figures/ROCcurvePostInf.png", plot = p + style_roc(), width = 20 , height = 15) 
-
-
-p = ggplot(calc_auc(p), aes(x = threshold, y = AUC, group=1)) + geom_point() + geom_line() + facet_grid(~type) 
-ggsave(filename = "figures/AUC.png", plot = p, width = 15, height = 10) 
-
-
-### boxplot FP
-
-
- df %>% 
-      filter(type != "Intercept") %>% 
-      group_by(label, type, threshold, method) %>% 
-      tally() 
-
-df_2 = df %>% 
-      filter(type != "Intercept") %>% 
-      group_by(label, prediction, type, threshold, method) %>% 
-      tally() %>% 
-      ungroup() %>% 
-      group_by(label, type, threshold, method) %>% 
-      mutate(tt = sum(n)) %>% 
-      mutate(proportion = n/tt) %>%
-      mutate(predict = ifelse(label == prediction, "true", "false"))
-
-
-df_comparison2$threshold = factor(df_comparison2$threshold)
-p = ggplot(df_2) + geom_bar(aes(x = method , y = proportion, fill = predict),stat="identity") + 
-          facet_grid(type~label + threshold) + theme(axis.text.x = element_text(angle = 50, vjust = 1, hjust= 1.01))
-p
-ggsave(filename = "figures/boxplotFPpostInf.png", plot = p, width = 20, height = 15) 
-
-```
-
-
-
-
-```{r}
-
-dds_simu.mcols = S4Vectors::mcols(dds_simu, use.names=TRUE)
-
-df_comparison = getDfComparison(dds_simu = dds_simu , model_matrix = design2simulate$model_matrix, beta.actual.matrix = beta.input)
-
-gene_disp.simu = dds_simu.mcols$dispersion %>% na.omit()
-
-
-dispersion_dtf = list(dispersion_infered = gene_disp.simu, dispersion_actual = gene_dispersion[,1]) %>% data.frame()
-
-p = ggplot(dispersion_dtf) + 
-      geom_point(aes(x=dispersion_actual, y = dispersion_infered ),alpha =0.2) + 
-    geom_abline(intercept = 0, slope = 1)
-      
-p
-
-ggsave(filename = "figures/DESEQ_dispersion.png", plot = p, width = 15, height = 8) 
-
-```
diff --git a/results/v2/2022-09-07_timeProcessing.Rmd b/results/v2/2022-09-07_timeProcessing.Rmd
deleted file mode 100644
index 041b699b7119b2e37d64e233675943198a7ae14a..0000000000000000000000000000000000000000
--- a/results/v2/2022-09-07_timeProcessing.Rmd
+++ /dev/null
@@ -1,222 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-09-07'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-library(furrr)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Public data
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-```{r }
-## Launch DESEQ2
-dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
-
-## Extract
-
-dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
-#dds.extraction$gene_dispersion
-
-```
-## Simu
-
-
-```{r}
-
-rm(df_timeProcess)
-
-
-
-## Params
-n_genes_vec = c(100, 1000, 6000, 30000)
-n_environments = 2
-theta = 10
-gen_vector = c(2, 5, 10, 50, 100, 1000)
-
-
-for (n_genes in n_genes_vec){
-    print(paste("nombre genes :", n_genes, sep = " "))
-    for (n_genotypes in gen_vector ){
-    print(paste("nombre genotypes :", n_genotypes, sep = " "))
-
-     beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_genotypes, n_environments = n_environments, beta.dtf=dds.extraction$beta)
-    
-      design2simulate = buildDesign2simulate(n_genotype = n_genotypes, n_environment = n_environments, n_replicate = 4)
-      log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-      #log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-      
-      
-      gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                                  n_genotypes = n_genotypes, 
-                                                  n_environments = n_environments, 
-                                                  dispersion.vec = dds.extraction$gene_dispersion, 
-                                                  model_matrix = design2simulate$model_matrix, 
-                                                            dispUniform_btweenCondition = T)
-          
-          
-      mu_ij = getMu_ij(log_qij, 1)
-      
-      kij.simulated = getK_ij(mu_ij, gene_dispersion)
-      
-      ############# GLM fitting ###################
-      
-      start_time <- Sys.time()
-    
-      plan(multisession, workers = 4)
-      a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], 
-                                                  .x, 
-                                                  design2simulate$design2simulate) )
-    
-      
-      ############ save res #####
-      #c = do.call(rbind, a)
-      #  beta.input.long = beta.input %>% data.frame() %>%
-      #        tibble::rownames_to_column(., var = "gene_id") %>%
-      #        dplyr::mutate(origin = "Actual") %>%
-      #        reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-      #        dplyr::rename(Actual = "value") %>% 
-      #        dplyr::select(-origin)
-        
-        end_time <- Sys.time()
-        timePr = as.numeric(difftime(end_time, start_time, units="mins"))
-        
-        df_tmp = list(n_genes = n_genes, n_genotype = n_genotypes, timeProcessing = timePr  ) %>% data.frame()
-        
-        if (exists('df_timeProcess') && is.data.frame(get('df_timeProcess'))){
-            df_timeProcess = rbind(df_timeProcess, df_tmp)
-     
-        }
-        else{
-            df_timeProcess = df_tmp
-        }
-              
-    }
-}
-
-df_timeProcess_glm = df_timeProcess %>% mutate(tool = "MASS::GLM")
-
-
-ggplot(df_timeProcess_glm) + geom_point(aes(x = n_genotype, y = timeProcessing)) + facet_grid(~n_genes)
-ggsave(filename = "figures/GLM_genotype_distrib.png", plot = p, width = 20, height = 12)
-
-
-```
-## DESEQ2
-
-```{r}
-
-rm(df_timeProcess)
-
-
-
-## Params
-n_genes_vec = c(100)
-n_environments = 2
-theta = 10
-gen_vector = c(75, 100)
-
-
-for (n_genes in n_genes_vec){
-    print(paste("nombre genes :", n_genes, sep = " "))
-    for (n_genotypes in gen_vector ){
-    print(paste("nombre genotypes :", n_genotypes, sep = " "))
-
-     beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_genotypes, n_environments = n_environments, beta.dtf=dds.extraction$beta)
-    
-      design2simulate = buildDesign2simulate(n_genotype = n_genotypes, n_environment = n_environments, n_replicate = 4)
-      log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-      #log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-      
-      
-      gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                                  n_genotypes = n_genotypes, 
-                                                  n_environments = n_environments, 
-                                                  dispersion.vec = dds.extraction$gene_dispersion, 
-                                                  model_matrix = design2simulate$model_matrix, 
-                                                            dispUniform_btweenCondition = T)
-          
-          
-      mu_ij = getMu_ij(log_qij, 1)
-      
-      kij.simulated = getK_ij(mu_ij, gene_dispersion)
-      
-      ############# DESEQ fitting ###################
-      start_time <- Sys.time()
-
-      dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
-
-      df_tmp = getDfComparison(dds_simu = dds_simu , model_matrix = design2simulate$model_matrix, beta.actual.matrix = beta.input)
-      
-       end_time <- Sys.time()
-      timePr = as.numeric(difftime(end_time, start_time, units="mins"))
-      
-      df_tmp = list(n_genes = n_genes, n_genotype = n_genotypes, timeProcessing = timePr  ) %>% data.frame()
-      
-      if (exists('df_timeProcess') && is.data.frame(get('df_timeProcess'))){
-            df_timeProcess = rbind(df_timeProcess, df_tmp)
-     
-        }
-        else{
-            df_timeProcess = df_tmp
-        }
-      
-      
-    }
-}
-
-df_timeProcess_deseq = df_timeProcess %>% mutate(tool = "DESEQ2")
-
-p =ggplot(df_timeProcess_deseq) + geom_point(aes(x = n_genotype, y = timeProcessing), col = "#F8766D", size = 6) + facet_grid(~n_genes) + scale_x_log10() 
-ggsave(filename = "figures/DESEQ_timeProcess.png", plot = p, width = 15, height = 10)
-
-
-
-```
-
-
-```{r}
-
-df = rbind(df_timeProcess_deseq, df_timeProcess_glm) %>% filter(n_genes == 100)
-
-p = ggplot(df) + geom_point(aes(x = n_genotype, y = timeProcessing, col = tool),size = 5 ) + facet_grid(~n_genes) + scale_x_log10()
-p
-ggsave(filename = "figures/timeProcess.png", plot = p, width = 20, height = 12)
-
-```
diff --git a/results/v2/2022-09-22_NgenotypesEffectBIS.Rmd b/results/v2/2022-09-22_NgenotypesEffectBIS.Rmd
deleted file mode 100644
index 120c215aa3575a4b948148365693f05a51b62a1f..0000000000000000000000000000000000000000
--- a/results/v2/2022-09-22_NgenotypesEffectBIS.Rmd
+++ /dev/null
@@ -1,187 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-07-26'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-library(furrr)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-
-## Simu
-
-```{r}
-n_genes = 80
-n_genotypes_list = c( 100, 200, 400 )
-threshold = 0.5
-n_E = 2
-
-
-rm(df_roc)
-
-
-for (n_G in n_genotypes_list){
-  
-  beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-  design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 3)
-  log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-  gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                                        n_genotypes = n_G, n_environments = n_E, 
-                                                        dispersion.vec = dds.extraction$gene_dispersion, 
-                                                        model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
-  mu_ij = getMu_ij(log_qij, 1)
-  kij.simulated = getK_ij(mu_ij, gene_dispersion)
-  
-  
-  
-  ######################### DESEQ
-      beta.actual.matrix = beta.input
-      ## 2 modifY
-      #dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
-      
-      #############
-      # Param
-      threshold = 0.5
-      ############
-      
-      #listBeta = DESeq2::resultsNames(dds_simu)
-      #plan(multisession, workers = 4)
-      #res = listBeta %>% furrr::future_map(.x = ., ~DESeq2::results(dds_simu, contrast=list(.x), lfcThreshold = threshold) %>% data.frame() %>% .$padj)
-      #padj.matrix = do.call("cbind", res)
-        
-        
-        
-      #dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
-        
-      #dds.simu.mcols.colnamesReshaped = colnames(dds_simu.mcols) %>%
-      #                                      stringr::str_replace(., "_vs_G0", "") %>%
-      #                                      stringr::str_replace(., "_vs_E0", "") %>%
-      #                                      stringr::str_replace_all(., "_", "") %>%
-      #                                      stringr::str_replace(., "\\.", ":")
-        
-      #columnOfInterest =  design2simulate$model_matrix %>% base::colnames() %>% stringr::str_replace_all(., "[//(//)]", "")
-      #dds_simu.mcols[,columnOfInterest]
-      
-      ## Get only column of interest
-      #idx_cols = base::match(columnOfInterest, dds.simu.mcols.colnamesReshaped)
-      #beta.infered = dds_simu.mcols[,idx_cols]
-      
-      ## homogeneize column names & rownames
-      #idx_cols = base::match(columnOfInterest, beta.actual.matrix %>% colnames())
-      #beta.actual.matrix = beta.actual.matrix[,idx_cols]
-      #colnames(beta.infered) = base::colnames(beta.actual.matrix)
-      #colnames(padj.matrix) = base::colnames(beta.actual.matrix)
-      #rownames(padj.matrix) = base::rownames(beta.actual.matrix)
-      #beta.actual.matrix %>% dim()
-      #padj.matrix %>% dim()
-      #beta.infer.long = beta.infered %>% data.frame() %>%
-      #                          tibble::rownames_to_column(., var = "gene_id") %>%
-      #                          dplyr::mutate(origin = "Inference") %>%
-      #                          reshape2::melt(., value.name = "value", variable.name= "beta")
-      #beta.actual.matrix.long = beta.actual.matrix %>% data.frame() %>%
-      #                          tibble::rownames_to_column(., var = "gene_id") %>%
-      #                          dplyr::mutate(origin = "Actual") %>%
-      #                          reshape2::melt(., value.name = "value", variable.name= "beta")
-      #padj.matrix.long = padj.matrix  %>% data.frame() %>%
-      #                          tibble::rownames_to_column(., var = "gene_id") %>%
-      #                          dplyr::mutate(origin = "padj") %>%
-      #                          reshape2::melt(., value.name = "value", variable.name= "beta")
-      
-      #beta.merged.long = rbind(beta.infer.long, beta.actual.matrix.long, padj.matrix.long)
-      #beta.merged.long$beta %>% unique()
-      
-      #beta.merged.long.reshape = beta.merged.long %>% dplyr::mutate(type = dplyr::case_when(
-      #  str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
-      #  str_detect(beta, "genotypeG\\d+$") ~ "G",
-      #  str_detect(beta, "environmentE\\d+$") ~ "E",
-      #  str_detect(beta, "Intercept$") ~ "Intercept")
-      #)
-      
-      
-      #beta.merged.long.reshape2 = beta.merged.long.reshape %>% reshape2::dcast(.,  gene_id + beta + type ~ origin)
-      #beta.merged.long.reshape2$type = factor(beta.merged.long.reshape2$type, levels = c("Intercept", "G", "E", "GxE"))
-      #beta.merged.long.reshape2$threshold = threshold
-      
-      
-      
-      ## 2 modifY
-      #df_roc_deseq = beta.merged.long.reshape2  %>% mutate(from = "DESEQ2") %>% mutate(n_genotype = n_G)
-  
-    ############## GLM
-      a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], ## 2 modify !!!
-                                              .x, 
-                                              design2simulate$design2simulate, threshold = threshold) )
-
-      ############ save res #####
-      c = do.call(rbind, a)
-      beta.input.long = beta.input %>% data.frame() %>%
-              tibble::rownames_to_column(., var = "gene_id") %>%
-              dplyr::mutate(origin = "Actual") %>%
-              reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-              dplyr::rename(Actual = "value") %>% 
-              dplyr::select(-origin)
-        
-      df_tmp = merge(c, beta.input.long)  %>% mutate(padj = p.adjust(pval, method= "fdr")) %>% mutate(threshold = threshold)
-      
-      
-      
-      
-      
-      ## 2 modify
-      df_roc_glm = df_tmp  %>% dplyr::select(c(-dispersion, -pval)) %>% mutate(from = 'MASS::glm.nb') %>% mutate(n_genotype = n_G)
-      
-      #### merge 
-      
-      #df_roc_deseq = df_roc_deseq %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
-      #              mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
-      df_roc_glm = df_roc_glm %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
-                          mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
-      
-      #df_tmp2 = rbind(df_roc_deseq, df_roc_glm)
-      df_tmp2 = df_roc_glm
-      if (exists('df_roc') && is.data.frame(get('df_roc'))){
-      df_roc = rbind(df_roc, df_tmp2)
-      }
-      else{
-      df_roc = df_tmp2
-      }
-
-      
-     
-}
-
-df_roc %>% group_by(from, n_genotype) %>% tally()
-```
-
-```{r}
-df_roc %>% filter(n_genotype == 100)
-
-
-df_roc$n_genotype %>% table()
-library(plotROC)
-p = ggplot(df_roc %>% filter(type %in% c("G","GxE"))  , aes(d = label , m = padj, color = from)) + 
-  geom_roc(n.cuts = 0, labels = F)  + facet_grid(~n_genotype) 
- 
-p  + style_roc()
-ggsave(filename = "figures/ROCcurve18.png", plot = p + style_roc(), width = 20) 
-
-p = ggplot(calc_auc(p), aes(x = threshold, y = AUC, group=1)) + geom_point() + geom_line() + facet_grid(~type) 
-ggsave(filename = "figures/AUC.png", plot = p, width = 15, height = 10) 
-### boxplot FP
-
-```
-
diff --git a/results/v2/2022-09-22_kmean.Rmd b/results/v2/2022-09-22_kmean.Rmd
deleted file mode 100644
index ab2e4c3343fd723530910501d5deb472e00f1667..0000000000000000000000000000000000000000
--- a/results/v2/2022-09-22_kmean.Rmd
+++ /dev/null
@@ -1,327 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-09-22'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-library(furrr)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Public data
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-```{r }
-## Launch DESEQ2
-dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
-
-## Extract
-
-dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
-#dds.extraction$gene_dispersion
-
-```
-
-## params visualization
-
-
-```{r}
-beta_obs.dtf.long = dds.extraction$beta %>% reshape2::melt(. , na.rm = T, variable.name = "parameter")
-
-alpha_obs.dtf.long = dds.extraction$gene_dispersion %>% base::data.frame() %>%
-                          dplyr::rename(., value = ".") %>%
-                          dplyr::mutate(parameter = "dispersion")
-
-dtf.params_obs = rbind(beta_obs.dtf.long, alpha_obs.dtf.long)
-
-
-
-p = ggplot(dtf.params_obs, aes(x= value)) +
-      geom_histogram(aes(y=..density..), colour="black", fill="white") + facet_grid(~parameter)+
-        theme(strip.text.x = element_text(size = 13),
-        axis.title = element_text(size = 5),
-        axis.text  = element_text(size = 5))
-
-p
-```
-
-
-
-## gene segemntation
-
-
-```{r}
-dds.extraction$beta 
-kmean.res = kmeans(dds.extraction$beta[,c(2,3)], 3)
-
-pca.obj= prcomp(dds.extraction$beta[,c(2,3)])
-
-
-res = summary(pca.obj)
-library(kableExtra)
-res$importance[,1:2] %>% 
-  kbl(., caption = "Table: Variance explained per Principal Component", position = "bottom", align = 'c') %>% 
-  kable_styling(full_width = F)
-
-
-dtp <- data.frame( 'cluster' = kmean.res$cluster ,
-                  pca.obj$x[,1:2]) # the first two components are selected (NB: you can also select 3 for 3D plotting or 3+)
-
-
-dtp$cluster <- factor(dtp$cluster)
-## Plot
-P1 <- ggplot(data = dtp) + 
-       geom_point(aes(x = PC1, y = PC2, 
-                      col = cluster),
-                      size =3) + 
-       theme_minimal() 
-P1 
-
-ggsave("../results/figures/ACP_kmeans.png", P1)
-
-dtf = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster ) %>%
-      reshape2::melt(. ,id=c("cluster"), value.name = "value", variable.name= "type") 
-dtf$cluster <- factor(dtf$cluster)
-
-
-p = ggplot(dtf) + geom_density(aes(x = value, fill = cluster ), alpha = 0.4) + facet_grid(~ type, scales = "free")
-p
-ggsave("figures/densityBeta_kmeans.png", p)
-
-
-betas = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster)
-betas$cluster = factor(betas$cluster)
-p = ggplot(betas) + geom_point(aes(x= betaG, betaGE, col = cluster))
-p
-ggsave("figures/scattrplot_clustering.png", p)
-
-```
-
-
-```{r}
-dds.extraction.bis = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster)
-
-
-beta.cluster1= dds.extraction$beta[kmean.res$cluster == 1,]
-beta.cluster2= dds.extraction$beta[kmean.res$cluster == 2,]
-beta.cluster3= dds.extraction$beta[kmean.res$cluster == 3,]
-
-
-```
-
-
-
-
-
-
-## all-in
-
-```{r}
-
-beta.input1 = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = beta.cluster1)
-beta.input2  = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = beta.cluster2)
-rownames(beta.input2) =  base::paste("gene", 101:(200), sep = "")
-beta.input3  = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = beta.cluster3)
-rownames(beta.input3) =  base::paste("gene", 201:(300), sep = "")
-
-beta.input = rbind(beta.input1, beta.input2, beta.input3)
-
-design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 15)
-log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
-mu_ij = getMu_ij(log_qij, 1)
-kij.simulated = getK_ij(mu_ij, gene_dispersion)
-
-kij.simulated %>% dim()
-```
-
-## Deseq 
-
-```{r}
-
-beta.actual.matrix = beta.input
-## 2 modifY
-dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
-
-#############
-# Param
-threshold = 0.5
-############
-listBeta = DESeq2::resultsNames(dds_simu)
-plan(multisession, workers = 4)
-res = listBeta %>% furrr::future_map(.x = ., ~DESeq2::results(dds_simu, contrast=list(.x), lfcThreshold = threshold) %>% data.frame() %>% .$padj)
-padj.matrix = do.call("cbind", res)
-  
-  
-  
-dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
-  
-dds.simu.mcols.colnamesReshaped = colnames(dds_simu.mcols) %>%
-                                      stringr::str_replace(., "_vs_G0", "") %>%
-                                      stringr::str_replace(., "_vs_E0", "") %>%
-                                      stringr::str_replace_all(., "_", "") %>%
-                                      stringr::str_replace(., "\\.", ":")
-  
-columnOfInterest =  design2simulate$model_matrix %>% base::colnames() %>% stringr::str_replace_all(., "[//(//)]", "")
-#dds_simu.mcols[,columnOfInterest]
-
-## Get only column of interest
-idx_cols = base::match(columnOfInterest, dds.simu.mcols.colnamesReshaped)
-beta.infered = dds_simu.mcols[,idx_cols]
-
-## homogeneize column names & rownames
-idx_cols = base::match(columnOfInterest, beta.actual.matrix %>% colnames())
-beta.actual.matrix = beta.actual.matrix[,idx_cols]
-colnames(beta.infered) = base::colnames(beta.actual.matrix)
-colnames(padj.matrix) = base::colnames(beta.actual.matrix)
-rownames(padj.matrix) = base::rownames(beta.actual.matrix)
-beta.actual.matrix %>% dim()
-padj.matrix %>% dim()
-beta.infer.long = beta.infered %>% data.frame() %>%
-                          tibble::rownames_to_column(., var = "gene_id") %>%
-                          dplyr::mutate(origin = "Inference") %>%
-                          reshape2::melt(., value.name = "value", variable.name= "beta")
-beta.actual.matrix.long = beta.actual.matrix %>% data.frame() %>%
-                          tibble::rownames_to_column(., var = "gene_id") %>%
-                          dplyr::mutate(origin = "Actual") %>%
-                          reshape2::melt(., value.name = "value", variable.name= "beta")
-padj.matrix.long = padj.matrix  %>% data.frame() %>%
-                          tibble::rownames_to_column(., var = "gene_id") %>%
-                          dplyr::mutate(origin = "padj") %>%
-                          reshape2::melt(., value.name = "value", variable.name= "beta")
-
-beta.merged.long = rbind(beta.infer.long, beta.actual.matrix.long, padj.matrix.long)
-#beta.merged.long$beta %>% unique()
-
-beta.merged.long.reshape = beta.merged.long %>% dplyr::mutate(type = dplyr::case_when(
-  str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
-  str_detect(beta, "genotypeG\\d+$") ~ "G",
-  str_detect(beta, "environmentE\\d+$") ~ "E",
-  str_detect(beta, "Intercept$") ~ "Intercept")
-)
-
-
-beta.merged.long.reshape2 = beta.merged.long.reshape %>% reshape2::dcast(.,  gene_id + beta + type ~ origin)
-beta.merged.long.reshape2$type = factor(beta.merged.long.reshape2$type, levels = c("Intercept", "G", "E", "GxE"))
-beta.merged.long.reshape2$threshold = threshold
-
-
-
-## 2 modifY
-df_roc_deseq = beta.merged.long.reshape2  %>% mutate(from = "DESEQ2")
-
-
-df_roc_deseq %>% group_by(gene_id, type) %>% tally()
-df_roc_deseq$gene_id %>% unique() %>% length
-```
-
-## GLM
-
-
-```{r}
-kij.simulated %>% dim()
-############# GLM fitting ###################
-plan(multisession, workers = 4)
-a = 1:300 %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], ## 2 modify !!!
-                                              .x, 
-                                              design2simulate$design2simulate, threshold = threshold) )
-
-#kij.simulated %>% dim()
-############ save res #####
-
-c = do.call(rbind, a)
-beta.input.long = beta.input %>% data.frame() %>%
-        tibble::rownames_to_column(., var = "gene_id") %>%
-        dplyr::mutate(origin = "Actual") %>%
-        reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-        dplyr::rename(Actual = "value") %>% 
-        dplyr::select(-origin)
-  
-df_tmp = merge(c, beta.input.long)  %>% mutate(padj = p.adjust(pval, method= "fdr")) %>% mutate(threshold = threshold)
-
-
-
-
-
-## 2 modify
-df_roc_glm = df_tmp  %>% dplyr::select(c(-dispersion, -pval)) %>% mutate(from = 'MASS::glm.nb')
-
-
-df_roc_glm %>% group_by(gene_id, type) %>% tally()
-df_roc_glm$gene_id %>% unique() %>% length()
-```
-
-
-## merge df
-
-```{r}
-
-df_roc_deseq = df_roc_deseq %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
-                    mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
-df_roc_glm = df_roc_glm %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
-                    mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
-
-df_roc = rbind(df_roc_deseq, df_roc_glm)
-
-
-cluster1 =  base::paste("gene", 1:(100), sep = "")
-cluster2 =  base::paste("gene", 101:(200), sep = "")
-cluster3 =  base::paste("gene", 201:(300), sep = "")
-
-df_roc = df_roc %>% mutate(cluster = ifelse(gene_id %in% cluster1, 1, NA)) %>% mutate(cluster = ifelse(gene_id %in% cluster2, 2, cluster)) %>% mutate(cluster = ifelse(gene_id %in% cluster3, 3, cluster))
-df_roc$cluster = factor(df_roc$cluster)
-```
-
-
-```{r}
-
-df_roc %>% dplyr::filter(cluster == 2 ) %>%  group_by(type, label, from ) %>% tally()
-#%>% filter(cluster == 2 )
-
-library(plotROC)
-p = ggplot(df_roc %>% filter(type != "Intercept") , aes(d = label , m = padj, color = from)) + 
-  geom_roc(n.cuts = 0, labels = F)  + facet_grid(type~cluster) 
- 
-p  + style_roc()
-ggsave(filename = "figures/ROCcurve3.png", plot = p + style_roc(), width = 15) 
-
-p = ggplot(calc_auc(p), aes(x = threshold, y = AUC, group=1)) + geom_point() + geom_line() + facet_grid(~type) 
-ggsave(filename = "figures/AUC.png", plot = p, width = 15, height = 10) 
-### boxplot FP
-
-```
-
diff --git a/results/v2/2022-11-23_dev2.Rmd b/results/v2/2022-11-23_dev2.Rmd
deleted file mode 100644
index eafef0c4056680f2ede0a67298ce49af7435a399..0000000000000000000000000000000000000000
--- a/results/v2/2022-11-23_dev2.Rmd
+++ /dev/null
@@ -1,418 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-11-23'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-library(furrr)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Simulation without ru_rm_5 - all genes
-
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-
-
-
-```{r }
-## Launch DESEQ2
-dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
-
-## Extract
-
-dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
-#dds.extraction$gene_dispersion
-
-```
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Data viz
-p<- ddsExtraction.viz(dds.extraction = dds.extraction)
-p
-ggsave(filename = "figures/inputParams_distrib.png", plot = p, width = 20, height = 12) 
-
-```
-
-## Gene segmentation
-
-```{r}
-
-n_group_gene = 3
-dds.extraction$beta 
-kmean.res = kmeans(dds.extraction$beta[,c(2,3)], n_group_gene)
-
-
-beta.cluster1= dds.extraction$beta[kmean.res$cluster == 1,]
-beta.cluster2= dds.extraction$beta[kmean.res$cluster == 2,]
-beta.cluster3= dds.extraction$beta[kmean.res$cluster == 3,]
-
-
-x = beta.cluster1 %>% as.matrix()
-fit.mvrnorm <- Rfast::mvnorm.mle(x)
-#### BETAG.E #####
-fit.mvrnorm$sigma[,2] = fit.mvrnorm$sigma[,2]
-fit.mvrnorm$sigma[4,] = fit.mvrnorm$sigma[4,]*2
-```
-
-
-
-## BETA
-
-
-```{r}
-
-getGenesDispersions <- function( n_genes, sample_ids, dispersion.vec ,dispUniform_btweenCondition = T ){
-
-   if (dispUniform_btweenCondition == T ) {
-          gene_dispersion.dtf = base::sample(  dispersion.vec , replace = T, size = n_genes) %>% base::data.frame()
-          n_rep =  length(sample_ids)
-          gene_dispersion.dtf = gene_dispersion.dtf[,base::rep(base::seq_len(base::ncol(gene_dispersion.dtf)), n_rep)]
-          rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "")
-          colnames(gene_dispersion.dtf) = sample_ids
-
-  }
-
-  else {
-
-          replication_table = sample_ids %>% stringr::str_replace(., pattern = "_[0-9]+","" ) %>% table()
-          gene_dispersion.dtf = replication_table %>% purrr::map(., ~sample(  dispersion.vec, replace = T, size = n_genes) ) %>% data.frame()
-          gene_dispersion.dtf = gene_dispersion.dtf[,rep(seq_len(ncol(gene_dispersion.dtf)), replication_table %>% as.numeric())]
-          colnames(gene_dispersion.dtf) = sample_ids
-          rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "")
-
-  }
-
-  return(gene_dispersion.dtf %>% as.matrix)
-
-}
-
-
-get_kij <- function(mu_ij.matx, dispersion.matx, n_genes, sample_id_list, idx_replicat){
-  n_sples = length(sample_id_list)
-  alpha_gene = 1/dispersion.matx
-  k_ij = stats::rnbinom(length(mu_ij.matx), size = alpha_gene  , mu = mu_ij.matx) %>% matrix(. , nrow = n_genes, ncol = n_sples )
-  k_ij[is.na(k_ij)] = 0
-  colnames(k_ij) = base::paste(sample_id_list, idx_replicat, sep = '_')
-  rownames(k_ij) = rownames(mu_ij.matrix) 
-  return(k_ij)
-}
-
-######### Utils ###########################
-uniform_replication <- function(maxN, n_samples) return(rep(T, time = maxN) %>% rep(., each = n_samples ) %>% matrix(ncol = n_samples))
-
-random_replication <- function(maxN, n_samples){
-  replicating <- function(maxN) return(sample(x = c(T,F), size = maxN, replace = T))
-  res = purrr::map(1:n_samples, ~replicating(maxN-1))
-  rep_table = do.call(cbind, res)
-  rep_table = rbind(rep(T, times = n_samples), rep_table)
-  return(rep_table)
-}
-#################################
-
-```
-
-```{r}
-######### Settings #############
-n_genes = 4
-n_genotypes = 100
-n_E = 2
-max_n_replicates = 15
-#################################
-
-
-##### Sampling from mvnorm ########
-beta.matrix <- MASS::mvrnorm(n = n_genes*(n_genotypes),
-                                 mu = fit.mvrnorm$mu,
-                                 Sigma = fit.mvrnorm$sigma )
-###################################
-
-####  Some reshaping ##############
-genes_vec = base::paste("gene", 1:n_genes, sep = "") 
-genotype_vec = base::paste("G", 0:(n_genotypes-1), sep = "")
-environment_vec = base::paste("E", 0:(n_E-1), sep = "")
-########################################
-m = c(1,1,0,0,1,1,1,1)
-design.matrix = matrix(data =  m , ncol = 2, byrow = F)
-colnames(design.matrix) = environment_vec
-rownames(design.matrix) = beta.matrix %>% colnames()
-##################################
-
-#### Computing log_qij & mu_ij ###########
-log_qij = beta.matrix %*% design.matrix
-mu_ij = getMu_ij(log_qij, 2)
-
-
-label_genotype = genotype_vec %>% rep(time = n_genes)
-label_gene = rep(genes_vec, each = n_genotypes)
-
-
-##### Preparing data for simulation ########"
-mu_ij.matrix = mu_ij %>%
-      data.frame() %>% 
-      dplyr::mutate(genotype = label_genotype) %>%
-      dplyr::mutate(gene_id = label_gene ) %>%
-      #dplyr::mutate(replicate_idx = 1 ) %>% 
-      reshape2::melt(., id.vars = c("gene_id", 'genotype'),  
-          value.name = "mu_ij", variable.name= "environment") %>% 
-      reshape2::dcast(., gene_id  ~  genotype + environment , value.var = "mu_ij") %>% 
-      column_to_rownames("gene_id") %>% as.matrix()
-
-
-#########################################
-
-#replication.matrix = random_replication(maxN = max_n_replicates, n_samples = n_genotypes*n_E)
-replication.matrix  = uniform_replication(maxN = max_n_replicates, n_samples = n_genotypes*n_E)
-
-########### SIMU 
-sample_ids = colnames(mu_ij.matrix)
-dispersion.matrix = getGenesDispersions(n_genes, sample_ids, dispersion.vec = dds.extraction$gene_dispersion, dispUniform_btweenCondition = T )
-kij.list = purrr::map(.x = 1:max_n_replicates, .f = ~get_kij(mu_ij.matrix[ ,replication.matrix[.x, ]], 
-                                                    dispersion.matrix[ ,replication.matrix[.x, ]],
-                                                    n_genes = n_genes,
-                                                    sample_ids[replication.matrix[.x, ]],
-                                                    .x
-                                                    )  )
-kij.simulated = do.call(cbind, kij.list)
-
-kij.simulated %>% dim() 
-  
-
-```
-## FIT
-
-
-
-```{r}
-## benchmarking lme4 / glmmTMB
-library(lme4)
-library(glmmTMB)
-library(broom.mixed)
-library(mice)
-
-df_2glmmm <- function(y , design_simulation, gene_name){
-  df_gene_i = cbind(design_simulation, y)
-  df_gene_i = df_gene_i %>% mutate(gene_id = gene_name)
-  return(df_gene_i)
-}
-
-
-fit_extraction <- function(fit){
-    fit.res = broom.mixed::tidy(fit)  %>% arrange(term)%>% .$estimate %>% as.numeric()
-    B0 = fit.res[3] 
-    BE = fit.res[4]
-    sd_BGE_E0 = fit.res[5]
-    sd_BGE_E1 = fit.res[7]
-    sd_BG_E0 = fit.res[6] 
-    sd_BG_E1 = fit.res[8] 
-    correlation_genotype =  fit.res[2]
-    correlation_interaction =  fit.res[1]
-    ####################################################################
-    res = list(mean_E0 = B0, mean_E1 = BE,  sd_BGE_E0 = sd_BGE_E0, sd_BGE_E1 = sd_BGE_E1, sd_BG_E0 = sd_BG_E0, sd_BG_E1 = sd_BG_E1, correlation_genotype = correlation_genotype, correlation_interaction = correlation_interaction) %>% data.frame() 
-    return(res)
-}
-
-fit_glmm <- function(data_glmm, i){
-    gene_i = paste('gene', i, sep = "")
-    data_glmm.filter = data_glmm %>% filter(gene_id == gene_i)
-    ###########################  FIT LME4   #################################
-    print("LME4")
-    m.nb <- glmer.nb(y ~ 0 + environment  + ( 1 + environment | genotype/environment )  , data= data_glmm.filter , verbose=F)
-    res_lme4 = fit_extraction(m.nb)
-    res_lme4 = res_lme4 %>% mutate(gene_id = gene_i) %>% mutate(from = "lme4")
-    
-    #####################  FIT GMTMB 1  #########################
-    print("glmTMB 1")
-    m.nb <- glmmTMB::glmmTMB(y ~  0 + environment  + ( 1 + environment | genotype/environment )   , data=data_glmm.filter, family=nbinom1, verbose = F)
-    res_glmTMB1 = fit_extraction(m.nb)
-    res_glmTMB1 = res_glmTMB1 %>% mutate(gene_id = gene_i) %>% mutate(from = "glmTMB nbinom1")
-    
-    #####################  FIT GMTMB 2  #########################
-    print("glmTMB 2")
-    
-    m.nb <- glmmTMB::glmmTMB(y ~ 0 + environment  + ( 1 + environment | genotype/environment )  , data=data_glmm.filter, family=nbinom2, verbose = F)
-    res_glmTMB2 = fit_extraction(m.nb)
-    res_glmTMB2 = res_glmTMB2 %>% mutate(gene_id = gene_i) %>% mutate(from = "glmTMB nbinom2")
-    
-    res = rbind(res_lme4, res_glmTMB1, res_glmTMB2)
-    return(res)
-}
-
-```
-
-```{r}
-####### ESTIMATION ############*
-experimental_design = colnames(kij.simulated) %>% 
-                            str_split(., pattern = '_', simplify = T)  %>%
-                              .[,c(1,2)] %>% data.frame() 
-colnames(experimental_design) = c("genotype", "environment")
-gene_id_list = rownames(kij.simulated)
-f = map(.x = gene_id_list, ~df_2glmmm(kij.simulated[.x,], experimental_design, .x) )
-data_glmm = do.call(rbind, f)
-plan(multisession, workers = 4)
-results = furrr::future_map(.x = 1:n_genes, ~fit_glmm(data_glmm, .x) )
-res = do.call(rbind, results)
-#################################################################
-
-ground_truth = beta.matrix %>% data.frame() %>%
-                  mutate(gene_id = label_gene) %>%
-                  mutate(genotype = label_genotype) %>%
-                  mutate(E0 = beta0 + betaG   ) %>%
-                  mutate(E1 = beta0 + betaE + betaG + betaGE  )
-
-gt_glmm = ground_truth %>% group_by(gene_id) %>% 
-              summarize( mean_E0 = mean(E0),
-                         mean_E1 = mean(E1),
-                         sd_BG_E0 = sd(betaG), ## ok
-                         sd_BG_E1 = sd(betaGE),
-                         sd_BGE_E0 = sd(betaE),                       
-                         sd_BGE_E1 = sd(betaE)) %>%
-              mutate(correlation = NA)
-
-
-######### JOIN #####
-res.long = res %>% reshape2::melt( id = c("gene_id", "from"), value.name = "Inference") #%>% mutate(type = 'Inference')
-input.long = gt_glmm %>% reshape2::melt( id = "gene_id", value.name = "Actual")
-df_merged = merge(res.long, input.long, by = c("gene_id", "variable")) #%>% mutate(n_genotype = n_G)
-df_benchmark = df_merged
-```
-
-```{r}
-
-p = ggplot(df_benchmark) + geom_point(aes(x = Actual , y = Inference, color = from ), alpha = 0.5, size = 4) + 
-                          facet_wrap(~variable, scales = "free", ncol = 2 ) + geom_abline(slope=1, intercept=0) +
-                          scale_color_manual(values = c("#D55E00", "#E69F00", "#0072B2")) 
-p
-ggsave(filename = 'figures/benchmark_identity8.png',p, width = 12)
-
-
-
-df_benchmark %>% filter(variable == 'correlation_interaction') %>% .$Inference %>% mean()
-df_benchmark %>% filter(variable == 'mean_E1') %>% .$Inference %>% mean()
-
-6.5-22+2.34
-
-
-fit.mvrnorm$sigma[2,3] = 0.1
-fit.mvrnorm$sigma[3,2] = 0.1
-
-
-mean(res$correlation)
-mean(res$sd_E0*res$sd_E1*res$correlation)
-```
-
-## GLM classique
-
-```{r}
-
-run.glm2 <- function(data_glm, i, threshold = 0) {
-  rownames(data_glm) <- NULL
-  tryCatch({
-    fit = MASS::glm.nb(y ~ genotype + environment + genotype:environment, data = data_glm, link = log)
-    return(reshapeGlmRes(fit, i, threshold))
-  },
-
-  error = function(cnd){
-    print('noop')
-    return(reshapeGlmRes(NULL, i, threshold  , error_bool = T))
-}
-)
-}
-
-run.glm3 <- function(data_glm, i, threshold = 0) {
-  rownames(data_glmm) <- NULL
-  #data_glm = data_glm %>% select(-gene_id)
-  #print(data_glm)
-  fit = MASS::glm.nb(y ~ genotype + environment + genotype:environment, data = data_glm, link = log)
-  reshapeGlmRes(fit, i, threshold)
-}
-
-############# GLM fitting ###################
-data_glm  = data_glmm
-rownames(data_glm) <- NULL
-plan(multisession, workers = 4)
-a = gene_id_list %>% furrr::future_map(.x = ., ~run.glm3(data_glm %>% filter(gene_id == .x), 
-                                              .x, 
-                                              threshold = 0.5) )
-
-
-
-.############ save res #####
-c = do.call(rbind, a)
-
-c %>% filter(beta == 'genotypeG5.environmentE1')
-```
-
-```{r}
-ground_truth = beta.matrix %>% data.frame() %>%
-                  mutate(gene_id = label_gene) %>%
-                  mutate(genotype = label_genotype) %>%
-                  reshape2::melt(., id = c("gene_id", 'genotype'), value.name = "Actual", variable.name = "beta") 
-
-
-part1  = ground_truth %>% filter(beta %in% c("beta0", 'betaE')) %>%
-                  group_by(gene_id, beta) %>%
-                  summarize( Actual = mean(Actual)) %>%
-                  mutate(beta = ifelse(beta == "beta0", "Intercept", 'betaE')) 
-                  
-part2 = ground_truth %>% filter(beta == "betaG") %>%
-                          mutate(beta = base::paste("genotype", genotype, sep = "")) %>% select(-genotype)
-
-part3 = ground_truth %>% filter(beta == "betaGE") %>%
-                          mutate(beta = base::paste("genotype", genotype, ".environmentE1", sep = "")) %>% select(-genotype)
-
-
-input = rbind(part1,part2, part3) %>% ungroup()
-input %>% dim()
-df_merged = merge(c, input, by = c("gene_id", "beta")) #%>% mutate(n_genotype = n_G)
-
-```
-
-```{r}
-
-
-
-p = ggplot(df_merged) + geom_point(aes(x = Actual * log(2) , y = Inference ), alpha = 0.5, size = 4) + 
-                          facet_grid(~type, scales = "free" ) + geom_abline(slope=1, intercept=0) +
-                          scale_color_manual(values = c("#D55E00", "#E69F00", "#0072B2")) 
-p
-
-
-```
-
-
diff --git a/results/v2/glmm_benchmark.Rmd b/results/v2/glmm_benchmark.Rmd
deleted file mode 100644
index 11978f9a920216806de8d1a2247ef014cd6d302d..0000000000000000000000000000000000000000
--- a/results/v2/glmm_benchmark.Rmd
+++ /dev/null
@@ -1,421 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-07-26'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-library(furrr)
-#library(tidyverse)
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Simulation without ru_rm_5 - all genes
-
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-
-
-
-```{r }
-## Launch DESEQ2
-dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
-
-## Extract
-
-dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
-#dds.extraction$gene_dispersion
-
-```
-
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-## Data viz
-p<- ddsExtraction.viz(dds.extraction = dds.extraction)
-p
-ggsave(filename = "figures/inputParams_distrib.png", plot = p, width = 20, height = 12) 
-
-```
-
-## Gene segmentation
-
-```{r}
-get_beta_gene_i <- function(fit.mvnorm, n_G, n_E){
-  beta.matrix.tmp <- MASS::mvrnorm(n = (n_E-1)*(n_G-1),
-                                 mu = fit.mvnorm$mu,
-                                 Sigma = fit.mvnorm$sigma )
-  
-  beta0 = beta.matrix.tmp[1,1] %>% unname()
-  betaG = beta.matrix.tmp[1:(n_G-1),2]
-  betaE = beta.matrix.tmp[1:(n_E-1),3]
-  betaGE = beta.matrix.tmp[,4]
-  
-  ### name
-  betaG.colnames = base::paste("genotype", "G", 1:(n_G-1), sep = "")
-  betaE.colnames = base::paste("environment", "E", 1:(n_E-1), sep = "")
-  betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
-  matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
-  
-  beta_gene_i <- c( beta0, betaG, betaE, betaGE)
-  names(beta_gene_i) = matrix.colnames
-
-  return(beta_gene_i)
-}
-
-
-n_g = 1
-n_group_gene = 3
-n_E = 2
-n_G = 400
-
-
-dds.extraction$beta 
-kmean.res = kmeans(dds.extraction$beta[,c(2,3)], n_group_gene)
-
-
-beta.cluster1= dds.extraction$beta[kmean.res$cluster == 1,]
-beta.cluster2= dds.extraction$beta[kmean.res$cluster == 2,]
-beta.cluster3= dds.extraction$beta[kmean.res$cluster == 3,]
-
-
-x = beta.cluster3 %>% as.matrix()
-fit.mvrnorm <- Rfast::mvnorm.mle(x)
-fit.mvrnorm$sigma[c(2,4),c(2,4)] = fit.mvrnorm$sigma[c(2,4),c(2,4)]*3
-#diag(fit.mvrnorm$sigma) <- diag(fit.mvrnorm$sigma) 
-
-#### BETAG.E #####
-fit.mvrnorm$mu[4] = 0
-fit.mvrnorm$sigma[,4] = 0
-fit.mvrnorm$sigma[4,] = 0
-#################
-#fit.mvrnorm$mu[0] = 30
-
-#### BETA0 #####
-fit.mvrnorm$mu[1] = 0
-fit.mvrnorm$sigma[,1] = 0
-fit.mvrnorm$sigma[1,] = 0
-#################
-
-#### BETAE #####
-fit.mvrnorm$mu[3] = 0
-fit.mvrnorm$sigma[,3] = 0
-fit.mvrnorm$sigma[3,] = 0
-
-#fit.mvrnorm$mu[2] = 8
-#fit.mvrnorm$sigma[2,2]= 6
-x <- NULL
-
-
-n_gene = n_g
-a = purrr::map(.x = 1:n_gene, ~ get_beta_gene_i(fit.mvrnorm, n_G, n_E)) 
-beta.matrix = do.call(rbind, a) 
-rownames(beta.matrix) = base::paste("gene", 1:(n_g), sep = "")
-
-
-
-x = beta.matrix %>% data.frame() %>% #%>% select(!starts_with("environment")) %>%
-  rownames_to_column('gene_id') %>% reshape2::melt(., id = "gene_id") %>% dplyr::mutate(type = dplyr::case_when(
-      str_detect(variable, "genotypeG\\d+\\.environment") ~ "GxE",
-      str_detect(variable, "genotypeG\\d+$") ~ "G",
-      str_detect(variable, "environmentE\\d+$") ~ "E",
-      str_detect(variable, "Intercept$") ~ "Intercept")) %>%  reshape2::dcast(., gene_id  ~ type, value.var = "value", fun.aggregate = list)
-
-
-g = x$G
-names(g) <- x$gene_id
-df_tmp2 = data.frame(g) %>% mutate(type = 'betaG') %>% reshape2::melt( id = "type", value.name = "betaG")
-g = x$GxE
-
-names(g) <- x$gene_id
-df_tmp3 = data.frame(g) %>% mutate(type = 'GxE') %>% reshape2::melt( id = "type", value.name = "betaGxE")
-
-
-df = cbind(df_tmp2, df_tmp3) %>% dplyr::select(c(betaGxE, betaG)) %>% mutate(from = 'Simulated') %>% mutate(cluster = 3)
-#df_simu = df
-df_simu = rbind(df_simu, df)
-
-
-df_simu$cluster <- factor(df_simu$cluster)
-ggplot(df_simu) + geom_point(aes(x = betaG, y = betaGxE, col = cluster), alpha = 0.2)
-
-```
-
-```{r}
-
-### Visualisation
-dtf = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster ) %>%
-      reshape2::melt(. ,id=c("cluster"), value.name = "value", variable.name= "type") 
-dtf$cluster <- factor(dtf$cluster)
-
-
-p = ggplot(dtf) + geom_density(aes(x = value, fill = cluster ), alpha = 0.4) + facet_grid(~ type, scales = "free")
-
-p
-ggsave("figures/densityBeta_kmeans.png", p)
-
-
-betas = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster) %>% mutate(from = "Actual")
-betas$cluster = factor(betas$cluster)
-df2 <- rbind(betas %>% dplyr::select(betaG, betaGE, from, cluster), df_simu %>% rename(betaGE = "betaGxE"))
-p = ggplot(df2) + geom_point(aes(x= betaG, betaGE, col = cluster), alpha = 0.1) + facet_grid(~from)
-
-p
-ggsave("figures/scatterplot.png", p, width = 10)
-
-```
-
-## Build simulated counts 
-
-```{r}
-df_2glmmm <- function(y , design2simulate, i){
-  genotype = design2simulate$design2simulate$genotype
-  environment = design2simulate$design2simulate$environment
-  #message("Fitting model ...")
-
-  df_gene_i = list(y = y , genotype = genotype,environment = environment) %>% data.frame() %>% mutate(inter = paste(environment, genotype, sep = '_'))
-  df_gene_i$inter <- factor( df_gene_i$inter )
-  rownames(df_gene_i) <- NULL
-  df_gene_i = df_gene_i %>% mutate(gene_id = paste("gene", i, sep = ""))
-  return(df_gene_i)
-}
-
-## benchmarking lme4 / glmmTMB
-library(lme4)
-library(glmmTMB)
-library(broom.mixed)
-library(mice)
-
-
-list_nb_G = c( 10, 100, 400, 700, 1000)
-n_attempt = 1:6
-remove(res_benchmark)
-
-n_G = 10
-for (n_G in list_nb_G){
-      ################### Simulation counts ###############
-      n_gene = 4
-      a = purrr::map(.x = 1:n_gene, ~ get_beta_gene_i(fit.mvrnorm, n_G, n_E)) 
-      beta.matrix = do.call(rbind, a) 
-      beta.input = beta.matrix
-      n_genes = n_g
-      design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 10)
-      
-      log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-      gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
-      mu_ij = getMu_ij(log_qij, 1)
-      kij.simulated = getK_ij(mu_ij, gene_dispersion)
-      #######################################################
-      f = map(.x = 1:n_genes, ~df_2glmmm(kij.simulated[.x,], design2simulate, .x) )
-      data_glmm = do.call(rbind, f)
-      
-      
-      ## benchmark lme4
-      print('lme4')
-      start_time <- Sys.time()
-      m.nb <- glmer.nb(y ~ environment +  ( 1 | genotype ) , data=data_glmm, verbose=F)
-      end_time <- Sys.time()
-      time_process = difftime(end_time, start_time, units = "secs") %>% as.numeric()
-      res_lme4 = list(package = "lme4", n_genotype = n_G, duration = time_process, attempt = i) %>% data.frame()
-      
-      
-      print('glmm')
-      start_time <- Sys.time()
-      m.nb <- glmmTMB::glmmTMB(y ~ environment +  ( 1 | genotype ) , data=data_glmm, family=nbinom1, verbose = F)
-      end_time <- Sys.time()
-      time_process = difftime(end_time, start_time, units = "secs") %>% as.numeric()
-      res_glmmTMB = list(package = "glmmTMB nbinom1", n_genotype = n_G, duration = time_process, attempt = i) %>% data.frame()
-      
-      ## bencbmark glmTMB
-      print('glmm')
-      start_time <- Sys.time()
-      m.nb <- glmmTMB::glmmTMB(y ~ environment +  ( 1 | genotype ) , data=data_glmm, family=nbinom2, verbose = F)
-      end_time <- Sys.time()
-      time_process = difftime(end_time, start_time, units = "secs") %>% as.numeric()
-      res_glmmTMB2 = list(package = "glmmTMB nbinom2", n_genotype = n_G, duration = time_process, attempt = i) %>% data.frame()
-      
-      tmp = rbind( res_lme4, res_glmmTMB, res_glmmTMB2 )
-      
-      if (exists("res_benchmark")) res_benchmark = rbind( res_benchmark, tmp )
-      else res_benchmark = tmp
-  
-}
-
-
-```
-
-```{r}
-
-p= ggplot(res_benchmark) + geom_violin(aes(x = duration, y = package, fill = package )) + scale_fill_manual(values = c("#D55E00", "#E69F00", "#0072B2"))
-
-ggsave(filename = 'figures/benchmark_violin_glmmm.png',p, height = 6)
-
-
-res_benchmark2 = res_benchmark
-res_benchmark2$n_genotype = factor(res_benchmark2$n_genotype)
-p = ggplot(res_benchmark2, aes(x = n_genotype, y = duration, color = package )) + geom_boxplot() + geom_point(position = position_jitterdodge()) + scale_y_log10() + ylab('duration (sec)') + scale_color_manual(values = c("#D55E00", "#E69F00", "#0072B2"))
-
-p
-
-ggsave(filename = 'figures/benchmark_glmm.png',p, height = 6)
-
-```
-
-
-```{r}
-
-fit_extraction <- function(fit){
-    fit.res = broom.mixed::tidy(fit)  %>% arrange(term)%>% .$estimate %>% as.numeric()
-    B0 = fit.res[2] 
-    BE = fit.res[3]
-    sd_BGE = fit.res[5]
-    correlation =  fit.res[1]
-    sd_BG = fit.res[4] 
-    ####################################################################
-    res = list(B0 = B0, BE = BE,  sd_BG = sd_BG, sd_BGE = sd_BGE, correlation = correlation) %>% data.frame() 
-    return(res)
-}
-
-fit_glmm <- function(data_glmm, i){
-    gene_i = paste('gene', i, sep = "")
-    data_glmm.filter = data_glmm %>% filter(gene_id == gene_i)
-    ###########################  FIT LME4   #################################
-    print("LME4")
-    m.nb <- glmer.nb(y ~ 0 + environment  + ( 1 + environment | genotype )  , data= data_glmm.filter , verbose=F)
-    res_lme4 = fit_extraction(m.nb)
-    res_lme4 = res_lme4 %>% mutate(gene_id = gene_i) %>% mutate(from = "lme4")
-    
-    #####################  FIT GMTMB 1  #########################
-    print("glmTMB 1")
-    m.nb <- glmmTMB::glmmTMB(y ~  0 + environment  + ( 1 + environment | genotype )   , data=data_glmm.filter, family=nbinom1, verbose = F)
-    res_glmTMB1 = fit_extraction(m.nb)
-    res_glmTMB1 = res_glmTMB1 %>% mutate(gene_id = gene_i) %>% mutate(from = "glmTMB nbinom1")
-    
-    #####################  FIT GMTMB 2  #########################
-    print("glmTMB 2")
-    
-    m.nb <- glmmTMB::glmmTMB(y ~ 0 + environment  + ( 1 + environment | genotype )  , data=data_glmm.filter, family=nbinom2, verbose = F)
-    res_glmTMB2 = fit_extraction(m.nb)
-    res_glmTMB2 = res_glmTMB2 %>% mutate(gene_id = gene_i) %>% mutate(from = "glmTMB nbinom2")
-    
-    res = rbind(res_lme4, res_glmTMB1, res_glmTMB2)
-    return(res)
-}
-
-
-######################## 
-##        SETUP
-########################
-n_gene = 4
-list_nb_G = c(200)
-remove(df_benchmark)
-#########################
-for (n_G in list_nb_G){
-      ################### Simulation counts ###############
-      a = purrr::map(.x = 1:n_gene, ~ get_beta_gene_i(fit.mvrnorm, n_G, n_E)) 
-      beta.matrix = do.call(rbind, a) 
-      beta.input = beta.matrix
-      design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 8)
-      log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-      gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_gene, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
-      mu_ij = getMu_ij(log_qij, 1)
-      kij.simulated = getK_ij(mu_ij, gene_dispersion)
-      
-      
-      ######## INPUT #######
-      beta0.input = beta.input[,1] 
-      mean_betaG = beta.input[,2:n_G] %>% rowMeans() 
-      mean_betaE = beta.input[,(n_G+1):(n_G+n_E-1)] 
-      mean_betaGE = beta.input[,(n_G+n_E):dim(beta.input)[2]] %>% rowMeans() 
-      beta0 = beta0.input + mean_betaG
-      #sd(beta0)
-      betaE = beta0.input + mean_betaE + mean_betaGE + mean_betaG
-      #sd_betaG = sd(beta0)
-      #sd_betaGE = sd(betaE)
-      sd_betaG = apply(beta.input[,2:n_G], 1, sd) 
-      sd_betaGE = apply(beta.input[,(n_G+n_E):dim(beta.input)[2]], 1, sd) 
-      
-      sd_betaG = apply(beta.input, 1, sd) 
-      sd_betaGE = apply(beta.input[,2:dim(beta.input)[2]], 1, sd) 
-      input = list(B0 = beta0, BE = betaE, sd_BG = sd_betaG, sd_BGE = sd_betaGE, gene_id = paste('gene', 1:n_gene, sep = ''), correlation = NA) %>% data.frame()
-      #######################################################
-     
-      
-      
-      ####### ESTIMATION ############*
-      f = map(.x = 1:n_gene, ~df_2glmmm(kij.simulated[.x,], design2simulate, .x) )
-      data_glmm = do.call(rbind, f)
-      plan(multisession, workers = 4)
-      results = furrr::future_map(.x = 1:n_gene, ~fit_glmm(data_glmm, .x) )
-      res = do.call(rbind, results)
-      #################################################################
-      
-      ########## JOIN #####
-      res.long = res %>% reshape2::melt( id = c("gene_id", "from"), value.name = "Inference") #%>% mutate(type = 'Inference')
-      input.long = input %>% reshape2::melt( id = "gene_id", value.name = "Actual")
-      df_merged = merge(res.long, input.long, by = c("gene_id", "variable")) %>% mutate(n_genotype = n_G)
-
-      if (exists("df_benchmark")) df_benchmark = rbind( df_benchmark, df_merged )
-      else df_benchmark = df_merged
-  
-}
-
-```
-
-```{r}
-
-p = ggplot(df_benchmark %>% filter(variable != 'correlation')) + geom_point(aes(x = Actual * log(2), y = Inference, color = from ), alpha = 0.5, size = 4) + 
-                          facet_wrap(n_genotype~variable, ncol = 4, scales = "free_x") + geom_abline(slope=1, intercept=0) +
-                          scale_color_manual(values = c("#D55E00", "#E69F00", "#0072B2")) 
-p
-ggsave(filename = 'figures/benchmark_identity8.png',p, width = 12)
-
-
-df_benchmark %>% filter(variable == 'sd_BGE') %>% .$Inference %>% mean()
-df_benchmark %>% filter(variable == 'sd_BG') %>% .$Inference %>% mean()
-
-df_benchmark %>% filter(variable == 'correlation') %>% .$Inference %>% mean()
-
-df_benchmark %>% filter(variable == 'correlation') %>% .$Inference %>% mean()*(df_benchmark %>% filter(variable == 'sd_BG') %>% .$Inference %>% mean() * df_benchmark %>% filter(variable == 'sd_BGE') %>% .$Inference %>% mean())
-
-fit.mvrnorm$sigma[4,2] = 0.0001
-fit.mvrnorm$sigma[2,4] = 0.0001
-
-fit.mvrnorm$mu[4] = -2
-```
-
-
-
-
-
diff --git a/results/v2/rocCuves.Rmd b/results/v2/rocCuves.Rmd
deleted file mode 100644
index af79e1ee367e0269343ef34fd52716e03cd1c565..0000000000000000000000000000000000000000
--- a/results/v2/rocCuves.Rmd
+++ /dev/null
@@ -1,294 +0,0 @@
----
-title: "HTRSIM"
-date: '2022-09-22'
-output:   
-  html_document:
-
-css: 
- - css/template.css
-
----
-
-
-```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-library(HTRSIM)
-library(furrr)
-#install.packages("tidyverse")
-#library(reshape2)
-#library(kableExtra)
-#library(gridExtra)
-#library(MatrixGenerics)
-```
-
-
-## Public data
-
-```{r}
-## Import & reshape table counts
-fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
-tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
-## DESIGN
-fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
-bioDesign <- read.table(file = fn, header = T, sep = ';')
-
-## defining reference
-bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
-bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
-
-
-bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
-tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
-#tabl_cntsA = tabl_cnts
-
-```
-
-```{r }
-## Launch DESEQ2
-dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
-
-## Extract
-
-dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
-#dds.extraction$gene_dispersion
-
-```
-
-## params visualization
-
-
-```{r}
-beta_obs.dtf.long = dds.extraction$beta %>% reshape2::melt(. , na.rm = T, variable.name = "parameter")
-
-alpha_obs.dtf.long = dds.extraction$gene_dispersion %>% base::data.frame() %>%
-                          dplyr::rename(., value = ".") %>%
-                          dplyr::mutate(parameter = "dispersion")
-
-dtf.params_obs = rbind(beta_obs.dtf.long, alpha_obs.dtf.long)
-
-
-
-p = ggplot(dtf.params_obs, aes(x= value)) +
-      geom_histogram(aes(y=..density..), colour="black", fill="white") + facet_grid(~parameter)+
-        theme(strip.text.x = element_text(size = 13),
-        axis.title = element_text(size = 5),
-        axis.text  = element_text(size = 5))
-
-p
-```
-
-
-## Simu
-
-```{r}
-#### params #####
-n_G = 30
-n_E = 2
-n_genes = 100
-
-
-##################
-beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-
-
-design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 15)
-#rowSums(beta.input) %>% which.max()
-#rowSums(beta.input) %>% max()
-log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
-
-
-gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
-
-
-#2* 2^log_qij
-mu_ij = getMu_ij(log_qij, 1)
-#max(mu_ij)
-kij.simulated = getK_ij(mu_ij, gene_dispersion)
-#max(kij.simulated)
-
-#kij.simulated = kij.simulated %>% data.frame() %>% filter_all(all_vars(. < 1000000)) 
-
-
-```
-
-
-## DESEQ2
-
-```{r}
-
-beta.actual.matrix = beta.input
-dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
-
-#############
-# Param
-threshold = 0.07
-############
-listBeta = DESeq2::resultsNames(dds_simu)
-plan(multisession, workers = 4)
-res = listBeta %>% furrr::future_map(.x = ., ~DESeq2::results(dds_simu, contrast=list(.x), lfcThreshold = threshold) %>% data.frame() %>% .$padj)
-padj.matrix = do.call("cbind", res)
-  
-  
-  
-  dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
-  
-  dds.simu.mcols.colnamesReshaped = colnames(dds_simu.mcols) %>%
-                                      stringr::str_replace(., "_vs_G0", "") %>%
-                                      stringr::str_replace(., "_vs_E0", "") %>%
-                                      stringr::str_replace_all(., "_", "") %>%
-                                      stringr::str_replace(., "\\.", ":")
-  
-  columnOfInterest =  design2simulate$model_matrix %>% base::colnames() %>% stringr::str_replace_all(., "[//(//)]", "")
-  #dds_simu.mcols[,columnOfInterest]
-  
-  ## Get only column of interest
-  idx_cols = base::match(columnOfInterest, dds.simu.mcols.colnamesReshaped)
-  beta.infered = dds_simu.mcols[,idx_cols]
-  
-  ## homogeneize column names & rownames
-  idx_cols = base::match(columnOfInterest, beta.actual.matrix %>% colnames())
-  beta.actual.matrix = beta.actual.matrix[,idx_cols]
-  colnames(beta.infered) = base::colnames(beta.actual.matrix)
-  colnames(padj.matrix) = base::colnames(beta.actual.matrix)
-  rownames(padj.matrix) = base::rownames(beta.actual.matrix)
-  beta.actual.matrix %>% dim()
-  padj.matrix %>% dim()
-  beta.infer.long = beta.infered %>% data.frame() %>%
-                            tibble::rownames_to_column(., var = "gene_id") %>%
-                            dplyr::mutate(origin = "Inference") %>%
-                            reshape2::melt(., value.name = "value", variable.name= "beta")
-  beta.actual.matrix.long = beta.actual.matrix %>% data.frame() %>%
-                            tibble::rownames_to_column(., var = "gene_id") %>%
-                            dplyr::mutate(origin = "Actual") %>%
-                            reshape2::melt(., value.name = "value", variable.name= "beta")
-  padj.matrix.long = padj.matrix  %>% data.frame() %>%
-                            tibble::rownames_to_column(., var = "gene_id") %>%
-                            dplyr::mutate(origin = "padj") %>%
-                            reshape2::melt(., value.name = "value", variable.name= "beta")
-  
-  beta.merged.long = rbind(beta.infer.long, beta.actual.matrix.long, padj.matrix.long)
-  #beta.merged.long$beta %>% unique()
-  
-  beta.merged.long.reshape = beta.merged.long %>% dplyr::mutate(type = dplyr::case_when(
-    str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
-    str_detect(beta, "genotypeG\\d+$") ~ "G",
-    str_detect(beta, "environmentE\\d+$") ~ "E",
-    str_detect(beta, "Intercept$") ~ "Intercept")
-  )
-  
-  
-  beta.merged.long.reshape2 = beta.merged.long.reshape %>% reshape2::dcast(.,  gene_id + beta + type ~ origin)
-  beta.merged.long.reshape2$type = factor(beta.merged.long.reshape2$type, levels = c("Intercept", "G", "E", "GxE"))
-  beta.merged.long.reshape2$threshold = threshold
-```
-
-```{r}
-  
-df_comparison = beta.merged.long.reshape2  
-
-
-  
-df_comparison = df_comparison  %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
-                    mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
-
-df_comparison$label <- factor(df_comparison$label)
-df_comparison$threshold <- factor(df_comparison$threshold)
-
-
-  
-```
-
-
-```{r}
-## ROC curve dtf
-df_comparison$threshold = factor(df_comparison$threshold)
-df_roc_deseq = df_comparison
-
-```
-
-## GLM
-
-
-```{r}
-design2simulate$design2simulate %>% dim()
-kij.simulated %>% dim()
-
-############# GLM fitting ###################
-plan(multisession, workers = 4)
-a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], 
-                                              .x, 
-                                              design2simulate$design2simulate, threshold = threshold) )
-
-#kij.simulated %>% dim()
-############ save res #####
-c = do.call(rbind, a)
-beta.input.long = beta.input %>% data.frame() %>%
-        tibble::rownames_to_column(., var = "gene_id") %>%
-        dplyr::mutate(origin = "Actual") %>%
-        reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-        dplyr::rename(Actual = "value") %>% 
-        dplyr::select(-origin)
-  
-df_tmp = merge(c, beta.input.long)  %>% mutate(padj = p.adjust(p.val, method= "fdr")) %>% mutate(threshold = threshold)
-
-
-df_tmp = df_tmp  %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
-                    mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
-
-df_tmp$label <- factor(df_tmp$label)
-df_tmp$threshold <- factor(df_tmp$threshold)
-
-```
-
-
-## merge DESEQ GLM
-
-```{r}
-df_roc_glm = df_tmp  %>% dplyr::select(c(-dispersion, -pval, -p.val))
-df_roc_glm = df_roc_glm %>% mutate(from = 'MASS::glm.nb')
-
-df_roc_deseq = df_roc_deseq %>% mutate(from = "DESEQ2") 
-
-
-df_roc = rbind(df_roc_deseq, df_roc_glm)
-
-```
-
-
-
-
-
-```{r}
-
-library(plotROC)
-p = ggplot(df_roc %>% filter(type != "Intercept") , aes(d = label , m = padj, color = from)) + 
-  geom_roc(n.cuts = 0, labels = F)  + facet_grid(~type) 
- 
-p  + style_roc()
-ggsave(filename = "figures/ROCcurve2.png", plot = p + style_roc(), width = 15, height = 8) 
-
-p = ggplot(calc_auc(p), aes(x = threshold, y = AUC, group=1)) + geom_point() + geom_line() + facet_grid(~type) 
-ggsave(filename = "figures/AUC.png", plot = p, width = 15, height = 10) 
-### boxplot FP
-
-df_comparison2 = df_comparison %>% 
-      filter(type != "Intercept") %>% 
-      group_by(label, prediction, type, threshold) %>% 
-      tally() %>% 
-      ungroup() %>% 
-      group_by(label, type, threshold) %>% 
-      mutate(tt = sum(n)) %>% 
-      mutate(proportion = n/tt) %>%
-      mutate(predict = ifelse(label == prediction, "true", "false"))
-
-
-df_comparison2$threshold = factor(df_comparison2$threshold)
-p = ggplot(df_comparison2) + geom_bar(aes(x = threshold , y = proportion, fill = predict),stat="identity") + facet_grid(type~label)
-p
-p = ggsave(filename = "figures/boxplotFP.png", plot = p, width = 15, height = 8) 
-
-```
-
diff --git a/results/v2/roccurvesBISBIS.R b/results/v2/roccurvesBISBIS.R
deleted file mode 100644
index 3d7053e5a205ec5ef2688a05625d352919141eda..0000000000000000000000000000000000000000
--- a/results/v2/roccurvesBISBIS.R
+++ /dev/null
@@ -1,186 +0,0 @@
-## Simu
-
-
-n_genes = 80
-n_rep_list = c( 2, 3, 5, 15, 25 )
-threshold = 0.5
-n_E = 2
-n_G = 100
-
-
-rm(df_roc)
-
-
-
-
-beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-
-for (n_rep in n_rep_list){
-design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = n_rep)
-log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes,
-                                                   n_genotypes = n_G, n_environments = n_E,
-                                                   dispersion.vec = dds.extraction$gene_dispersion,
-                                                   model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
-mu_ij = getMu_ij(log_qij, 1)
-kij.simulated = getK_ij(mu_ij, gene_dispersion)
-
-
-
-######################### DESEQ
-beta.actual.matrix = beta.input
-## 2 modifY
-#dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
-
-#############
-# Param
-threshold = 0.5
-
-############## GLM
-a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], ## 2 modify !!!
-                                                     .x,
-                                                     design2simulate$design2simulate, threshold = threshold) )
-
-############ save res #####
-c = do.call(rbind, a)
-beta.input.long = beta.input %>% data.frame() %>%
-  tibble::rownames_to_column(., var = "gene_id") %>%
-  dplyr::mutate(origin = "Actual") %>%
-  reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-  dplyr::rename(Actual = "value") %>%
-  dplyr::select(-origin)
-
-df_tmp = merge(c, beta.input.long)  %>% mutate(padj = p.adjust(pval, method= "fdr")) %>% mutate(threshold = threshold)
-
-
-
-
-
-## 2 modify
-df_roc_glm = df_tmp  %>% dplyr::select(c(-dispersion, -pval)) %>% mutate(from = 'MASS::glm.nb') %>% mutate(n_rep = n_rep)
-
-#### merge
-
-#df_roc_deseq = df_roc_deseq %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
-#              mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
-df_roc_glm = df_roc_glm %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
-  mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
-
-#df_tmp2 = rbind(df_roc_deseq, df_roc_glm)
-df_tmp2 = df_roc_glm
-if (exists('df_roc') && is.data.frame(get('df_roc'))){
-  df_roc = rbind(df_roc, df_tmp2)
-}
-else{
-  df_roc = df_tmp2
-}
-
-
-
-}
-
-df_roc %>% filter(type %in% c("G","GxE"))%>% group_by(from, n_rep) %>% tally()
-
-df_roc$n_rep <- factor(df_roc$n_rep)
-df_roc_REP = df_roc
-library(plotROC)
-p = ggplot(df_roc %>% filter(type %in% c("G","GxE"))  , aes(d = label , m = padj, color = n_rep)) +
-  geom_roc(n.cuts = 0, labels = F)
-
-
-p2  = p + style_roc() + scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D", "#696969" ))
-ggsave(filename = "../../../results/v2/figures/ROCcurve9.png", plot = p2, height = 6, width = 8)
-
-######################
-
-n_genes = 80
-sizeFac_vector = c(0.1, 1, 5, 10, 20 )
-threshold = 0.5
-n_E = 2
-n_G = 20
-
-
-rm(df_roc)
-
-
-
-
-beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
-
-for (sj in sizeFac_vector ){
-  print(sj)
-
-  design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 5)
-  log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
-  gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes,
-                                                     n_genotypes = n_G, n_environments = n_E,
-                                                     dispersion.vec = dds.extraction$gene_dispersion,
-                                                     model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
-  mu_ij = getMu_ij(log_qij, sj)
-  kij.simulated = getK_ij(mu_ij, gene_dispersion)
-
-
-  ######################### DESEQ
-  beta.actual.matrix = beta.input
-  ## 2 modifY
-  #dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
-
-  #############
-  # Param
-  threshold = 0.5
-
-  ############## GLM
-  a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], ## 2 modify !!!
-                                                       .x,
-                                                       design2simulate$design2simulate, threshold = threshold) )
-
-  ############ save res #####
-  c = do.call(rbind, a)
-  beta.input.long = beta.input %>% data.frame() %>%
-    tibble::rownames_to_column(., var = "gene_id") %>%
-    dplyr::mutate(origin = "Actual") %>%
-    reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-    dplyr::rename(Actual = "value") %>%
-    dplyr::select(-origin)
-
-  df_tmp = merge(c, beta.input.long)  %>% mutate(padj = p.adjust(pval, method= "fdr")) %>% mutate(threshold = threshold)
-
-
-
-
-
-  ## 2 modify
-  mean_readsPerSample = kij.simulated %>% colSums() %>% mean() %>% round() %>% format(., scientific = FALSE, big.mark = ',' )
-  df_roc_glm = df_tmp  %>% dplyr::select(c(-dispersion, -pval)) %>% mutate(from = 'MASS::glm.nb') %>% mutate(Depth_seq = mean_readsPerSample)
-
-  #### merge
-
-  #df_roc_deseq = df_roc_deseq %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
-  #              mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
-  df_roc_glm = df_roc_glm %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
-    mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
-
-  #df_tmp2 = rbind(df_roc_deseq, df_roc_glm)
-  df_tmp2 = df_roc_glm
-  if (exists('df_roc') && is.data.frame(get('df_roc'))){
-    df_roc = rbind(df_roc, df_tmp2)
-  }
-  else{
-    df_roc = df_tmp2
-  }
-
-
-
-}
-
-df_roc$Depth_seq %>% unique()
-df_roc$Depth_seq <- factor(df_roc$Depth_seq, levels = c("24,546", "250,278" , "1,223,084", "2,470,148","4,995,472"))
-#df_roc_REP = df_roc
-library(plotROC)
-p = ggplot(df_roc %>% filter(type %in% c("G","GxE"))  , aes(d = label , m = padj, color = Depth_seq)) +
-  geom_roc(n.cuts = 0, labels = F)
-p
-p + style_roc() + scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D", "#696969" ))
-p
-p2  = p + style_roc() + scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D", "#696969" ))
-ggsave(filename = "../../../results/v2/figures/ROCcurve10.png", plot = p2, height = 6, width = 8)
diff --git a/results/v3/2022-12-04_dev.Rmd b/results/v3/2022-12-04_dev.Rmd
deleted file mode 100644
index 259adc0dc0f91eb6f1bece423b4b4e014bc789dd..0000000000000000000000000000000000000000
--- a/results/v3/2022-12-04_dev.Rmd
+++ /dev/null
@@ -1,158 +0,0 @@
----
-title: "HTRsim"
-date: '2022-11-23'
-output:   
-  html_document:
----
-
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-library(plotROC)
-```
-
-## Simulation
-
-### Parameters
-
-```{r}
-# -- SETUP
-n_G = 1000
-n_genoT = 3
-n_env = 2
-FixIntercept = T
-max_n_rep = 10
-sequencing_fact = 1
-n_clus = 1
-thr = 2
-###########
-```
-
-### Random intercept 
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- Simulate counts
-mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
-    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = FixIntercept, n_clusters = n_clus)
-# -- count table & experimental design
-count_table = mock$countTable %>% as.data.frame()
-bioDesign = mock$design
-
-# -- ground truth
-beta.actual = mock$actualParam$beta
-
-```
-
-
-### get time to fit 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide", eval=FALSE}
-rm(dtf2evaluation)
-rm (dispersion_comparison.glm)
-#            -- MASS::glm --           #x
-dtf2fit = HTRfit::reshapeCounTable(count_table,
-                                          bioDesign)
-l.glm = HTRfit::launch.glm(dtf2fit)
-fitDtf.glm =listFit2dtf(l.glm)
-fitDtf.glm$inference$from = 'MASS::glm'
-expectation = getExpectation(beta.actual,
-                                    toEval = "glm" , threshold = thr)
-prediction = getPrediction(fitDtf.glm$inference, 
-                                 threshold = thr, alphaRisk = 0.05)
-comp.glm = getComparison(actual.dtf = expectation, 
-                                 inference.dtf = prediction)
-
-dispersion_estimated = fitDtf.glm$dispersion
-# -- one dispersion per gene
-dispersion_actual = mock$actualParameters$dispersion %>% 
-          rowMeans() %>% 
-          as.data.frame() %>% 
-          dplyr::rename(., dispersion.actual = ".") %>% tibble::rownames_to_column("gene_id")
-
-
-dispersion_actual = data.table::data.table(dispersion_actual, key = "gene_id")
-dispersion_estimated = data.table::data.table(dispersion_estimated, key = "gene_id")
-dtf_idxMvrnorm = expectation %>% dplyr::select(gene_id, idx_mvrnom)
-dtf_idxMvrnorm = data.table::data.table(dtf_idxMvrnorm, key = "gene_id")
-dtf_idxMvrnorm = dtf_idxMvrnorm[!duplicated(dtf_idxMvrnorm)]
-dispersion_comparison.glm = dispersion_actual[dispersion_estimated]
-dispersion_comparison.glm = dtf_idxMvrnorm[dispersion_comparison.glm]
-dispersion_comparison.glm = dispersion_comparison.glm %>% mutate(from = "MASS::glm")
-dispersion_comparison.glm$dispersion.estimate = 1/dispersion_comparison.glm$dispersion.estimate
-#            -- DESEQ2 --           #
-dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
-deseqFitdtf = getCoefficientsFromDds(dds_simu)
-prediction = getPrediction(deseqFitdtf, threshold = thr, 
-                        alphaRisk = 0.05, pvalCorrection = T)
-comp.deseq = getComparison(actual.dtf = expectation, 
-                                inference.dtf = prediction )
-comp.deseq$from = 'DESEQ2'
-
-dispersion_estimated = getDispersionFromDDS(dds_simu)
-dispersion_estimated = data.table::data.table(dispersion_estimated, key = "gene_id")
-dispersion_comparison.deseq = dispersion_actual[dispersion_estimated]
-dispersion_comparison.deseq = dtf_idxMvrnorm[dispersion_comparison.deseq]
-dispersion_comparison.deseq = dispersion_comparison.deseq %>% mutate(from = "DESEQ2")
-
-
-#       ---- Saving results -----          #
-tmp = rbind(comp.deseq, comp.glm)
-dispersion_comparison = rbind(dispersion_comparison.deseq, dispersion_comparison.glm)
-if (exists('dtf2evaluation')){
-    dtf2evaluation = rbind(dtf2evaluation, tmp)
-}
-else{
-    dtf2evaluation = tmp
-}
-
-
-```
-
-```{r}
-
-
-dtf2evaluation$idx_mvrnom = factor(dtf2evaluation$idx_mvrnom)
-stderror = dtf2evaluation %>% filter(beta != "(Intercept)" ) %>% reshape2::dcast(., gene_id + beta + term + idx_mvrnom ~ from, value.var = 'std.error')
-p1 = ggplot(stderror) + geom_point(aes(`MASS::glm`, DESEQ2, col = idx_mvrnom)) +
-  geom_abline(intercept = 0, slope = 1) + scale_x_log10() + scale_y_log10() + xlab("Std.error_MASS") + ylab("Std.error_DESEQ2")
-p1
-
-
-dispersion_comparison$idx_mvrnom = factor(dispersion_comparison$idx_mvrnom)
-p2 = ggplot(dispersion_comparison) + geom_point(aes(dispersion.actual, dispersion.estimate, col = idx_mvrnom, shape = from) ) +
-  geom_abline(intercept = 0, slope = 1)    + scale_x_log10() + scale_y_log10()
-
-p3 = ggplot(dtf2evaluation %>% filter(beta != "(Intercept)")) + 
-  geom_point(aes(x = actual.value, y = estimate, color = idx_mvrnom), alpha = 0.5) +
-  geom_abline(intercept = 0, slope = 1) + 
-  facet_grid(beta~from, scales = "free")
-
-p3_bis = ggplot(dtf2evaluation %>% filter(beta != "(Intercept)")) + 
-  geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) +
-  geom_abline(intercept = 0, slope = 1) + 
-  facet_grid(beta~from, scales = "free")
-
-p4 = ggplot(dtf2evaluation %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, col = from)) +
-  geom_roc(n.cuts = 0, labels = F)  #+ facet_grid(~beta, scales = "free")
-
-
-
-beta.actual$idx_mvrnom <- factor(beta.actual$idx_mvrnom)
-p5  = ggplot(beta.actual) + geom_point(aes(x = betaG, y= betaGE, col = idx_mvrnom))
-  
-library(gridExtra)
-a = grid.arrange( p3, p3_bis, nrow = 2, ncol = 1)
-b = grid.arrange(p1,p2, nrow = 2)
-f = grid.arrange(a,b, ncol = 2)
-
-ggsave('test_export2.png',f, width = 14, height = 10)
-
-
-dispersion_comparison %>% filter(idx_mvrnom == 3) %>% .$gene_id  %>% unique()
-dtf2evaluation %>% filter(idx_mvrnom == 3) %>% .$gene_id %>% unique()
-```
diff --git a/results/v3/2023_02_10_benchmarking.Rmd b/results/v3/2023_02_10_benchmarking.Rmd
deleted file mode 100644
index 2554d7463c7e8adc94aa8f76adfee572efd24491..0000000000000000000000000000000000000000
--- a/results/v3/2023_02_10_benchmarking.Rmd
+++ /dev/null
@@ -1,237 +0,0 @@
----
-title: "Benchmarking packages"
-date: "2023-02-08"
-output: 
-  html_document:
-    toc: true 
-css: css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-library(plotROC)
-library(Rmisc)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r}
-# -- SETUP
-n_G = 100
-n_genoT_list = c(1000)
-n_env = 2
-FixIntercept = F
-max_n_rep = 8
-sequencing_fact = 1
-n_clus = 3
-thr = 2
-number_ofRepetition = 4
-###########
-```
-
-### get time to fit 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide", eval=FALSE}
-#       ----------------        #
-#      /!\ Very very long
-# Not exectuted in the notebook
-#   -------------------------   #
-
-#rm(dtf2evaluation)
-for (n_genoT in n_genoT_list){
-  for (i in 1:number_ofRepetition){
-    # -- traceback
-    print(paste("Number of genotype:", n_genoT, sep = " "))
-    print(paste("Iteration:", i, sep = " "))
-    
-    #     -- Simulate counts --     #
-    mock_random = rnaMock(n_genes = n_G, 
-                          n_genotypes = n_genoT, 
-                          n_environments = n_env, 
-                          max_n_replicates = max_n_rep, 
-                          sequencing_factor = sequencing_fact, 
-                          fixIntercept = FixIntercept, 
-                          n_clusters = n_clus)
-    
-    # -- count table & experimental design
-    count_table_random = mock_random$countTable %>% as.data.frame()
-    bioDesign_random = mock_random$design
-    #      -- ground truth --       #
-    beta.actual_random = mock_random$actualParam$beta
-    
-    #     --- Glm mixte fitting ---     #
-    # -- lme4::glm.nb
-    dtf2fit_random = HTRfit::reshapeCounTable(count_table_random,
-                                              bioDesign_random)
-    start_time <- Sys.time()
-    l_random.lme4 = HTRfit::launch.glm_mixte(dtf2fit_random, 
-                                             package = "lme4")
-    end_time <- Sys.time()
-
-    fitDtf_random.lme4 =listFit2dtf(l_random.lme4)
-    fitDtf_random.lme4$inference$timeProcess = difftime(end_time, 
-                            start_time, units = "secs") %>% as.numeric()
-    fitDtf_random.lme4$inference$from = 'lme4' 
-    fitDtf_random.lme4$inference$n_genotypes = n_genoT
-    # -- glmmTMB::glmmTMB
-    start_time <- Sys.time()
-    l_random.tmb = HTRfit::launch.glm_mixte(dtf2fit_random, package = "glmmTMB")
-    end_time <- Sys.time()
-
-    fitDtf_random.tmb =listFit2dtf(l_random.tmb)
-    fitDtf_random.tmb$inference$timeProcess = difftime(end_time, 
-                            start_time, units = "secs") %>% as.numeric()
-    fitDtf_random.tmb$inference$from = 'glmmTMB'
-    fitDtf_random.tmb$inference$n_genotypes = n_genoT
-    #       -------------------------        #
-    
-    #     -- join LME4 & glmmTMB res --      #
-    fitDtf.inference = rbind(fitDtf_random.tmb$inference, 
-                             fitDtf_random.lme4$inference)
-    
-    expectation = getExpectation(beta.actual_random,
-                                        toEval = "glm_mixte" , threshold = thr)
-    prediction = getPrediction(fitDtf.inference, threshold = thr, alphaRisk = 0.05)
-    
-      #      -- Glm mixte : join actual & inference --       #
-    actual2join.dtf <- data.table::data.table(expectation, 
-                                            key = c("gene_id", "term"))
-    inference2join.dtf <- data.table::data.table(prediction, 
-                                               key = c("gene_id", "term"))
-    comp.glmMixte <- actual2join.dtf[inference2join.dtf] %>% select(-group, -effect)
-    
-    #            -- MASS::glm --           #x
-    if (n_genoT < 300){
-    start_time <- Sys.time()
-    l_random.glm = HTRfit::launch.glm(dtf2fit_random)
-    end_time <- Sys.time()
-
-    fitDtf_random.glm =listFit2dtf(l_random.glm)
-    fitDtf_random.glm$inference$timeProcess = difftime(end_time, 
-                            start_time, units = "secs") %>% as.numeric()
-    fitDtf_random.glm$inference$from = 'MASS::glm'
-    fitDtf_random.glm$inference$n_genotypes = n_genoT
-    # -- convert to log base 10
-    fitDtf.glm$inference$estimate = fitDtf.glm$inference$estimate/log(2) 
-    fitDtf.glm$inference$std.error = fitDtf.glm$inference$std.error/log(2)
-    expectation = getExpectation(beta.actual_random,
-                                        toEval = "glm" , threshold = thr)
-    prediction = getPrediction(fitDtf_random.glm$inference, 
-                                     threshold = thr, alphaRisk = 0.05)
-    comp.glm = getComparison(actual.dtf = expectation, 
-                                     inference.dtf = prediction)
-    }
-    #            -- DESEQ2 --           #
-    if (n_genoT <= 100){
-    start_time <- Sys.time()
-    dds_simu = HTRsim::fit_deseq(count_table_random, bioDesign_random)
-    end_time <- Sys.time()
-
-    deseqFitdtf = getCoefficientsFromDds(dds_simu)
-    prediction = getPrediction(deseqFitdtf, threshold = thr, 
-                            alphaRisk = 0.05, pvalCorrection = T)
-    comp.deseq = getComparison(actual.dtf = expectation, 
-                                    inference.dtf = prediction )
-    comp.deseq$timeProcess = difftime(end_time, 
-                            start_time, units = "secs") %>% as.numeric()
-    comp.deseq$from = 'DESEQ2'
-    comp.deseq$n_genotypes = n_genoT
-    }
-    
-    #       ---- Saving results -----          #
-    list_column = c("gene_id", "term", "actual.value" ,"estimate", "std.error", "timeProcess", "from", "n_genotypes", "statistic"     , "p.value", "padj" , "prediction.label")
-    
-    if (n_genoT <=100) {
-      comp.deseq = comp.deseq %>% select(list_column)
-    tmp = rbind(comp.deseq, comp.glm, comp.glmMixte)
-    }
-    if (n_genoT <=300) {
-    comp.glm = comp.glm %>% select(list_column)
-    tmp = rbind(comp.glm, comp.glmMixte)
-    }
-    else tmp =  comp.glmMixte
-    
-    if (exists('dtf2evaluation')){
-        dtf2evaluation = rbind(dtf2evaluation, tmp)
-    }
-    else{
-        dtf2evaluation = tmp
-    }
-  }
-}
-
-
-#write_tsv(tgc, file = "2023_01_17-tgc_backup.tsv")
-#write_tsv(dtf2evaluation, file = "2023_01_17-dfEval_random_backup.tsv")
-
-```
-
-## Evaluation
-
-### Preparing dataframe
-
-```{r}
-dtf2evaluation <- read_tsv('2023_01_17-dfEval_random_backup.tsv', show_col_types = FALSE)
-dtf2evaluation$timeProcess = dtf2evaluation$timeProcess/60
-tgc <- Rmisc::summarySE(dtf2evaluation,
-                 measurevar="timeProcess",
-                 groupvars=c("n_genotypes","from"))
-# -- backup
-#write_tsv(tgc, file = "2023_01_17-tgc_backup.tsv")
-
-#tgc <- read_tsv('2023_01_17-tgc_backup.tsv', show_col_types = FALSE)
-```
-
-### Build graph for evaluation 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-#tgc$n_genotypes <- factor(tgc$n_genotypes)
-#tgc$n_genotypes = as.numeric(as.character(tgc$n_genotypes))
-tgc$from = factor(tgc$from, levels = c("DESEQ2","MASS::glm",  "lme4", "glmmTMB" ))
-
-tgc = tgc %>% filter(from %in% c('DESEQ2', "MASS::glm", "glmmTMB"))
-p= ggplot(tgc, aes(x = n_genotypes, y = timeProcess, colour = from)) + 
-  geom_line(aes(x = n_genotypes, y= timeProcess+sd), 
-            linetype = "dashed", alpha = 0.6) +
-  geom_line(aes(x = n_genotypes, y= timeProcess-sd), 
-            linetype = "dashed", alpha = 0.6) +
-  geom_ribbon(aes(ymin = timeProcess-sd , 
-                  ymax= timeProcess+sd, fill = from), alpha=0.4) + 
-  geom_line(aes(x = n_genotypes, y= timeProcess), 
-            linetype = "solid", alpha = 0.6) +
-  #geom_point() +  
-  scale_y_log10() + scale_x_log10() +
-    scale_color_manual(values = c("#16A085", "#2C3E50",  '#FFC30F' )) +
-  scale_fill_manual(values = c("#16A085", "#2C3E50", '#FFC30F' )) + ylab('Time processing (min)')
-p
-ggsave("timeprocessing3.png", p, width = 8, height = 5)
-df2ROC = dtf2evaluation %>% 
-              filter(from %in% c("MASS::glm", "DESEQ2"))%>% 
-              filter(term != "(Intercept)") %>%
-              dplyr::mutate(
-                actual.label =
-                    dplyr::if_else(abs(actual.value) < 2,
-                      "nonDE", "DE"
-                )
-        )
-df2ROC$n_genotypes <- factor(df2ROC$n_genotypes)
-# -- ROC curve
-p1 = ggplot(df2ROC, 
-           aes(d = actual.label , m = padj, col = from)) + 
-  geom_roc(n.cuts = 0, labels = F)   + facet_grid(~n_genotypes) 
-  scale_color_manual(values = c("#yellow","#BDBDBD"))
-p1
-```
-
-### Visualization
-
-```{r, message=FALSE, warning=FALSE}
-p
-```
diff --git a/results/v3/2023_02_10_benchmarkingv2.Rmd b/results/v3/2023_02_10_benchmarkingv2.Rmd
deleted file mode 100644
index 2a0a82a55bd677dfb7bdace1f8e61781f4870ef8..0000000000000000000000000000000000000000
--- a/results/v3/2023_02_10_benchmarkingv2.Rmd
+++ /dev/null
@@ -1,174 +0,0 @@
----
-title: "Benchmarking packages"
-date: "2023-02-08"
-output: 
-  html_document:
-    toc: true 
-css: css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-library(plotROC)
-library(Rmisc)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r}
-# -- SETUP
-n_G = 100
-n_genoT_list = c(100)
-n_env = 2
-FixIntercept = T
-max_n_rep = 15
-sequencing_fact = 2
-n_clus = 3
-thr = 2
-number_ofRepetition = 3
-###########
-```
-
-### get time to fit 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide", eval=FALSE}
-#       ----------------        #
-#      /!\ Very very long
-# Not exectuted in the notebook
-#   -------------------------   #
-
-#rm(dtf2evaluation)
-for (n_genoT in n_genoT_list){
-  for (i in 1:number_ofRepetition){
-    # -- traceback
-    print(paste("Number of genotype:", n_genoT, sep = " "))
-    print(paste("Iteration:", i, sep = " "))
-    
-    #     -- Simulate counts --     #
-    mock_fixed = rnaMock(n_genes = n_G, 
-                          n_genotypes = n_genoT, 
-                          n_environments = n_env, 
-                          max_n_replicates = max_n_rep, 
-                          sequencing_factor = sequencing_fact, 
-                          fixIntercept = FixIntercept, 
-                          n_clusters = n_clus)
-    
-    # -- count table & experimental design
-    count_table_fixed = mock_fixed$countTable %>% as.data.frame()
-    bioDesign_fixed = mock_fixed$design
-    #      -- ground truth --       #
-    beta.actual_fixed = mock_fixed$actualParam$beta
-    
-    #            -- MASS::glm --           #x
-    dtf2fit_fixed = HTRfit::reshapeCounTable(count_table_fixed,
-                                              bioDesign_fixed)
-    start_time <- Sys.time()
-    l_fixed.glm = HTRfit::launch.glm(dtf2fit_fixed)
-    end_time <- Sys.time()
-
-    fitDtf_fixed.glm =listFit2dtf(l_fixed.glm)
-    fitDtf_fixed.glm$inference$timeProcess = difftime(end_time, 
-                            start_time, units = "secs") %>% as.numeric()
-    fitDtf_fixed.glm$inference$from = 'MASS::glm'
-    fitDtf_fixed.glm$inference$n_genotypes = n_genoT
-    expectation = getExpectation(beta.actual_fixed,
-                                        toEval = "glm" , threshold = thr)
-    prediction = getPrediction(fitDtf_fixed.glm$inference, 
-                                     threshold = thr, alphaRisk = 0.05)
-    comp.glm = getComparison(actual.dtf = expectation, 
-                                     inference.dtf = prediction)
-    
-    #            -- DESEQ2 --           #
-    start_time <- Sys.time()
-    dds_simu = HTRsim::fit_deseq(count_table_fixed, bioDesign_fixed)
-    end_time <- Sys.time()
-
-    deseqFitdtf = getCoefficientsFromDds(dds_simu)
-    prediction = getPrediction(deseqFitdtf, threshold = thr, 
-                            alphaRisk = 0.05, pvalCorrection = T)
-    comp.deseq = getComparison(actual.dtf = expectation, 
-                                    inference.dtf = prediction )
-    comp.deseq$timeProcess = difftime(end_time, 
-                            start_time, units = "secs") %>% as.numeric()
-    comp.deseq$from = 'DESEQ2'
-    comp.deseq$n_genotypes = n_genoT
-    
-    
-    #       ---- Saving results -----          #
-    list_column = c("gene_id", "term", "actual.value" ,"estimate", "std.error", "timeProcess", "from", "n_genotypes", "statistic"     , "p.value", "padj" , "prediction.label")
-    
-    #comp.deseq = comp.deseq %>% select(list_column)
-    #print(colnames(comp.deseq))
-    #print(colnames(comp.glm))
-    tmp = rbind(comp.deseq, comp.glm)
-    
-    if (exists('dtf2evaluation')){
-        dtf2evaluation = rbind(dtf2evaluation, tmp)
-    }
-    else{
-        dtf2evaluation = tmp
-    }
-  }
-}
-
-
-#write_tsv(tgc, file = "2023_01_17-tgc_backup.tsv")
-#write_tsv(dtf2evaluation, file = "2023_02_03-dfEval_fixed_backup.tsv")
-
-```
-
-## Evaluation
-
-### Preparing dataframe
-
-```{r}
-
-tgc <- Rmisc::sum-marySE(dtf2evaluation,
-                 measurevar="timeProcess",
-                 groupvars=c("n_genotypes","from"))
-```
-
-### Build graph for evaluation 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-
-tgc$from = factor(tgc$from, levels = c("DESEQ2","MASS::glm"))
-p= ggplot(tgc, aes(x = n_genotypes, y = timeProcess, colour = from)) + 
-  geom_line(aes(x = n_genotypes, y= timeProcess+sd), 
-            linetype = "dashed", alpha = 0.6) +
-  geom_line(aes(x = n_genotypes, y= timeProcess-sd), 
-            linetype = "dashed", alpha = 0.6) +
-  geom_ribbon(aes(ymin = timeProcess-sd , 
-                  ymax= timeProcess+sd, fill = from), alpha=0.4) + 
-  geom_line(aes(x = n_genotypes, y= timeProcess), 
-            linetype = "solid", alpha = 0.6) +
-  #geom_point() +  
-  scale_y_log10() + scale_x_log10() +
-    scale_color_manual(values = c("#16A085", "#2C3E50", '#EC7063', '#FFC30F' )) +
-  scale_fill_manual(values = c("#16A085", "#2C3E50", '#EC7063', '#FFC30F' ))
-
-p
-df2ROC = dtf2evaluation %>% 
-              filter(from %in% c("MASS::glm", "DESEQ2"))%>% 
-              filter(term != "(Intercept)") %>%
-              dplyr::mutate(
-                actual.label =
-                    dplyr::if_else(abs(actual.value) < 2,
-                      "nonDE", "DE"
-                )
-        )
-df2ROC$n_genotypes <- factor(df2ROC$n_genotypes)
-# -- ROC curve
-p1 = ggplot(df2ROC, aes(d = actual.label , m = padj, col = from)) + 
-  geom_roc(n.cuts = 0, labels = F)   + facet_grid(~n_genotypes) +
-    scale_color_manual(values = c("#16A085", "#2C3E50"))
-p1
-ggsave(p1, filename = "benchmark_fig.png", height = 8, width = 10)
-```
diff --git a/results/v3/2023_02_10_benchmarkingv3.Rmd b/results/v3/2023_02_10_benchmarkingv3.Rmd
deleted file mode 100644
index 0307f782aad85a783a5c83c7f6f013e5d51b45c2..0000000000000000000000000000000000000000
--- a/results/v3/2023_02_10_benchmarkingv3.Rmd
+++ /dev/null
@@ -1,161 +0,0 @@
----
-title: "Random intercept"
-date: "2023-02-08"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-library(plotROC)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r}
-# -- SETUP
-n_G = 4
-n_genoT = 300
-n_env = 2
-FixIntercept = F
-max_n_rep = 7
-sequencing_fact = 2
-n_clus = 1
-thr = 1
-###########
-```
-
-### Random intercept 
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- Simulate counts
-mock_random = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
-    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = FixIntercept, n_clusters = n_clus)
-
-# -- count table & experimental design
-count_table_random = mock_random$countTable %>% as.data.frame()
-bioDesign_random = mock_random$design
-
-# -- ground truth
-beta.actual_random = mock_random$actualParam$beta
-
-```
-
-## Estimation
-
-### Launch glmmTMB::glmmTMB on simulated data (random intercept)
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- glmmTMB::glmmTMB
-dtf2fit_random = HTRfit::reshapeCounTable(count_table_random, bioDesign_random)
-l_random.tmb = HTRfit::launch.glm_mixte(dtf2fit_random, package = "glmmTMB")
-fitDtf_random.tmb =listFit2dtf(l_random.tmb)
-```
-
-```{r}
-# -- MASS::glm
-l_random.glm = HTRfit::launch.glm(dtf2fit_random)
-fitDtf_random.glm =listFit2dtf(l_random.glm)
-prediction = getPrediction(fitDtf_random.glm$inference, 
-                                     threshold = thr, alphaRisk = 0.05)
-
-
-glm.dtf_lng = fitDtf_random.glm$inference %>%
-        dplyr::mutate(type = dplyr::case_when(
-      str_detect(term, "genotypeG\\d+\\:environment") ~ "betaGE",
-      str_detect(term, "genotypeG\\d+$") ~ "betaG",
-      str_detect(term, "environmentE\\d+$") ~ "betaE",
-      str_detect(term, "(Intercept)") ~ "(Intercept)")) %>%
-      reshape2::dcast(., gene_id ~ type, value.var = "estimate", fun.aggregate = list) %>% unnest(c(`(Intercept)`,betaE, betaG, betaGE))
-
-
-glm.dtf = getExpectation(glm.dtf_lng , toEval = "glm_mixte" , threshold = thr) %>% rename(estimate = "actual.value")  %>% mutate(from = "prediction Mass::glm")
-
-```
-
-### Join Actual & Inference
-
-```{r}
-###### -- GLMTMB -- #######
-expectation_random = getExpectation(beta.actual_random,toEval = "glm_mixte" , threshold = thr)
-prediction_glmMixte = getPrediction(fitDtf_random.tmb$inference, threshold = thr, alphaRisk = 0.05)  %>% mutate(from  = 'prediction glm mixte' )
-# - rbind glm & glm mixte
-prediction.dtf = rbind(prediction_glmMixte %>% select(estimate, term, gene_id, from), glm.dtf %>% select(estimate, term, gene_id, from))
-
-# -- join actual & inference
-actual2join.dtf <- data.table::data.table(expectation_random, key = c("gene_id", "term"))
-inference.dtf <- data.table::data.table(prediction.dtf, key = c("gene_id", "term"))
-
-comparison.tmb <- actual2join.dtf[inference.dtf]
-
-comparison.dtf = comparison.tmb
-```
-
-## Evaluation
-
-### Build graph for evaluation mixte model
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-
-# -- preparing dtf
-comparison.dtf$term = factor(comparison.dtf$term, levels = c("(Intercept)", "environmentE1","cor__(Intercept).environmentE1", "sd__(Intercept)", "sd__environmentE1"))
-
-
-# -- Identity plot
-p = ggplot(comparison.dtf)  + 
-  geom_point(aes(x = actual.value, 
-                 y = estimate, col = from), alpha = 0.6, size = 3) +
-  geom_abline(intercept = 0, slope = 1) + 
-  facet_wrap(~term, scales = "free")  + 
-  theme(strip.text.x = element_text(size = 7)) +
-  scale_color_manual(values = c('#581845', '#FFC30F'))
-p
-#ggsave("../img/graph/poc_glmm2_1000.png", p,) 
-```
-
-
-
-### Visualization
-
-```{r, message=FALSE, warning=FALSE}
-
-library(ggridges)
-
-pred_mixte = prediction_glmMixte %>% select(estimate, term, gene_id, from)  %>% reshape2::dcast(., gene_id + from ~ term, value.var = "estimate")
-pred_mixte = pred_mixte %>% mutate(betaG_E0 = list(rnorm(1000, mean = `(Intercept)` , sd = `sd__(Intercept)` ))) %>% 
-      mutate(betaG_E1 = list(rnorm(1000, mean = environmentE1 + `(Intercept)` , sd = `sd__(Intercept)` ))) %>%
-      mutate(betaGE_E1 = list(rnorm(1000, mean = environmentE1 + `(Intercept)` , sd = sd__environmentE1 ))) %>% 
-      select(gene_id, betaG_E0, betaG_E1, betaGE_E1, from) %>% 
-      unnest(c(betaG_E0, betaG_E1, betaGE_E1))
-
-
-pred_glm = glm.dtf_lng %>% mutate(betaG_E0 = `(Intercept)` + betaG) %>% 
-                       mutate(betaG_E1 = `(Intercept)` + betaE + betaG  ) %>%
-                       mutate(betaGE_E1 = `(Intercept)` + betaE + betaGE) %>% select(gene_id, betaG_E0, betaG_E1, betaGE_E1)  %>% mutate(from = "prediction MASS::glm")
-
-actu = beta.actual_random %>% mutate(betaG_E0 = `(Intercept)` + betaG) %>% 
-                       mutate(betaG_E1 = `(Intercept)` + betaE + betaG  ) %>%
-                       mutate(betaGE_E1 = `(Intercept)` + betaE + betaGE) %>% select(gene_id, betaG_E0, betaG_E1, betaGE_E1)  %>% mutate(from = "Actual")
-
-
-dtf = rbind(actu, pred_glm, pred_mixte) %>% reshape2::melt(
-      id.vars = c("gene_id", 'from'),
-      value.name = "value",
-      variable.name = 'env')
-
-dtf %>% dplyr::filter(gene_id %in% c("gene1"))
-p = ggplot(dtf %>% filter(gene_id %in% c("gene1"))) + 
-  geom_density_ridges(aes(x = value , y = from, fill = from), alpha = 0.6) + facet_grid(env~gene_id, scales = 'free_x')  + scale_fill_manual(values = c("#FFE699", "#EC7063", "#2C3E50"))
-p
-ggsave("../img/graph/poc_glmm1000.png", p, height = 6, width = 8) 
-
-```
diff --git a/results/v3/2023_02_21_benchmarking_distrib.Rmd b/results/v3/2023_02_21_benchmarking_distrib.Rmd
deleted file mode 100644
index 09963207d665e083b52e8d3567730d5a72e2ea3a..0000000000000000000000000000000000000000
--- a/results/v3/2023_02_21_benchmarking_distrib.Rmd
+++ /dev/null
@@ -1,201 +0,0 @@
----
-title: "Random intercept"
-date: "2023-02-08"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-library(plotROC)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r}
-# -- SETUP
-n_G = 5
-n_genoT = 200
-n_env = 2
-FixIntercept = F
-max_n_rep = 15
-sequencing_fact = 2
-n_clus = 7
-thr = 1
-###########
-```
-
-### Random intercept 
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- Simulate counts
-mock_random = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
-    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = FixIntercept, n_clusters = n_clus)
-
-# -- count table & experimental design
-count_table_random = mock_random$countTable %>% as.data.frame()
-bioDesign_random = mock_random$design
-
-# -- ground truth
-beta.actual_random = mock_random$actualParam$beta
-
-beta.actual_random$idx_mvrnom %>% unique()
-beta.actual_random %>% filter(gene_id == "gene10")
-```
-
-## Estimation
-
-### Launch glmmTMB::glmmTMB on simulated data (random intercept)
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- glmmTMB::glmmTMB
-dtf2fit_random = HTRfit::reshapeCounTable(count_table_random, bioDesign_random)
-#dtf2fit_random2 = dtf2fit_random %>% filter(genotype != "G0")
-l_random.tmb = HTRfit::launch.glm_mixte(dtf2fit_random2, package = "glmmTMB")
-fitDtf_random.tmb =listFit2dtf(l_random.tmb)
-```
-
-```{r}
-# -- MASS::glm
-l_random.glm = HTRfit::launch.glm(dtf2fit_random)
-fitDtf_random.glm =listFit2dtf(l_random.glm)
-```
-
-
-```{r}
-prediction_glm = fitDtf_random.glm$inference %>%
-        dplyr::mutate(type = dplyr::case_when(
-      str_detect(term, "genotypeG\\d+\\:environment") ~ "betaGE",
-      str_detect(term, "genotypeG\\d+$") ~ "betaG",
-      str_detect(term, "environmentE\\d+$") ~ "betaE",
-      str_detect(term, "(Intercept)") ~ "(Intercept)")) %>%
-      reshape2::dcast(., gene_id ~ type, value.var = "estimate", fun.aggregate = list) %>% unnest(c(`(Intercept)`,betaE, betaG, betaGE))
-glm.dtf = getExpectation(prediction_glm , toEval = "glm_mixte" , threshold = thr) %>% dplyr::rename(estimate = "actual.value")  %>% mutate(from = "prediction Mass::glm")
-
-
-prediction_glmMixte = getPrediction(fitDtf_random.tmb$inference, threshold = thr, alphaRisk = 0.05)  %>% mutate(from  = 'prediction glm mixte' )
-
-```
-
-
-```{r, message=FALSE, warning=FALSE}
-
-library(ggridges)
-
-prediction_glmMixte = prediction_glmMixte %>% select(estimate, term, gene_id, from)  %>% reshape2::dcast(., gene_id + from ~ term, value.var = "estimate")
-nb_tirage = 10000
-prediction_glmMixte = prediction_glmMixte %>%  
-    mutate(betaG_E0 = pmap(list(nb_tirage, `(Intercept)`, `sd__(Intercept)`), 
-                           function (n, mu, sd) rnorm(n, mu, sd))) %>%
-  mutate(betaG_E1 = pmap(list(nb_tirage, environmentE1 + `(Intercept)`, `sd__(Intercept)`), 
-                         function (n, mu, sd) rnorm(n, mu, sd))) %>%
-  mutate(betaGE_E1 = pmap(list(nb_tirage, environmentE1, `sd__environmentE1`), 
-                         function (n, mu, sd) rnorm(n, mu, sd)))  %>% 
-      select(gene_id, betaG_E0, betaG_E1, betaGE_E1, from) %>% 
-      unnest(c(betaG_E0, betaG_E1, betaGE_E1)) %>% as.data.frame()
-
-
-
-prediction_glm = prediction_glm %>% mutate(betaG_E0 = `(Intercept)` + betaG ) %>% 
-                       mutate(betaG_E1 = `(Intercept)` + betaE + betaG  ) %>%
-                       mutate(betaGE_E1 = betaE + betaGE) %>% 
-                        select(gene_id, betaG_E0, betaG_E1, betaGE_E1)  %>% mutate(from = "prediction MASS::glm") %>% as.data.frame()
-
-actu = beta.actual_random %>% mutate(betaG_E0 = `(Intercept)` + betaG ) %>% 
-                       mutate(betaG_E1 = `(Intercept)` + betaE + betaG  ) %>%
-                       mutate(betaGE_E1 = betaE + betaGE  ) %>% select(gene_id, betaG_E0, betaG_E1, betaGE_E1)  %>% 
-                      mutate(from = "Actual") %>%
-                      as.data.frame()
-
-
-dtf = rbind(actu, prediction_glm,prediction_glmMixte) %>% reshape2::melt(
-      id.vars = c("gene_id", 'from'),
-      value.name = "value",
-      variable.name = 'env') %>% as.data.frame() %>% separate(env, c("type", "Env"), sep = "_")
-
-
-#dtf %>% dplyr::filter(gene_id == "gene1")
-p = ggplot( dtf %>% dplyr::filter(gene_id %in% c("gene3")) )+ 
-  geom_density_ridges(aes(x = value , y = from, fill = Env), quantile_lines=TRUE,
-                      quantile_fun=function(x,...)mean(x), alpha = 0.6) + facet_wrap(~type, ncol = 2, scales = "free_x")  + scale_fill_manual(values = c("#2E75B6", "#F4B183"))
-p
-
-ggsave("../../img/distrib/geneE_whole.svg", p, height = 4, width = 6) 
-
-```
-
-
-```{r}
-
-reshapeActualDtf_glm_mixte2 <- function(actual.dtf) {
-    actual.dtf <- actual.dtf %>%
-        dplyr::group_by(gene_id) %>%
-        dplyr::summarise(
-            tmp = mean(`(Intercept)` + betaG),
-            environmentE1 = mean(betaE + betaGE),
-            "sd__(Intercept)" = sd(`(Intercept)` + betaG),
-            sd__environmentE1 = sd(betaGE + betaE),
-            "cor__(Intercept).environmentE1" = cor((betaGE + betaE), (`(Intercept)` + betaG))
-        ) %>%
-        dplyr::rename("(Intercept)" = tmp) %>%
-        reshape2::melt(id = "gene_id", value.name = "actual.value", variable.name = "term")
-    return(actual.dtf)
-}
-
-
-
-```
-
-
-```{r}
-beta.actual_random$idx_mvrnom = factor(beta.actual_random$idx_mvrnom)
-ggplot(beta.actual_random) + geom_point(aes( x = betaG, y = betaGE, col = idx_mvrnom ))
-ggplot(beta.actual_random ) + geom_point(aes( x = betaG, y = betaGE, col = gene_id ))
-
-
-###### -- GLMTMB -- #######
-expectation_random = reshapeActualDtf_glm_mixte2(beta.actual_random)
-prediction_glmMixte = getPrediction(fitDtf_random.tmb$inference, threshold = thr, alphaRisk = 0.05)  %>% mutate(from  = 'prediction glm mixte' )
-
-# - rbind glm & glm mixte
-prediction.dtf = rbind(prediction_glmMixte %>% select(estimate, term, gene_id, from), glm.dtf %>% select(estimate, term, gene_id, from))
-
-# -- join actual & inference
-actual2join.dtf <- data.table::data.table(expectation_random, key = c("gene_id", "term"))
-inference.dtf <- data.table::data.table(prediction.dtf, key = c("gene_id", "term"))
-
-comparison.tmb <- actual2join.dtf[inference.dtf]
-
-comparison.dtf = comparison.tmb
-```
-
-## Evaluation
-
-### Build graph for evaluation mixte model
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-
-# -- preparing dtf
-comparison.dtf$term = factor(comparison.dtf$term, levels = c("(Intercept)", "environmentE1","cor__(Intercept).environmentE1", "sd__(Intercept)", "sd__environmentE1"))
-
-
-# -- Identity plot
-p = ggplot(comparison.dtf %>% filter(from == "prediction glm mixte") %>% filter(!is.na(term)))  + 
-  geom_point(aes(x = actual.value, 
-                 y = estimate, col = gene_id), alpha = 0.6, size = 3) +
-  geom_abline(intercept = 0, slope = 1) + 
-  facet_wrap(~term, scales = "free")  + 
-  theme(strip.text.x = element_text(size = 7)) #+
-  scale_color_manual(values = c('#581845', '#FFC30F'))
-p
-#ggsave("../img/graph/poc_glmm2_1000.png", p,) 
-```
diff --git a/results/v3/2023_02_21_benchmarkingv3.Rmd b/results/v3/2023_02_21_benchmarkingv3.Rmd
deleted file mode 100644
index 825acab05958eaf103e30fd815333f069ea76c75..0000000000000000000000000000000000000000
--- a/results/v3/2023_02_21_benchmarkingv3.Rmd
+++ /dev/null
@@ -1,193 +0,0 @@
----
-title: "Benchmarking packages"
-date: "2023-02-08"
-output: 
-  html_document:
-    toc: true 
-css: css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-library(plotROC)
-library(Rmisc)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r}
-# -- SETUP
-n_G = 100
-n_genoT_list = c(2, 3, 6)
-n_env = 2
-FixIntercept = T
-max_n_rep = 10
-sequencing_fact = 2
-n_clus = 1
-thr = 1.2
-number_ofRepetition = 2
-###########
-```
-
-### get time to fit 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide", eval=FALSE}
-#       ----------------        #
-#      /!\ Very very long
-# Not exectuted in the notebook
-#   -------------------------   #
-
-#rm(dtf2evaluation)
-for (n_genoT in n_genoT_list){
-  for (i in 1:number_ofRepetition){
-    # -- traceback
-    print(paste("Number of genotype:", n_genoT, sep = " "))
-    print(paste("Iteration:", i, sep = " "))
-    
-    #     -- Simulate counts --     #
-    mock_random = rnaMock(n_genes = n_G, 
-                          n_genotypes = n_genoT, 
-                          n_environments = n_env, 
-                          max_n_replicates = max_n_rep, 
-                          sequencing_factor = sequencing_fact, 
-                          fixIntercept = FixIntercept, 
-                          n_clusters = n_clus)
-    
-    # -- count table & experimental design
-    count_table_random = mock_random$countTable %>% as.data.frame()
-    bioDesign_random = mock_random$design
-    #      -- ground truth --       #
-    beta.actual_random = mock_random$actualParam$beta
-    
-    #            -- MASS::glm --           #x
-    
-    dtf2fit_random = HTRfit::reshapeCounTable(count_table_random,
-                                              bioDesign_random)
-    start_time <- Sys.time()
-    l_random.glm = HTRfit::launch.glm(dtf2fit_random)
-    end_time <- Sys.time()
-
-    fitDtf.glm =listFit2dtf(l_random.glm)
-    fitDtf.glm$inference$timeProcess = difftime(end_time, 
-                            start_time, units = "secs") %>% as.numeric()
-    fitDtf.glm$inference$from = 'MASS::glm'
-    fitDtf.glm$inference$n_genotypes = n_genoT
-    # -- convert to log base 10
-    fitDtf.glm$inference$estimate = fitDtf.glm$inference$estimate/log(2) 
-    fitDtf.glm$inference$std.error = fitDtf.glm$inference$std.error/log(2)
-    expectation = getExpectation(beta.actual_random,
-                                        toEval = "glm" , threshold = thr)
-    prediction = getPrediction(fitDtf.glm$inference, 
-                                     threshold = thr, alphaRisk = 0.05)
-    comp.glm = getComparison(actual.dtf = expectation, 
-                                     inference.dtf = prediction)
-    
-    #            -- DESEQ2 --           #
-    if (n_genoT < 100 ){ 
-    start_time <- Sys.time()
-    dds_simu = HTRsim::fit_deseq(count_table_random, bioDesign_random)
-    end_time <- Sys.time()
-
-    deseqFitdtf = getCoefficientsFromDds(dds_simu)
-    prediction = getPrediction(deseqFitdtf, threshold = thr, 
-                            alphaRisk = 0.05, pvalCorrection = T)
-    comp.deseq = getComparison(actual.dtf = expectation, 
-                                    inference.dtf = prediction )
-    comp.deseq$timeProcess = difftime(end_time, 
-                            start_time, units = "secs") %>% as.numeric()
-    comp.deseq$from = 'DESEQ2'
-    comp.deseq$n_genotypes = n_genoT
-    
-    #       ---- Saving results -----          #
-    #list_column = c("gene_id", "term", "actual.value" ,"estimate", "std.error", "timeProcess", "from", "n_genotypes", "statistic"     , "p.value", "padj" , "prediction.label")
-    
-    tmp = rbind(comp.deseq, comp.glm)
-    }
-    else{
-      tmp = comp.glm
-    }
-
-    if (exists('dtf2evaluation')){
-        dtf2evaluation = rbind(dtf2evaluation, tmp)
-    }
-    else{
-        dtf2evaluation = tmp
-    }
-  }
-}
-
-
-#write_tsv(tgc, file = "2023_01_17-tgc_backup.tsv")
-#write_tsv(dtf2evaluation, file = "2023_02_21-dfEval_random_backup2.tsv")
-dtf2evaluation %>% group_by(from, n_genotypes) %>% tally()
-```
-
-## Evaluation
-
-### Preparing dataframe
-
-```{r}
-#dtf2evaluation2 = dtf2evaluation
-#dtf2evaluation <- read_tsv('2023_02_21-dfEval_random_backup2.tsv', show_col_types = FALSE)
-#dtf2evaluation = rbind( dtf2evaluation2, dtf2evaluation )
-tgc <- Rmisc::summarySE(dtf2evaluation,
-                 measurevar="timeProcess",
-                 groupvars=c("n_genotypes","from"))
-# -- backup
-#write_tsv(tgc, file = "2023_01_17-tgc_backup.tsv")
-
-#tgc <- read_tsv('2023_01_17-tgc_backup.tsv', show_col_types = FALSE)
-```
-
-### Build graph for evaluation 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-#tgc$n_genotypes <- factor(tgc$n_genotypes)
-#tgc$n_genotypes = as.numeric(as.character(tgc$n_genotypes))
-tgc$from = factor(tgc$from, levels = c("DESEQ2","MASS::glm",  "lme4", "glmmTMB" ))
-p= ggplot(tgc, aes(x = n_genotypes, y = timeProcess, colour = from)) + 
-  geom_line(aes(x = n_genotypes, y= timeProcess+sd), 
-            linetype = "dashed", alpha = 0.6) +
-  geom_line(aes(x = n_genotypes, y= timeProcess-sd), 
-            linetype = "dashed", alpha = 0.6) +
-  geom_ribbon(aes(ymin = timeProcess-sd , 
-                  ymax= timeProcess+sd, fill = from), alpha=0.4) + 
-  geom_line(aes(x = n_genotypes, y= timeProcess), 
-            linetype = "solid", alpha = 0.6) +
-  #geom_point() +  
-  scale_y_log10() + scale_x_log10() +
-    scale_color_manual(values = c("#16A085", "#2C3E50", '#EC7063', '#FFC30F' )) +
-  scale_fill_manual(values = c("#16A085", "#2C3E50", '#EC7063', '#FFC30F' ))
-
-p
-df2ROC = dtf2evaluation %>% 
-              filter(from %in% c("MASS::glm", "DESEQ2"))%>% 
-              filter(term != "(Intercept)") %>%
-              dplyr::mutate(
-                actual.label =
-                    dplyr::if_else(abs(actual.value) < 2,
-                      "nonDE", "DE"
-                )
-        )
-df2ROC$n_genotypes <- factor(df2ROC$n_genotypes)
-# -- ROC curve
-p1 = ggplot(df2ROC %>% filter(n_genotypes != 5) , 
-           aes(d = actual.label , m = padj, col = from)) + 
-  geom_roc(n.cuts = 0, labels = F)   + facet_grid(~n_genotypes) + 
-    scale_color_manual(values = c("#16A085", "#2C3E50")) + theme(axis.text.x = element_text(size = 6))
-p1
-ggsave(filename = "ROCcurves_genotincr.png", p1 , height = 4, width = 8)
-```
-
-### Visualization
-
-```{r, message=FALSE, warning=FALSE}
-p
-```
diff --git a/results/v3/DESEQ2/2022_02_03_dispersion.Rmd b/results/v3/DESEQ2/2022_02_03_dispersion.Rmd
deleted file mode 100644
index 346798bfd44e6c0be5c67a4212899dbd6ff63c24..0000000000000000000000000000000000000000
--- a/results/v3/DESEQ2/2022_02_03_dispersion.Rmd
+++ /dev/null
@@ -1,75 +0,0 @@
----
-title: "Dispersion"
-date: "2023-02-01"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-```
-
-
-## Simulation
-
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- SETUP
-n_G = 1000
-n_genoT = 3
-n_env = 2
-max_n_rep = 15
-sequencing_fact = 1
-thr = 2
-###########
-
-# -- Simulate counts
-mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
-    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact)
-
-# -- count table & experimental design
-count_table = mock$countTable %>% as.data.frame()
-bioDesign = mock$design
-
-# -- dispersion value per gene & per condition
-mock$actualParameters$dispersion %>% head()
-
-```
-
-## Estimation
-
-### Launch DESEQ2 on simulated data
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- DESEQ2
-dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
-dispersion_estimated = getDispersionFromDDS(dds_simu)
-```
-
-### Join Actual & Inference
-
-```{r}
-# -- one dispersion per gene
-dispersion_actual = mock$actualParameters$dispersion %>% 
-          rowMeans() %>% 
-          as.data.frame() %>% 
-          dplyr::rename(., dispersion.actual = ".") %>% tibble::rownames_to_column("gene_id")
-
-dispersion_actual = data.table::data.table(dispersion_actual, key = "gene_id")
-dispersion_estimated = data.table::data.table(dispersion_estimated, key = "gene_id")
-dispersion_comparison = dispersion_actual[dispersion_estimated]
-
-```
-
-### Identity plot 
-
-```{r}
-ggplot(dispersion_comparison) + 
-  geom_point(aes(x = dispersion.actual, y = dispersion.estimate), alpha = 0.5) +
-  geom_abline(intercept = 0, slope = 1) + scale_y_log10() + scale_x_log10()
-```
diff --git a/results/v3/DESEQ2/2023_02_01_postInferenceSelection.Rmd b/results/v3/DESEQ2/2023_02_01_postInferenceSelection.Rmd
deleted file mode 100644
index 58b4549e5b039e8b1e494c863e3e586e6b5a7f25..0000000000000000000000000000000000000000
--- a/results/v3/DESEQ2/2023_02_01_postInferenceSelection.Rmd
+++ /dev/null
@@ -1,120 +0,0 @@
----
-title: "Post Inference Selection"
-date: "2023-02-01"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-
-## Required
-
-
-```{r setup_2, message=FALSE, warning=FALSE, results="hide"}
-library(HTRfit)
-library(HTRsim)
-library(plotROC)
-library(gridExtra)
-```
-
-
-## Simulation
-
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- SETUP
-n_G = 100
-n_genoT = 10 
-n_env = 2
-max_n_rep = 3
-sequencing_fact = 1
-thr = 2
-###########
-
-# -- Simulate counts
-mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
-    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact)
-count_table = mock$countTable %>% as.data.frame()
-bioDesign = mock$design
-```
-
-## Prediction
-
-### Launch DESEQ2 on simulated data
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- DESEQ2
-dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
-deseqFitdtf = getCoefficientsFromDds(dds_simu)
-
-```
-
-### p(|beta| > 0) > 0.95 & |beta| > T => DE
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- prediction with post inference selection method
-prediction = getPrediction(deseqFitdtf, threshold = thr, 
-                            alphaRisk = 0.05, postInferenceSelection = T)
-expectation = getExpectation(mock$actualParameters$beta, threshold = thr)
-dtf.comp.annot.PIS = getComparison(actual.dtf = expectation, 
-                                    inference.dtf = prediction )
-```
-
-### p(|beta| > T) > 0.95 => DE
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- prediction
-prediction = getPrediction(deseqFitdtf, threshold = thr, 
-                            alphaRisk = 0.05, postInferenceSelection = F)
-expectation = getExpectation(mock$actualParameters$beta, threshold = thr)
-dtf.comp.annot= getComparison(actual.dtf = expectation, 
-                              inference.dtf = prediction )
-```
-
-## Evaluation
-
-
-### Build graph for evaluation
-
-```{r message=FALSE, warning=FALSE, echo = T, results = 'hide' ,include=TRUE}
-# -- Venn Diag Post inference selection
-p1 = getVennDiagramm(dtf.comp.annot.PIS, 
-                        title = "p(|beta| > 0) > 0.95 & |beta| > T")
-# -- Venn Diag 
-p2 = getVennDiagramm(comparisonDTF  = dtf.comp.annot,
-                     title = "p(|beta| > T) > 0.95")
-
-# -- ROC curves post inference selection
-dtf1 = dtf.comp.annot.PIS %>% 
-                  mutate(method = "p(|beta| > 0) > 0.95 & |beta| > T")
-# -- ROC curves
-dtf2 = dtf.comp.annot %>% 
-                  mutate(method = "p(|beta| > T) > 0.95")
-dtf = rbind(dtf1, dtf2)
-dtf$annotation <- factor(dtf$annotation, levels = c("TRUE", "FALSE"))
-
-# -- Identity plot
-p3 = ggplot(dtf %>% filter(beta != "(Intercept)")) + 
-  geom_point(aes(x = actual.value, y = estimate, col = padj), alpha = 0.5) +  
-  geom_abline(intercept = 0, slope = 1) + 
-  geom_vline(xintercept = c(-thr, thr), linetype = "dotted") +
-  facet_grid(method~beta, scales = "free") #+ 
-  #scale_color_brewer(palette = "Blues")
-
-p4 = ggplot(dtf %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, color = method)) + 
-  geom_roc(n.cuts = 0, labels = F)
-```
-
-### Conclusion
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-# -- Venn diagram
-grid.arrange(p1, p2, nrow = 1, ncol = 2)
-# -- Identity plot
-p3
-# -- ROC curves
-p4
-```
-
diff --git a/results/v3/DESEQ2/2023_02_03_FDR.Rmd b/results/v3/DESEQ2/2023_02_03_FDR.Rmd
deleted file mode 100644
index fa564ca1ce0c02ecde1cd7bc554f47ba0d522135..0000000000000000000000000000000000000000
--- a/results/v3/DESEQ2/2023_02_03_FDR.Rmd
+++ /dev/null
@@ -1,117 +0,0 @@
----
-title: "False discovery rate"
-date: "2023-02-01"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRfit)
-library(HTRsim)
-library(plotROC)
-library(gridExtra)
-```
-
-
-## Simulation
-
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- SETUP
-n_G = 100
-n_genoT = 5
-n_env = 2
-max_n_rep = 3
-sequencing_fact = 1
-thr = 2
-###########
-
-# -- Simulate counts
-mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
-    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact)
-count_table = mock$countTable %>% as.data.frame()
-bioDesign = mock$design
-mock$actualParameters$dispersion
-
-```
-
-## Prediction
-
-### Launch DESEQ2 on simulated data
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- DESEQ2
-dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
-deseqFitdtf = getCoefficientsFromDds(dds_simu)
-```
-
-### pvalue adjusted
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- prediction with post inference selection method
-prediction = getPrediction(deseqFitdtf, threshold = thr, 
-                            alphaRisk = 0.05, pvalCorrection = T)
-expectation = getExpectation(mock$actualParameters$beta, threshold = thr)
-dtf.comp.annot.adjusted = getComparison(actual.dtf = expectation, 
-                                    inference.dtf = prediction )
-```
-### pvalue adjusted
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- prediction 
-prediction = getPrediction(deseqFitdtf, threshold = thr, 
-                            alphaRisk = 0.05, pvalCorrection = F)
-dtf.comp.annot.unadjusted = getComparison(actual.dtf = expectation, 
-                                    inference.dtf = prediction )
-```
-
-
-
-## Evaluation
-
-### Build graph for evaluation
-
-```{r message=FALSE, warning=FALSE, echo = T, results = 'hide' ,include=TRUE}
-# -- Venn Diag adjusted pvalue
-p1 = getVennDiagramm(dtf.comp.annot.adjusted, 
-                        title = "padjusted")
-# -- Venn Diag unadjusted pvalue
-p2 = getVennDiagramm(comparisonDTF  = dtf.comp.annot.unadjusted,
-                     title = "pvalue")
-
-# -- ROC curves post inference selection
-dtf1 = dtf.comp.annot.adjusted %>% 
-                  mutate(method = "padjusted")
-# -- ROC curves
-dtf2 = dtf.comp.annot.unadjusted %>% 
-                  mutate(method = "pvalue")
-dtf = rbind(dtf1, dtf2)
-dtf$annotation <- factor(dtf$annotation, levels = c("TRUE", "FALSE"))
-
-# -- Identity plot
-p3 = ggplot(dtf %>% filter(beta != "(Intercept)")) + 
-  geom_point(aes(x = actual.value, y = estimate, col = padj), alpha = 0.5) +  
-  geom_abline(intercept = 0, slope = 1) + 
-  geom_vline(xintercept = c(-thr, thr), linetype = "dotted") +
-  facet_grid(method~beta, scales = "free") #+ 
-  #scale_color_brewer(palette = "Set2")
-p4 = ggplot(dtf %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, color = method)) + 
-  geom_roc(n.cuts = 0, labels = F)
-```
-
-### Visualization
-
-```{r message=FALSE, warning=FALSE, include=TRUE}
-# -- Venn diagram
-grid.arrange(p1, p2, nrow = 1, ncol = 2)
-# -- Identity plot
-p3
-# -- ROC curves
-p4
-```
diff --git a/results/v3/DESEQ2/2023_02_05_sequencingDepth.Rmd b/results/v3/DESEQ2/2023_02_05_sequencingDepth.Rmd
deleted file mode 100644
index c6f9f753e662fe94ab0bbd6977bc4eecee10ef8a..0000000000000000000000000000000000000000
--- a/results/v3/DESEQ2/2023_02_05_sequencingDepth.Rmd
+++ /dev/null
@@ -1,128 +0,0 @@
----
-title: "Sequencing depth effect"
-date: "2023-02-01"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(plotROC)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- SETUP
-n_G = 1000
-n_genoT = 3
-n_env = 2
-max_n_rep = 3
-thr = 2
-```
-
-### Ground truth
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- Fit mvnorm
-dds.extraction = loadEmbedded_ObservedValues()
-fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
-# -- Ground truth
-beta.actual <- getBetaforSimulation(
-                      n_G,
-                      n_genoT,
-                      fit.mvnorm)
-# -- build input for simulation 
-model.matx <- getModelMatrix()
-log_qij <- getLog_qij(beta.actual, model.matx)
-```
-
-### Sequencing depth increased
-
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-
-# -- loop on
-sequencing_factor_list = c(0.01, 0.1, 1, 2)
-
-# -- sequencing factor effect
-for (sequencing_factor in sequencing_factor_list){
-    mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
-    sample_ids <- colnames(mu_ij)
-    gene_dispersion.vec <- dds.extraction$gene_dispersion
-    dispersion.matrix <- getGenesDispersions(n_G,
-                              sample_ids,
-                              dispersion.vec = gene_dispersion.vec)
-  # -- Design replicates 
-  designReplication.matx <- getReplicationDesign(
-                                max_n_rep,
-                                n_genoT,
-                                n_env)
-  
-  # -- build counts table
-  count_table <- getCountTable(mu_ij, dispersion.matrix,
-                    n_G, n_genoT,
-                    sample_id_list = sample_ids,
-                    replication.matx = designReplication.matx)
-  bioDesign <- summariseDesign(count_table)
-  
-  # -- number of reads simulate
-  reads_counts = count_table %>% 
-                      colSums() %>% 
-                      sum() %>% 
-                      format(., scientific = TRUE, digits=1)
-
-  # -- DESEQ2 on simulated data
-  dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
-  deseqFitdtf = getCoefficientsFromDds(dds_simu)
-  prediction = getPrediction(deseqFitdtf, threshold = thr, alphaRisk = 0.05)
-  expectation = getExpectation(beta.actual, threshold = thr)
-  # -- join actual & inference
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  # -- annotation
-  comparison = comparison  %>% 
-              mutate(reads_sequenced = reads_counts)
-  # save results
-  if (exists('df2evaluation')){
-    df2evaluation = rbind(df2evaluation, comparison)
-  }
-  else{
-    df2evaluation = comparison
-  }
-}
-```
-
-## Evaluation
-
-### Build graph for evaluation 
-
-```{r, message=FALSE, warning=FALSE,}
-# -- ROC curve
-df2evaluation$reads_sequenced <- factor(df2evaluation$reads_sequenced, levels = unique(df2evaluation$reads_sequenced))
-p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, col = reads_sequenced)) + 
-  geom_roc(n.cuts = 0, labels = F)   + 
-  scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D"))
-#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
-
-# Identity plot
-p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + 
-  geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) +
-  geom_abline(intercept = 0, slope = 1) + 
-  facet_grid(beta~reads_sequenced, scales = "free")
-```
-
-### Visualization
-
-```{r, message=FALSE, warning=FALSE,}
-p1
-p2
-```
diff --git a/results/v3/DESEQ2/2023_02_06_replicates.Rmd b/results/v3/DESEQ2/2023_02_06_replicates.Rmd
deleted file mode 100644
index 81c79667f64fd2472dfd7619680bd37f4fd636e1..0000000000000000000000000000000000000000
--- a/results/v3/DESEQ2/2023_02_06_replicates.Rmd
+++ /dev/null
@@ -1,126 +0,0 @@
----
-title: "Sequencing depth effect"
-date: "2023-02-01"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(plotROC)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- SETUP
-n_G = 300
-n_genoT = 3
-n_env = 2
-sequencing_factor = 1
-thr = 2
-```
-
-### Ground truth
-
-```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- Fit mvnorm
-dds.extraction = loadEmbedded_ObservedValues()
-fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
-# -- Ground truth
-beta.actual <- getBetaforSimulation(
-                      n_G,
-                      n_genoT,
-                      fit.mvnorm)
-# -- build input for simulation 
-model.matx <- getModelMatrix()
-log_qij <- getLog_qij(beta.actual, model.matx)
-mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
-sample_ids <- colnames(mu_ij)
-gene_dispersion.vec <- dds.extraction$gene_dispersion
-dispersion.matrix <- getGenesDispersions(n_G,
-                          sample_ids,
-                          dispersion.vec = gene_dispersion.vec)
-```
-
-### Number of replicates increased
-
-```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- loop on
-n_rep_list = c(2, 5, 10, 25)
-
-# -- sequencing factor effect
-for (n_rep in n_rep_list){
-  # -- Design replicates 
-  designReplication.matx <- getReplicationDesign(
-                                n_rep,
-                                n_genoT,
-                                n_env)
-  
-  # -- build counts table
-  count_table <- getCountTable(mu_ij, dispersion.matrix,
-                    n_G, n_genoT,
-                    sample_id_list = sample_ids,
-                    replication.matx = designReplication.matx)
-  bioDesign <- summariseDesign(count_table)
-  
-  # -- number of reads simulate
-  reads_counts = count_table %>% 
-                      colSums() %>% 
-                      sum() %>% 
-                      format(., scientific = TRUE, digits=1)
-
-  # -- DESEQ2 on simulated data
-  dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
-  deseqFitdtf = getCoefficientsFromDds(dds_simu)
-  prediction = getPrediction(deseqFitdtf, threshold = thr, alphaRisk = 0.05)
-  expectation = getExpectation(beta.actual, threshold = thr)
-  # -- join actual & inference
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  # -- annotation
-  comparison = comparison  %>% 
-              mutate(n_replicates = n_rep)
-  # save results
-  if (exists('df2evaluation')){
-    df2evaluation = rbind(df2evaluation, comparison)
-  }
-  else{
-    df2evaluation = comparison
-  }
-}
-```
-
-## Evaluation
-
-### Build graph for evaluation 
-
-```{r, message=FALSE, warning=FALSE,}
-# -- ROC curve
-df2evaluation$n_replicates <- factor(df2evaluation$n_replicates, levels = unique(df2evaluation$n_replicates))
-p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, col = n_replicates)) + 
-  geom_roc(n.cuts = 0, labels = F)   + 
-  scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D"))
-#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
-
-# Identity plot
-p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + 
-  geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) +
-  geom_abline(intercept = 0, slope = 1) + 
-  facet_grid(beta~n_replicates, scales = "free")
-```
-
-### Visualization
-
-```{r, message=FALSE, warning=FALSE,}
-p1
-p2
-```
diff --git a/results/v3/GLM/2023_02_04_dispersion.Rmd b/results/v3/GLM/2023_02_04_dispersion.Rmd
deleted file mode 100644
index cecb263e291db181aed3758c3619ac37baeb58c7..0000000000000000000000000000000000000000
--- a/results/v3/GLM/2023_02_04_dispersion.Rmd
+++ /dev/null
@@ -1,78 +0,0 @@
----
-title: "Dispersion"
-date: "2023-02-01"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-```
-
-
-## Simulation
-
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- SETUP
-n_G = 300
-n_genoT = 3
-n_env = 2
-max_n_rep = 15
-sequencing_fact = 1
-thr = 2
-###########
-
-# -- Simulate counts
-mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
-    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact)
-
-# -- count table & experimental design
-count_table = mock$countTable %>% as.data.frame()
-bioDesign = mock$design
-
-# -- dispersion value per gene & per condition
-mock$actualParameters$dispersion %>% head()
-
-```
-
-## Estimation
-
-### Launch MASS::glm.nb on simulated data
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- MASS:glm.nb
-dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign)
-l = HTRfit::launch.glm(dtf2fit)
-fitDtf =listFit2dtf(l)
-dispersion_estimated = fitDtf$dispersion
-```
-
-### Join Actual & Inference
-
-```{r}
-# -- one dispersion per gene
-dispersion_actual = mock$actualParameters$dispersion %>% 
-          rowMeans() %>% 
-          as.data.frame() %>% 
-          dplyr::rename(., dispersion.actual = ".") %>% tibble::rownames_to_column("gene_id")
-
-dispersion_actual = data.table::data.table(dispersion_actual, key = "gene_id")
-dispersion_estimated = data.table::data.table(dispersion_estimated, key = "gene_id")
-dispersion_comparison = dispersion_actual[dispersion_estimated]
-
-```
-
-### Identity plot 
-
-```{r}
-ggplot(dispersion_comparison) + 
-  geom_point(aes(x = dispersion.actual, y = 1/dispersion.estimate), alpha = 0.5) +
-  geom_abline(intercept = 0, slope = 1) + scale_y_log10() + scale_x_log10()
-```
diff --git a/results/v3/GLM/2023_02_05_sequencingDepth.Rmd b/results/v3/GLM/2023_02_05_sequencingDepth.Rmd
deleted file mode 100644
index 081a0d73617291f06aa950ceaf521ffdb63d0592..0000000000000000000000000000000000000000
--- a/results/v3/GLM/2023_02_05_sequencingDepth.Rmd
+++ /dev/null
@@ -1,129 +0,0 @@
----
-title: "Sequencing depth effect"
-date: "2023-02-01"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-library(plotROC)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- SETUP
-n_G = 1000
-n_genoT = 3
-n_env = 2
-max_n_rep = 3
-thr = 2
-```
-
-### Ground truth
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- Fit mvnorm
-dds.extraction = loadEmbedded_ObservedValues()
-fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
-# -- Ground truth
-beta.actual <- getBetaforSimulation(
-                      n_G,
-                      n_genoT,
-                      fit.mvnorm)
-# -- build input for simulation 
-model.matx <- getModelMatrix()
-log_qij <- getLog_qij(beta.actual, model.matx)
-```
-
-### Sequencing depth increased
-
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-
-# -- loop on
-sequencing_factor_list = c(0.01, 0.1, 1, 2)
-
-# -- sequencing factor effect
-for (sequencing_factor in sequencing_factor_list){
-    mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
-    sample_ids <- colnames(mu_ij)
-    gene_dispersion.vec <- dds.extraction$gene_dispersion
-    dispersion.matrix <- getGenesDispersions(n_G,
-                              sample_ids,
-                              dispersion.vec = gene_dispersion.vec)
-  # -- Design replicates 
-  designReplication.matx <- getReplicationDesign(
-                                max_n_rep,
-                                n_genoT,
-                                n_env)
-  
-  # -- build counts table
-  count_table <- getCountTable(mu_ij, dispersion.matrix,
-                    n_G, n_genoT,
-                    sample_id_list = sample_ids,
-                    replication.matx = designReplication.matx)
-  bioDesign <- summariseDesign(count_table)
-  
-  # -- number of reads simulate
-  reads_counts = count_table %>% 
-                      colSums() %>% 
-                      sum() %>% 
-                      format(., scientific = TRUE, digits=1)
-  # -- MASS:glm.nb
-  dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign)
-  l = HTRfit::launch.glm(dtf2fit)
-  fitDtf =listFit2dtf(l)
-  prediction = getPrediction(fitDtf$inference, threshold = thr, alphaRisk = 0.05)
-  expectation = getExpectation(beta.actual, threshold = thr)
-  # -- join actual & inference
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  # -- annotation
-  comparison = comparison  %>% 
-              mutate(reads_sequenced = reads_counts)
-  # save results
-  if (exists('df2evaluation')){
-    df2evaluation = rbind(df2evaluation, comparison)
-  }
-  else{
-    df2evaluation = comparison
-  }
-}
-```
-
-## Evaluation
-
-### Build graph for evaluation 
-
-```{r, message=FALSE, warning=FALSE,}
-# -- ROC curve
-df2evaluation$reads_sequenced <- factor(df2evaluation$reads_sequenced, levels = unique(df2evaluation$reads_sequenced))
-p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, col = reads_sequenced)) + 
-  geom_roc(n.cuts = 0, labels = F)   + 
-  scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D"))
-#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
-
-# Identity plot
-p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + 
-  geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) +
-  geom_abline(intercept = 0, slope = 1) + 
-  facet_grid(beta~reads_sequenced, scales = "free")
-```
-
-### Visualization
-
-```{r, message=FALSE, warning=FALSE,}
-p1
-p2
-```
diff --git a/results/v3/GLM/2023_02_06_replicates.Rmd b/results/v3/GLM/2023_02_06_replicates.Rmd
deleted file mode 100644
index a95d5f522af9ef9e92503a7b4efe2b8e52ad60cb..0000000000000000000000000000000000000000
--- a/results/v3/GLM/2023_02_06_replicates.Rmd
+++ /dev/null
@@ -1,128 +0,0 @@
----
-title: "Sequencing depth effect"
-date: "2023-02-01"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-library(plotROC)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- SETUP
-n_G = 300
-n_genoT = 3
-n_env = 2
-sequencing_factor = 1
-thr = 2
-```
-
-### Ground truth
-
-```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- Fit mvnorm
-dds.extraction = loadEmbedded_ObservedValues()
-fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
-# -- Ground truth
-beta.actual <- getBetaforSimulation(
-                      n_G,
-                      n_genoT,
-                      fit.mvnorm)
-# -- build input for simulation 
-model.matx <- getModelMatrix()
-log_qij <- getLog_qij(beta.actual, model.matx)
-mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
-sample_ids <- colnames(mu_ij)
-gene_dispersion.vec <- dds.extraction$gene_dispersion
-dispersion.matrix <- getGenesDispersions(n_G,
-                          sample_ids,
-                          dispersion.vec = gene_dispersion.vec)
-```
-
-### Number of replicates increased
-
-```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- loop on
-n_rep_list = c(2, 5, 10, 25)
-
-# -- sequencing factor effect
-for (n_rep in n_rep_list){
-  # -- Design replicates 
-  designReplication.matx <- getReplicationDesign(
-                                n_rep,
-                                n_genoT,
-                                n_env)
-  
-  # -- build counts table
-  count_table <- getCountTable(mu_ij, dispersion.matrix,
-                    n_G, n_genoT,
-                    sample_id_list = sample_ids,
-                    replication.matx = designReplication.matx)
-  bioDesign <- summariseDesign(count_table)
-  
-  # -- number of reads simulate
-  reads_counts = count_table %>% 
-                      colSums() %>% 
-                      sum() %>% 
-                      format(., scientific = TRUE, digits=1)
-
-  # -- MASS:glm.nb
-  dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign)
-  l = HTRfit::launch.glm(dtf2fit)
-  fitDtf =listFit2dtf(l)
-  prediction = getPrediction(fitDtf$inference, threshold = thr, alphaRisk = 0.05)
-  expectation = getExpectation(beta.actual, threshold = thr)
-  # -- join actual & inference
-  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
-  # -- annotation
-  comparison = comparison  %>% 
-              mutate(n_replicates = n_rep)
-  # save results
-  if (exists('df2evaluation')){
-    df2evaluation = rbind(df2evaluation, comparison)
-  }
-  else{
-    df2evaluation = comparison
-  }
-}
-```
-
-## Evaluation
-
-### Build graph for evaluation 
-
-```{r, message=FALSE, warning=FALSE,}
-# -- ROC curve
-df2evaluation$n_replicates <- factor(df2evaluation$n_replicates, levels = unique(df2evaluation$n_replicates))
-p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, col = n_replicates)) + 
-  geom_roc(n.cuts = 0, labels = F)   + 
-  scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D"))
-#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
-
-# Identity plot
-p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + 
-  geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) +
-  geom_abline(intercept = 0, slope = 1) + 
-  facet_grid(beta~n_replicates, scales = "free")
-```
-
-### Visualization
-
-```{r, message=FALSE, warning=FALSE,}
-p1
-p2
-```
diff --git a/results/v3/GLM/2023_02_08_randomIntercept.Rmd b/results/v3/GLM/2023_02_08_randomIntercept.Rmd
deleted file mode 100644
index 9ef903c0d836a50382b880af0f4296be133318d3..0000000000000000000000000000000000000000
--- a/results/v3/GLM/2023_02_08_randomIntercept.Rmd
+++ /dev/null
@@ -1,141 +0,0 @@
----
-title: "Random intercept"
-date: "2023-02-08"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-library(plotROC)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r}
-# -- SETUP
-n_G = 2000
-n_genoT = 26
-n_env = 2
-max_n_rep = 3
-sequencing_fact = 2
-thr = 2
-###########
-```
-
-### Intercept fixed
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- Simulate counts
-mock_fixed = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
-    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = T)
-
-# -- count table & experimental design
-count_table_fixed = mock_fixed$countTable %>% as.data.frame()
-bioDesign_fixed = mock_fixed$design
-
-# -- ground truth
-beta.actual_fixed = mock_fixed$actualParam$beta
-```
-
-### Random intercept 
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- Simulate counts
-mock_random = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
-    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = F)
-
-# -- count table & experimental design
-count_table_random = mock_random$countTable %>% as.data.frame()
-bioDesign_random = mock_random$design
-
-# -- ground truth
-beta.actual_random = mock_random$actualParam$beta
-
-```
-
-## Estimation
-
-### Launch MASS::glm.nb on simulated data (fixed intercept)
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- MASS:glm.nb
-dtf2fit_fixed = HTRfit::reshapeCounTable(count_table_fixed, bioDesign_fixed)
-l_fixed = HTRfit::launch.glm(dtf2fit_fixed)
-fitDtf_fixed =listFit2dtf(l_fixed)
-```
-
-### Launch MASS::glm.nb on simulated data (random intercept)
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- MASS:glm.nb
-dtf2fit_random = HTRfit::reshapeCounTable(count_table_random, bioDesign_random)
-l_random = HTRfit::launch.glm(dtf2fit_random)
-fitDtf_random =listFit2dtf(l_random)
-```
-
-### Join Actual & Inference
-
-```{r}
-
-###### -- Fixed Intercept -- ########
-expectation_fixed = getExpectation(beta.actual_fixed, threshold = thr)
-prediction_fixed = getPrediction(fitDtf_fixed$inference, threshold = thr, alphaRisk = 0.05)
-# -- join actual & inference
-comparison_fixed = getComparison(actual.dtf = expectation_fixed, inference.dtf = prediction_fixed)
-#####################################
-
-###### -- Random Intercept -- #######
-expectation_random = getExpectation(beta.actual_random, threshold = thr)
-prediction_random = getPrediction(fitDtf_random$inference, threshold = thr, alphaRisk = 0.05)
-# -- join actual & inference
-comparison_random = getComparison(actual.dtf = expectation_random, inference.dtf = prediction_random)
-#####################################
-
-```
-
-### Join random & fixed dataframe
-
-```{r}
-# -- Annotations
-comparison_random  = comparison_random %>% dplyr::mutate(from = "Random intercept")
-comparison_fixed = comparison_fixed %>% dplyr::mutate(from = "Fixed intercept")
-# -- join
-df2evaluation = rbind(comparison_fixed, comparison_random)
-```
-
-
-## Evaluation
-
-### Build graph for evaluation 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- ROC curve
-p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), 
-           aes(d = actual.label , m = padj, col = from)) + 
-  geom_roc(n.cuts = 0, labels = F)   #+ 
-  scale_color_manual(values = c("#yellow","#BDBDBD"))
-#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
-
-# Identity plot
-p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + 
-  geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) +
-  geom_abline(intercept = 0, slope = 1) + 
-  facet_grid(beta~from, scales = "free")
-```
-
-### Visualization
-
-```{r, message=FALSE, warning=FALSE}
-p1
-p2
-```
diff --git a/results/v3/GLM_mixte/2023_01_17-tgc_backup.tsv b/results/v3/GLM_mixte/2023_01_17-tgc_backup.tsv
deleted file mode 100644
index 38660541a04c3d310d5d1d02bf98dc0b640f5868..0000000000000000000000000000000000000000
--- a/results/v3/GLM_mixte/2023_01_17-tgc_backup.tsv
+++ /dev/null
@@ -1,11 +0,0 @@
-n_genotypes	from	N	timeProcess	sd	se	ci
-20	glmmTMB	200	15.130752444267273	2.6080884089635843	0.1844196999912184	0.3636676342866279
-20	lme4	192	20.31276837239663	2.8278579904444543	0.20408307150164248	0.40254610129193336
-50	glmmTMB	200	20.799276530742645	5.514627455256488	0.38994304693293763	0.7689507432848744
-50	lme4	200	40.54833024740219	19.021427312622208	1.345018024060217	2.6523170946821053
-100	glmmTMB	200	25.420783281326294	4.2043931998868596	0.29729549424146057	0.5862537954460072
-100	lme4	200	37.98638904094696	4.275002040382448	0.3022882932340756	0.5960993780936279
-200	glmmTMB	200	39.15629369020462	5.9148858659293335	0.4182455905743096	0.8247621294374486
-200	lme4	200	68.16497600078583	12.042067627674905	0.8515027679035926	1.67912645561597
-400	glmmTMB	200	66.25662302970886	7.7012016308806155	0.5445571896480582	1.0738431138458309
-400	lme4	200	136.99138724803925	35.908038766273386	2.539081771074134	5.0069589148599665
diff --git a/results/v3/GLM_mixte/2023_02_06_dispersion.Rmd b/results/v3/GLM_mixte/2023_02_06_dispersion.Rmd
deleted file mode 100644
index 909b2059b6239827ba8ab1b8c581f85696946b3a..0000000000000000000000000000000000000000
--- a/results/v3/GLM_mixte/2023_02_06_dispersion.Rmd
+++ /dev/null
@@ -1,78 +0,0 @@
----
-title: "Dispersion"
-date: "2023-02-01"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-```
-
-
-## Simulation
-
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- SETUP
-n_G = 10
-n_genoT = 200
-n_env = 2
-max_n_rep = 5
-sequencing_fact = 1
-thr = 2
-###########
-
-# -- Simulate counts
-mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
-    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact)
-
-# -- count table & experimental design
-count_table = mock$countTable %>% as.data.frame()
-bioDesign = mock$design
-
-# -- dispersion value per gene & per condition
-mock$actualParameters$dispersion[1:3, 1:3]
-
-```
-
-## Estimation
-
-### Launch MASS::glm.nb on simulated data
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- MASS:glm.nb
-dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign)
-l = HTRfit::launch.glm_mixte(dtf2fit)
-fitDtf =listFit2dtf(l)
-dispersion_estimated = fitDtf$dispersion
-```
-
-### Join Actual & Inference
-
-```{r}
-# -- one dispersion per gene
-dispersion_actual = mock$actualParameters$dispersion %>% 
-          rowMeans() %>% 
-          as.data.frame() %>% 
-          dplyr::rename(., dispersion.actual = ".") %>% tibble::rownames_to_column("gene_id")
-
-dispersion_actual = data.table::data.table(dispersion_actual, key = "gene_id")
-dispersion_estimated = data.table::data.table(dispersion_estimated, key = "gene_id")
-dispersion_comparison = dispersion_actual[dispersion_estimated]
-
-```
-
-### Identity plot 
-
-```{r}
-ggplot(dispersion_comparison) + 
-  geom_point(aes(x = dispersion.actual, y = 1/dispersion.estimate), alpha = 0.5) +
-  geom_abline(intercept = 0, slope = 1) + scale_y_log10() + scale_x_log10()
-```
diff --git a/results/v3/GLM_mixte/2023_02_08_benchmarkingPackages.Rmd b/results/v3/GLM_mixte/2023_02_08_benchmarkingPackages.Rmd
deleted file mode 100644
index 4e86cb942c65d00f5794699ed8faeb999d0822d6..0000000000000000000000000000000000000000
--- a/results/v3/GLM_mixte/2023_02_08_benchmarkingPackages.Rmd
+++ /dev/null
@@ -1,159 +0,0 @@
----
-title: "Benchmarking packages"
-date: "2023-02-08"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-#library(plotROC)
-library(Rmisc)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r}
-# -- SETUP
-n_G = 10
-n_genoT_list = c(20, 50, 100, 200, 400)
-n_env = 2
-FixIntercept = F
-max_n_rep = 5
-sequencing_fact = 1
-n_clus = 3
-thr = 2
-number_ofRepetition = 4
-###########
-```
-
-### get time to fit 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide", eval=FALSE}
-#       ----------------        #
-#      /!\ Very very long
-# Not exectuted in the notebook
-#   -------------------------   #
-
-#rm(dtf2evaluation)
-for (n_genoT in n_genoT_list){
-  for (i in 1:number_ofRepetition){
-    # -- traceback
-    print(paste("Number of genotype:", n_genoT, sep = " "))
-    print(paste("Iteration:", i, sep = " "))
-    
-    #     -- Simulate counts --     #
-    mock_random = rnaMock(n_genes = n_G, 
-                          n_genotypes = n_genoT, 
-                          n_environments = n_env, 
-                          max_n_replicates = max_n_rep, 
-                          sequencing_factor = sequencing_fact, 
-                          fixIntercept = FixIntercept, 
-                          n_clusters = n_clus)
-    
-    # -- count table & experimental design
-    count_table_random = mock_random$countTable %>% as.data.frame()
-    bioDesign_random = mock_random$design
-    #      -- ground truth --       #
-    beta.actual_random = mock_random$actualParam$beta
-    
-    #     --- Model fitting ---     #
-    # -- lme4::glm.nb
-    start_time <- Sys.time()
-    dtf2fit_random = HTRfit::reshapeCounTable(count_table_random,
-                                              bioDesign_random)
-    l_random.lme4 = HTRfit::launch.glm_mixte(dtf2fit_random, 
-                                             package = "lme4")
-    fitDtf_random.lme4 =listFit2dtf(l_random.lme4)
-    end_time <- Sys.time()
-    fitDtf_random.lme4$inference$timeProcess = difftime(end_time, 
-                            start_time, units = "secs") %>% as.numeric()
-    fitDtf_random.lme4$inference$from = 'lme4' 
-    fitDtf_random.lme4$inference$n_genotypes = n_genoT
-    # -- glmmTMB::glmmTMB
-    start_time <- Sys.time()
-    l_random.tmb = HTRfit::launch.glm_mixte(dtf2fit_random, package = "glmmTMB")
-    fitDtf_random.tmb =listFit2dtf(l_random.tmb)
-    end_time <- Sys.time()
-    fitDtf_random.tmb$inference$timeProcess = difftime(end_time, 
-                            start_time, units = "secs") %>% as.numeric()
-    fitDtf_random.tmb$inference$from = 'glmmTMB'
-    fitDtf_random.tmb$inference$n_genotypes = n_genoT
-    #       -------------------------        #
-    
-    #     -- join LME4 & glmmTMB res --      #
-    fitDtf.inference = rbind(fitDtf_random.tmb$inference, 
-                             fitDtf_random.lme4$inference)
-    
-    expectation = getExpectation(beta.actual_random,
-                                        toEval = "glm_mixte" , threshold = thr)
-    prediction = getPrediction(fitDtf.inference, threshold = thr, alphaRisk = 0.05)
-    
-      #      -- join actual & inference --       #
-    actual2join.dtf <- data.table::data.table(expectation, 
-                                            key = c("gene_id", "term"))
-    inference2join.dtf <- data.table::data.table(prediction, 
-                                               key = c("gene_id", "term"))
-    tmp <- actual2join.dtf[inference2join.dtf]
-    
-    #       ---- Saving results -----          #
-    if (exists('dtf2evaluation')){
-        dtf2evaluation = rbind(dtf2evaluation, tmp)
-    }
-    else{
-        dtf2evaluation = tmp
-    }
-  }
-}
-
-
-```
-
-## Evaluation
-
-### Preparing dataframe
-
-```{r}
-
-#tgc <- Rmisc::summarySE(dtf2evaluation, 
-#                 measurevar="timeProcess", groupvars=c("n_genotypes","from"))
-# -- backup
-#write_tsv(tgc, file = "2023_01_17-tgc_backup.tsv")
-
-tgc <- read_tsv('2023_01_17-tgc_backup.tsv', show_col_types = FALSE)
-```
-
-### Build graph for evaluation 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-#tgc$n_genotypes <- factor(tgc$n_genotypes)
-#tgc$n_genotypes = as.numeric(as.character(tgc$n_genotypes))
-p= ggplot(tgc, aes(x = n_genotypes, y = timeProcess, colour = from)) + 
-  geom_line(aes(x = n_genotypes, y= timeProcess+sd), 
-            linetype = "dashed") +
-  geom_line(aes(x = n_genotypes, y= timeProcess-sd), 
-            linetype = "dashed") +
-  geom_ribbon(aes(ymin = timeProcess-sd , 
-                  ymax= timeProcess+sd, fill = from), alpha=0.2) + 
-  geom_line(aes(x = n_genotypes, y= timeProcess), 
-            linetype = "solid") +
-  geom_point() +  
-  scale_y_log10() +
-  scale_color_manual(values = c('#581845', '#FFC30F')) + 
-  scale_fill_manual(values = c('#581845', '#FFC30F'))
-```
-
-### Visualization
-
-```{r, message=FALSE, warning=FALSE}
-p
-```
diff --git a/results/v3/GLM_mixte/2023_02_08_randomIntercept.Rmd b/results/v3/GLM_mixte/2023_02_08_randomIntercept.Rmd
deleted file mode 100644
index 2be7f63a0ae09c0d5babb6df259de0a22d0ae177..0000000000000000000000000000000000000000
--- a/results/v3/GLM_mixte/2023_02_08_randomIntercept.Rmd
+++ /dev/null
@@ -1,152 +0,0 @@
----
-title: "Random intercept"
-date: "2023-02-08"
-output: 
-  html_document:
-    toc: true 
-css: ../css/air.css
----
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-library(HTRfit)
-library(plotROC)
-```
-
-
-## Simulation
-
-### Parameters
-
-```{r}
-# -- SETUP
-n_G = 10
-n_genoT = 300
-n_env = 2
-FixIntercept = F
-max_n_rep = 15
-sequencing_fact = 1
-n_clus = 3
-thr = 2
-###########
-```
-
-### Random intercept 
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- Simulate counts
-mock_random = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
-    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = FixIntercept, n_clusters = n_clus)
-
-# -- count table & experimental design
-count_table_random = mock_random$countTable %>% as.data.frame()
-bioDesign_random = mock_random$design
-
-# -- ground truth
-beta.actual_random = mock_random$actualParam$beta
-
-```
-
-## Estimation
-
-### Launch glmmTMB::glmmTMB on simulated data (random intercept)
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- glmmTMB::glmmTMB
-dtf2fit_random = HTRfit::reshapeCounTable(count_table_random, bioDesign_random)
-l_random.tmb = HTRfit::launch.glm_mixte(dtf2fit_random, package = "glmmTMB")
-fitDtf_random.tmb =listFit2dtf(l_random.tmb)
-```
-
-### Launch lme4::glmer.nb on simulated data (random intercept)
-
-```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-# -- lme4::glm.nb
-l_random.lme4 = HTRfit::launch.glm_mixte(dtf2fit_random, package = "lme4")
-fitDtf_random.lme4 =listFit2dtf(l_random.lme4)
-```
-
-### Join Actual & Inference
-
-```{r}
-###### -- LME4 -- #######
-expectation_random = getExpectation(beta.actual_random,toEval = "glm_mixte" , threshold = thr)
-prediction_random = getPrediction(fitDtf_random.lme4$inference, threshold = thr, alphaRisk = 0.05)
-# -- join actual & inference
-actual2join.dtf <- data.table::data.table(expectation_random, key = c("gene_id", "term"))
-inference2join.dtf <- data.table::data.table(prediction_random, key = c("gene_id", "term"))
-comparison.lme4 <- actual2join.dtf[inference2join.dtf]
-
-
-###### -- GLMTMB -- #######
-expectation_random = getExpectation(beta.actual_random,toEval = "glm_mixte" , threshold = thr)
-prediction_random = getPrediction(fitDtf_random.tmb$inference, threshold = thr, alphaRisk = 0.05)
-# -- join actual & inference
-actual2join.dtf <- data.table::data.table(expectation_random, key = c("gene_id", "term"))
-inference2join.dtf <- data.table::data.table(prediction_random, key = c("gene_id", "term"))
-comparison.tmb <- actual2join.dtf[inference2join.dtf]
-
-```
-
-### Join lme4 & glmmTMB
-
-```{r}
-###### -- Annotations
-comparison.tmb = comparison.tmb %>% dplyr::mutate(from = "glmmTMB")
-comparison.lme4 = comparison.lme4 %>% dplyr::mutate(from = "lme4")
-# -- join
-comparison.dtf = rbind(comparison.lme4, comparison.tmb)
-```
-## Evaluation
-
-### Build graph for evaluation 
-
-```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
-
-# -- preparing dtf
-comparison.dtf$term = factor(comparison.dtf$term, levels = c("(Intercept)", "environmentE1","cor__(Intercept).environmentE1", "sd__(Intercept)", "sd__environmentE1"))
-comparison.dtf = comparison.dtf %>% 
-  mutate(actual.value = 
-           if_else(str_detect(term, "cor_"), 
-                   actual.value, actual.value*log(2) ))
-
-# -- Identity plot
-p = ggplot(comparison.dtf)  + 
-  geom_point(aes(x = actual.value, 
-                 y = estimate, col = from), alpha = 0.6, size = 3) +
-  geom_abline(intercept = 0, slope = 1) + 
-  facet_wrap(~term, scales = "free")  + 
-  theme(strip.text.x = element_text(size = 7)) +
-  scale_color_manual(values = c('#581845', '#FFC30F'))
-p
-#ggsave("../img/graph/poc_glmm2_1000.png", p,) 
-```
-
-### Visualization
-
-```{r, message=FALSE, warning=FALSE}
-p
-
-library(ggridges)
-
-x = comparison.dtf %>% reshape2::dcast(., gene_id ~ term, value.var = "estimate")
-gene_vec = rep(x$gene_id, 1000)
-a = rnorm(n_G* 1000, sd = x$`sd__(Intercept)`, mean = x$`(Intercept)` )  %>% data.frame() %>% mutate(Env = "(Intercept)") %>% mutate(gene_id = gene_vec)
-b = rnorm(n_G*1000, sd = x$sd__environmentE1, mean = x$`sd__(Intercept)` )  %>% data.frame() %>% mutate(Env = "E1")  %>% mutate(gene_id = gene_vec)
-dtf = rbind(a,b) %>% dplyr::rename(value = ".") %>% mutate(from = "prediction")
-
-x2 = comparison.dtf %>% reshape2::dcast(., gene_id ~ term, value.var = "actual.value")
-gene_vec = rep(x$gene_id, 1000)
-a = rnorm(n_G* 1000, sd = x2$`sd__(Intercept)`, mean = x2$`(Intercept)` )  %>% data.frame() %>% mutate(Env = "(Intercept)") %>% mutate(gene_id = gene_vec)
-b = rnorm(n_G*1000, sd = x2$sd__environmentE1, mean = x2$`sd__(Intercept)` )  %>% data.frame() %>% mutate(Env = "E1")  %>% mutate(gene_id = gene_vec)
-dtf2 = rbind(a,b) %>% dplyr::rename(value = ".") %>% mutate(from = "actual")
-
-dtf = rbind(dtf, dtf2)
-p = ggplot(dtf %>% filter(gene_id %in% c("gene1", "gene2", "gene3"))) + geom_density_ridges(aes(x = value , y = from, fill = from), alpha = 0.6) + facet_wrap(gene_id~Env, scales = 'free_x', ncol = 2) + xlim(c(-15,15)) + scale_fill_manual(values = c("#FFE699", "#1F4E79"))
-p
-ggsave("../img/graph/poc_glmm1000.png", p, height = 6, width = 8) 
-
-```
diff --git a/results/v3/Tuto/2023_02_03_rangeObservedValues.Rmd b/results/v3/Tuto/2023_02_03_rangeObservedValues.Rmd
deleted file mode 100644
index a18ef59e9b3f404d7d066e56172fb72ae1a5c75c..0000000000000000000000000000000000000000
--- a/results/v3/Tuto/2023_02_03_rangeObservedValues.Rmd
+++ /dev/null
@@ -1,55 +0,0 @@
----
-title: "Range of observed values"
-date: "2023-02-01"
-output: 
-  html_document:
-    toc: true 
-css: css/air.css
----
-
-
-## Required
-
-
-```{r setup, message=FALSE, warning=FALSE, results="hide"}
-library(HTRsim)
-```
-
-## Step by step
-
-### Load counts table
-
-```{r}
-counTable = loadEmbedded_CounTable()
-counTable %>% head()
-```
-
-### Load design
-
-```{r}
-design = loadEmbedded_design()
-design %>% head
-```
-
-### DESEQ2
-
-```{r}
-dds <- fit_deseq(counTable, bioDesign = design)
-```
-
-### Beta & dispersion extraction
-
-```{r}
-dds.extraction <- extraction_embeddedDds(dds_obj = dds)
-dds.extraction$beta %>% head()
-dds.extraction$gene_dispersion %>% head()
-```
-
-
-## all-in
-
-```{r}
-observedValues = embedded_CounTable2observedValues()
-observedValues$beta %>% head()
-observedValues$gene_dispersion %>% head()
-```
diff --git a/results/v3/css/air.css b/results/v3/css/air.css
deleted file mode 100644
index 5b8f3b347c8db6ea9a6852178570304eb4ae6916..0000000000000000000000000000000000000000
--- a/results/v3/css/air.css
+++ /dev/null
@@ -1,219 +0,0 @@
-
-@media print {
-    *,
-    *:before,
-    *:after {
-      background: transparent !important;
-      color: #000 !important;
-      box-shadow: none !important;
-      text-shadow: none !important;
-    }
-  
-    a,
-    a:visited {
-      text-decoration: underline;
-    }
-  
-    a[href]:after {
-      content: " (" attr(href) ")";
-    }
-  
-    abbr[title]:after {
-      content: " (" attr(title) ")";
-    }
-  
-    a[href^="#"]:after,
-    a[href^="javascript:"]:after {
-      content: "";
-    }
-  
-    pre,
-    blockquote {
-      border: 1px solid #999;
-      page-break-inside: avoid;
-    }
-  
-    thead {
-      display: table-header-group;
-    }
-  
-    tr,
-    img {
-      page-break-inside: avoid;
-    }
-  
-    img {
-      max-width: 100% !important;
-    }
-  
-    p,
-    h2,
-    h3 {
-      orphans: 3;
-      widows: 3;
-    }
-  
-    h2,
-    h3 {
-      page-break-after: avoid;
-    }
-  }
-  
-  html {
-    font-size: 12px;
-  }
-  
-  @media screen and (min-width: 32rem) and (max-width: 48rem) {
-    html {
-      font-size: 15px;
-    }
-  }
-  
-  @media screen and (min-width: 48rem) {
-    html {
-      font-size: 16px;
-    }
-  }
-  
-  body {
-    line-height: 1.85;
-  }
-  
-  p,
-  .air-p {
-    font-size: 1rem;
-    margin-bottom: 1.3rem;
-  }
-  
-  h1,
-  .air-h1,
-  h2,
-  .air-h2,
-  h3,
-  .air-h3,
-  h4,
-  .air-h4 {
-    margin: 1.414rem 0 .5rem;
-    font-weight: inherit;
-    line-height: 1.42;
-  }
-  
-  h1,
-  .air-h1 {
-    text-align: center;
-    margin-top: 0;
-    font-size: 3.998rem;
-  }
-  
-  h2,
-  .air-h2 {
-    font-size: 2.827rem;
-  }
-  
-  h3,
-  .air-h3 {
-    font-size: 1rem;
-    text-align: left;
-    font-style: italic;
-  }
-  
-  h4,
-  .air-h4 {
-    font-size: 1.414rem;
-    text-align: center;
-  }
-  
-  h5,
-  .air-h5 {
-    font-size: 1.121rem;
-  }
-  
-  h6,
-  .air-h6 {
-    font-size: .88rem;
-  }
-  
-  small,
-  .air-small {
-    font-size: .707em;
-  }
-  
-  /* https://github.com/mrmrs/fluidity */
-  
-  img,
-  canvas,
-  iframe,
-  video,
-  svg,
-  select,
-  textarea {
-    max-width: 100%;
-  }
-  
-  @import url(http://fonts.googleapis.com/css?family=Open+Sans:300italic,300);
-  
-  body {
-    color: #444;
-    font-family: 'Open Sans', Helvetica, sans-serif;
-    font-weight: 300;
-    margin: 6rem auto 1rem;
-    max-width: 48rem;
-    text-align: justify;
-  }
-  
-  img {
-    border-radius: 0%;
-    justify-content: center;
-    align-items: center;
-  }
-  
-  a,
-  a:visited {
-    color: #000;
-    text-decoration: none;
-  }
-  
-  a:hover,
-  a:focus,
-  a:active {
-    color: #000;
-    text-decoration:  underline dotted;
-
-  }
-  
-  pre {
-    background-color: #fafafa;
-    padding: 1rem;
-    text-align: left;
-  }
-  
-  blockquote {
-    margin: 0;
-    border-left: 5px solid #7a7a7a;
-    font-style: italic;
-    padding: 1.33em;
-    text-align: left;
-  }
-  
-  ul,
-  ol,
-  li {
-    text-align: left;
-  }
-  
-  p {
-    color: #777;
-  }
-
-  #header {
-    border-bottom: thick double #333F50;
-    margin: 1em 6em 1em;
-
-  }
-
-  #TOC {
-      background-color: #f8f8f8;
-      padding: .5em;
-      padding-bottom: 2px;
-      
-  }
diff --git a/results/v3/css/splendor.css b/results/v3/css/splendor.css
deleted file mode 100644
index 4121b51a2bca4da9b80ea4544b8fc63ce5f59f9e..0000000000000000000000000000000000000000
--- a/results/v3/css/splendor.css
+++ /dev/null
@@ -1,225 +0,0 @@
-@media print {
-    *,
-    *:before,
-    *:after {
-      background: transparent !important;
-      color: #000 !important;
-      box-shadow: none !important;
-      text-shadow: none !important;
-    }
-  
-    a,
-    a:visited {
-      text-decoration: underline;
-    }
-  
-    a[href]:after {
-      content: " (" attr(href) ")";
-    }
-  
-    abbr[title]:after {
-      content: " (" attr(title) ")";
-    }
-  
-    a[href^="#"]:after,
-    a[href^="javascript:"]:after {
-      content: "";
-    }
-  
-    pre,
-    blockquote {
-      border: 1px solid #999;
-      page-break-inside: avoid;
-    }
-  
-    thead {
-      display: table-header-group;
-    }
-  
-    tr,
-    img {
-      page-break-inside: avoid;
-    }
-  
-    img {
-      max-width: 100% !important;
-    }
-  
-    p,
-    h2,
-    h3 {
-      orphans: 3;
-      widows: 3;
-    }
-  
-    h2,
-    h3 {
-      page-break-after: avoid;
-    }
-  }
-  
-  html {
-    font-size: 12px;
-  }
-  
-  @media screen and (min-width: 32rem) and (max-width: 48rem) {
-    html {
-      font-size: 15px;
-    }
-  }
-  
-  @media screen and (min-width: 48rem) {
-    html {
-      font-size: 16px;
-    }
-  }
-  
-  body {
-    line-height: 1.85;
-  }
-  
-  p,
-  .splendor-p {
-    font-size: 1rem;
-    margin-bottom: 1.3rem;
-  }
-  
-  h1,
-  .splendor-h1,
-  h2,
-  .splendor-h2,
-  h3,
-  .splendor-h3,
-  h4,
-  .splendor-h4 {
-    margin: 1.414rem 0 .5rem;
-    font-weight: inherit;
-    line-height: 1.42;
-  }
-  
-  h1,
-  .splendor-h1 {
-    margin-top: 0;
-    font-size: 3.998rem;
-  }
-  
-  h2,
-  .splendor-h2 {
-    font-size: 2.827rem;
-  }
-  
-  h3,
-  .splendor-h3 {
-    font-size: 1.999rem;
-  }
-  
-  h4,
-  .splendor-h4 {
-    font-size: 1.414rem;
-  }
-  
-  h5,
-  .splendor-h5 {
-    font-size: 1.121rem;
-  }
-  
-  h6,
-  .splendor-h6 {
-    font-size: .88rem;
-  }
-  
-  small,
-  .splendor-small {
-    font-size: .707em;
-  }
-  
-  /* https://github.com/mrmrs/fluidity */
-  
-  img,
-  canvas,
-  iframe,
-  video,
-  svg,
-  select,
-  textarea {
-    max-width: 100%;
-  }
-  
-  @import url(http://fonts.googleapis.com/css?family=Merriweather:300italic,300);
-  
-  html {
-    font-size: 18px;
-    max-width: 100%;
-  }
-  
-  body {
-    color: #444;
-    font-family: 'Merriweather', Georgia, serif;
-    margin: 0;
-    max-width: 100%;
-  }
-  
-  /* === A bit of a gross hack so we can have bleeding divs/blockquotes. */
-  
-  p,
-  *:not(div):not(img):not(body):not(html):not(li):not(blockquote):not(p) {
-    margin: 1rem auto 1rem;
-    max-width: 36rem;
-    padding: .25rem;
-  }
-  
-  div {
-    width: 100%;
-  }
-  
-  div img {
-    width: 100%;
-  }
-  
-  blockquote p {
-    font-size: 1.5rem;
-    font-style: italic;
-    margin: 1rem auto 1rem;
-    max-width: 48rem;
-  }
-  
-  li {
-    margin-left: 2rem;
-  }
-  
-  /* Counteract the specificity of the gross *:not() chain. */
-  
-  h1 {
-    padding: 4rem 0 !important;
-  }
-  
-  /*  === End gross hack */
-  
-  p {
-    color: #555;
-    height: auto;
-    line-height: 1.45;
-  }
-  
-  pre,
-  code {
-    font-family: Menlo, Monaco, "Courier New", monospace;
-  }
-  
-  pre {
-    background-color: #fafafa;
-    font-size: .8rem;
-    overflow-x: scroll;
-    padding: 1.125em;
-  }
-  
-  a,
-  a:visited {
-    color: #3498db;
-  }
-  
-  a:hover,
-  a:focus,
-  a:active {
-    color: #2980b9;
-  }
\ No newline at end of file
diff --git a/src/beta_test/htrsim_library.R b/src/beta_test/htrsim_library.R
deleted file mode 100644
index 0a2fd55099a42b069d0fdcc8f358a69e2281c371..0000000000000000000000000000000000000000
--- a/src/beta_test/htrsim_library.R
+++ /dev/null
@@ -1,151 +0,0 @@
-
-library(methods)
-
-######################## HTRsim definition ###########################################
-setClass("HTRsim", 
-         slots=list(
-           name="character",         
-           genes_NB_params = "data.frame", 
-            n_replicates = "numeric", 
-            tbl_counts='data.frame'))
-
-# Print method
-setMethod("print", "HTRsim", function(x) {
-            cat(paste0("Sample: ",x@name,"\n"))
-            cat(paste0("Number of replicates: ", x@n_replicates, "\n"))
-            cat("Access to NB params used to simulate this library using <myvar>@genes_NB_params\n")
-            cat("Access to table counts for this library using <myvar>@tbl_counts\n")
-            #print(x@tbl_counts)
-          }  
-)  
-
-setMethod("dim", "HTRsim", function(x) {
- return(list(x@n_replicates, length(x@genes_NB_params)))
-}  
-)  
-
-setMethod("length", "HTRsim", function(x) {
-  return(x@n_replicates)
-}  
-) 
-#setMethod("[", "Replicates", function(x) {
-#  m <- callNextMethod()
-#  return(x@tbl_counts[m])
-#})
-
-setMethod("[", "HTRsim",
-          function(x, i, j, ..., drop) {
-            x@tbl_counts <- x@tbl_counts[i]
-            x
-          })
-
-setMethod("show", "HTRsim", function(object) {
-  print(object@tbl_counts)
-}  
-)  
-
-############################## CONSTRUCTOR ##################################
-build_tbl_cnts <- function(name, genes_info, mu ,n_replicates){
-  tmp_obj = map2(.x=mu, .y=genes_info$alpha, .f=~(rnbinom(mu=.x, size=.y, n = n_replicates)))
-  my_tbl = data.frame(Reduce(rbind, tmp_obj), row.names = genes_info$name)
-  
-  ### rearrange for convenience
-  vec_of_replicates = rep(name, n_replicates)
-  vec_of_replicates_index = n_replicates %>% map(~seq(1,.)) %>% unlist()
-  samples_full_name = paste0(vec_of_replicates,"_" ,vec_of_replicates_index)
-  colnames(my_tbl) = samples_full_name 
-  return(my_tbl)
-}
-
-
-htrsim <- function(name, genes_info, mu ,n_replicates) {
-  new("HTRsim",
-      name= name,
-      genes_NB_params = cbind(genes_info, mu) ,
-      n_replicates = n_replicates,
-      tbl_counts = build_tbl_cnts(name, genes_info, mu, n_replicates)
-  )
-}
-
-############################ USEFUL FUNCTIONS ###############################
-## generate library from input :
-## - samples info dtf 
-## -and genes_info dtf
-library_generator <- function(samples_info, mu, genes_info){
-  HTR_sim_list <- map2(.x=samples_info$name, .y=samples_info$n_rep, .f=~(htrsim(name = .x , genes_info = genes_info, mu = mu,  n_replicates = .y)))
-  return(HTR_sim_list)
-}
-
-## convert list of HTRsim to dataframe
-## can be used as DESEQ input
-getDESEQ_input <- function(list_HTRsim){
-  HTR_sim_dtf <- list_HTRsim %>% map(~(return(.x@tbl_counts))) %>% do.call("cbind", .)
-  return(HTR_sim_dtf)
-}
-
-
-source("htrsim_library.R")
-
-
-################################### Get started : HTRSIM object #########################################
-
-
-## Inputs params
-set_gene_name = paste0('gene', 1:5)
-set_alpha = runif(2,100, n = 5) ## 5 values between 2 and 100
-#set_alpha
-set_mu = runif(100,1000, n = 5) ## 5 value between 100 & 1000
-#set_mu
-
-genes <- list(name = set_gene_name , alpha = set_alpha ) %>% data.frame()
-genes
-
-sample1_simulated <- htrsim( name ="mysample", genes_info = genes, mu = set_mu, n_replicates = 3)
-
-## All informations used for simulating <mysample> library are save in Htrsim object
-## By default when you print htrsim object you have only access to tbl_counts simulated
-sample1_simulated
-
-## Others information are available thanks to these commands
-sample1_simulated@name
-sample1_simulated@genes_NB_params
-sample1_simulated@n_replicates
-sample1_simulated@tbl_counts
-
-## using print you have access to a summary
-print(sample1_simulated)
-dim(sample1_simulated)
-
-samples
-tmp_obj = map2(.x=genes$mu, .y=genes$alpha, .f=~(rnbinom(mu=.x, size=.y, n = 5)))
-my_tbl = data.frame(Reduce(rbind, tmp_obj), row.names = genes$name)
-head(my_tbl)
-vec_of_replicates = rep(samples$name,samples$n_rep )
-vec_of_replicates_index = 4 %>% map(~seq(1,.)) %>% unlist()
-samples_full_name = paste0(vec_of_replicates,"_" ,vec_of_replicates_index)
-colnames(my_tbl) = samples_full_name
-
-
-#list(name_sample = samples_name, name_gene = paste0('gene', 1:N_genes), mu = runif(100,1000, n = N_genes))
-
-## samples and genes dtf will serve as input for our simulation
-## using samples and genes dtf, library_generator function build a list of HTRsim obj
-lib_sim <- library_generator(samples_info = samples, genes)
-lib_sim
-
-## each element of the list is an HTRsim obj
-## each HTRsim obj is corresponding to a sample (-> remember : N_samples = 5)
-length(lib_sim)
-samples
-
-##For each element of the list you can used HTRsim attributes
-lib_sim[[4]]@genes_NB_params
-lib_sim[[3]]@n_replicates
-
-## Finally you can convertlist of HTRsim to an usual data.frame 
-## which can be used as input by DESEQ
-my_lib_for_deseq <- getDESEQ_input(lib_sim)
-head(my_lib_for_deseq)
-
-
-
diff --git a/src/beta_test/main.R b/src/beta_test/main.R
deleted file mode 100644
index 0544f77d7256b5790786ea80cc3ea4855e0ffc64..0000000000000000000000000000000000000000
--- a/src/beta_test/main.R
+++ /dev/null
@@ -1,90 +0,0 @@
-############################# PCKGE REQUIRED ##############################
-library(DESeq2)
-library(ggplot2)
-library(tydiverse)
-### maybe others ###
-
-
-########################## change your home path  ##########################
-setwd("mydatalocal/counts_simulation/src")
-
-# fix seed
-set.seed(123)
-
-
-
-##########################    IMPORT FUN     ################################
-## simulation functions
-source("simulators.R")
-
-
-#visualization function
-source("visualization_fun.R")
-
-
-########################## INPUT PARAMS #####################################
-N_cond = 2
-N_gene = 6000
-n_rep_sim = seq(2, 5, by = 1) ### number of replicate to assessed
-
-
-############################ MU effect  #######################################
-
-mu_simul_dtf_res <- data.frame()
-for (N_rep in n_rep_sim){
-  mu_simul = seq(2500, 12000, by = 200)
-  #mu_simul
-  #mu_simul <- rep.int(1500, 8)
-  res_simul <- mu_effect(alpha = 2, mu_simul)
-  res_simul$N_rep <- N_rep
-  tmp_reshape_res_simul <- res_simul %>% reshape2::melt(.,id = c("vec_of_mu", "N_rep"))
-  mu_simul_dtf_res <- rbind(mu_simul_dtf_res, tmp_reshape_res_simul)
-}
-
-######  LOG transform  #######
-
-# -> SEE linearity of var observed & mu
-#mu_simul_dtf_res$value[mu_simul_dtf_res$variable=="var_observ"]<-log(mu_simul_dtf_res$value[mu_simul_dtf_res$variable=="var_observ"])
-#mu_simul_dtf_res$vec_of_mu <- log(mu_simul_dtf_res$vec_of_mu)
-
-
-###### Visualization ######
-figure_mu_effect <- mu_effect_visualization(mu_simul_dtf_res)
-figure_mu_effect
-
-
-########################### ALPHA effect ####################################
-n_rep_sim = seq(2, 5, by = 1)
-
-alpha_simul_dtf_res <- data.frame()
-for (N_rep in n_rep_sim){
-  alpha_simul = seq(0.2, 7, by = 0.2)
-  alpha_simul
-  res_simul <- size_effect(mu = 10000, alpha_simul)
-  res_simul$N_rep <- N_rep
-  tmp_reshape_res_simul <- res_simul %>% reshape2::melt(.,id = c("vec_of_alpha", "N_rep"))
-  alpha_simul_dtf_res <- rbind(alpha_simul_dtf_res, tmp_reshape_res_simul)
-}
-
-
-###### Visualization ######
-alpha_simul_dtf_res
-figure_alpha_effect <- size_effect_visualization(alpha_simul_dtf_res)
-figure_alpha_effect
-
-
-
-########################### EXPORT RESULTS #################################
-
-svg("mydatalocal/counts_simulation/img/fig_mu_effect.svg")
-figure_mu_effect
-dev.off()
-
-svg("mydatalocal/counts_simulation/img/fig_size_effect.svg")
-figure_alpha_effect
-dev.off()
-
-
-
-###########################  bETA TEST #####################################
-
diff --git a/src/beta_test/run_deseq.R b/src/beta_test/run_deseq.R
deleted file mode 100644
index 547a484514548f9c7c78f6374f6c6a25b3ede638..0000000000000000000000000000000000000000
--- a/src/beta_test/run_deseq.R
+++ /dev/null
@@ -1,96 +0,0 @@
-library("DESeq2")
-
-directory <- "mydatalocal/counts_simulation/results/"
-
-
-sampleFiles <- grep("*tsv",list.files(directory),value=TRUE)
-sampleName <- sub("*.tsv","",sampleFiles)
-
-sampleName
-
-sampleCondition <- as.character(sampleName)
-for (i in 1:length(sampleCondition)){
-  sampleCondition[i] <- gsub("_[A-B]", "", sampleCondition[i])
-}
-
-
-sampleCondition
-
-sampleTable <- data.frame(sampleName = sampleName,
-                          fileName = sampleFiles,
-                          condition = sampleCondition)
-sampleTable
-str(sampleTable)
-
-ddsInput <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable,
-                                       directory = directory,
-                                       design = ~ condition)
-
-
-
-dds <- DESeq(ddsInput)
-
-resultsNames(dds)
-
-resLFC <- lfcShrink(dds, coef=2, type="apeglm")
-resLFC
-# an alternate analysis: likelihood ratio test
-ddsLRT <- DESeq(dds, test="LRT", reduced= ~ 1)
-resLRT <- results(ddsLRT)
-resLRT
-
-
-dds$sizeFactor*(res$log2FoldChange)
-
-coef(dds)
-coef(dds, SE=TRUE)[1,]
-
-dds <- estimateSizeFactors(dds)
-dds$condition
-head(counts(dds, normalized=TRUE))
-
-resGA <- results(dds, contrast=c("condition","env1","env2"), lfcThreshold=.4, altHypothesis="greaterAbs")
-table(resGA$padj < 0.05)
-
-
-
-########################" poUR ALLER CHERHCERH DES TRUC INTERESSANT
-## simulation functions
-source("mydatalocal/counts_simulation/src/simulators.R")
-
-var(rnbinom(10000, mu=1000, size=200))
-
-
-N_gene= 6000
-N_cond= 2
-N_rep= 2
-
-
-mtrx<-matrix_generator(1000, 2)
-cond <- factor(rep(1:2, each=N_rep))
-dds <- DESeqDataSetFromMatrix(cnts, DataFrame(cond), ~ cond)
-
-# standard analysis
-dds <- DESeq(dds, fitType='local')
-res <- results(dds)
-
-mcols(dds,use.names=TRUE)[1:4,]
-substr(names(mcols(dds)),1,10) 
-mcols(mcols(dds), use.names=TRUE)[1:4,]
-assays(dds)[["mu"]]
-
-head(assays(dds)[["mu"]][which(res$padj<0.05),])
-
-head(assays(dds)[["mu"]][which(res$padj>0.05),])
-
-dispersions(dds)
-
-assays(dds)[["cooks"]][27,]
-
-sizeFactors(dds)
-b =mcols(dds)
-assays(dds)[["H"]]
-
-
-alpha_sample = dispersions(dds)
-
diff --git a/src/beta_test/simulators.R b/src/beta_test/simulators.R
deleted file mode 100644
index ed2fbcea96e00a869c26080bc679d436d5d7f09a..0000000000000000000000000000000000000000
--- a/src/beta_test/simulators.R
+++ /dev/null
@@ -1,100 +0,0 @@
-############################## FUNCTIONS  ###################################
-
-## count_generator(int, int, int) -> vec of length n_value
-count_generator <- function(n_value, mu_theo, size_theo){
-  rnbinom(n=n_value, mu = mu_theo, size = size_theo)
-}
-
-## MATRIX_generator(int, int) -> matrice de dim(Ncol = N_cond*N_rep, Nrow = N_gene)
-matrix_generator <- function(mu_theo, size_theo){
-  n_value = N_gene*N_cond*N_rep #number of counts expected
-  mtx <- matrix(count_generator(n= n_value , mu = mu_theo, size = size_theo),  ncol= N_cond*N_rep)
-  return(mtx)
-}
-
-
-
-
-mu_effect <- function(alpha, vec_of_mu){ 
-  mu_observ <- c()
-  var_observ <- c()
-  statistical_power <- c() ## Init results of Differential expression analysis
-  res_DEA <- c() 
-  for (mu in vec_of_mu){
-    
-    # Print advancement message
-    cat(sprintf("Simulation for mu = %d\n", mu))
-    
-    cnts <- matrix_generator(mu, alpha)
-    cond <- factor(rep(1:2, each=N_rep))
-    dds <- DESeqDataSetFromMatrix(cnts, DataFrame(cond), ~ cond)
-    
-    # standard analysis
-    dds <- DESeq(dds, fitType='local')
-    res <- results(dds)
-    
-    #mu_observed 
-    mu_observ <- c(mu_observ, mean(cnts))
-    #var
-    var_observ <- c(var_observ, mean(rowVars(cnts)))
-    
-    
-    # results of DEA
-    cat(sprintf("Length table = %d\n", length(table(res$padj < 0.05))))
-    if (dim(table(res$padj < 0.05)) == 1){
-      cat(sprintf("NO DEG = %d\n", mu))
-      cat(table(res$padj < 0.05))
-      
-      res_DEA <- c(res_DEA, 0) ## case 1 : no DEG found by DESEQ2
-      statistical_power <- c(statistical_power, NA)
-    }
-    else {
-      res_DEA <- c(res_DEA, table(res$padj < 0.05)[["TRUE"]]) ## case 2 : Nb DEG found by deseq2
-      statistical_power <- c(statistical_power, min(abs(res$log2FoldChange[res$padj < 0.05]),na.rm=TRUE))
-    }
-  }
-  return (data.frame(vec_of_mu, mu_observ, res_DEA, statistical_power, var_observ)) 
-}
-
-
-
-size_effect <- function(mu,vec_of_alpha){ 
-  mu_observ <- c()
-  var_observ <- c()
-  statistical_power <- c() ## Init results of Differential expression analysis
-  res_DEA <- c() 
-  for (alpha_params in vec_of_alpha){
-    
-    # Print advancement message
-    cat(sprintf("Simulation for alpha = %f\n", alpha_params))
-    
-    cnts <- matrix_generator(mu, alpha_params)
-    cond <- factor(rep(1:2, each=N_rep))
-    dds <- DESeqDataSetFromMatrix(cnts, DataFrame(cond), ~ cond)
-    
-    # standard analysis
-    dds <- DESeq(dds, fitType='local')
-    res <- results(dds)
-    
-    #mu_observed 
-    mu_observ <- c(mu_observ, mean(cnts))
-    #var
-    var_observ <- c(var_observ, mean(rowVars(cnts)))
-    
-    
-    # results of DEA
-    cat(sprintf("Length table = %d\n", length(table(res$padj < 0.05))))
-    if (dim(table(res$padj < 0.05)) == 1){
-      cat(sprintf("NO DEG = %d\n", mu))
-      cat(table(res$padj < 0.05))
-      
-      res_DEA <- c(res_DEA, 0) ## case 1 : no DEG found by DESEQ2
-      statistical_power <- c(statistical_power, NA)
-    }
-    else {
-      res_DEA <- c(res_DEA, table(res$padj < 0.05)[["TRUE"]]) ## case 2 : Nb DEG found by deseq2
-      statistical_power <- c(statistical_power, min(abs(res$log2FoldChange[res$padj < 0.05]),na.rm=TRUE))
-    }
-  }
-  return (data.frame(vec_of_alpha, mu_observ, res_DEA, statistical_power, var_observ)) 
-}
diff --git a/src/beta_test/visualization_fun.R b/src/beta_test/visualization_fun.R
deleted file mode 100644
index fe111357b1a6d97984fc906c7198760f61521ebe..0000000000000000000000000000000000000000
--- a/src/beta_test/visualization_fun.R
+++ /dev/null
@@ -1,18 +0,0 @@
-library(ggplot2)
-
-# visualization functions
-mu_effect_visualization <- function(mu_effect_res){
-  label_wrap <- c("mu observed", "N gene DE", "min(|logFC|)", "var observed")
-  names(label_wrap) <- c("mu_observ", "res_DEA", "statistical_power", "var_observ")
-  figure = mu_effect_res %>% ggplot(., aes(x=vec_of_mu, y = value, col=factor(N_rep))) +
-    geom_point() + facet_wrap(~variable, scales = "free_y", labeller = labeller(variable = label_wrap))  + labs(color = "N replicates")
-  return(figure)
-}
-
-size_effect_visualization <- function(alpha_effect_res){
-  label_wrap <- c("mu observed", "N gene DE", "min(|logFC|)", "var observed")
-  names(label_wrap) <- c("mu_observ", "res_DEA", "statistical_power", "var_observ")
-  figure = alpha_effect_res %>% ggplot(., aes(x=vec_of_alpha, y = value, col=factor(N_rep))) +
-    geom_point() + facet_wrap(~variable, scales = "free_y", labeller = labeller(variable = label_wrap))  + labs(color = "N replicates")
-  return(figure)
-}
\ No newline at end of file
diff --git a/src/htrsim_beta/counts_generator.R b/src/htrsim_beta/counts_generator.R
deleted file mode 100644
index e3a49112a5f521491548cf840f5e0b398b8eb835..0000000000000000000000000000000000000000
--- a/src/htrsim_beta/counts_generator.R
+++ /dev/null
@@ -1,24 +0,0 @@
-#########################     BUILD COUNTS TABLES        ######################
-
-### Simulate N values from a Negative binomial distribution
-rn_sim <- function(mu, alpha, n_replicates, ...){
-  simul<- rnbinom(mu=mu, size=alpha, n = n_replicates)
-  return(simul)
-}
-
-### Simulate counts and convert to lovely deseq input
-generate_counts <- function(setup_dtf){
-  message("reading and processing counts per genes ...")
-  cnt.list = setup_dtf %>% 
-                      purrr::pmap(rn_sim)
-  message("reshaping to dataframe ...")
-  cnt.dtf <- cnt.list %>%
-                  plyr::ldply(., rbind) %>% 
-                  BiocGenerics::cbind(setup_dtf %>% select(c("gene_id", "name"))) %>%
-                  reshape2::melt(.,id=c('name','gene_id'),value.name = "counts") %>% 
-                  tidyr::unite(full_name, name, variable) %>%
-                  tidyr::drop_na(counts) %>%
-                  reshape2::dcast(., gene_id ~ full_name, value.var= "counts")
-  return(cnt.dtf)
-}
-
diff --git a/src/htrsim_beta/input_estimation.R b/src/htrsim_beta/input_estimation.R
deleted file mode 100644
index 18cc20c3dd8852e3321113814cd29b524a1d94d5..0000000000000000000000000000000000000000
--- a/src/htrsim_beta/input_estimation.R
+++ /dev/null
@@ -1,45 +0,0 @@
-###################      Estimate alpha per gene        ########################
-
-estim.alpha <- function(dds){
-    #N.B: alpha = dispersion per gene
-    #dds  <- DESeq2::estimateDispersions(dds, quiet = F)
-    #dispersion estimation
-    dispersion_estimate <- dispersions(dds) 
-    
-    ## Shape and export
-    names(dispersion_estimate) <- tabl_cnts %>% rownames()
-    
-    ## drop NA in dispersion estimate (link to unexpress gene)
-    ### and convert to lovely dataframe
-    expressed_gene_dispersion <- dispersion_estimate[!is.na(dispersion_estimate)] %>%       
-      data.frame() %>% 
-      rownames_to_column() %>% 
-      rename(., "alpha" = ., gene_id = "rowname")
-    return(expressed_gene_dispersion)
-    
-    #disp_gene_express %>% dim
-    #write_tsv(disp_gene_express, 'results/2022-03-03/estimate_dispersion.tsv')
-}
-
-
-#################     Estimate mu distribution        #########################
-estim.mu <- function(dds){
-  mu_estimate <- dds@assays@data$mu
-  #dds@assays@data$mu %>% dim()
-  #mu_estimate %>% dim()
-  rownames(mu_estimate) = rownames(dds@assays@data$counts)
-  ## drop NA in dispersion estimate (link to unexpress gene)
-  ### and convert to lovely dataframe
-  mu_gene_express = mu_estimate %>% 
-                        na.omit()  %>%
-                        data.frame() 
-  
-  colnames(mu_gene_express) <- colnames(tabl_cnts)
-  mu_gene_express
-  mu_gene_express <- mu_gene_express %>% 
-                        mutate(gene_id = rownames(.)) %>% 
-                        select(gene_id, everything())
-  return(mu_gene_express)
-  
-  #write_tsv(mu_gene_express, 'results/2022-03-03/estimate_mu.tsv')
-}
\ No newline at end of file
diff --git a/src/htrsim_beta/launch_deseq.R b/src/htrsim_beta/launch_deseq.R
deleted file mode 100644
index 7ad08f7158ba8cb9705602dce6939b3114734e5d..0000000000000000000000000000000000000000
--- a/src/htrsim_beta/launch_deseq.R
+++ /dev/null
@@ -1,9 +0,0 @@
-########### LAUNCH DESEQ #############
-
-run.deseq <- function(tabl_cnts, bioDesign ){
-  ## Design model
-  dds = DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = ~ mutant + env + mutant:env)
-  ## DESEQ standard analysis
-  dds <- DESeq(dds)
-  return(dds)
-}
\ No newline at end of file
diff --git a/src/htrsim_beta/main.R b/src/htrsim_beta/main.R
deleted file mode 100644
index 073ec92c7368d29ae31cd5cff45ab178bd47a7f2..0000000000000000000000000000000000000000
--- a/src/htrsim_beta/main.R
+++ /dev/null
@@ -1,25 +0,0 @@
-
-htrsim <- function(countData, bioDesign, N_replicates){
-
-
-  source(file = "/home/rstudio/mydatalocal/counts_simulation/src/htrsim/launch_deseq.R")
-  dds <- run.deseq(countData, bioDesign)
-
-  source(file="/home/rstudio/mydatalocal/counts_simulation/src/htrsim/input_estimation.R")
-  mu.input = estim.mu(dds)
-  alpha.input = estim.alpha(dds)
-
-
-  source(file="/home/rstudio/mydatalocal/counts_simulation/src/htrsim/setup_cntsGenerator.R")
-  input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input)
-
-  setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                       n_rep = N_replicates,
-                                       alpha = input$alpha,
-                                       gene_id = input$gene_id,
-                                       mu = input$mu)
-  ## Generate counts
-  source(file= "/home/rstudio/mydatalocal/counts_simulation/src/htrsim/counts_generator.R" )
-  htrs <- generate_counts(setup.simulation)
-  return(list(simul_cnts = htrs, mu.input = mu.input, alpha.input = alpha.input, setup = setup.simulation))
-}
diff --git a/src/htrsim_beta/setup_cntsGenerator.R b/src/htrsim_beta/setup_cntsGenerator.R
deleted file mode 100644
index 109957502e9ab2fb9b56721805b0de07ec2181d5..0000000000000000000000000000000000000000
--- a/src/htrsim_beta/setup_cntsGenerator.R
+++ /dev/null
@@ -1,131 +0,0 @@
-########## BUILD INPUT DATAFRAME ###########
-
-reshape_input2setup <- function(mu.dtf, alpha.dtf){
-  ## Defining sample names
-  bioSample_id <- mu.dtf %>% 
-      select(-gene_id) %>% 
-      colnames() %>% 
-      purrr::map(., ~stringr::str_split(.,"_")[[1]][1:2] %>% 
-                 BiocGenerics::paste(., collapse='_') )  %>% 
-      BiocGenerics::unlist() %>% BiocGenerics::unique() 
-  
-  
-  ############### Mu is same for biosample replicate ############# 
-  ### case 1: choose 1  replicate 
-  #mu_params <- mu_params %>% dplyr::select(., contains("rep1")) 
-  ## rename mu_params colnames to ensure corresponding with sample_names 
-  #colnames(mu_params) <- sample_names
-  ### case 2: average replicates
-  average_rep <- function(x, dtf) {
-    varname <- x
-    dtf %>% 
-      select(.,contains(x)) %>% 
-      mutate(!!varname := rowMeans(.)) %>% 
-      select(varname)
-  }
-  mu_avg.dtf <- bioSample_id %>% map(.x = ., .f = ~average_rep(.x, mu.dtf))  %>% data.frame() 
-  mu_avg.dtf$gene_id <- alpha.dtf$gene_id
-
-  return(list(alpha = alpha.dtf , mu = mu_avg.dtf, bioSample_id = bioSample_id, gene_id = alpha.dtf$gene_id))
-} 
-
-
-
-handle_except <- function(bioSample, n_rep , gene_id , alpha, n_genes){
-    
-    if(is.numeric(n_rep) && length(n_rep) == 1){
-      message("Homogeneous number of replicates between samples: ", n_rep, " replicates per samples\n")
-      n_rep = rep(n_rep, length(bioSample))
-    }
-  
-    if(!is.null(n_rep) && length(bioSample) != length(n_rep)) stop("ERROR: unconsistent length between samples_names and n_rep\n")
-  
-    
-    if(is.null(n_genes)) {
-       if(!is.null(gene_id) || !is.null(alpha)){
-         ifelse(length(alpha) == length(alpha), 
-                  (n_genes = length(gene_id)), ## if 
-                  stop("ERROR: unconsistent value between n_genes, length(gene_names) and length(gene_disp)\n")) ## else
-       } 
-    }
-  
-    if(!is.null(n_genes)) {
-      if(is.null(gene_id) && is.null(alpha)) {
-        ### Precised alpha params for each genes
-        alpha = runif(0.2,120, n = n_genes) ## randomly defined between 2 and 100
-        id = paste0('gene', 1:n_genes)
-        alpha <-  list(gene_id = id, alpha = alpha) 
-      }
-    }
-    
-    
-  
-    if(is.null(n_genes) && is.null(gene_disp) && is.null(gene_id)) {
-      message("Number of genes unspecified\nAssuming n_genes = 3\nAssuming gene dispersion (alpha) follow a uniform law between 2 and 100\n")
-      n_genes = 3
-      alpha = runif(2,100, n = n_genes)
-      gene_id = paste0('gene', 1:n_genes)
-    }
-    
-    if(is.null(gene_id) && is.null(alpha)) {
-      message("n_genes = ", n_genes, "\nAssuming gene dispersion (alpha) follow a uniform law between 2 and 100\n")
-      alpha = runif(2,100, n = n_genes)
-      gene_id = paste0('gene', 1:n_genes)
-    }
-  
-    if(length(bioSample) == 1 && bioSample == "my_first_lib") message("No sample name is provided.\nAssuming only one library will be setup\n")
-  
-    if(is.null(n_rep)){
-      message("Number of replicates not provided.\nAssuming 10 replicates per sample will be setup")
-      n_rep = 10
-    }
-
-  
-    if(is.null(gene_id)) gene_id = paste0('gene', 1:n_genes)
-    
-  
-  my_list = list(bioSample = bioSample, rep = n_rep, n_g = n_genes,   alpha = alpha)
-  return(my_list)
-}
-
-
-
-
-setup_countGener <- function(bioSample_id = "my_first_lib", n_rep = NULL , gene_id = NULL , alpha = NULL, n_genes = NULL, mu = NULL ){
-  
-  ######### HANDLE EXCEPTION #######
-  setup = handle_except(bioSample_id, n_rep , gene_id , alpha, n_genes)
-  ######## HANDLE TYPE MU ##########
-  if(is.null(mu)) mu = .mu_generator # default function to generate mu
-  
-  if(is.function(mu)) {  #mu = function
-    mu.set = mu(setup$n_g)
-    ######## BUILD AN INPUT DTF FOR count_generator ############
-    nBinom_params <- purrr::map2(.x= setup$bioSample, .y = setup$rep, 
-                                  ~(list(name=.x, #sample_name
-                                         n_replicates = .y, # random int between 1 & max_N_replicates 
-                                         gene_id = setup$alpha$gene_id,  # gene_id
-                                         mu = mu.set ,  #mu(ij)
-                                         alpha = setup$alpha$alpha))) %>%  # alpha(i)
-                    data.table::rbindlist(.) %>% as.data.frame() ## convert to lovely dtf
-  }
-  
-  
-  if(is.data.frame(mu)) {  # mu = data.frame
-    mu.dtf = mu
-    ######## BUILD AN INPUT DTF FOR count_generator ############
-    nBinom_params <- purrr::map2(.x= setup$bioSample, .y = setup$rep, 
-                                 ~(list(name=.x, #sample_name
-                                        n_replicates = .y, # number replicates
-                                        gene_id = setup$alpha$gene_id,  # gene_name
-                                        mu = mu.dtf %>% dplyr::select(all_of(.x)) %>% unlist() ,  #mu(ij)
-                                        alpha = setup$alpha$alpha))) %>%  # alpha(i)
-                      data.table::rbindlist(.) %>% as.data.frame() ## convert to lovely dtf
-  }
-  
-  return(nBinom_params)
- 
-}
-
-
-.mu_generator <- function(x) return(runif(100,1000, n = x ))
diff --git a/src/htrsim_beta/setup_deseq.R b/src/htrsim_beta/setup_deseq.R
deleted file mode 100644
index 82c774942f28805fdd2055afbeae8915413e93f8..0000000000000000000000000000000000000000
--- a/src/htrsim_beta/setup_deseq.R
+++ /dev/null
@@ -1,17 +0,0 @@
-########## BUILD INPUT DESEQ ###########
-
-setup_deseq = function(samples, htrs){
-  
-  samples = samples %>% data.frame()
-  
-  
-  design.deseq <- list(samples = colnames(htrs.deseq))
-  origin_replicates <- map(colnames(htrs.deseq), ~str_split(., pattern = "_")[[1]][1]) %>% unlist()
-  idx <- match(origin_replicates, samples$name)
-  design.deseq$genotype = samples$genotype[idx]
-  design.deseq$env = samples$env[idx]
-  
-  design.deseq = design.deseq %>% data.frame()
-  
-  return (design.deseq)
-}
\ No newline at end of file
diff --git a/src/kallisto_output2TablCnts.R b/src/kallisto_output2TablCnts.R
deleted file mode 100644
index 66fadcadceca3546ac18b7dda14dff9210d7e580..0000000000000000000000000000000000000000
--- a/src/kallisto_output2TablCnts.R
+++ /dev/null
@@ -1,49 +0,0 @@
-library(tidyverse)
-library(rhdf5)
-library(tximport)
-
-# code contributed from Andrew Morgan
-read_kallisto_h5 <- function(fpath, ...) {
-  if (!requireNamespace("rhdf5", quietly=TRUE)) {
-    stop("reading kallisto results from hdf5 files requires Bioconductor package `rhdf5`")
-  }
-  counts <- rhdf5::h5read(fpath, "est_counts")
-  ids <- rhdf5::h5read(fpath, "aux/ids")
-  efflens <- rhdf5::h5read(fpath, "aux/eff_lengths")
-
-  # as suggested by https://support.bioconductor.org/p/96958/#101090
-  ids <- as.character(ids)
-
-  stopifnot(length(counts) == length(ids))
-  stopifnot(length(efflens) == length(ids))
-
-  result <- data.frame(target_id = ids,
-                       eff_length = efflens,
-                       est_counts = counts,
-                       stringsAsFactors = FALSE)
-  normfac <- with(result, (1e6)/sum(est_counts/eff_length))
-  result$tpm <- with(result, normfac*(est_counts/eff_length))
-  return(result)
-}
-
-
-setwd("/home/arnux/counts_simulation/data/SRP217588/")
-listFile = list.files(path= ".", pattern=NULL, all.files=FALSE, full.names=FALSE)
-
-h52dtf <- function(fn){
-  print(fn)
-  h5 = tximport(files = fn,
-           type = "kallisto",
-           txOut = TRUE, importer = read_kallisto_h5 )
-  return(h5$counts)
-}
-
-a = listFile %>% map(~h52dtf(.x))
-tbl_cnts = do.call("cbind", a)
-colnames(tbl_cnts) = listFile %>% str_split(pattern = ".h5", simplify = TRUE)%>% .[,1] %>% str_replace(., '-', '_' ) %>% str_replace(., '-', '_' )
-tbl_cnts= tbl_cnts %>% data.frame()
-tbl_cnts = tbl_cnts %>% rownames_to_column("gene_id")
-write_tsv(tbl_cnts, "../../src/htrsim/inst/extdata/SRP217588.tsv" )
-#write_tsv(tbl_cnts, "../../src/htrsim/inst/extdata/SRP217588.tsv" )
-
-
diff --git a/src/model.fit/HTRfit.Rcheck/00_pkg_src/HTRfit/R/preprocessing.R b/src/model.fit/HTRfit.Rcheck/00_pkg_src/HTRfit/R/preprocessing.R
deleted file mode 100644
index 045ff07ec4307bb75ebf730696bf3c90fd0e4db0..0000000000000000000000000000000000000000
--- a/src/model.fit/HTRfit.Rcheck/00_pkg_src/HTRfit/R/preprocessing.R
+++ /dev/null
@@ -1,33 +0,0 @@
-#' get df2fit for a given gene
-#' @param k_ij 
-#' @param design_simulation
-#' @param gene_name
-#' @import dplyr
-#' @return dataframe 
-#' @export
-#'
-#' @examples
-getdf2fit <- function(k_ij , design_simulation, gene_name){
-  df_gene_i = cbind(design_simulation, k_ij) %>% 
-                        dplyr::mutate(gene_id = gene_name) %>% 
-                        dplyr::mutate(gene_id = gene_name)
-  rownames(df_gene_i) <- NULL
-  return(df_gene_i)
-}   
-
-#' get df2fit for a given gene
-#' @param countTable 
-#' @param experimental_design
-#' @import purrr
-#' @return dataframe 
-#' @export
-#'
-#' @examples
-reshapeCounTable<- function(countTable, experimental_design ){
-
-    gene_id_list = rownames( countTable )
-    list_df2fit = purrr::map( .x = gene_id_list, ~getdf2fit( countTable[ .x, ], 
-                                           experimental_design, .x) )
-    data2fit = do.call( rbind, list_df2fit )
-    return(data2fit)
-}
\ No newline at end of file
diff --git a/src/model.fit/HTRfit/DESCRIPTION b/src/model.fit/HTRfit/DESCRIPTION
deleted file mode 100644
index a5a95ef8f3cf548d062589810a2785dd101c186b..0000000000000000000000000000000000000000
--- a/src/model.fit/HTRfit/DESCRIPTION
+++ /dev/null
@@ -1,25 +0,0 @@
-Package: HTRfit
-Title: What the Package Does (One Line, Title Case)
-Version: 0.0.0.9000
-Authors@R: 
-    person("First", "Last", , "first.last@example.com", role = c("aut", "cre"),
-           comment = c(ORCID = "YOUR-ORCID-ID"))
-Description: What the package does (one paragraph).
-License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
-    license
-Encoding: UTF-8
-Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.2
-Depends: 
-    tidyverse
-Imports: 
-    broom,
-    broom.mixed,
-    dplyr,
-    furrr,
-    futile.logger,
-    future,
-    MASS,
-    purrr,
-    stats,
-    tibble
diff --git a/src/model.fit/HTRfit/NAMESPACE b/src/model.fit/HTRfit/NAMESPACE
deleted file mode 100644
index c3cb52f58d274f3b9711f056d7e60af5b2879721..0000000000000000000000000000000000000000
--- a/src/model.fit/HTRfit/NAMESPACE
+++ /dev/null
@@ -1,17 +0,0 @@
-# Generated by roxygen2: do not edit by hand
-
-export(fit.glm)
-export(getStatisticWaldTest)
-export(getdf2fit)
-export(launch.glm)
-export(listFit2dtf)
-export(reshapeCounTable)
-export(tidySummary)
-export(wald_test)
-import(MASS)
-import(broom)
-import(dplyr)
-import(furrr)
-import(future)
-import(purrr)
-import(stats)
diff --git a/src/model.fit/HTRfit/R/model_fitting.R b/src/model.fit/HTRfit/R/model_fitting.R
deleted file mode 100644
index 337afcd80a02c7f24f89d9b85b66ed09b7d5a9cd..0000000000000000000000000000000000000000
--- a/src/model.fit/HTRfit/R/model_fitting.R
+++ /dev/null
@@ -1,260 +0,0 @@
-
-#' launch glm on data
-#' @param data2fit
-#' @param model2fit
-#' @param fit_by
-#' @param threads
-#' @import future
-#' @import furrr
-#' @import futile.logger
-#' @return fit list
-#' @export
-#'
-#' @examples
-launch.glm <- function(data2fit,
-                       model2fit = k_ij ~ genotype + environment + genotype:environment,
-                       fit_by = "gene_id",
-                       threads = 4) {
-    stopifnot(fit_by %in% colnames(data2fit))
-    # -- log
-    futile.logger::flog.info("Fit started per %s", fit_by)
-
-    data2fit <- data2fit %>% as.data.frame()
-    iteration_list <- data2fit[, fit_by] %>%
-        unique() %>%
-        unlist() %>%
-        unname() ## get list gene
-    future::plan(multisession, workers = threads)
-    fit_list <- iteration_list %>% furrr::future_map(
-        .x = .,
-        ~ fit.glm(
-            data2fit[which(data2fit[, fit_by] == .x), ],
-            .x,
-            model2fit,
-            fit_by
-        )
-    )
-
-    # -- log
-    futile.logger::flog.info("Fit ended\n")
-
-    return(fit_list)
-}
-
-
-#' fit model on data
-#' @param data
-#' @param id
-#' @import MASS
-#' @import dplyr
-#' @import futile.logger
-#' @return fit
-#' @export
-#'
-#' @examples
-fit.glm <- function(data, id, model2fit, fit_by) {
-    # -- function executed if all perform well
-    f <- function(data, id, model2fit, fit_by) {
-        fit <- MASS::glm.nb(model2fit, data = data, link = log, control = glm.control(maxit=1000))
-        fit.dtf <- tidySummary(fit, "glm")
-        fit.dtf$inference <- fit.dtf$inference %>% dplyr::mutate(!!fit_by := id)
-        # -- convert estimation from natural logarithm to log base 2
-        fit.dtf$inference$estimate <- fit.dtf$inference$estimate/log(2)
-        fit.dtf$inference$std.error <- fit.dtf$inference$std.error/log(2)
-        fit.dtf$fitQuality <- fit.dtf$fitQuality %>% dplyr::mutate(!!fit_by := id)
-        fit.dtf$dispersion <- list(dispersion.estimate = fit$theta) %>%
-            as.data.frame() %>%
-            dplyr::mutate(!!fit_by := id)
-        futile.logger::flog.info("%s: ok", id)
-        return(fit.dtf)
-    }
-
-    tryCatch(
-        expr = {
-            withCallingHandlers(
-                f(data, id, model2fit, fit_by),
-                warning = function(w) {
-                    futile.logger::flog.info("%s: %s ", id, w)
-                    invokeRestart("muffleWarning")
-                }
-            )
-        },
-        error = function(e) {
-            futile.logger::flog.error("%s: %s ", id, e)
-
-            inference <- list(estimate = NA, std.error = NA, term = NA) %>%
-                as.data.frame() %>%
-                dplyr::mutate(!!fit_by := id)
-            fitQuality <- list(null.deviance = NA, df.null = NA, logLik = NA, AIC = NA, BIC = NA, deviance = NA, df.residual = NA, nobs = NA) %>%
-                as.data.frame() %>%
-                dplyr::mutate(!!fit_by := id)
-            dispersion <- list(dispersion.estimate = NA) %>%
-                as.data.frame() %>%
-                dplyr::mutate(!!fit_by := id)
-            fit.dtf <- list(inference = inference, fitQuality = fitQuality, dispersion = dispersion)
-
-            return(fit.dtf)
-        }
-    )
-}
-
-#' fit model on data
-#' @param fit
-#' @param modelType
-#' @import broom.mixed
-#' @import dplyr
-#' @return list of element
-#' @export
-#'
-#' @examples
-tidySummary <- function(fit, modelType = "glm") {
-    dtf <- broom.mixed::tidy(fit)
-    if (modelType == "glm") {
-        dtf <- dtf %>%
-            dplyr::select(estimate, std.error, term)
-    }
-    if (modelType == "glm_mixte") {
-        dtf <- dtf %>%
-            dplyr::select(effect, group, estimate, std.error, term)
-    }
-    gl <- broom::glance(fit)
-    return(list(inference = dtf, fitQuality = gl))
-}
-
-#' convert list to dataframe
-#' @param list_fit
-#' @return list of dtf
-#' @export
-#'
-#' @examples
-listFit2dtf <- function(list_fit) {
-    tmp <- do.call(cbind, list_fit)
-    inference.dtf <- do.call(rbind, tmp[1, ])
-    fitQuality.dtf <- do.call(rbind, tmp[2, ])
-    dispersion.dtf <- do.call(rbind, tmp[3, ])
-    return(list(inference = inference.dtf, fitQuality = fitQuality.dtf, dispersion = dispersion.dtf))
-}
-
-
-#' launch glm mixte on data
-#' @param data
-#' @param model2fit
-#' @param fit_by
-#' @param threads
-#' @param package
-#' @import future
-#' @import furrr
-#' @import futile.logger
-#' @return fit list
-#' @export
-#'
-#' @examples
-launch.glm_mixte <- function(data2fit,
-                             model2fit = k_ij ~ environment + (1 + environment | genotype),
-                             fit_by = "gene_id", package = "glmmTMB",
-                             threads = 4) {
-    # -- log
-    futile.logger::flog.info("Fit started per %s", fit_by)
-    futile.logger::flog.info("GLM mixte: %s", package)
-
-    data2fit <- data2fit %>% as.data.frame()
-    iteration_list <- data2fit[, fit_by] %>%
-        unique() %>%
-        unlist() %>%
-        unname() ## get list gene
-    future::plan(multisession, workers = threads)
-    fit_list <- iteration_list %>% furrr::future_map(
-        .x = .,
-        ~ fit.glm_mixte(
-            data2fit[which(data2fit[, fit_by] == .x), ],
-            .x,
-            model2fit,
-            fit_by, package
-        )
-    )
-    # -- log
-    futile.logger::flog.info("Fit ended\n")
-    return(fit_list)
-}
-
-
-
-
-#' fit model on data
-#' @param data
-#' @param id
-#' @param model2fit
-#' @param fit_by
-#' @param package
-#' @import lme4
-#' @import dplyr
-#' @import futile.logger
-#' @import glmmTMB
-#' @return fit
-#' @export
-#'
-#' @examples
-fit.glm_mixte <- function(data, id, model2fit, fit_by, package = "glmmTMB") {
-    # -- function executed if all perform well
-    f <- function(data, id, model2fit, fit_by, package) {
-        stopifnot(package %in% c("glmmTMB", "lme4"))
-        if (package == "glmmTMB") {
-            fit <- glmmTMB::glmmTMB(model2fit, data = data, family = nbinom1, verbose = F)
-            theta <- glmmTMB::sigma(fit) # dispersion
-        }
-        if (package == "lme4") {
-            fit <- lme4::glmer.nb(model2fit, data = data, verbose = FALSE)
-            theta <- lme4::getME(fit, "glmer.nb.theta") # dispersion
-        }
-
-        fit.dtf <- tidySummary(fit, "glm_mixte")
-        fit.dtf$inference <- fit.dtf$inference %>% dplyr::mutate(!!fit_by := id)
-        fit.dtf$fitQuality <- fit.dtf$fitQuality %>% dplyr::mutate(!!fit_by := id)
-        # -- convert estimation from natural logarithm to log base 2
-        fit.dtf$inference = fit.dtf$inference %>% dplyr::mutate(
-                                estimate = if_else(str_detect(term, "cor_"), 
-                                        estimate, estimate/log(2) ))
-        #  avoid error if missing columns => set to NA
-        if (!("AIC" %in% colnames(fit.dtf$fitQuality))) fit.dtf$fitQuality$AIC = NA
-        if (!("BIC" %in% colnames(fit.dtf$fitQuality))) fit.dtf$fitQuality$BIC = NA
-        if (!("logLik" %in% colnames(fit.dtf$fitQuality))) fit.dtf$fitQuality$logLik = NA
-        if (!("deviance" %in% colnames(fit.dtf$fitQuality))) fit.dtf$fitQuality$deviance = NA
-
-        fit.dtf$dispersion <- list(dispersion.estimate = theta) %>%
-            as.data.frame() %>%
-            dplyr::mutate(!!fit_by := id)
-
-
-        # -- log all rigth
-        futile.logger::flog.info("%s: ok", id)
-        return(fit.dtf)
-    }
-
-
-    tryCatch(
-        expr = {
-            withCallingHandlers(
-                f(data, id, model2fit, fit_by, package),
-                warning = function(w) {
-                    futile.logger::flog.info("%s: %s ", id, w)
-                    invokeRestart("muffleWarning")
-                }
-            )
-        },
-        error = function(e) {
-            # -- log error
-            futile.logger::flog.error("%s: %s ", id, e)
-            inference <- list(effect = NA, group = NA,estimate = NA, std.error = NA, term = NA) %>%
-                as.data.frame() %>%
-                dplyr::mutate(!!fit_by := id)
-            fitQuality <- list( sigma = NA, logLik = NA, AIC = NA, BIC = NA, deviance = NA, df.residual = NA, nobs = NA) %>%
-                as.data.frame() %>%
-                dplyr::mutate(!!fit_by := id)
-            dispersion <- list(dispersion.estimate = NA) %>%
-                as.data.frame() %>%
-                dplyr::mutate(!!fit_by := id)
-            fit.dtf <- list(inference = inference, fitQuality = fitQuality, dispersion = dispersion)
-            return(fit.dtf)
-        }
-    )
-}
diff --git a/src/model.fit/HTRfit/R/preprocessing.R b/src/model.fit/HTRfit/R/preprocessing.R
deleted file mode 100644
index 96fc39911299abe32569d34b85e1545180d1718e..0000000000000000000000000000000000000000
--- a/src/model.fit/HTRfit/R/preprocessing.R
+++ /dev/null
@@ -1,25 +0,0 @@
-#' get df2fit 
-#' @param countTable
-#' @param experimental_design
-#' @import data.table
-#' @import reshape2
-#' @import tibble
-#' @return dataframe
-#' @export
-#'
-#' @examples
-reshapeCounTable <- function(countTable, experimental_design) {
-  countTable = countTable %>% data.frame()
-  countTable.long <- countTable %>%
-    tibble::rownames_to_column("gene_id") %>%
-    reshape2::melt(
-      id.vars = "gene_id",
-      value.name = "k_ij",
-      variable.name = "sample_id"
-    )
-  countTable2join <- data.table::data.table(countTable.long, key = "sample_id")
-  experimentalDesign2join <- data.table::data.table(experimental_design, key = "sample_id")
-  data2fit <- countTable2join[experimentalDesign2join]
-
-  return(data2fit)
-}
diff --git a/src/model.fit/HTRfit/R/wald_test.R b/src/model.fit/HTRfit/R/wald_test.R
deleted file mode 100644
index f63a0fa569a34a4d8794d4d88c0bb70799b99ac8..0000000000000000000000000000000000000000
--- a/src/model.fit/HTRfit/R/wald_test.R
+++ /dev/null
@@ -1,45 +0,0 @@
-#' fit model on data
-#' @param W
-#' @param altHypothesis
-#' @import stats
-#' @return pvalue
-#' @export
-#'
-#' @examples
-wald_test <- function(w, altHypothesis = "greaterAbs") {
-    if (altHypothesis == c("greaterAbs")) {
-        ## greaterAbs
-        pval <- 2 * (stats::pnorm(w, mean = 0, sd = 1, lower.tail = FALSE))
-        pval[pval > 1] <- 1
-    }
-    if (altHypothesis %in% c("greater", "lessAbs")) {
-        pval <- stats::pnorm(w, mean = 0, sd = 1, lower.tail = FALSE)
-        pval[pval > 1] <- 1
-    }
-    return(pval)
-}
-
-
-#' fit model on data
-#' @param estimate
-#' @param threshold
-#' @param stdError
-#' @param altHypothesis
-#' @return W
-#' @export
-#'
-#' @examples
-getStatisticWaldTest <- function(estimate, stdError, threshold, altHypothesis = "greaterAbs") {
-    ## cf https://en.wikipedia.org/wiki/Wald_test
-    stopifnot(threshold >= 0)
-    if (altHypothesis == "greaterAbs") {
-        wald_stat <- (abs(estimate) - threshold) / stdError
-    }
-    if (altHypothesis == "lessAbs") {
-        wald_stat <- (threshold - abs(estimate)) / stdError
-    }
-    if (altHypothesis == "greater") {
-        wald_stat <- (estimate - threshold) / stdError
-    }
-    return(wald_stat)
-}
diff --git a/src/model.fit/HTRfit/devtools_history.R b/src/model.fit/HTRfit/devtools_history.R
deleted file mode 100644
index 630791dde4a0792d6839ff6af2144f82e58a9602..0000000000000000000000000000000000000000
--- a/src/model.fit/HTRfit/devtools_history.R
+++ /dev/null
@@ -1,15 +0,0 @@
-usethis::use_build_ignore("devtools_history.R")
-usethis::use_package('tidyverse', type = "depends")
-usethis::use_package("dplyr")
-usethis::use_package("purrr")
-usethis::use_package("furrr")
-usethis::use_package("MASS")
-usethis::use_package("future")
-usethis::use_package("stats")
-usethis::use_package("broom.mixed")
-usethis::use_package("tibble")
-usethis::use_package("futile.logger")
-
-
-
-
diff --git a/src/tuto_beta/tutorial_htrsim.R b/src/tuto_beta/tutorial_htrsim.R
deleted file mode 100644
index 7a3049e3d5806ecbb95b6be51be5cfdd8783cf4c..0000000000000000000000000000000000000000
--- a/src/tuto_beta/tutorial_htrsim.R
+++ /dev/null
@@ -1,124 +0,0 @@
-################################### Getting started  ########################################################
-### Required
-library(tidyverse)
-library(data.table)
-
-setwd("~/mydatalocal/counts_simulation/")
-
-###############################################################################
-#######################     Experimental design     #######################
-N_samples = 1 ## number of samples
-N_replicates = 2000 ## Nb of replicates per samples
-N_genes = 3 # number of genes in your favorite organism
-###########################################################################
-
-
-#########################     Define NB params    ######################
-### GENERATE SAMPLE INFO (name + nb of replicates per sample)
-samples <- list(name = paste0('sample', 1:N_samples), n_rep = rep(N_replicates,N_samples))
-samples
-
-### GENERATE Negative binomial params (dispersion(i) & mu(ij)) 
-## N.B: dispersion != between gene 
-##      but dispersion == between sample (for same gene)
-set_alpha_per_gene = runif(2,100, n = N_genes)
-set_gene_name = paste0('gene', 1:N_genes)
-## N.B: mu != between gene and between sample 
-genes <-  list(names = set_gene_name, alpha = set_alpha_per_gene)
-
-genes
-
-source(file = "src/htrsim/setup_cntsGenerator.R")
-nBinom_params <- setup_countGener(sample_names = samples$name , n_rep = samples$n_rep , gene_names = genes$names, alpha_gene = genes$alpha )
-
-
-setup_countGener()
-#genes_NB_params
-## Use filter to understand our dtf
-## Notice that alpha is equal for equivalent gene between sample
-nBinom_params %>% filter(name_gene == "gene1")
-## But not mu !
-
-
-source(file = "src/htrsim/counts_generator.R")
-htrs <- generate_counts(nBinom_params)
-htrs %>% head()
-data2plot <- htrs %>% reshape2::melt( ., id=c("name_gene"), variable.name = "Run")
-data2plot <- data2plot %>% group_by(name_gene) %>% mutate(mean_obs = mean(value)) %>% ungroup()
-#data2plot %>% select() %>% 
-
-## ###########################" plot 2 understand ############################
-figure = data2plot %>% ggplot(., aes(x=value)) +
-  geom_histogram(fill= "grey") + 
-  facet_wrap(~name_gene, scales = "free_y") +
-  geom_vline(aes(xintercept = mean_obs), col="#0072B2") +
-  geom_text(aes(x = mean_obs, y = 0, label = paste0("Mean obs\n", mean_obs), vjust = -2, hjust=-0.2), col="#0072B2")#, vjust = -1, angle = 45))
-
-nBinom_params
-figure
-### check if counts tables create is consistent with the design previously defined
-dim(htrs[, -1]) ## dimension without column name_gene
-genes_NB_params %>% filter(name_gene == "gene1") %>% select(n_replicates) %>% sum()
-
-
-
-
-
-################################### COMPLEX LIBRARIES SIMULATOR  ########################################################
-
-#######################     Experimental design     #######################
-N_samples = 200
-max_N_replicates = 5 ## maximum number of replicates per sample -> heterogeneous design
-N_genes = 6000
-##############################################################################
-
-
-
-#########################     Define NB params    ######################
-### GENERATE SAMPLE INFO (name + nb of replicates per sample)
-samples <- list(name = paste0('sample', 1:N_samples), n_rep = sample(1:max_N_replicates, N_samples, replace=TRUE))
-samples
-
-
-### GENERATE Negative binomial params (dispersion(i) & mu(ij)) 
-## N.B: dispersion != between gene 
-##      but dispersion == between sample (for same gene)
-set_alpha_per_gene = runif(2,100, n = N_genes)
-set_gene_name = paste0('gene', 1:N_genes)
-## N.B: mu != between gene and between sample 
-genes_NB_params <- samples$name %>% 
-  map(~(list(name=., #sample_name
-             n_replicates = sample(1:max_N_replicates, 1), #random int between 1 & max_N_replicates 
-             name_gene = set_gene_name,  # gene_name
-             mu = runif(100,1000, n = N_genes),  #mu(ij)
-             alpha = set_alpha_per_gene))) %>%  # alpha(i)
-  rbindlist(.) %>% data.frame() ## convert to dtf
-
-genes_NB_params
-## Use filter to understand our dtf
-## Notice that alpha is equal for equivalent gene between sample
-genes_NB_params %>% filter(name_gene == "gene251")
-## But not mu !
-
-
-#########################     BUILD COUNTS TABLES        ######################
-
-# a bit long :/
-
-ptm <- proc.time()
-htrs<- htrSim(genes_NB_params)
-proc.time() - ptm
-
-
-### check if the dtf create is consistent with the design previously defined
-dim(htrs[, -1]) ## dimension without column name_gene
-genes_NB_params %>% filter(name_gene == "gene1") %>% select(n_replicates)%>% sum()
-
-
-
-htrs %>% head()
-
-
-
-
-
diff --git a/src/tuto_beta/tutorial_htrsim.Rmd b/src/tuto_beta/tutorial_htrsim.Rmd
deleted file mode 100644
index 1c201fe629d7a1c93cea80638405d4c0366afff8..0000000000000000000000000000000000000000
--- a/src/tuto_beta/tutorial_htrsim.Rmd
+++ /dev/null
@@ -1,785 +0,0 @@
----
-title: "HTRSIM Getting started"
-output: html_document
----
-
-# A. Introduction
-
-$$
-Phenotype = Genotype + Environment + Genotype.Environment
-$$
-From this expression, $\beta_{G}$, $\beta_{E}$, $\beta_{G*E}$ can be seen as coefficients which allow quantifying the participation of each factors (Genotype, Environment and interaction Genotype/Environment).
-In mathematical term, it leads to a linear expression such: 
-$$
-P = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0}
-$$
-In order to estimate these coefficients, a Generalized Linear Model (GLM) can be used.
-
-# B. HTRSIM getting started
-
-  <u>a. Required</u> 
-
-```{r required, message=FALSE, echo = T, results = "hide"}
-library(data.table)
-library(tidyverse)
-library(DESeq2)
-```
-
-
-```{r setworkdir}
-# on berthollet
-#setwd("/home2/aduvermy/counts_simulation/src/")
-# on VM
-setwd("/home/rstudio/mydatalocal/counts_simulation/src/")
-```
-
-
-  <u>b. Workflow</u> 
-
-
-```{r echo=FALSE, out.width='50%'}
-knitr::include_graphics('../img/schema_loop.jpg')
-```
-
-  <u>c. RNA-seq pipeline</u> 
-
-You can used your favorite pipeline to obtain table counts from real data.
-If you don't have any idea of how to obtain such table counts rdv [at](https://gitbio.ens-lyon.fr/aduvermy/rna-seq_public_library_investigations)
-
-
-  <u> d. BioProject PRJNA675209b as input</u>
-
-To easily test *HTRSIM* we produced an usual table counts from BioProject PRJNA675209b.
-Take the time to clean up your table counts.
-
-```{r}
-tabl_cnts <- read.table("/home/rstudio/mydatalocal/rna-seq_public_library_investigations/results/2022-02-09/salmon.merged.gene_counts.tsv", header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-tabl_cnts
-```
-
-   <u> e. Launch HTRSIM</u>
-   
-```{r message=FALSE, warning=FALSE}
-## import design of bioProject
-bioDesign <- read.table(file = "/home/rstudio/mydatalocal/rna-seq_public_library_investigations/data/design_deseq__PRJNA675209.csv")
-#bioDesign
-source(file = "htrsim/main.R")
-tabl_cnts %>% dim()
-bioDesign %>% dim()
-
-```
-
-
-
-
-# C. Advance user
-
-
-This is a tutorial for *htrsim* utilization
-
-To perform counts per genes *htrsim* needs some inputs.</br>
-Following parameters are required:</br>
-- number of samples</br>
-- number of replicates per samples</br>
-- number of genes in your favorite organism</br>
-
-
-Before performing counts per genes, you should provide to *htrsim* a setup dataframe.</br>
-This setup dataframe will precise the design and the input parameters per genes and samples to use during simulation.</br>
-
-## A. Performing simple design
-
-As a first step we will simulate a single library with 100 replicates.</br>
-
-  <u>a. Setup the simulation</u> 
-  
-```{r simple design, echo=TRUE, message=FALSE, warning=FALSE}
-N_samples = 1 ## number of samples
-N_replicates = 3 ## Nb of replicates per samples
-N_genes = 3 # number of genes in your favorite organism
-
-### SAMPLE INFO (name + nb of replicates per sample)
-samples <- list(name = "my_sample", n_rep = N_replicates)
-samples
-
-### SETUP SIMULATION 
-source(file = "htrsim/setup_cntsGenerator.R")
-setup.simulation <- setup_countGener(sample_names = samples$name,
-                                     n_rep= samples$n_rep,
-                                     n_genes = N_genes)
-setup.simulation
-```
-
-  <u>b. Generate counts per genes</u>
-
-Now you can generate a lovely counts table from parameters *mu* and *alpha* stored in ```setup.simulation```.</br>
-
-```{r generate counts, echo=TRUE, message=FALSE, warning=FALSE}
-## GENERATE COUNTS
-source(file = "htrsim/counts_generator.R")
-htrs <- generate_counts(setup.simulation)
-htrs %>% dim()
-htrs %>% head()
-```
-
-   <u>c. Understand mu & alpha </u>
-  
-Regarding ```setup.simulation``` you should remark columns *mu* and *alpha*.</br>
-*mu* and *alpha* are input parameters used in *htrsim* to generate counts.</br>
-
-Htrsim uses a negative binomial law to generates counts per genes.</br>
-</br>
-</br>
-</br>
-**Let a gene i</br>**
-**Let a sample j</br>**
-**Let c(ij) a read count for sample j and gene i</br>**
-</br>
-</br>
-$$
-c(ij) \sim {\sf Nbinom}(\mu_{ij}, \alpha_{i}) 
-$$
-</br>
-<br>
-Then, $\mu_{ij}$ and $\alpha_{i}$ define a negative binomial distribution from which counts for sample j and gene i are randomly sampled.
-</br>
-</br>
-</br>
-Please redo the simulation with a large set of replicates for your library.  </br>
-
-```{r understand mu & alpha, echo=TRUE, message=FALSE, warning=FALSE}
-N_samples = 1 ## number of samples
-N_replicates = 2000 ## Nb of replicates per samples
-N_genes = 3 # number of genes in your favorite organism
-
-### SAMPLE INFO (name + nb of replicates per sample)
-samples <- list(name = "my_sample", n_rep = N_replicates) 
-
-### SETUP SIMULATION 
-source(file = "htrsim/setup_cntsGenerator.R")
-setup.simulation <- setup_countGener(sample_names= samples$name, 
-                                     n_rep= samples$n_rep, 
-                                     n_genes = N_genes)
-### GENERATE COUNTS
-source(file = "htrsim/counts_generator.R")
-htrs <- generate_counts(setup.simulation)
-```
-
-
-Next, plot your distribution of simulated counts per gene.</br>
-
-<span style="color:red">*WARNING: Code bellow is informative only with one library, a small set of genes, but a lot of replicates !*</span></br>
-
-```{r plot hist, message=FALSE, warning=FALSE}
-## Reshape and build dataframe easy to plot
-data2plot <- htrs %>% reshape2::melt( ., id=c("name_gene"), variable.name = "Run")
-data2plot <- data2plot %>% 
-                dplyr::group_by(name_gene) %>% 
-                dplyr::mutate(mean_obs = mean(value)) %>% 
-                dplyr::ungroup()
-
-figure = data2plot %>% ggplot(., aes(x=value)) +
-            geom_histogram(fill= "grey", binwidth = 30) + 
-            facet_wrap(~name_gene, scale = "free_y") +
-            geom_vline(aes(xintercept = mean_obs), col="#0072B2") +
-            geom_text(aes(x = mean_obs, y = 0, label = paste0("Mean obs\n", mean_obs), vjust = -2, hjust=-0.2), col="#0072B2")
-figure
-setup.simulation
-``` 
-
-
-
-By comparing ```setup.simulation``` object and figures previously obtained you should noticed that mean of counts per genes observed is very close to input parameters $\mu_{ij}$.</br>
-Also, notice that greater is the dispersion parameter ($\alpha_{i}$) narrower is the distribution (et vice versa)
-
-Therefore, mu is the mean of distribution from which you are sampling your counts.
-And alpha is the dispersion of the distribution.
-
-  <u>d. Setup mu and alpha before counts generation</u>
-   
-*htrsim* gives you the possibility to setup your own parameters for negative binomial. </br>
-You can control the distribution from which each count $c_{ij}$ are sampled.
-
-
-```{r setup params}
-N_samples = 2 ## number of samples
-N_replicates = 2 ## Nb of replicates per samples
-N_genes = 3 # number of genes in your favorite organism
-
-### SAMPLE INFO (name + nb of replicates per sample)
-samples <-  list(name = paste0('sample', 1:N_samples), n_rep = N_replicates)
-
-
-### Defining alpha params for each gene
-alphaGene.set = stats::runif(0.2,120, n = N_genes) ## randomly defined between 2 and 120
-nameGene.set = paste0('gene', 1:N_genes)
-genes <-  list(name = nameGene.set , alpha = alphaGene.set) %>% as.data.frame()
-
-
-## from an input dataframe
-mu.dtf.colnames = samples$name
-mu.dtf.geneID = genes$name
-mu.dtf.init = data.frame(matrix(0, nrow = length(mu.dtf.geneID), ncol = length(mu.dtf.colnames)))
-colnames(mu.dtf.init) = mu.dtf.colnames
-mu.dtf <- mu.dtf.init %>% 
-                dplyr::mutate_all(., ~ runif(100, 2000, n = N_genes)) ## fill mu.dtf from uniform law
-mu.dtf <- mu.dtf %>% #optionnal
-                dplyr::mutate(gene_id = mu.dtf.geneID) %>% 
-                dplyr::select(gene_id ,everything()) 
-
-## Setup simulation
-source(file = "htrsim/setup_cntsGenerator.R")
-## with mu define from a dataframe
-setup.simulation1 <- setup_countGener(sample_names= samples$name, 
-                                      n_rep= samples$n_rep, 
-                                      gene_dispersion = genes$alpha, 
-                                      gene_names = genes$name, 
-                                      mu = mu.dtf)
-setup.simulation1
-
-### Defining mu params for each gene of each sample
-## from a statistical law
-mu.foo <- function(x) return(runif(100,1000, n = x ))
-## with mu define from function
-setup.simulation2 <- setup_countGener(sample_names= samples$name, 
-                                      n_rep= samples$n_rep, 
-                                      gene_dispersion = genes$alpha, 
-                                      gene_names = genes$name, 
-                                      mu = mu.foo)
-setup.simulation2
-```
-
-
-Above we randomly defined $\mu_{ij}$ and $\alpha_{i}$.</br>
-
-Defining mu and alpha value per gene for each sample is useful when you can infer them from experimental data.</br>
-[See rna-seq_public_library_investigation](https://gitbio.ens-lyon.fr/aduvermy/rna-seq_public_library_investigations)
-
-
-Use filter to understand the ```setup.simulation``` object.</br>
-
-```{r filter setup, echo=TRUE}
-setup.simulation1 %>% dplyr::filter(name_gene == "gene1")
-```
-
-You should notice that $\mu$ and $\alpha$ are equal for equivalent gene between replicates.</br>
-But not $\mu$ ! </br>
-As defined in : <br> 
-$$
-c(ij) \sim {\sf Nbinom}(\mu_{ij}, \alpha_{i}) 
-$$
-
-## B. Performing complex design
-
-Now you understood how $\mu$ and $\alpha$ affect the distribution of counts per gene.<br>
-Also, you mastered using *htrsim* to build a library from simple RNA-seq design.
-
-
-  <u>a. Setup design of simulation</u>
-  
-*Htrsim* was developed to build complex RNA-seq design. 
-By complex RNA-seq design I mean several libraries with several replicates and not often the same number of replicates per samples.
-
-```{r complex design, message=FALSE, warning=FALSE}
-N_samples = 10
-max_N_replicates = 5 ## maximum number of replicates per sample -> heterogeneous design
-N_genes = 6000
-
-
-### SAMPLE INFO (name + nb of replicates per sample)
-samples <-  list(name = paste0('sample', 1:N_samples), n_rep = N_replicates)
-samples
-
-
-### Defining alpha params for each gene
-alphaGene.set = runif(0.2,120, n = N_genes) ## randomly defined between 2 and 100
-nameGene.set = paste0('gene', 1:N_genes)
-genes <-  list(name = nameGene.set , alpha = alphaGene.set) %>% as.data.frame()
-
-### Defining mu params for each gene of each sample
-## from an input dataframe
-mu.dtf.colnames = samples$name
-mu.dtf.geneID = genes$name
-mu.dtf.init = data.frame(matrix(0, nrow = length(mu.dtf.geneID), ncol = length(mu.dtf.colnames)))
-colnames(mu.dtf.init) = mu.dtf.colnames
-mu.dtf <- mu.dtf.init %>% 
-                      mutate_all(., ~ runif(100, 2000, n = N_genes)) ## fill mu.dtf from uniform law
-mu.dtf <- mu.dtf %>% 
-                  mutate(gene_id = mu.dtf.geneID) %>%  # reshape for convenience
-                  dplyr::select(gene_id ,everything())
-
-## Setup simulation
-source(file = "htrsim/setup_cntsGenerator.R")
-## with mu dataframe
-setup.simulation <- setup_countGener(sample_names= samples$name, 
-                                     n_rep= samples$n_rep, 
-                                     gene_dispersion = genes$alpha, 
-                                     gene_names = genes$name, 
-                                     mu = mu.dtf)
-source(file= "htrsim/counts_generator.R" )
-htrs <- generate_counts(setup.simulation)
-htrs %>% dplyr::filter(name_gene == "gene1")
-```
-
-About columns names:</br>
-- *sample1_1* reference the counts for sample 1 replicate 1</br>
-- *sample1_2* reference the counts for sample 1 replicate 2</br>
-- *sample2_1* reference the counts for sample 2 replicate 1</br>
-- ...</br>
-
-
-  <u>b. Setup Deseq2</u>
-
-Once you convert name_gene columns to rownames ```htrs``` can be used as input of DESEQ2.
-
-```{r input deseq 1, message=FALSE, warning=FALSE}
-## Reshape for DESEQ
-rownames(htrs) <- htrs$name_gene
-htrs.deseq = htrs %>% dplyr::select(-name_gene) # remove name gene column
-```
-
-But DESEQ2 also needs to understand your design. [DESeq2 tutorial](http://bioconductor.org/packages/devel/bioc/vignettes/DESeq2/inst/doc/DESeq2.html)<br>
-For this, we developed a function to convert ```setup.simulation``` into a conventional deseq2 design input.
-
-
-```{r input deseq 2, message=FALSE, warning=FALSE}
-## add information for each sample
-set.seed(101)
-samples$env <- c(rep("kcl", 5), rep("control", 5)) ## 5 x 2 environments
-samples$genotype <- sample(c("msn2d", "msn4D", "WT"),10, replace = TRUE)
-
-## SETUP DESEQ
-source(file = "htrsim/setup_deseq.R")
-setup.deseq <- setup_deseq(samples, htrs.deseq)
-setup.deseq %>% head()
-```
-
-  <u>c. Run DESeq2</u>
-
-```{r message=FALSE, warning=FALSE}
-#source(file = "htrsim/launch_deseq.R")
-htrs.deseq %>% dim()# htrs %>% select(-name_gene)
-setup.deseq %>% dim()
-#model.matrix(~genotype + env + genotype:env, setup.deseq) # check if a column is fully equal to 0 if yes -> lead to an error
-dds = DESeq2::DESeqDataSetFromMatrix(countData = htrs.deseq, 
-                                     colData = setup.deseq, 
-                                     design = ~ genotype + env + genotype:env)
-```
-
-   <u>d. alpha</u>
-
-```{r message=FALSE, warning=FALSE}
-dds <- DESeq2::estimateSizeFactors(dds)
-dds  <- DESeq2::estimateDispersions(dds)
-#dispersion_estimate <- dip
-dispersion_estimate <- DESeq2::dispersions(dds) 
-
-alpha.inference = data.frame(alpha = dispersion_estimate, from = "inference")
-alpha.input = data.frame(alpha = alphaGene.set, from = "input") 
-
-alpha.dtf <- rbind(alpha.input, alpha.inference)
-ggplot(alpha.dtf, aes(x=alpha)) + geom_density() + scale_y_log10() + facet_grid(~from)
-```
-
-Observed distributions are slightly different !<br>
-Distribution of alpha inferred by deseq does not follow a uniform distribution.<br>
-Whereas we produced alpha per gene from a uniform distribution.<br>
-
-<u>d. mu </u>
-
-```{r message=FALSE, warning=FALSE}
-
-mu_estimate <- dds@assays@data$mu
-mu_inference.vec = as.vector(mu_estimate)
-mu.inference = data.frame(mu = mu_inference.vec, from = "inference")
-
-mu_input.vec = as.vector(mu.dtf %>% dplyr::select(-gene_id)) %>% 
-                                            purrr::flatten() %>% 
-                                            BiocGenerics::unlist()
-mu.input = data.frame(mu = mu_input.vec, from = "input")
-
-mu.dtf <- BiocGenerics::rbind(mu.input, mu.inference)
-ggplot(mu.dtf, aes(x=mu)) + geom_density()  + facet_grid(~from)
-```
-
-
-Observed distributions are slightly different !<br>
-Distribution of $\mu_{ij}$ inferred by deseq does not follow a uniform distribution.<br>
-Whereas we produced mu per gene from a uniform distribution.<br>
-
-## C. BioProject PRJNA675209b as input 
-
-PRJNA675209b is a RNA-seq project which proposes 3 genotypes studied into 2 environmental conditions.<br>
-Using the conventional RNA-Seq procedure described at [see](https://gitbio.ens-lyon.fr/aduvermy/rna-seq_public_library_investigations), and a Generalized Linear Model it is possible to deduce $\mu$ and $\alpha$ from real counts per gene.
-Using this RNA-seq conventional procedure we defined $\mu$ and $\alpha$ from RNA-seq real table counts (PRJNA675209b).
-<br>
-
-By using  as input parameters of ```htrsim```, $\mu_{ij}$ and $\alpha_{i}$ deduced from PRJNA675209b table counts, we will simulate table counts ($c_{ij}$). 
-By using a Generalized Linear Model, we will infer $\mu_{ij}$ and $\alpha_{i}$ from  $c_{ij}$ obtained in simulation.<br>
-Finally, we will compare $\mu_{ij}$, $\alpha_{i}$ deduced from real counts, and $\mu_{ij}$, $\alpha_{i}$ obtained from simulate counts.
-
-**By playing on pragmatical parameters (number of replicates, library size, ...) we aim to understand on which way we may maximize the fit between $\mu_{ij}$ and $\alpha_{i}$ obtained from real and simulate counts**
-
-1. Input for simulation
-
-```{r message=FALSE, warning=FALSE}
-## import alpha(i) for each genes
-alpha_params = readr::read_tsv(file="../../rna-seq_public_library_investigations/results/2022-03-03/estimate_dispersion.tsv")#, show_col_type=F)
-
-
-## import mu(ij) params for each gene & each sample
-mu_params = readr::read_tsv(file="../../rna-seq_public_library_investigations/results/2022-03-03/estimate_mu.tsv")#, show_col_type=F)
-## Defining sample names
-sample_names <- mu_params %>% 
-                select(-gene_id) %>% 
-                colnames() %>% 
-                purrr::map(., ~stringr::str_split(.,"_")[[1]][1:2] %>% 
-                                  BiocGenerics::paste(., collapse='_') )  %>% 
-                BiocGenerics::unlist() %>% BiocGenerics::unique() 
-
-## Mu is same for replicate 
-### case 1: choose 1  replicate 
-#mu_params <- mu_params %>% dplyr::select(., contains("rep1")) 
-## rename mu_params colnames to ensure corresponding with sample_names 
-#colnames(mu_params) <- sample_names
-### case 2: average replicates
-average_rep <- function(x) {
-    varname <- x
-    mu_params %>% 
-      select(.,contains(x)) %>% 
-      mutate(!!varname := rowMeans(.)) %>% 
-      select(varname)
-}
-mu_params <- sample_names %>% map(.x = ., .f = ~average_rep(.x))  %>% data.frame() 
-mu_params$gene_id <- alpha_params$gene_id
-
-## Defining number of genes:
-N_genes = length(alpha_params$gene_id)
-
-## Defining number of replicates:
-N_replicates = 2 ## homogeneous design / same as bioproject
-
-### Samples informations (name + nb of replicates per sample)
-samples <- list(name = sample_names,  n_rep = N_replicates) ## Nb of replicates per samples)
-
-### Defining alpha params for each gene
-alphaGene.set = alpha_params$dispersion
-nameGene.set = alpha_params$gene_id
-genes <-  list(name = nameGene.set, 
-               alpha = alphaGene.set) %>%
-                                  as.data.frame()
-```
-
-2. setup & process simulation
-
-```{r message=FALSE, warning=FALSE}
-source(file = "htrsim/setup_cntsGenerator.R")
-## Setup simulation from PRJNA675209b table counts
-setup.simulation <- setup_countGener(sample_names = samples$name,
-                                     n_rep= samples$n_rep,
-                                     gene_dispersion = genes$alpha, 
-                                     gene_names = genes$name,
-                                     mu = mu_params)
-## Generate counts
-source(file= "htrsim/counts_generator.R" )
-htrs <- generate_counts(setup.simulation)
-```
-
-3. Setup & Run DESeq2
-
-```{r message=FALSE, warning=FALSE}
-## reshape htrs for convenience
-rownames(htrs) <- htrs$name_gene
-htrs.deseq = htrs %>% 
-              dplyr::select(-name_gene) # remove name gene column
-
-
-## add information for each sample
-samples$env <- sample_names %>% 
-                    purrr::map(., ~stringr::str_split(.,"_")[[1]][2]) %>% 
-                    BiocGenerics::unlist() ## 5 x 2 environments
-samples$genotype <- sample_names %>% 
-                        purrr::map(., ~stringr::str_split(.,"_")[[1]][1]) %>% 
-                        BiocGenerics::unlist()
-
-## build input dataframe for deseq
-setup.deseq = samples %>% 
-                  as.data.frame() %>% 
-                  tidyr::uncount(n_rep, .id = "idx") %>% 
-                  tidyr::unite(name, name, idx , sep = "_")  
-rownames(setup.deseq) <- c() ## drop rownames
-
-
-#check homogeneity between countData column number & colData row number
-htrs.deseq %>% dim()
-setup.deseq %>% dim()
-
-# check if a column is fully equal to 0 if yes -> lead to an error
-#model.matrix(~genotype + env + genotype:env, setup.deseq) 
-dds = DESeq2::DESeqDataSetFromMatrix(countData = htrs.deseq,
-                                     colData = setup.deseq,
-                                     design = ~ genotype + env + genotype:env)
-```
-
-4. alpha
-
-```{r message=FALSE, warning=FALSE}
-
-## Dispersion inference
-dds <- DESeq2::estimateSizeFactors(dds)
-dds  <- DESeq2::estimateDispersions(dds)
-dispersion_estimate <- DESeq2::dispersions(dds) 
-
-## Build dataframes
-alpha.inference = data.frame(inference = dispersion_estimate)
-
-alpha.input = data.frame(input = alpha_params$dispersion,  
-                        name_gene = alpha_params$gene_id) 
-
-## Same dimension
-#alpha.input %>% dim
-#alpha.inference %>% dim
-
-## Merge & plot
-alpha.dtf <- BiocGenerics::cbind(alpha.input, alpha.inference)
-
-
-## Plot 2 by 2
-ggplot(alpha.dtf, aes(x=input, y = inference), na.rm = TRUE) + geom_point(alpha=0.4) 
-
-# Reshape and plot distribution
-alpha.dtf.reshape = alpha.dtf %>% 
-                      reshape2::melt( ., id=c("name_gene"), 
-                                      variable.name = "from", 
-                                      value.name = "alpha")
-                                    
-
-ggplot(alpha.dtf.reshape, aes(x=alpha), na.rm = TRUE) + geom_density() + scale_x_log10() + facet_grid(~from)
-```
-
-
-5. mu
-
-```{r}
-## Mu inference
-mu_estimate <- dds@assays@data$mu
-colnames(mu_estimate) <- htrs.deseq %>% colnames()
-mu_estimate = mu_estimate %>% as.data.frame()
-## Average replicates
-average_rep <- function(x) {
-    varname <- x
-    mu_estimate %>% 
-      select(.,contains(x)) %>% 
-      mutate(!!varname := rowMeans(.)) %>% 
-      select(varname)
-}
-
-mu_estimate <- sample_names %>% map(.x = ., .f = ~average_rep(.x))  %>% data.frame() 
-mu_inference.vec = mu_estimate %>% 
-                      purrr::flatten() %>% 
-                      BiocGenerics::unlist() %>% 
-                      as.numeric()
-mu.inference = data.frame(inference = mu_inference.vec, 
-                          env = rep(samples$env, each = N_genes),
-                          genotype = rep(samples$genotype, each = N_genes))
-
-## Mu used as input 
-mu_input.vec = mu_params %>% 
-                    select(-gene_id) %>%
-                    purrr::flatten() %>% 
-                    BiocGenerics::unlist() %>% 
-                    as.numeric()
-mu.input = data.frame(input = mu_input.vec, name_gene = mu_params$gene_id )
-
-#Same dimension
-#mu.inference %>% dim()
-#mu.input %>% dim()
-
-## Merged input & inference
-mu.dtf <- BiocGenerics::cbind(mu.input, mu.inference)
-
-## Plot 2 by 2
-ggplot(mu.dtf, aes(x=input, y = inference, col=genotype, shape=env), na.rm = TRUE) + geom_point(alpha=0.4) 
-
-# Reshape and plot distribution
-mu.dtf.reshape = mu.dtf %>% 
-                      reshape2::melt( ., id=c("name_gene", "env", "genotype"), 
-                                      variable.name = "from", 
-                                      value.name = "mu")
-                                    
-ggplot(mu.dtf.reshape, aes(x=mu), na.rm = TRUE) + geom_density() +
-  scale_x_log10() + facet_grid(~from)
-```
-
-6. Number of replicates effect
-
-Bellow, $\mu_{ij}$ and $\alpha_{i}$ deduced from PRJNA675209b table counts, will be reuse as input of simulation.<br>
-By increasing the number of replicates per sample we hope to improve fitting  $\mu_{ij}$ and $\alpha_{i}$ obtained from real and simulate counts.
-
-<br>
-Please, redo the simulation and the inference of $\mu_{ij}$ and $\alpha_{i}$ by increasing the number of replicates per sample.
-
-```{r message=FALSE, warning=FALSE}
-## See *B-1. Input for simulation* to define :
-# - mu_params
-# - alpha_params
-# - samples (see *B-3. Setup & Run DESeq2* to add information for each sample)
-
-
-
-# number of replicate per simulation
-replicate_number2simul = seq(2, 16, by=4)
-
-#init output
-alpha.inference = data.frame(inference = numeric(), 
-                             additional_params = factor())
-mu.inference = data.frame(inference = numeric(), 
-                          env = factor(),
-                          genotype = factor(),
-                          additional_params = factor(),
-                          name_gene = character())
-
-## Mu used as input 
-mu_input.vec = mu_params %>% 
-                    select(-gene_id) %>%
-                    purrr::flatten() %>% 
-                    BiocGenerics::unlist() %>% 
-                    as.numeric()
-mu.input = data.frame(input = mu_input.vec, 
-                      name_gene = mu_params$gene_id,
-                      env = rep(samples$env, each = N_genes),
-                     genotype = rep(samples$genotype, each = N_genes))
-
-
-alpha.input = data.frame(input = alpha_params$dispersion,  
-                        name_gene = alpha_params$gene_id) 
-
-
-
-
-for (N in replicate_number2simul){
-  
-    samples$n_rep = N ## modify nb of replicate per simulation
-    
-    ## Setup simulation
-    source(file = "htrsim/setup_cntsGenerator.R")
-    setup.simulation <- setup_countGener(sample_names= samples$name, 
-                                         n_rep= samples$n_rep, 
-                                         gene_dispersion = genes$alpha, 
-                                         gene_names = genes$name, 
-                                         mu = mu_params)
-    ## Generate counts from setup
-    source(file = "htrsim/counts_generator.R" )
-    htrs <- generate_counts(setup.simulation)
-    
-    ## reshape htrs for convenience
-    rownames(htrs) <- htrs$name_gene
-    htrs.deseq = htrs %>% dplyr::select(-name_gene) # remove name gene column
-    
-    
-    ## build input dataframe for deseq
-    setup.deseq = samples %>% 
-                  as.data.frame() %>% 
-                  tidyr::uncount(n_rep, .id = "idx") %>% 
-                  tidyr::unite(name, name, idx , sep = "_")  
-    rownames(setup.deseq) <- c() ## drop rownames
-    
-    ## run DESEQ
-    dds = DESeq2::DESeqDataSetFromMatrix(countData = htrs.deseq , 
-                                         colData = setup.deseq , 
-                                         design = ~ genotype + env + genotype:env)
-
-    # Build results
-    dds <- DESeq2::estimateSizeFactors(dds)
-    dds  <- DESeq2::estimateDispersions(dds)
-    dispersion_estimate <- DESeq2::dispersions(dds) 
-
-    
-    ################# Alpha ################
-    tmp = data.frame(inference = dispersion_estimate, 
-                     additional_params=N)
-    
-    alpha.inference = BiocGenerics::rbind(alpha.inference, tmp)
-    
-    ################## Mu  ################
-    mu_estimate <- dds@assays@data$mu  %>% data.frame() 
-    colnames(mu_estimate) <- htrs.deseq %>% colnames()
-    average_rep <- function(x) {
-          varname <- x
-          mu_estimate %>% 
-            select(.,contains(x)) %>% 
-            mutate(!!varname := rowMeans(.)) %>% 
-            select(varname)
-    }
-
-    mu_estimate <- sample_names %>% map(.x = ., .f = ~average_rep(.x))  %>% data.frame() 
-    mu_inference.vec = mu_estimate %>% 
-                      purrr::flatten() %>% 
-                      BiocGenerics::unlist() %>% 
-                      as.numeric()
-    tmp = data.frame(inference = mu_inference.vec, 
-                     env = rep(samples$env, each = N_genes),
-                     genotype = rep(samples$genotype, each = N_genes),
-                     additional_params=N,
-                     name_gene = rep(mu_params$gene_id, length(sample_names)))
-    mu.inference = BiocGenerics::rbind(mu.inference, tmp)
-    
-}
-
-```
-
-
-Now, you can compare  $\alpha_{i}$ from real and simulate counts.<br>
-
-
-```{r message=FALSE, warning=FALSE}
-## bind alpha used as input and alpha inferred
-alpha.dtf <- BiocGenerics::cbind(alpha.input, alpha.inference)
-
-
-## plot 2 by 2
-alpha.inference$additional_params = alpha.inference$additional_params %>% as.factor()
-ggplot(alpha.dtf, na.rm = TRUE) + geom_point(aes(x=input, y=inference), alpha=0.02) + scale_y_log10() + scale_x_log10()  + facet_wrap(~additional_params, scales = 'free_y')
-
-
-
-# Reshape and plot distribution
-alpha.dtf.reshape = alpha.dtf %>% 
-                      reshape2::melt( ., id=c("name_gene", "additional_params"), 
-                                      variable.name = "from", 
-                                      value.name = "alpha")
-
-ggplot(alpha.dtf.reshape, aes(x=alpha), na.rm = TRUE) + geom_density() + scale_x_log10() + facet_grid(from~additional_params)
-
-
-```
-
-The number of replicates does not seem to have a significant effect on the inference of $\alpha_{i}$.<br>
-<br>
-Now, compare  $\mu_{ij}$ from real and simulate counts.<br>
-
-```{r message=FALSE, warning=FALSE}
-## Join input & inference
-dt1 <- data.table(mu.inference %>% group_by(additional_params), key =  c("name_gene", "env", "genotype")) 
-dt2 <- data.table(mu.input, key = c("name_gene", "env", "genotype"))
-mu.dtf = dt1[dt2,  nomatch=0, roll=1] 
-
-
-## Plot 2 by 2
-ggplot(mu.dtf, aes(x=input, y = inference), na.rm = TRUE) + geom_point(alpha=0.4) 
-
-## Reshape and plot distribution
-mu.dtf.reshape = mu.dtf %>% 
-                      reshape2::melt( ., id=c("name_gene", "env", "genotype", "additional_params"), 
-                                      variable.name = "from", 
-                                      value.name = "mu")
-                                    
-ggplot(mu.dtf.reshape, aes(x=mu), na.rm = TRUE) + geom_density() +
-  scale_x_log10() + facet_grid(from~additional_params)
-
-```
-
-**Increasing the number of replicates seems to improve the fit between the distribution of  $\mu_{ij}$, used as input, and the distribution obtained by inference.**
diff --git a/src/v1/htrsim/DESCRIPTION b/src/v1/htrsim/DESCRIPTION
deleted file mode 100644
index a5ca29cf36ef2e37f87a7c189fbd1de514eba326..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/DESCRIPTION
+++ /dev/null
@@ -1,31 +0,0 @@
-Package: htrsim
-Title: Hightoughtput RNA-seq simulation
-Version: 0.1
-Authors@R: person('Duvermy', 'Arnaud', email = 'arnaud.duvermy@ens-lyon.Fr', role = c('aut', 'cre'))
-Description: blabla.
-License: GPL-3
-Encoding: UTF-8
-Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.2
-Suggests: 
-    rmarkdown,
-    knitr
-VignetteBuilder: knitr
-Imports: 
-    stats,
-    DESeq2,
-    magrittr,
-    dplyr,
-    BiocGenerics,
-    tibble,
-    rlang,
-    stringr,
-    purrr,
-    data.table,
-    plyr,
-    reshape2,
-    tidyr,
-    S4Vectors,
-    readr,
-    testthat
-Config/testthat/edition: 3
diff --git a/src/v1/htrsim/NAMESPACE b/src/v1/htrsim/NAMESPACE
deleted file mode 100644
index b6f56d78a61bdd832958ac75acd66437379e5e6c..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/NAMESPACE
+++ /dev/null
@@ -1,26 +0,0 @@
-# Generated by roxygen2: do not edit by hand
-
-export(estim.alpha)
-export(estim.mu)
-export(estim.mu_beta)
-export(generate_counts)
-export(handle_except)
-export(htrsim)
-export(reshape_input2setup)
-export(rn_sim)
-export(run.deseq)
-export(setup_countGener)
-import(BiocGenerics)
-import(DESeq2)
-import(S4Vectors)
-import(data.table)
-import(dplyr)
-import(plyr)
-import(purrr)
-import(readr)
-import(reshape2)
-import(stats)
-import(stringr)
-import(tibble)
-import(tidyr)
-importFrom(rlang,.data)
diff --git a/src/v1/htrsim/R/generate_counts.R b/src/v1/htrsim/R/generate_counts.R
deleted file mode 100644
index 54ddafb463d7e6df95f8665748aad2ee6800dfc7..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/R/generate_counts.R
+++ /dev/null
@@ -1,58 +0,0 @@
-#' Sampling counts from Negative Binomial distribution
-#'
-#' @param mu mu_ij value
-#' @param alpha alpha_i value
-#' @param n_replicates number of replicates
-#' @param ... everything else
-#'
-#' @return vector of length n_replicates
-#' @export
-#'
-#' @examples
-rn_sim <- function(mu, alpha, n_replicates, ...){
-  simul<- rnbinom(mu=mu, size=alpha, n = n_replicates)
-  return(simul)
-}
-
-
-#' Simulate counts and convert to lovely deseq input
-#'
-#' @param setup_dtf Output from setup_cntsGenerator.R
-#' @param export Boolean
-#'
-#' @return dataframe with counts c_ij
-#' @export
-#' @import tidyr
-#' @import reshape2
-#' @import plyr
-#' @import BiocGenerics
-#' @import purrr
-#' @import readr
-#'
-#' @examples
-generate_counts <- function(setup_dtf, export = FALSE){
-  full_name <- NULL
-  name <- NULL
-  variable <- NULL
-
-
-  message("reading and processing counts per genes ...")
-
-  cnt.list = setup_dtf %>%
-                purrr::pmap(rn_sim)
-
-  message("reshaping to dataframe ...")
-
-  cnt.dtf <- cnt.list %>%
-              plyr::ldply(., rbind) %>%
-              BiocGenerics::cbind(setup_dtf %>% select(c("gene_id", "name"))) %>%
-              reshape2::melt(.,id=c('name','gene_id'),value.name = "counts") %>%
-              tidyr::unite(full_name, name, variable) %>%
-              tidyr::drop_na(counts) %>%
-              reshape2::dcast(., gene_id ~ full_name, value.var= "counts")
-
-  if (export == TRUE) readr::write_tsv(cnt.dtf, 'results/2022-03-03/estimate_dispersion.tsv')
-
-  return(cnt.dtf)
-}
-
diff --git a/src/v1/htrsim/R/htrsim_workflow.R b/src/v1/htrsim/R/htrsim_workflow.R
deleted file mode 100644
index 4d618e46b7cc49717986725067a27c3942d0c8f9..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/R/htrsim_workflow.R
+++ /dev/null
@@ -1,38 +0,0 @@
-
-
-#' HTRSIM workflow
-#'
-#' @param countData dataframe with actual count per gene
-#' @param bioDesign dataframe defining bioDesign
-#' @param N_replicates Number of replicate
-#'
-#' @return dataframe with simulated count per gene
-#' @export
-#'
-#' @examples
-htrsim <- function(countData, bioDesign, N_replicates){
-
-
-    # launch standard DESEQ2 analysis
-    dds = run.deseq(tabl_cnts = countData, bioDesign = bioDesign)
-
-    ## Model matrix per samples
-    mm <- model.matrix(~genotype + env + genotype:env, bioDesign)
-
-    ## Input estimation
-    res = estim.mu(dds, mm)
-    mu.input = res$mu
-    alpha.input = estim.alpha(dds)
-
-    # Setup simulation
-    input = reshape_input2setup(mu.dtf = mu.input, alpha.dtf = alpha.input, average_rep = FALSE)
-
-    setup.simulation <- setup_countGener(bioSample_id = input$bioSample_id,
-                                         n_rep = 1,
-                                         alpha = input$alpha,
-                                         gene_id = input$gene_id,
-                                         mu = input$mu)
-    # Simulate counts
-    htrs <- generate_counts(setup.simulation)
-    return(list(countDataSim = htrs, input = input, dds = dds))
-}
diff --git a/src/v1/htrsim/R/input_estimation.R b/src/v1/htrsim/R/input_estimation.R
deleted file mode 100644
index cfbe88b3807bf7cf5958b4f275c636c3572bbe7b..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/R/input_estimation.R
+++ /dev/null
@@ -1,182 +0,0 @@
-
-#' Estimate alpha_i
-#'
-#' @param dds DESEQ2 object
-#' @param export Boolean
-#'
-#' @return alpha_i per gene only for gene expressed c_ij != 0
-#' @export
-#' @import DESeq2
-#' @import stats
-#' @import dplyr
-#' @import tibble
-#' @import BiocGenerics
-#' @import S4Vectors
-#' @import readr
-#' @examples
-#' @importFrom rlang .data
-estim.alpha <- function(dds, export = FALSE){
-  gene_id <- NULL
-  expressed_gene_dispersion <- NULL
-  ###################      Estimate alpha per gene        ########################
-  #N.B: alpha = dispersion per gene
-  #dds  <- DESeq2::estimateDispersions(dds, quiet = F)
-  #dispersion estimation
-  dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
-  dispersion_estimate = dds.mcols$dispersion
-  #dispersion_estimate <- DESeq2::dispersions(dds)
-
-  ## Shape and export
-  names(dispersion_estimate) <-  names(dds@rowRanges)
-
-  ## drop NA in dispersion estimate (link to unexpressed gene)
-  ### and convert to lovely dataframe
-  expressed_gene_dispersion <- dispersion_estimate[!is.na(dispersion_estimate)] %>%
-                                    data.frame() %>%
-                                    tibble::rownames_to_column() %>%
-                                    dplyr::rename("alpha" = .data$., gene_id = "rowname")
-
-  if (export == TRUE)   readr::write_tsv(expressed_gene_dispersion, 'results/2022-03-03/estimate_dispersion.tsv')
-
-  return(expressed_gene_dispersion)
-
-}
-
-#' Estimate mu_ij
-#'
-#' @param dds DESEQ2 object
-#' @param export Boolean
-#' @param mm a model matrix
-#'
-#' @return mu_ij only for gene expressed c_ij != 0
-#' @export
-#' @import stats
-#' @import dplyr
-#' @import tibble
-#' @import BiocGenerics
-#' @import S4Vectors
-#' @examples
-#' @importFrom rlang .data
-estim.mu <- function(dds, mm, epsilon = TRUE,  export = FALSE){
-
-
-  gene_id <- NULL
-  nb_sples = BiocGenerics::rownames(dds@colData) %>% length()
-  nb_genes =  BiocGenerics::rownames(dds@assays@data$counts) %>% length()
-  mm_epsi = rep(1, nb_sples)
-  names(mm_epsi) = 1 : nb_sples
-
-
-  dds.mcols = S4Vectors::mcols(dds,use.names=TRUE)
-  ## BETA
-  B0 <- dds.mcols$Intercept
-  B1 <- dds.mcols$genotype_msn2D_vs_wt
-  B2 <- dds.mcols$genotype_msn4D_vs_wt
-  B3 <- dds.mcols$env_kcl_vs_control
-  B4 <- dds.mcols$genotypemsn2D.envkcl
-  B5 <- dds.mcols$genotypemsn4D.envkcl
-
-  #print(max(B0, na.rm=TRUE))
-  #print(max(B1, na.rm=TRUE))
-  #print(max(B2, na.rm=TRUE))
-  #print(max(B3, na.rm=TRUE))
-  #print(max(B4, na.rm=TRUE))
-  #print(max(B5, na.rm = TRUE))
-
-  ## deviance = sigma2 -> estimate epsilon
-  deviance_i.sqrt = sqrt(dds.mcols$deviance)
-
-
-  beta.matrix = cbind(B0, B1,B2,B3,B4,B5) %>% as.matrix()
-  #p_ij = B0_i*mm1_j + B1_i*mm2_j + B3_i*mm3_j + B4_i*mm4_j + B5_i*mm5_j
-  p_ij = beta.matrix %*% t(mm)
-
-
-  if (epsilon == TRUE){
-    message("Epsilon : TRUE")
-    #epsilon_ij ~ N(0, deviance)
-    epsilon_ij = mm[,1] %>% map(., ~rnorm(deviance_i.sqrt, mean = 0, sd = deviance_i.sqrt ))  %>% data.frame() %>% as.matrix()
-    ## log_qij = p_ij + epsilon_ij
-    log_qij <- p_ij + epsilon_ij
-  }
-  else {
-    message("Epsilon : FALSE")
-    log_qij <- p_ij
-  }
-
-  ## s_j
-  s_j = dds$sizeFactor
-  mu_ij = s_j * 2^log_qij
-
-
-  #################     Estimate mu        #########################
-  mu_estimate <- dds@assays@data$mu
-  #dds@assays@data$mu %>% dim()
-  #mu_estimate %>% dim()
-  rownames(mu_ij) = BiocGenerics::rownames(dds@assays@data$counts)
-  ## drop NA in dispersion estimate (link to unexpressed gene)
-  ### and convert to lovely dataframe
-  mu_gene_express = mu_ij %>%
-    stats::na.omit() %>%
-    data.frame()
-  colnames(mu_gene_express) <- rownames(dds@colData)
-  mu_gene_express <- mu_gene_express %>%
-    tibble::rownames_to_column(var = "gene_id")
-
-
-
-
-  if (export == TRUE)  readr::write_tsv(mu_gene_express, 'results/2022-03-03/estimate_mu.tsv')
-
-
-  res = list(mu = mu_gene_express, beta.matrix = beta.matrix, deviance.sqrt = deviance_i.sqrt, dds.mcols = dds.mcols)
-  return(res)
-
-
-}
-
-
-#.xMm.foo <- function(b, m) return(b * m)
-#.epsilon.foo <- function(x) return(rnorm(mean = 0 ,sd = x, n = 1 ))
-#.epsilon_i <- function(dev_i) return(dev_i %>% map(., ~.epsilon.foo(.))%>% unlist())
-#.getMu_i <- function(s, qi) return(2^(qi))
-
-
-
-#' Estimate mu_ij
-#'
-#' @param dds DESEQ2 object
-#' @param export Boolean
-#'
-#' @return mu_ij only for gene expressed c_ij != 0
-#' @export
-#' @import stats
-#' @import dplyr
-#' @import tibble
-#' @import BiocGenerics
-#' @examples
-#' @importFrom rlang .data
-estim.mu_beta <- function(dds, export = FALSE){
-  gene_id <- NULL
-  #################     Estimate mu        #########################
-  mu_estimate <- dds@assays@data$mu
-  #dds@assays@data$mu %>% dim()
-  #mu_estimate %>% dim()
-  rownames(mu_estimate) = BiocGenerics::rownames(dds@assays@data$counts)
-  ## drop NA in dispersion estimate (link to unexpressed gene)
-  ### and convert to lovely dataframe
-  mu_gene_express = mu_estimate %>%
-                      stats::na.omit() %>%
-                      data.frame()
-
-  colnames(mu_gene_express) <- rownames(dds@colData)
-  mu_gene_express <- mu_gene_express %>%
-                          tibble::rownames_to_column(var = "gene_id")
-
-
-  if (export == TRUE)  write_tsv(mu_gene_express, 'results/2022-03-03/estimate_mu.tsv')
-
-  return(mu_gene_express)
-
-
-}
diff --git a/src/v1/htrsim/R/launch_deseq.R b/src/v1/htrsim/R/launch_deseq.R
deleted file mode 100644
index 1f5f266448f9001ea9012db636b598c832b0fd06..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/R/launch_deseq.R
+++ /dev/null
@@ -1,19 +0,0 @@
-
-#' Title
-#'
-#' @param tabl_cnts table containing counts per genes & samples
-#' @param bioDesign table describing bioDesgin of input
-#' @import DESeq2
-#' @return DESEQ2 object
-#' @export
-#'
-#' @examples
-run.deseq <- function(tabl_cnts, bioDesign ){
-
-  dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = ~ genotype + env + genotype:env )
-
-
-  dds <- DESeq2::DESeq(dds)
-  return(dds)
-
-  }
diff --git a/src/v1/htrsim/R/setup_cntsGenerator.R b/src/v1/htrsim/R/setup_cntsGenerator.R
deleted file mode 100644
index 7e18062410e5be2d05a487ffcf2791023249ac06..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/R/setup_cntsGenerator.R
+++ /dev/null
@@ -1,189 +0,0 @@
-#' Reshape input before building setup
-#'
-#' @param mu.dtf dataframe of mu_ij
-#' @param alpha.dtf dataframe of alpha_i
-#' @param average_rep bool
-#'
-#' @return
-#' @import purrr
-#' @import stringr
-#' @import dplyr
-#' @import BiocGenerics
-#' @export
-#'
-#' @examples
-reshape_input2setup <- function(mu.dtf, alpha.dtf,  average_rep = FALSE){
-  gene_id <- NULL
-
-  if(average_rep == TRUE){
-      ## Defining sample names
-      bioSample_id <- mu.dtf %>%
-        dplyr::select(-gene_id) %>%
-        BiocGenerics::colnames() %>%
-        purrr::map(., ~stringr::str_split(.,"_")[[1]][1:2] %>%
-                     BiocGenerics::paste(., collapse='_') )  %>%
-        BiocGenerics::unlist() %>% BiocGenerics::unique()
-      ############### Mu is equal for biosample replicate #############
-      ### case 1: choose 1  replicate
-      #mu_params <- mu_params %>% dplyr::select(., contains("rep1"))
-      ## rename mu_params colnames to ensure corresponding with sample_names
-      #colnames(mu_params) <- sample_names
-      ### case 2: average replicates
-      average_rep <- function(x, dtf) {
-        varname <- x
-        dtf %>%
-          dplyr::select(.,contains(x)) %>%
-          dplyr::mutate(!!varname := rowMeans(.)) %>%
-          dplyr::select(varname)
-      }
-      mu_ij <- bioSample_id %>% purrr::map(.x = ., .f = ~average_rep(.x, mu.dtf))  %>% data.frame()
-      mu_ij$gene_id <- alpha.dtf$gene_id
-  }
-
-  else {
-
-  bioSample_id <-  mu.dtf %>%
-    dplyr::select(-gene_id) %>%
-    BiocGenerics::colnames()
-
-  mu_ij = mu.dtf
-
-  }
-
-  return(list(alpha = alpha.dtf , mu = mu_ij, bioSample_id = bioSample_id, gene_id = alpha.dtf$gene_id))
-}
-
-
-
-#' Handle exception
-#'
-#' @param bioSample vector of id for each bioSample
-#' @param n_rep number of replicates
-#' @param gene_id vector of id for each gene
-#' @param alpha vector of alpha_i
-#'
-#' @return
-#' @export
-#'
-#' @examples
-handle_except <- function(bioSample, n_rep , gene_id , alpha){
-
-
-
-  if(is.null(bioSample)){
-    message("BioSample ID: NULL" )
-    bioSample = "my_first_lib"
-    message("Assuming only one library will be setup. A library named 'my_first_lib'")
-  }
-  else message("BioSample ID: ", "OK")
-
-  if(is.null(n_rep)){
-    message("N_rep: ", "NULL")
-    n_rep = 1
-    message("Number of replicates unspecified\nAssuming n_rep = 1")
-  }
-  else message("N_rep: ", "OK")
-
-  if(is.numeric(n_rep) && length(n_rep) == 1){
-    message(n_rep, " replicates per samples")
-    n_rep = rep(n_rep, length(bioSample))
-  }
-
-  if(is.null(gene_id)){
-    message("Gene_id: ", "NULL")
-  }
-  else message("Gene_id: ", "OK")
-
-  if(is.null(alpha)){
-    message("Alpha: ", "NULL")
-  }
-  else message("Alpha: ", "OK")
-
-
-  if(!is.null(n_rep) && length(bioSample) != length(n_rep)) stop("ERROR: unconsistent length between samples_names and n_rep")
-
-  if(!is.null(gene_id)  && !is.null(alpha)) {
-      if (is.data.frame(alpha)) if (length(gene_id) != dim(alpha)[1]) stop("ERROR: unconsistent length between gene_id and alpha")
-      if (is.vector(alpha)) if (length(gene_id) != length(alpha)) stop("ERROR: unconsistent length between gene_id and alpha")
-      else n_genes = length(alpha)
-  }
-  if( is.null(gene_id) && is.null(alpha) ){
-    message("Assuming n_genes = 3")
-    n_genes = 3
-    gene_id = paste0('gene', 1:n_genes)
-  }
-
-  if(is.null(gene_id)  && !is.null(alpha)) {
-    message("Built gene_id")
-    n_genes = length(alpha)
-    gene_id = paste0('gene', 1:n_genes)
-  }
-
-  if(!is.null(gene_id)  && is.null(alpha)) {
-    message("Alpha randomly defined from uniform law between 2 and 100")
-    n_genes = length(gene_id)
-    alpha = runif(0.2,120, n = n_genes) ## randomly defined between 2 and 100
-  }
-
-  if ( !exists("n_genes")) n_genes = length(gene_id)
-
-  my_list = list(bioSample = bioSample, rep = n_rep, n_g = n_genes,   alpha = alpha, gene_id = gene_id)
-  return(my_list)
-}
-
-
-
-
-#' Build setup for counts generator
-#'
-#' @param bioSample_id vector of id for each bioSample
-#' @param n_rep number of replicates
-#' @param gene_id vector of id for each gene
-#' @param alpha vector of alpha_i
-#' @param mu dataframe of mu_ij
-#' @import purrr
-#' @import data.table
-#' @return
-#' @export
-#'
-#' @examples
-setup_countGener <- function(bioSample_id = NULL, n_rep = NULL , gene_id = NULL , alpha = NULL, mu = NULL ){
-
-  message("\nSetup counts generator ...")
-  ######### HANDLE EXCEPTION #######
-  setup = handle_except(bioSample_id, n_rep , gene_id , alpha)
-  ######## HANDLE TYPE MU ##########
-  if(is.null(mu)) mu = .mu_generator # default function to generate mu
-
-  if(is.function(mu)) {  #mu = function
-    mu.set = mu(setup$n_g)
-    ######## BUILD AN INPUT DTF FOR count_generator ############
-    nBinom_params <- purrr::map2(.x= setup$bioSample, .y = setup$rep,
-                                 ~(list(name=.x, #sample_name
-                                        n_replicates = .y, # random int between 1 & max_N_replicates
-                                        gene_id = setup$gene_id,  # gene_id
-                                        mu = mu.set ,  #mu(ij)
-                                        alpha = setup$alpha))) %>%  # alpha(i)
-
-      data.table::rbindlist(.) %>% as.data.frame() ## convert to lovely dtf
-  }
-
-
-  if(is.data.frame(mu)) {  # mu = data.frame
-    mu.dtf = mu
-    ######## BUILD AN INPUT DTF FOR count_generator ############
-    nBinom_params <- purrr::map2(.x= setup$bioSample, .y = setup$rep,
-                                 ~(list(name=.x, #sample_name
-                                        n_replicates = .y, # number replicates
-                                        gene_id = setup$alpha$gene_id,  # gene_name
-                                        mu = mu.dtf %>% dplyr::select(all_of(.x)) %>% unlist() ,  #mu(ij)
-                                        alpha = setup$alpha$alpha))) %>%  # alpha(i)
-      data.table::rbindlist(.) %>% as.data.frame() ## convert to lovely dtf
-  }
-
-  return(nBinom_params)
-
-}
-
-
-.mu_generator <- function(x) return(runif(100,1000, n = x ))
diff --git a/src/v1/htrsim/devtools_history.R b/src/v1/htrsim/devtools_history.R
deleted file mode 100644
index 38b57d876b3f7dc9a71641af916eecdee46826d9..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/devtools_history.R
+++ /dev/null
@@ -1,18 +0,0 @@
-usethis::use_build_ignore("devtools_history.R")
-usethis::use_package('DESeq2')
-usethis::use_package('magrittr')
-usethis::use_package('stats')
-usethis::use_package('dplyr')
-usethis::use_package("BiocGenerics")
-usethis::use_package("tibble")
-usethis::use_package("stringr")
-usethis::use_package("purrr")
-usethis::use_package("data.table")
-usethis::use_package("plyr")
-usethis::use_package("reshape2")
-usethis::use_package("tidyr")
-usethis::use_package("S4Vectors")
-usethis::use_package("readr")
-usethis::use_package("testthat")
-usethis::use_test("name")
-devtools::load_all()
diff --git a/src/v1/htrsim/man/estim.alpha.Rd b/src/v1/htrsim/man/estim.alpha.Rd
deleted file mode 100644
index 24d1213e98bdced644c27e3d77c935cd6c356474..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/man/estim.alpha.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/input_estimation.R
-\name{estim.alpha}
-\alias{estim.alpha}
-\title{Estimate alpha_i}
-\usage{
-estim.alpha(dds, export = FALSE)
-}
-\arguments{
-\item{dds}{DESEQ2 object}
-
-\item{export}{Boolean}
-}
-\value{
-alpha_i per gene only for gene expressed c_ij != 0
-}
-\description{
-Estimate alpha_i
-}
diff --git a/src/v1/htrsim/man/estim.mu.Rd b/src/v1/htrsim/man/estim.mu.Rd
deleted file mode 100644
index 0a203bd2cb607a9f09a93496c7480dea6227ecba..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/man/estim.mu.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/input_estimation.R
-\name{estim.mu}
-\alias{estim.mu}
-\title{Estimate mu_ij}
-\usage{
-estim.mu(dds, mm, epsilon = TRUE, export = FALSE)
-}
-\arguments{
-\item{dds}{DESEQ2 object}
-
-\item{mm}{a model matrix}
-
-\item{export}{Boolean}
-}
-\value{
-mu_ij only for gene expressed c_ij != 0
-}
-\description{
-Estimate mu_ij
-}
diff --git a/src/v1/htrsim/man/generate_counts.Rd b/src/v1/htrsim/man/generate_counts.Rd
deleted file mode 100644
index 89cd640755fbfa945b50066db6b88702087777d8..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/man/generate_counts.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/generate_counts.R
-\name{generate_counts}
-\alias{generate_counts}
-\title{Simulate counts and convert to lovely deseq input}
-\usage{
-generate_counts(setup_dtf, export = FALSE)
-}
-\arguments{
-\item{setup_dtf}{Output from setup_cntsGenerator.R}
-
-\item{export}{Boolean}
-}
-\value{
-dataframe with counts c_ij
-}
-\description{
-Simulate counts and convert to lovely deseq input
-}
diff --git a/src/v1/htrsim/man/handle_except.Rd b/src/v1/htrsim/man/handle_except.Rd
deleted file mode 100644
index 083d5be4118f44d3f5afd1554d6bf65e72b20065..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/man/handle_except.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/setup_cntsGenerator.R
-\name{handle_except}
-\alias{handle_except}
-\title{Handle exception}
-\usage{
-handle_except(bioSample, n_rep, gene_id, alpha)
-}
-\arguments{
-\item{bioSample}{vector of id for each bioSample}
-
-\item{n_rep}{number of replicates}
-
-\item{gene_id}{vector of id for each gene}
-
-\item{alpha}{vector of alpha_i}
-}
-\value{
-
-}
-\description{
-Handle exception
-}
diff --git a/src/v1/htrsim/man/htrsim.Rd b/src/v1/htrsim/man/htrsim.Rd
deleted file mode 100644
index ae549a9582364950f2cac17380ac8183432a58c6..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/man/htrsim.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/htrsim_workflow.R
-\name{htrsim}
-\alias{htrsim}
-\title{HTRSIM workflow}
-\usage{
-htrsim(countData, bioDesign, N_replicates)
-}
-\arguments{
-\item{countData}{dataframe with actual count per gene}
-
-\item{bioDesign}{dataframe defining bioDesign}
-
-\item{N_replicates}{Number of replicate}
-}
-\value{
-dataframe with simulated count per gene
-}
-\description{
-HTRSIM workflow
-}
diff --git a/src/v1/htrsim/man/reshape_input2setup.Rd b/src/v1/htrsim/man/reshape_input2setup.Rd
deleted file mode 100644
index 27551e3cc004ed15674b38c1a21b13a6a1e7eeed..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/man/reshape_input2setup.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/setup_cntsGenerator.R
-\name{reshape_input2setup}
-\alias{reshape_input2setup}
-\title{Reshape input before building setup}
-\usage{
-reshape_input2setup(mu.dtf, alpha.dtf, average_rep = FALSE)
-}
-\arguments{
-\item{mu.dtf}{dataframe of mu_ij}
-
-\item{alpha.dtf}{dataframe of alpha_i}
-
-\item{average_rep}{bool}
-}
-\value{
-
-}
-\description{
-Reshape input before building setup
-}
diff --git a/src/v1/htrsim/man/rn_sim.Rd b/src/v1/htrsim/man/rn_sim.Rd
deleted file mode 100644
index 8915b7fe5b830ac9705043d736ed48f89eca22e1..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/man/rn_sim.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/generate_counts.R
-\name{rn_sim}
-\alias{rn_sim}
-\title{Sampling counts from Negative Binomial distribution}
-\usage{
-rn_sim(mu, alpha, n_replicates, ...)
-}
-\arguments{
-\item{mu}{mu_ij value}
-
-\item{alpha}{alpha_i value}
-
-\item{n_replicates}{number of replicates}
-
-\item{...}{everything else}
-}
-\value{
-vector of length n_replicates
-}
-\description{
-Sampling counts from Negative Binomial distribution
-}
diff --git a/src/v1/htrsim/man/run.deseq.Rd b/src/v1/htrsim/man/run.deseq.Rd
deleted file mode 100644
index dc4d447705e263948802c06c04d939912aa6a96b..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/man/run.deseq.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/launch_deseq.R
-\name{run.deseq}
-\alias{run.deseq}
-\title{Title}
-\usage{
-run.deseq(tabl_cnts, bioDesign)
-}
-\arguments{
-\item{tabl_cnts}{table containing counts per genes & samples}
-
-\item{bioDesign}{table describing bioDesgin of input}
-}
-\value{
-DESEQ2 object
-}
-\description{
-Title
-}
diff --git a/src/v1/htrsim/man/setup_countGener.Rd b/src/v1/htrsim/man/setup_countGener.Rd
deleted file mode 100644
index 4d6b277717a0b8d601ffcdb982cbded63a6f74fd..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/man/setup_countGener.Rd
+++ /dev/null
@@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/setup_cntsGenerator.R
-\name{setup_countGener}
-\alias{setup_countGener}
-\title{Build setup for counts generator}
-\usage{
-setup_countGener(
-  bioSample_id = NULL,
-  n_rep = NULL,
-  gene_id = NULL,
-  alpha = NULL,
-  mu = NULL
-)
-}
-\arguments{
-\item{bioSample_id}{vector of id for each bioSample}
-
-\item{n_rep}{number of replicates}
-
-\item{gene_id}{vector of id for each gene}
-
-\item{alpha}{vector of alpha_i}
-
-\item{mu}{dataframe of mu_ij}
-}
-\value{
-
-}
-\description{
-Build setup for counts generator
-}
diff --git a/src/v1/htrsim/tests/testthat.R b/src/v1/htrsim/tests/testthat.R
deleted file mode 100644
index 123067aff30524ce4dcbeab96bcf3843ca56a5af..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/tests/testthat.R
+++ /dev/null
@@ -1,4 +0,0 @@
-library(testthat)
-library(htrsim)
-
-test_check("htrsim")
diff --git a/src/v1/htrsim/tests/testthat/test-name.R b/src/v1/htrsim/tests/testthat/test-name.R
deleted file mode 100644
index a2c9ceeffc0b273837bd0340d39a4a16ab612f2b..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/tests/testthat/test-name.R
+++ /dev/null
@@ -1,29 +0,0 @@
-
-
-
-
-## manually created data expected
-dat1 <- list(names= c("name", "n_replicates", "gene_id", "mu", "alpha"), row.names = c(1,2,3), class = "data.frame" )
-dat2 <- list(names= c("name", "n_replicates", "gene_id", "mu", "alpha"), row.names = 1:200 , class = "data.frame" )
-dat3 <- list(names= c("name", "n_replicates", "gene_id", "mu", "alpha"), row.names = 1 , class = "data.frame" )
-dat4 <- dat1
-dat5 <- dat1
-dat6 <- dat3
-dat7 <- dat3
-dat8 <- 0.4
-dat9 <- dat1
-
-test_that("Setup counts generator", {
-  expect_equal(attributes(setup_countGener()), dat1 )
-  expect_equal(attributes(setup_countGener(gene_id = 1:200)), dat2 )
-  expect_equal(attributes(setup_countGener(gene_id = 0)), dat3 )
-  expect_equal(attributes(setup_countGener(n_rep = 0)), dat4 )
-  expect_equal(attributes(setup_countGener(bioSample_id = "lib1")), dat5 )
-  expect_equal(attributes(setup_countGener(bioSample_id = "lib1", gene_id = 0)), dat6 )
-  expect_equal(attributes(setup_countGener(bioSample_id = "lib1", gene_id = 0, alpha = 0.4)), dat7 )
-  expect_equal(setup_countGener(bioSample_id = "lib1", gene_id = 0, alpha = 0.4)
-                                  %>% select(alpha)
-                                      %>% as.numeric(), expected = dat8)
-  expect_equal(attributes(setup_countGener(bioSample_id = "lib1", alpha = c(0.4, 0.2, 0.3))), dat9)
-})
-
diff --git a/src/v1/htrsim/vignettes/comment-utiliser-mon-package.Rmd b/src/v1/htrsim/vignettes/comment-utiliser-mon-package.Rmd
deleted file mode 100644
index ba6fe984ce270bf42c77d67c357f576cd51628ca..0000000000000000000000000000000000000000
--- a/src/v1/htrsim/vignettes/comment-utiliser-mon-package.Rmd
+++ /dev/null
@@ -1,149 +0,0 @@
----
-title: "HTRSIM tutorial"
-output: rmarkdown::html_vignette
-vignette: >
-  %\VignetteIndexEntry{HTRSIM}
-  %\VignetteEngine{knitr::rmarkdown}
-  %\VignetteEncoding{UTF-8}
----
-
-<style>
-body {
-text-align: justify}
-</style>
-
-# A. Introduction
-
-
-A differential expression analysis uses a generalized linear model of the form:
-
-$$
-K_{ij} \sim {\sf NB}(\mu_{ij} ; \sigma_i)
-$$
-$$
-\mu_{ij} = s_jq_{ij}
-$$
-$$
-log_2(q_{ij}) = x_j*\beta_i
-$$
-where counts $K_{ij}$ for gene i, sample j are modeled using a Negative Binomial distribution with fitted mean $\mu_{ij}$ and a gene-specific dispersion parameter $\alpha_i$. 
-The fitted mean is composed of a sample-specific size factor $s_j$ and a parameter qij proportional to the expected true concentration of fragments for sample j. 
-The coefficients $\beta_i$ give the log2 fold changes for gene i for each column of the model matrix X. The sample-specific size factors can be replaced by gene-specific normalization factors for each sample using normalizationFactors.
-
-Basically, genes expression is understanding as a shake between a genotype effect, an environment effect and an interaction between G&E. The part of each effect can be modelized by a coefficient $\beta$. 
-Considering genes expression as a phenotype we can write:
-$$Phenotype = \beta_{G} * Genotype + \beta_{E}*Environment +  \beta_{G*E} * Genotype.Environment$$
-
-From a generalized  linear model we can try to quantify each effect : $\beta_{G}$, $\beta_{E}$ and $\beta_{G*E}$.
-Quantifying such coefficients will allow to evaluate the participation of each factors (Genotype, Environment and interaction Genotype/Environment) for each gene. Then, differencies between genes or conditions could be assessed.
-
-According to the DESEQ2 GLM, we can write: 
-$$
-log_2(\mu_{ij]}) = \beta_{G}*G + \beta_{E}*E + \beta_{G*E}*G.E + \beta_{0}
-$$
-
-
-# B. HTRSIM getting started
-
-  <u>a. Required</u>
-
-```{r setup, results='hide', message=FALSE, warning=FALSE}
-library(htrsim)
-library(tidyverse)
-```
-
- <u>b. Workflow</u> 
-
-
-```{r echo=FALSE, out.width='50%'}
-#knitr::include_graphics('img/schema_loop.jpg')
-```
-
-  <u>c. RNA-seq pipeline</u> 
-
-You can used your favorite pipeline to obtain table counts from real data.
-If you don't have any idea of how to obtain such table counts rendez-vous [at](https://gitbio.ens-lyon.fr/aduvermy/rna-seq_public_library_investigations)
-
-
-  <u> d. BioProject PRJNA675209b as input</u>
-
-To easily test *HTRSIM* we produced an usual table counts from BioProject PRJNA675209b.
-Take the time to clean up your table counts before using it as input of htrsim.
-
-```{r}
-fn = system.file("extdata/", "public_tablCnts.tsv", package = "htrsim")
-
-tabl_cnts <- read.table(file = fn, header = TRUE)
-rownames(tabl_cnts) <- tabl_cnts$gene_id
-tabl_cnts <- tabl_cnts %>% select(-gene_id)##suppr colonne GeneID
-tabl_cnts <- tabl_cnts %>% select(-gene_name) ##suppr colonne GeneName
-```
-
-```{r}
-fn = system.file("extdata/", "public_bioDesign.csv", package = "htrsim")
-
-bioDesign <- read.table(file = fn, header = TRUE, sep = ';')
-```
-
- <u> e. Launch HTRSIM</u>
-   
-```{r message=FALSE, warning=FALSE}
-
-#dds = run.deseq(tabl_cnts = tabl_cnts, bioDesign = bioDesign)
-
-#htrsim.results = htrsim(countData = tabl_cnts, bioDesign= bioDesign, N_replicates=1)
-
-
-
-```
-<u> f. Evaluate simulation </u>
-
-```{r}
-build_design2Deseq = function(htrs.tablCnts){
-  #samples = samples %>% data.frame()
-  sample = colnames(htrs.tablCnts %>% select(-gene_id))
-  #design.deseq <- list(samples = colnames(htrs.tablCnts))
-  genotype <- map(sample, ~str_split(., pattern = "_")[[1]][1]) %>% unlist()
-  env <-  map(sample, ~str_split(., pattern = "_")[[1]][2]) %>% unlist()
-  new_design = cbind(sample,genotype, env) %>% data.frame()
-  
-  return (new_design)
-}
-
-designSimu <- build_design2Deseq(htrsim.results$countDataSim)
-
-row.names(htrsim.results$countDataSim) = htrsim.results$countDataSim$gene_id
-htrsim.results$countDataSim = htrsim.results$countDataSim %>% select(-gene_id)
-
-htrsim.results$countDataSim %>% dim()
-designSimu %>% dim()
-#bioDesign$
-designSimu$env
-colnames(htrsim.results$countDataSim)
-model.matrix(~genotype + env + genotype:env, designSimu)
-simu_deseq = run.deseq(tabl_cnts = htrsim.results$countDataSim, designSimu)
-
-designSimu$genotype <- factor(x = bioDesign$genotype,levels = c('wt','msn2D', 'msn4D'))
-designSimu$env <- factor(x = bioDesign$env,levels = c('control', 'kcl'))
-
-  ## DESEQ standard analysis
-
-htrs.tablCnts <- htrsim.results$countDataSim %>% replace(is.na(.), 0)
-
-max(htrs.tablCnts)
-dds = DESeq2::DESeqDataSetFromMatrix( countData = round(htrs.tablCnts/10), colData = designSimu , design = ~ genotype + env + genotype:env)
-dds <- DESeq2::DESeq(dds)
-
-
-```
-
-
- <u> f. Visualize results</u>
-   
-```{r message=FALSE, warning=FALSE}
-
-devtools::load_all()
-
-
-
-```
diff --git a/src/v2/HTRSIM/.Rbuildignore b/src/v2/HTRSIM/.Rbuildignore
deleted file mode 100644
index 86257dd97eb4cbe110ad508b8de684421ee60f3c..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/.Rbuildignore
+++ /dev/null
@@ -1,3 +0,0 @@
-^HTRSIM\.Rproj$
-^\.Rproj\.user$
-^devtools_history\.R$
diff --git a/src/v2/HTRSIM/DESCRIPTION b/src/v2/HTRSIM/DESCRIPTION
deleted file mode 100644
index 5c93a758b217026790e10370a079ad22b0dae2f9..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/DESCRIPTION
+++ /dev/null
@@ -1,25 +0,0 @@
-Package: HTRSIM
-Title: RNAseq counts simulation
-Version: 0.0.0.9000
-Authors@R: 
-    person("Arnaud", "Duvermy", , "first.last@example.com", role = c("aut", "cre"),
-           comment = c(ORCID = "YOUR-ORCID-ID"))
-Description: RNAseq counts simulation.
-License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a
-    license
-Encoding: UTF-8
-Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.1.2
-Suggests: 
-    testthat (>= 3.0.0)
-Config/testthat/edition: 3
-Depends: 
-    tidyverse
-Imports: 
-    DESeq2,
-    furrr,
-    glmglrt,
-    MASS,
-    S4Vectors,
-    stats,
-    stringr
diff --git a/src/v2/HTRSIM/HTRSIM.Rproj b/src/v2/HTRSIM/HTRSIM.Rproj
deleted file mode 100644
index 69fafd4b6dddad27500cfc67efb9fb16e86a96bd..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/HTRSIM.Rproj
+++ /dev/null
@@ -1,22 +0,0 @@
-Version: 1.0
-
-RestoreWorkspace: No
-SaveWorkspace: No
-AlwaysSaveHistory: Default
-
-EnableCodeIndexing: Yes
-UseSpacesForTab: Yes
-NumSpacesForTab: 2
-Encoding: UTF-8
-
-RnwWeave: Sweave
-LaTeX: pdfLaTeX
-
-AutoAppendNewline: Yes
-StripTrailingWhitespace: Yes
-LineEndingConversion: Posix
-
-BuildType: Package
-PackageUseDevtools: Yes
-PackageInstallArgs: --no-multiarch --with-keep.source
-PackageRoxygenize: rd,collate,namespace
diff --git a/src/v2/HTRSIM/NAMESPACE b/src/v2/HTRSIM/NAMESPACE
deleted file mode 100644
index 2bf9a20a5078c45bb5487acd717da9ae5a25d839..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/NAMESPACE
+++ /dev/null
@@ -1,24 +0,0 @@
-# Generated by roxygen2: do not edit by hand
-
-export(ddsExtraction.viz)
-export(extractDistributionFromDDS)
-export(getBetaforSimulation)
-export(getDfComparison)
-export(getGenesDispersionsForSimulation)
-export(buildDesign2simulate)
-export(getK_ij)
-export(getLog_qij)
-export(getMu_ij)
-export(rnorm.distrib.beta)
-export(run.deseq)
-export(run.glm)
-export(reshapeGlmRes)
-import(DESeq2)
-import(dplyr)
-import(ggplot2)
-import(reshape2)
-import(stats)
-import(stringr)
-import(tidyverse)
-import(MASS)
-
diff --git a/src/v2/HTRSIM/R/countsGenerator.R b/src/v2/HTRSIM/R/countsGenerator.R
deleted file mode 100644
index 3af9bf322fa2b1ed25bc362bb51e1482909eba32..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/R/countsGenerator.R
+++ /dev/null
@@ -1,168 +0,0 @@
-#' Get beta_ij
-#'
-#' @param n_genes  an integer
-#' @param n_genotype A int.
-#' @param n_environment A int.
-#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE.
-#' @param model_matrix an output of stat::model.matrix()
-#' @param theta a float to control the noise introduce around betaG, betaE, betaGxE
-#' @import stringr
-#' @import base
-#' @import dplyr
-#' @import Rfast
-#' @import MASS
-#' @import purrr
-#' @return a dataframe with the gene dispersion for each samples
-#' @export
-#'
-#' @examples
-getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, n_environments = 2, beta.dtf, theta = 10 ){
-
-  x = beta.dtf %>% as.matrix()
-  fit.mvrnorm <- Rfast::mvnorm.mle(x)
-  x <- NULL
-  beta.matrix.tmp <- MASS::mvrnorm(n = n_genes,
-                                 mu = fit.mvrnorm$mu,
-                                 Sigma = fit.mvrnorm$sigma )
-
-  replicate_beta <- function(beta_vec, n, theta){
-    beta_vec.rep = rep(beta_vec, n)
-    beta_vec.rep + rnorm(length(beta_vec.rep), mean = 0, sd = abs(beta_vec/theta))
-  }
-
-  beta0 = beta.matrix.tmp[,1]
-  beta.matrix.tmp = purrr::map2(.x = c(2,3,4), .y =  c(n_genotypes-1,
-                                              n_environments-1,
-                                              (n_genotypes-1)*(n_environments-1)),
-                            ~ replicate_beta(beta.matrix.tmp[,.x], .y, theta) %>% matrix(ncol = .y)) %>%
-                    do.call(cbind, .)
-
-
-
-  beta.matrix = cbind(beta0, beta.matrix.tmp)
-  betaG.colnames = base::paste("genotype", "G", 1:(n_genotypes-1), sep = "")
-  betaE.colnames = base::paste("environment", "E", 1:(n_environments-1), sep = "")
-  betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
-  matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
-
-  colnames(beta.matrix) = matrix.colnames
-  rownames(beta.matrix) = base::paste("gene", 1:(n_genes), sep = "")
-
-  return(beta.matrix)
-}
-
-
-
-#' Get log(q_ij)
-#'
-#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE.
-#' @param model_matrix an output of stat::model.matrix()
-#' @import stringr
-#' @import base
-#' @import dplyr
-#' @import
-#' @return a dataframe with the gene dispersion for each samples
-#' @export
-#'
-#' @examples
-getLog_qij <- function( beta.matrix.input , model_matrix){
-
-    log_qij = beta.matrix.input %*% t(model_matrix) ## j samples (n_genotypes * n_environments), i genes
-
-    return(log_qij )
-
-}
-
-
-
-#' Get mu_ij
-#'
-#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE.
-#' @param model_matrix an output of stat::model.matrix()
-#' @import stringr
-#' @import base
-#' @import dplyr
-#' @import
-#' @return a dataframe with the gene dispersion for each samples
-#' @export
-#'
-#' @examples
-getMu_ij <- function( log_qij.matrix, size_factor ){
-
-  mu_ij = size_factor * 2^log_qij.matrix ## size factor * log(qij)
-
-  return(mu_ij )
-
-}
-
-#' Get genes dispersion
-#'
-#' @param n_genes  an integer
-#' @param n_genotype A int.
-#' @param n_environment A int.
-#' @param dispersion.vec A vector of observed dispersion.
-#' @param dispUniform_btweenCondition logical
-#' @param model_matrix an output of stat::model.matrix()
-#' @import stringr
-#' @import base
-#' @import dplyr
-#' @import
-#' @return a dataframe with the gene dispersion for each samples
-#' @export
-#'
-#' @examples
-getGenesDispersionsForSimulation <- function( n_genes = 100, n_genotypes, n_environments, dispersion.vec ,dispUniform_btweenCondition = T, model_matrix ){
-
-
-   if (dispUniform_btweenCondition == T ) {
-          gene_dispersion.dtf = base::sample(  dds.extraction$gene_dispersion, replace = T, size = n_genes) %>% base::data.frame()
-          n_rep =  length(rownames(model_matrix))
-          gene_dispersion.dtf = gene_dispersion.dtf[,base::rep(base::seq_len(base::ncol(gene_dispersion.dtf)), n_rep)]
-          rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "")
-          colnames(gene_dispersion.dtf) = rownames(model_matrix)
-
-  }
-
-  else {
-
-          replication_table = rownames(model_matrix) %>% stringr::str_replace(., pattern = "_[0-9]+","" ) %>% table()
-          gene_dispersion.dtf = replication_table %>% purrr::map(., ~sample(  dispersion.vec, replace = T, size = n_genes) ) %>% data.frame()
-          gene_dispersion.dtf = gene_dispersion.dtf[,rep(seq_len(ncol(gene_dispersion.dtf)), replication_table %>% as.numeric())]
-          colnames(gene_dispersion.dtf) = rownames(model_matrix)
-          rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "")
-
-  }
-
-  return(gene_dispersion.dtf %>% as.matrix)
-
-}
-
-
-
-
-
-#' Get K_ij : gene counts
-#'
-#' @param log_qij.matrix  a matrix of log_qij
-#' @param gene_disp.matrix a matrix of gene dispersion
-#' @import stats
-#' @return a dataframe with the gene dispersion for each samples
-#' @export
-#'
-#' @examples
-getK_ij <- function( mu_ij.matrix ,  gene_disp.matrix ){
-
-  n_genes =  nrow(mu_ij.matrix)
-  n_sples = ncol(mu_ij.matrix)
-  alpha_gene = 1/gene_disp.matrix
-  k_ij = stats::rnbinom(length(mu_ij.matrix), size = alpha_gene  , mu = mu_ij.matrix) %>% matrix(. , nrow = n_genes, ncol = n_sples )
-  k_ij[is.na(k_ij)] = 0
-
-  colnames(k_ij) = colnames(mu_ij.matrix)
-  rownames(k_ij) = rownames(mu_ij.matrix)
-
-  return(k_ij)
-
-}
-
-
diff --git a/src/v2/HTRSIM/R/designSimulationBuilder.R b/src/v2/HTRSIM/R/designSimulationBuilder.R
deleted file mode 100644
index f360907a290d7e746a33a5635367b476d953466f..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/R/designSimulationBuilder.R
+++ /dev/null
@@ -1,51 +0,0 @@
-
-
-#' Build a design dataframe
-#'
-#' @param n_genotype A int.
-#' @param n_environment A int.
-#' @param n_replicate A int.
-#' @param uniform_nb_rep logical
-#' @examples
-#' buildDesign2simulate(1000, 2, 30)
-#
-#' @return dataframe with n_genotype rows and 3 columns (sample_id, genotype, environment)
-#' @import tidyverse
-#' @import stats
-#'
-buildDesign2simulate <- function(n_genotype , n_environment, n_replicate , uniform_nb_rep = T ){
-
-
-    genotypes = base::paste("G", 0:(n_genotype-1), sep = "")
-    environments = base::paste("E", 0:(n_environment-1), sep = "")
-    sample_ids = as.vector(outer(genotypes, environments, paste, sep="_")) %>% sort()
-
-
-    sample_id.split =  sample_ids %>% str_split(., "_", simplify = T)
-    design_without_rep = base::list(sample_id = sample_ids ,
-                                    environment = sample_id.split[,2],
-                                    genotype =  sample_id.split[,1] ) %>% data.frame()
-
-
-    rows = c(1:nrow(design_without_rep))
-    if (uniform_nb_rep == T) times = rep(n_replicate, base::nrow(design_without_rep))
-    else { times = sample(1:n_replicate, base::nrow(design_without_rep), replace=T) ; message("uniform_nb_rep = FALSE \nn_replicate consider as a maximum number of replicates possible") }
-
-    design = design_without_rep %>%
-      dplyr::mutate(rep = times) %>%
-      dplyr::group_by(sample_id, genotype, environment) %>%
-      tidyr::expand(rep = seq(1:rep)) %>%
-      tidyr::unite(sample_id, sample_id, rep) %>%
-      dplyr::ungroup()
-
-    if (n_genotype > 1) design$genotype <- factor(x = design$genotype,levels = c("G0", unique(design$genotype)[-1]))
-    if (n_environment  > 1) design$environment <- factor(x = design$environment, levels = c( "E0", unique(design$environment)[-1]))
-
-    model_matrix = stats::model.matrix(~ genotype + environment + genotype:environment, design)
-    rownames(model_matrix) = design$sample_id
-
-    return( base::list(model_matrix = model_matrix,
-                    design2simulate = design) )
-}
-
-
diff --git a/src/v2/HTRSIM/R/evaluation.R b/src/v2/HTRSIM/R/evaluation.R
deleted file mode 100644
index 6f246c2c003a967ccffb1e6f297b0b174abb305a..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/R/evaluation.R
+++ /dev/null
@@ -1,76 +0,0 @@
-
-#' Get getDfComparison
-#'
-#' @param dds_simu.mcols  dds object obtain on simulation
-#' @param model_matrix a stats::model.matrix output
-#' @param  beta.actual.matrix.matrix a matrix of beta used as input for simulation
-#' @param threads
-#' @import stringr
-#' @import base
-#' @import dplyr
-#' @import reshape2
-#' @import DESeq2
-#' @import furrr
-#' @return a dataframe for beta comparison
-#' @export
-#'
-#' @examples
-getDfComparison <- function(dds_simu , model_matrix, beta.actual.matrix, threads = 4 ){
-
-
-    listBeta = DESeq2::resultsNames(dds_simu)
-    plan(multisession, workers = 4)
-    res = listBeta %>% furrr::future_map(.x = ., ~DESeq2::results(dds_simu, contrast=list(.x)) %>% data.frame() %>% .$padj)
-    padj.matrix = do.call("cbind", res)
-
-
-    dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
-    dds.simu.mcols.colnamesReshaped = colnames(dds_simu.mcols) %>%
-                                        stringr::str_replace(., "_vs_G0", "") %>%
-                                        stringr::str_replace(., "_vs_E0", "") %>%
-                                        stringr::str_replace_all(., "_", "") %>%
-                                        stringr::str_replace(., "\\.", ":")
-
-    columnOfInterest = model_matrix %>% base::colnames() %>% stringr::str_replace_all(., "[//(//)]", "")
-    #dds_simu.mcols[,columnOfInterest]
-
-    ## Get only column of interest
-    idx_cols = base::match(columnOfInterest, dds.simu.mcols.colnamesReshaped)
-    beta.infered = dds_simu.mcols[,idx_cols]
-
-    ## homogeneize column names & rownames
-    idx_cols = base::match(columnOfInterest, beta.actual.matrix %>% colnames())
-    beta.actual.matrix = beta.actual.matrix[,idx_cols]
-    colnames(beta.infered) = base::colnames(beta.actual.matrix)
-    colnames(padj.matrix) = base::colnames(beta.actual.matrix)
-    rownames(padj.matrix) = base::rownames(beta.actual.matrix)
-
-    beta.infer.long = beta.infered %>% data.frame() %>%
-                              tibble::rownames_to_column(., var = "gene_id") %>%
-                              dplyr::mutate(origin = "Inference") %>%
-                              reshape2::melt(., value.name = "value", variable.name= "beta")
-    beta.actual.matrix.long = beta.actual.matrix %>% data.frame() %>%
-                              tibble::rownames_to_column(., var = "gene_id") %>%
-                              dplyr::mutate(origin = "Actual") %>%
-                              reshape2::melt(., value.name = "value", variable.name= "beta")
-    padj.matrix.long = padj.matrix  %>% data.frame() %>%
-                              tibble::rownames_to_column(., var = "gene_id") %>%
-                              dplyr::mutate(origin = "padj") %>%
-                              reshape2::melt(., value.name = "value", variable.name= "beta")
-
-    beta.merged.long = rbind(beta.infer.long, beta.actual.matrix.long, padj.matrix.long)
-    #beta.merged.long$beta %>% unique()
-
-    beta.merged.long.reshape = beta.merged.long %>% dplyr::mutate(type = dplyr::case_when(
-      str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
-      str_detect(beta, "genotypeG\\d+$") ~ "G",
-      str_detect(beta, "environmentE\\d+$") ~ "E",
-      str_detect(beta, "Intercept$") ~ "Intercept")
-    )
-
-
-    beta.merged.long.reshape2 = beta.merged.long.reshape %>% reshape2::dcast(.,  gene_id + beta + type ~ origin)
-    beta.merged.long.reshape2$type = factor(beta.merged.long.reshape2$type, levels = c("Intercept", "G", "E", "GxE"))
-
-    return(beta.merged.long.reshape2)
-}
diff --git a/src/v2/HTRSIM/R/modelFitting.R b/src/v2/HTRSIM/R/modelFitting.R
deleted file mode 100644
index 6918c11c074e9d339e4be3ab5602878c20de3fef..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/R/modelFitting.R
+++ /dev/null
@@ -1,128 +0,0 @@
-#' Launch wald test
-#'
-#' @param model.res output of glm.nb
-#' @param term index of the coeff to test
-#' @param threshold HO : coeff < threshold
-#' @param initvec vector of 0
-#' @import glmglrt
-#' @return a numeric pvalue
-#' @export
-#'
-#' @examples
-test_wald <- function(model.res, term, threshold, initvec){
-  constrast_vec = replace(initvec, term , 1)
-  model.res$coefficients <- abs(model.res$coefficients)
-  wald.test.pvalue = glmglrt::p_value_contrast(model.res, contrast = constrast_vec ,  alternative = "greater", method = "Wald", H0 = threshold)
-  return(wald.test.pvalue %>% as.numeric())
-}
-
-
-
-#' Launch MASS:GLM.NB
-#'
-#' @param gene_count a row of kij simulated table
-#' @param gene_name name of the gene
-#' @import dplyr
-#' @import stringr
-#' @import stats
-#' @import purrr
-#' @import furrr
-#' @return a dtf
-#' @export
-#'
-#' @examples
-reshapeGlmRes <- function(fit, gene_name, threshold  ,  error_bool = F){
-    if (error_bool == T){ ## error while fiting model
-      res = list(Inference = NA, pval = NA, beta = NA, gene_id = paste("gene", i, sep = ""), type = NA, dispersion = NA) %>%
-            data.frame()
-    }
-    else{ ## success to fit model
-
-      ### wald test ###
-      message(paste("WALD test:\n HO:", '|Beta| <' , threshold, sep = ' ' ))
-      vecofzero = rep(0, length(coef(fit)))
-      list_pval = 1:length(coef(fit)) %>% purrr::map(.x = .,  ~test_wald(model.res = fit, term= .x ,  threshold , initvec = vecofzero) ) %>% unlist()
-
-
-
-      ###
-      res = coef(summary(fit))[,c(1,4)] %>%
-              data.frame() %>%
-              dplyr::rename(.,  Inference = "Estimate") %>%
-              dplyr::mutate(beta = stringr::str_remove_all(rownames(.), "[//(//)]")) %>%
-              dplyr::mutate(beta = stringr::str_replace(beta, ":", ".")) %>%
-              dplyr::mutate(pval = list_pval) %>%
-              dplyr::select(-"Pr...z..")
-
-      rownames(res) <- NULL
-      res = res  %>%
-              dplyr::mutate(gene_id = gene_name ) %>%
-              dplyr::mutate(type = dplyr::case_when(
-                stringr::str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
-                stringr::str_detect(beta, "genotypeG\\d+$") ~ "G",
-                stringr::str_detect(beta, "environmentE\\d+$") ~ "E",
-                stringr::str_detect(beta, "Intercept$") ~ "Intercept")) %>%
-              dplyr::mutate(dispersion = fit$theta)
-      #stats::summary.glm(fit)$dispersion
-    }
-    return(res)
-}
-
-
-
-
-#' Launch MASS:GLM.NB
-#'
-#' @param gene_count a row of kij simulated table
-#' @param i index of the gene
-#' @param  design.dtf a dtf of the design simulated
-#' @import MASS
-#' @return a dtf
-#' @export
-#'
-#' @examples
-run.glm <- function(gene_count, i, design.dtf, threshold = 0) {
-  y = gene_count
-  genotype = design2simulate$design2simulate$genotype
-  environment = design2simulate$design2simulate$environment
-
-  message("Fitting model ...")
-
-  df_gene_i = list(y = y , genotype = genotype,environment = environment) %>% data.frame()
-  rownames(df_gene_i) <- NULL
-  #print(i)
-  tryCatch({
-
-    fit = MASS::glm.nb(y ~ genotype + environment + genotype:environment, data = df_gene_i, link = log)
-    return(reshapeGlmRes(fit, i, threshold))
-  },
-
-  error = function(cnd){
-    return(reshapeGlmRes(NULL, i, threshold  , error_bool = T))
-}
-)
-}
-
-
-
-#' Launch Deseq
-#'
-#' @param tabl_cnts table containing counts per genes & samples
-#' @param bioDesign table describing bioDesgin of input
-#' @import DESeq2
-#' @return DESEQ2 object
-#' @export
-#'
-#' @examples
-run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + genotype:environment){
-
-
-  dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model  )
-  dds <- DESeq2::DESeq(dds)
-  return(dds)
-
-}
-
-
-
-
diff --git a/src/v2/HTRSIM/R/ublicsTableCountsExtractor.R b/src/v2/HTRSIM/R/ublicsTableCountsExtractor.R
deleted file mode 100644
index cc6826296fac648d6eca8414d38c58738a8807c3..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/R/ublicsTableCountsExtractor.R
+++ /dev/null
@@ -1,30 +0,0 @@
-#' Extract beta distribution from DESEQ2 object
-#'
-#' @param dds_obj a DESEQ2 object
-#' @import S4vectors
-#' @return a list containing 1- mean and sd of BetaG 2- mean and sd of BetaE 3- mean and sd of BetaGE 5- mean and sd of gene dispersion
-#' @export
-#'
-#' @examples
-extractDistributionFromDDS <-  function(dds_obj){
-  ## Beta
-  dds.mcols = S4Vectors::mcols(dds, use.names=TRUE)
-  beta0 <- dds.mcols$Intercept
-  betaG <- dds.mcols$genotype_RM11_vs_GSY147
-  betaE <- dds.mcols$environment_treated_vs_untreated
-  betaGE <- dds.mcols$genotypeRM11.environmenttreated
-  beta.dtf = cbind(beta0,betaG,betaE,betaGE) %>% as.data.frame() %>% drop_na()
-
-  ## Dispersion
-  gene_disp = dds.mcols$dispersion %>% na.omit()
-
-
-  return(list(beta = beta.dtf,
-              gene_dispersion = gene_disp,
-              beta0.mean = mean(beta.dtf$beta0,  na.rm = T), beta0.sd = sd(beta.dtf$beta0,  na.rm = T) ,
-              betaG.mean = mean(beta.dtf$betaG,  na.rm = T), betaG.sd = sd(beta.dtf$betaG,  na.rm = T) ,
-              betaE.mean = mean(beta.dtf$betaE, na.rm = T)  , betaE.sd = sd(beta.dtf$betaE,  na.rm = T) ,
-              betaGE.mean =  mean(beta.dtf$betaGE, na.rm = T)  , betaGE.sd = sd(beta.dtf$betaGE,  na.rm = T),
-              gene_disp.mean = mean(gene_disp, na.rm = T) ,  gene_disp.sd = sd(gene_disp, na.rm = T)))
-
-}
diff --git a/src/v2/HTRSIM/R/visualization.R b/src/v2/HTRSIM/R/visualization.R
deleted file mode 100644
index 47c1e445ad8cacf55f311acfcd836754d2c3809f..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/R/visualization.R
+++ /dev/null
@@ -1,65 +0,0 @@
-
-
-
-#' Sampling value from a known Gaussian distribution
-#'
-#' @param beta_type  a string (beta0, betaG, betaE, betaGE)
-#' @param mean.distrib  the mean of the distribution (dbl)
-#' @param sd.distrib  the standard deviation of the distribution (dbl)
-#' @param n an integer defining the number of value sampling
-#' @import dplyr
-#' @import base
-#' @return a dataframe
-#' @export
-#'
-#' @examples
-rnorm.distrib.beta <- function(parameter, mean.distrib , sd.distrib , n = 1000 ){
-
-  rnorm.distrib.dtf = rnorm(1000, mean = mean.distrib, sd = sd.distrib) %>%
-    base::data.frame() %>%
-    dplyr::mutate( parameter = parameter) %>%
-    dplyr::rename(., value = ".") %>%
-    dplyr::mutate(sampling_from = paste("rnorm(mean = ", signif(mean.distrib, 3), ", sd=" , signif(sd.distrib, 3), ")", sep = ""))
-
-
-  return(rnorm.distrib.dtf)
-}
-
-
-#' Vizualize the output of extractDistributionFromDDS
-#'
-#' @param dds.extraction  output of extractDistributionFromDDS
-#' @import ggplot2
-#' @import base
-#' @import dplyr
-#' @return a plot
-#' @export
-#'
-#' @examples
-ddsExtraction.viz <- function(dds.extraction){
-    beta_obs.dtf.long = dds.extraction$beta %>% reshape2::melt(. , na.rm = T, variable.name = "parameter")
-
-    alpha_obs.dtf.long = dds.extraction$gene_dispersion %>% base::data.frame() %>%
-                          dplyr::rename(., value = ".") %>%
-                          dplyr::mutate(parameter = "dispersion")
-
-    dtf.params_obs = rbind(beta_obs.dtf.long, alpha_obs.dtf.long)
-
-    rnorm.distrib.beta0 = rnorm.distrib.beta(parameter = "beta0", mean.distrib = dds.extraction$beta0.mean, sd = dds.extraction$beta0.sd )
-    rnorm.distrib.betaG = rnorm.distrib.beta(parameter = "betaG", mean.distrib = dds.extraction$betaG.mean, sd = dds.extraction$betaG.sd )
-    rnorm.distrib.betaE = rnorm.distrib.beta(parameter = "betaE", mean.distrib = dds.extraction$betaE.mean, sd = dds.extraction$betaE.sd )
-    rnorm.distrib.betaGE = rnorm.distrib.beta(parameter = "betaGE", mean.distrib = dds.extraction$betaGE.mean, sd = dds.extraction$betaGE.sd )
-    rnorm.distrib.alpha = rnorm.distrib.beta(parameter = "dispersion", mean.distrib = dds.extraction$gene_disp.mean, sd = dds.extraction$gene_disp.sd )
-
-    dtf.rnorm.param = base::rbind(rnorm.distrib.beta0, rnorm.distrib.betaG, rnorm.distrib.betaE, rnorm.distrib.betaGE, rnorm.distrib.alpha)
-
-    p = ggplot(dtf.params_obs, aes(x= value)) +
-      geom_histogram(aes(y=..density..), colour="black", fill="white")+
-      geom_density(data = dtf.rnorm.param, aes(x =  value,  fill= sampling_from), alpha=.4) + facet_grid(~parameter)
-
-
-
-
-    return(p)
-
-}
diff --git a/src/v2/HTRSIM/devtools_history.R b/src/v2/HTRSIM/devtools_history.R
deleted file mode 100644
index 9ad37d1dbbceb46a1b30e98d8129245ca7c390d4..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/devtools_history.R
+++ /dev/null
@@ -1,9 +0,0 @@
-usethis::use_build_ignore("devtools_history.R")
-usethis::use_package('tidyverse', type = "depends")
-usethis::use_package('stats')
-usethis::use_package("stringr")
-usethis::use_package("MASS")
-usethis::use_package("DESeq2")
-usethis::use_package("furrr")
-usethis::use_package("glmglrt")
-
diff --git a/src/v2/HTRSIM/man/buildDesign2simulate.Rd b/src/v2/HTRSIM/man/buildDesign2simulate.Rd
deleted file mode 100644
index e3c699cab1267b6ad3749c6bf656306562d77498..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/man/buildDesign2simulate.Rd
+++ /dev/null
@@ -1,31 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/designSimulationBuilder.R
-\name{buildDesign2simulate}
-\alias{buildDesign2simulate}
-\title{Build a design dataframe}
-\usage{
-buildDesign2simulate(
-  n_genotype,
-  n_environment,
-  n_replicate,
-  uniform_nb_rep = T
-)
-}
-\arguments{
-\item{n_genotype}{A int.}
-
-\item{n_environment}{A int.}
-
-\item{n_replicate}{A int.}
-
-\item{uniform_nb_rep}{logical}
-}
-\value{
-dataframe with n_genotype rows and 3 columns (sample_id, genotype, environment)
-}
-\description{
-Build a design dataframe
-}
-\examples{
-buildDesign2simulate(1000, 2, 30)
-}
diff --git a/src/v2/HTRSIM/man/ddsExtraction.viz.Rd b/src/v2/HTRSIM/man/ddsExtraction.viz.Rd
deleted file mode 100644
index 02be5edfc93af9d654f7abecbca9ada9acced010..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/man/ddsExtraction.viz.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/visualization.R
-\name{ddsExtraction.viz}
-\alias{ddsExtraction.viz}
-\title{Vizualize the output of extractDistributionFromDDS}
-\usage{
-ddsExtraction.viz(dds.extraction)
-}
-\arguments{
-\item{dds.extraction}{output of extractDistributionFromDDS}
-}
-\value{
-a plot
-}
-\description{
-Vizualize the output of extractDistributionFromDDS
-}
diff --git a/src/v2/HTRSIM/man/extractDistributionFromDDS.Rd b/src/v2/HTRSIM/man/extractDistributionFromDDS.Rd
deleted file mode 100644
index fcfc012a692b9a83732c6cfa9ee9ecefca01d28f..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/man/extractDistributionFromDDS.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ublicsTableCountsExtractor.R
-\name{extractDistributionFromDDS}
-\alias{extractDistributionFromDDS}
-\title{Extract beta distribution from DESEQ2 object}
-\usage{
-extractDistributionFromDDS(dds_obj)
-}
-\arguments{
-\item{dds_obj}{a DESEQ2 object}
-}
-\value{
-a list containing 1- mean and sd of BetaG 2- mean and sd of BetaE 3- mean and sd of BetaGE 5- mean and sd of gene dispersion
-}
-\description{
-Extract beta distribution from DESEQ2 object
-}
diff --git a/src/v2/HTRSIM/man/getBetaforSimulation.Rd b/src/v2/HTRSIM/man/getBetaforSimulation.Rd
deleted file mode 100644
index 1fdf6fc83fde79926de3079f1903312e49d7be13..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/man/getBetaforSimulation.Rd
+++ /dev/null
@@ -1,33 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/countsGenerator.R
-\name{getBetaforSimulation}
-\alias{getBetaforSimulation}
-\title{Get beta_ij}
-\usage{
-getBetaforSimulation(
-  n_genes = 100,
-  n_genotypes = 20,
-  n_environments = 2,
-  beta.dtf,
-  threshold = 15
-)
-}
-\arguments{
-\item{n_genes}{an integer}
-
-\item{beta.dtf}{a dtf of beta0,betaG, betaE, betaGE.}
-
-\item{threshold}{an integer)}
-
-\item{n_genotype}{A int.}
-
-\item{n_environment}{A int.}
-
-\item{model_matrix}{an output of stat::model.matrix()}
-}
-\value{
-a dataframe with the gene dispersion for each samples
-}
-\description{
-Get beta_ij
-}
diff --git a/src/v2/HTRSIM/man/getDfComparison.Rd b/src/v2/HTRSIM/man/getDfComparison.Rd
deleted file mode 100644
index 7f80baa5c2b067cd669806f9843dd6f019138133..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/man/getDfComparison.Rd
+++ /dev/null
@@ -1,21 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/evaluation.R
-\name{getDfComparison}
-\alias{getDfComparison}
-\title{Get getDfComparison}
-\usage{
-getDfComparison(dds_simu, model_matrix, beta.actual.matrix)
-}
-\arguments{
-\item{model_matrix}{a stats::model.matrix output}
-
-\item{dds_simu.mcols}{dds object obtain on simulation}
-
-\item{beta.actual.matrix.matrix}{a matrix of beta used as input for simulation}
-}
-\value{
-a dataframe for beta comparison
-}
-\description{
-Get getDfComparison
-}
diff --git a/src/v2/HTRSIM/man/getGenesDispersionsForSimulation.Rd b/src/v2/HTRSIM/man/getGenesDispersionsForSimulation.Rd
deleted file mode 100644
index e2b8cff4990f51e5a88ef4c1d4cbd147ee327732..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/man/getGenesDispersionsForSimulation.Rd
+++ /dev/null
@@ -1,34 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/countsGenerator.R
-\name{getGenesDispersionsForSimulation}
-\alias{getGenesDispersionsForSimulation}
-\title{Get genes dispersion}
-\usage{
-getGenesDispersionsForSimulation(
-  n_genes = 100,
-  n_genotypes,
-  n_environments,
-  dispersion.vec,
-  dispUniform_btweenCondition = T,
-  model_matrix
-)
-}
-\arguments{
-\item{n_genes}{an integer}
-
-\item{dispersion.vec}{A vector of observed dispersion.}
-
-\item{dispUniform_btweenCondition}{logical}
-
-\item{model_matrix}{an output of stat::model.matrix()}
-
-\item{n_genotype}{A int.}
-
-\item{n_environment}{A int.}
-}
-\value{
-a dataframe with the gene dispersion for each samples
-}
-\description{
-Get genes dispersion
-}
diff --git a/src/v2/HTRSIM/man/getK_ij.Rd b/src/v2/HTRSIM/man/getK_ij.Rd
deleted file mode 100644
index 08c5b701ed619a394fad10959f80c191a9e695bc..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/man/getK_ij.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/countsGenerator.R
-\name{getK_ij}
-\alias{getK_ij}
-\title{Get K_ij : gene counts}
-\usage{
-getK_ij(mu_ij.matrix, gene_disp.matrix)
-}
-\arguments{
-\item{gene_disp.matrix}{a matrix of gene dispersion}
-
-\item{log_qij.matrix}{a matrix of log_qij}
-}
-\value{
-a dataframe with the gene dispersion for each samples
-}
-\description{
-Get K_ij : gene counts
-}
diff --git a/src/v2/HTRSIM/man/getLog_qij.Rd b/src/v2/HTRSIM/man/getLog_qij.Rd
deleted file mode 100644
index 9bbc605df29dfe5ce660d01d4ed3f6f2c3374c64..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/man/getLog_qij.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/countsGenerator.R
-\name{getLog_qij}
-\alias{getLog_qij}
-\title{Get log(q_ij)}
-\usage{
-getLog_qij(beta.matrix.input, model_matrix)
-}
-\arguments{
-\item{model_matrix}{an output of stat::model.matrix()}
-
-\item{beta.dtf}{a dtf of beta0,betaG, betaE, betaGE.}
-}
-\value{
-a dataframe with the gene dispersion for each samples
-}
-\description{
-Get log(q_ij)
-}
diff --git a/src/v2/HTRSIM/man/getMu_ij.Rd b/src/v2/HTRSIM/man/getMu_ij.Rd
deleted file mode 100644
index e4b7949b3050993396664062336b2c56b14d7b5e..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/man/getMu_ij.Rd
+++ /dev/null
@@ -1,19 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/countsGenerator.R
-\name{getMu_ij}
-\alias{getMu_ij}
-\title{Get mu_ij}
-\usage{
-getMu_ij(log_qij.matrix, size_factor)
-}
-\arguments{
-\item{beta.dtf}{a dtf of beta0,betaG, betaE, betaGE.}
-
-\item{model_matrix}{an output of stat::model.matrix()}
-}
-\value{
-a dataframe with the gene dispersion for each samples
-}
-\description{
-Get mu_ij
-}
diff --git a/src/v2/HTRSIM/man/rnorm.distrib.beta.Rd b/src/v2/HTRSIM/man/rnorm.distrib.beta.Rd
deleted file mode 100644
index 63a174c2c1506939784abd3e43a79353e551d18b..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/man/rnorm.distrib.beta.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/visualization.R
-\name{rnorm.distrib.beta}
-\alias{rnorm.distrib.beta}
-\title{Sampling value from a known Gaussian distribution}
-\usage{
-rnorm.distrib.beta(parameter, mean.distrib, sd.distrib, n = 1000)
-}
-\arguments{
-\item{mean.distrib}{the mean of the distribution (dbl)}
-
-\item{sd.distrib}{the standard deviation of the distribution (dbl)}
-
-\item{n}{an integer defining the number of value sampling}
-
-\item{beta_type}{a string (beta0, betaG, betaE, betaGE)}
-}
-\value{
-a dataframe
-}
-\description{
-Sampling value from a known Gaussian distribution
-}
diff --git a/src/v2/HTRSIM/man/run.deseq.Rd b/src/v2/HTRSIM/man/run.deseq.Rd
deleted file mode 100644
index 02d00fce58c0517f7bd9b0e8af9dfbfc07bb882e..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/man/run.deseq.Rd
+++ /dev/null
@@ -1,23 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/ublicsTableCountsExtractor.R
-\name{run.deseq}
-\alias{run.deseq}
-\title{Launch Deseq}
-\usage{
-run.deseq(
-  tabl_cnts,
-  bioDesign,
-  model = ~genotype + environment + genotype:environment
-)
-}
-\arguments{
-\item{tabl_cnts}{table containing counts per genes & samples}
-
-\item{bioDesign}{table describing bioDesgin of input}
-}
-\value{
-DESEQ2 object
-}
-\description{
-Launch Deseq
-}
diff --git a/src/v2/HTRSIM/tests/testthat.R b/src/v2/HTRSIM/tests/testthat.R
deleted file mode 100644
index 16b641e1107965b2dee5f779339fd83fa45c4460..0000000000000000000000000000000000000000
--- a/src/v2/HTRSIM/tests/testthat.R
+++ /dev/null
@@ -1,4 +0,0 @@
-library(testthat)
-library(HTRSIM)
-
-test_check("HTRSIM")
diff --git a/src/v3/HTRsim/DESCRIPTION b/src/v3/HTRsim/DESCRIPTION
deleted file mode 100644
index 5a3903a93cd822f12f3c062cd2af4b239d958bf1..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/DESCRIPTION
+++ /dev/null
@@ -1,23 +0,0 @@
-Package: HTRsim
-Title: Hightroughtput RNA-seq simulation
-Version: 0.3
-Authors@R: person('Duvermy', 'Arnaud', email = 'arnaud.duvermy@ens-lyon.Fr', role = c('aut', 'cre'))
-Description: Hightroughtput RNA-seq simulation.
-License: GPL-3
-Encoding: UTF-8
-Roxygen: list(markdown = TRUE)
-RoxygenNote: 7.2.2
-Depends: 
-    tidyverse
-Imports: 
-    base,
-    DESeq2,
-    dplyr,
-    MASS,
-    purrr,
-    Rfast,
-    S4Vectors,
-    stats,
-    stringr,
-    tidyr,
-    utils
diff --git a/src/v3/HTRsim/HTRsim.Rproj b/src/v3/HTRsim/HTRsim.Rproj
deleted file mode 100644
index 69fafd4b6dddad27500cfc67efb9fb16e86a96bd..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/HTRsim.Rproj
+++ /dev/null
@@ -1,22 +0,0 @@
-Version: 1.0
-
-RestoreWorkspace: No
-SaveWorkspace: No
-AlwaysSaveHistory: Default
-
-EnableCodeIndexing: Yes
-UseSpacesForTab: Yes
-NumSpacesForTab: 2
-Encoding: UTF-8
-
-RnwWeave: Sweave
-LaTeX: pdfLaTeX
-
-AutoAppendNewline: Yes
-StripTrailingWhitespace: Yes
-LineEndingConversion: Posix
-
-BuildType: Package
-PackageUseDevtools: Yes
-PackageInstallArgs: --no-multiarch --with-keep.source
-PackageRoxygenize: rd,collate,namespace
diff --git a/src/v3/HTRsim/NAMESPACE b/src/v3/HTRsim/NAMESPACE
deleted file mode 100644
index 50d60c735b973b837a1a5a00326f5f1a796d4b76..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/NAMESPACE
+++ /dev/null
@@ -1,31 +0,0 @@
-# Generated by roxygen2: do not edit by hand
-
-export(ddsExtraction)
-export(getBetaforSimulation)
-export(getCountTable)
-export(getGenesDispersions)
-export(getLog_qij)
-export(getModelMatrix)
-export(getMu_ij)
-export(getPublicMvnormFit)
-export(getReplicationDesign)
-export(get_kij)
-export(loadObservedValues)
-export(loadPubliCounTable)
-export(loadPublicDesign)
-export(mvnormFitting)
-export(publicDDS_extraction)
-export(random_replication)
-export(rnaMock)
-export(run.deseq)
-export(uniform_replication)
-import(DESeq2)
-import(MASS)
-import(Rfast)
-import(S4Vectors)
-import(dplyr)
-import(purrr)
-import(stats)
-import(stringr)
-import(tidyr)
-import(utils)
diff --git a/src/v3/HTRsim/R/countsGenerator.R b/src/v3/HTRsim/R/countsGenerator.R
deleted file mode 100644
index 5736beb5bfe3a4ec9ee31be66f8ac6099fe84ba3..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/R/countsGenerator.R
+++ /dev/null
@@ -1,294 +0,0 @@
-#' Get beta_ij
-#'
-#' @param n_genes  an integer
-#' @param n_genotype A int.
-#' @param mvrnormFit_list an object fit mvronorm
-#' @param fixIntercept
-#' @param fixBetaE
-#' @param n_clusters
-#' @import MASS
-#' @import dplyr
-#' @import purrr
-#' @return a dataframe
-#' @export
-#'
-#' @examples
-getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, mvrnormFit_list, fixIntercept = TRUE, fixBetaE = TRUE, n_clusters = 5) {
-  ## -- Sampling from mvnorm -- ##
-  list_idxFit <- sample(1:n_clusters, replace = TRUE, size = n_genes)
-  f <- purrr::map(.x = list_idxFit, ~ MASS::mvrnorm(
-    n = n_genotypes,
-    mu = mvrnormFit_list[[.x]]$mu,
-    Sigma = mvrnormFit_list[[.x]]$sigma
-  ))
-  beta.matrix <- do.call("rbind", f)
-  ## -- Build lovely dataframe & annotations -- ##
-  genes_vec <- base::paste("gene", 1:n_genes, sep = "")
-  genotype_vec <- base::paste("G", 0:(n_genotypes - 1), sep = "")
-  genotype <- genotype_vec %>% rep(time = n_genes)
-  gene_id <- rep(genes_vec, each = n_genotypes)
-  idx_mvrnom <- rep(list_idxFit, each = n_genotypes) ## -- saving the mvnorm distribution idx
-  beta.dtf <- beta.matrix %>% data.frame()
-  colnames(beta.dtf) <- c("(Intercept)", "betaG", "betaE", "betaGE")
-  res <- cbind(gene_id, genotype, beta.dtf, idx_mvrnom)
-  ## fixing Genotype and GxE effects to 0 for G0 (reference)
-  ## -> hide in (Intercept)
-  #res[res$genotype == "G0", c("betaG", "betaGE")] <- 0
-  if (fixIntercept) {
-    res <- res %>%
-      dplyr::group_by(gene_id) %>%
-      dplyr::mutate("(Intercept)" = mean(`(Intercept)`))
-  }
-  if (fixBetaE) {
-    res <- res %>%
-      dplyr::group_by(gene_id) %>%
-      dplyr::mutate(betaE = mean(betaE))
-  }
-
-  # -- log
-  futile.logger::flog.info("Beta: ok")
-
-  return(res)
-}
-
-#' Get model matrix
-#'
-#' @param n_environments an integer
-#' @return a matrix
-#' @export
-#'
-#' @examples
-getModelMatrix <- function(n_environments = 2) {
-  environment_vec <- base::paste("E", 0:(n_environments - 1), sep = "")
-  ########################################
-  m <- c(1, 1, 0, 0, 1, 1, 1, 1)
-  model.matrix <- matrix(data = m, ncol = 2, byrow = F)
-  colnames(model.matrix) <- environment_vec
-  rownames(model.matrix) <- c("(Intercept)", "betaG", "betaE", "betaGE")
-  return(model.matrix)
-}
-
-
-#' Get log(q_ij)
-#'
-#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE.
-#' @param model.matx an output of stat::model.matrix()
-#' @return a dataframe
-#' @export
-#'
-#' @examples
-getLog_qij <- function(beta.dtf, model.matx) {
-  beta.matx <- beta.dtf[, c("(Intercept)", "betaG", "betaE", "betaGE")] %>% as.matrix()
-  log_qij.matx <- beta.matx %*% model.matx ## j samples, i genes
-  ### Some reshaping ###
-  log_qij.dtf <- log_qij.matx %>% data.frame()
-  annotations <- beta.dtf[, c("gene_id", "genotype")]
-  log_qij.dtf <- cbind(annotations, log_qij.dtf)
-
-   # -- log
-  futile.logger::flog.info("log(q_ij): ok")
-
-  return(log_qij.dtf)
-}
-
-#' Get mu_ij
-#'
-#' @param log_qij.dtf a dtf
-#' @param size_factor a scalar
-#' @return a matrix
-#' @export
-#'
-#' @examples
-getMu_ij <- function(log_qij.dtf, size_factor) {
-  log_qij.matx <- log_qij.dtf[, c("E0", "E1")] %>% as.matrix()
-
-  mu_ij.matx <- size_factor * 2^log_qij.matx ## size factor * log(qij)
-
-  mu_ij.dtf <- mu_ij.matx %>% data.frame()
-  annotations <- log_qij.dtf[, c("gene_id", "genotype")]
-  mu_ij.dtf <- cbind(annotations, mu_ij.dtf)
-  mu_ij.matx <- mu_ij.dtf %>%
-    reshape2::melt(.,
-      id.vars = c("gene_id", "genotype"),
-      value.name = "mu_ij", variable.name = "environment"
-    ) %>%
-    reshape2::dcast(., gene_id ~ genotype + environment, value.var = "mu_ij") %>%
-    column_to_rownames("gene_id") %>%
-    as.matrix()
-
-  # -- log
-  futile.logger::flog.info("mu_ij: ok")
-
-  return(mu_ij.matx)
-}
-
-#' Get genes dispersion
-#'
-#' @param n_genes  an integer
-#' @param n_genotype A int.
-#' @param n_environment A int.
-#' @param dispersion.vec A vector of observed dispersion.
-#' @param dispUniform_btweenCondition logical
-#' @param model_matrix an output of stat::model.matrix()
-#' @import stringr
-#' @import purrr
-#' @return a dataframe with the gene dispersion for each samples
-#' @export
-#'
-#' @examples
-getGenesDispersions <- function(n_genes, sample_id_list,
-                                dispersion.vec, dispUniform_btweenCondition = T) {
-  if (dispUniform_btweenCondition == T) {
-    # --Get dispersion for each genes
-
-    gene_dispersion.dtf <- base::sample(dispersion.vec,
-      replace = T,
-      size = n_genes
-    ) %>% base::data.frame()
-    n_rep <- length(sample_id_list)
-    gene_dispersion.dtf <- gene_dispersion.dtf[, base::rep(base::seq_len(base::ncol(gene_dispersion.dtf)), n_rep)]
-    rownames(gene_dispersion.dtf) <- base::paste("gene", 1:(n_genes), sep = "")
-    colnames(gene_dispersion.dtf) <- sample_id_list
-  } else {
-    # -- Get dispersion for each genes within each conditions
-
-    replication_table <- sample_ids %>%
-      stringr::str_replace(., pattern = "_[0-9]+", "") %>%
-      table()
-    gene_dispersion.dtf <- replication_table %>%
-      purrr::map(., ~ sample(dispersion.vec, replace = T, size = n_genes)) %>%
-      data.frame()
-    gene_dispersion.dtf <- gene_dispersion.dtf[, rep(seq_len(ncol(gene_dispersion.dtf)), replication_table %>% as.numeric())]
-    colnames(gene_dispersion.dtf) <- sample_ids
-    rownames(gene_dispersion.dtf) <- base::paste("gene", 1:(n_genes), sep = "")
-  }
-  gene_dispersion.mtx <- gene_dispersion.dtf %>% as.matrix()
-  gene_dispersion.mtx <- gene_dispersion.mtx[order(row.names(gene_dispersion.mtx)), ]
-  
-  # -- log
-  futile.logger::flog.info("dispersion: ok")
-
-  return(gene_dispersion.mtx)
-}
-
-
-#' Get K_ij : gene counts
-#'
-#' @param mu_ij.matx  a matrix of mu_ij
-#' @param dispersion.matx a matrix of gene dispersion
-#' @param n_genes a matrix of gene dispersion
-#' @param sample_id_list list of sample_ids
-#' @param idx_replicat
-#' @import stats
-#' @return a matrix with counts per genes and samples
-#' @export
-#'
-#' @examples
-get_kij <- function(mu_ij.matx, dispersion.matx, n_genes,
-                    sample_id_list, idx_replicat) {
-  n_sples <- length(sample_id_list)
-  alpha_gene <- 1 / dispersion.matx
-  k_ij <- stats::rnbinom(length(mu_ij.matx),
-    size = alpha_gene,
-    mu = mu_ij.matx
-  ) %>%
-    matrix(., nrow = n_genes, ncol = n_sples)
-  k_ij[is.na(k_ij)] <- 0
-  colnames(k_ij) <- base::paste(sample_id_list, idx_replicat, sep = "_")
-  rownames(k_ij) <- rownames(mu_ij.matx)
-  return(k_ij)
-}
-
-
-#' Get count table
-#'
-#' @param mu_ij.matx  a matrix
-#' @param dispersion.matx a matrix
-#' @param n_genes number of genes
-#' @param n_genotypes number of genotypes
-#' @param n_environments number of environments
-#' @param sample_id_list vector of sample ids
-#' @param maxN max number of replicate
-#' @param uniformNumberOfReplicates bool
-#' @import purrr
-#' @return a matrix
-#' @export
-#'
-#' @examples
-getCountTable <- function(mu_ij.matx, dispersion.matx,
-                          n_genes, n_genotypes,
-                          n_environments = 2, sample_id_list,
-                          replication.matx) {
-  # -- log
-  futile.logger::flog.info("k_ij ~ NegativBinomial( mu_ij, dispersion_ij )")
-  ### Iterate on each replicates
-  max_n_replicates <- dim(replication.matx)[1]
-  kij.simu.list <- purrr::map(
-    .x = 1:max_n_replicates,
-    .f = ~ get_kij(mu_ij.matx[, replication.matx[.x, ]],
-      dispersion.matx[, replication.matx[.x, ]],
-      n_genes = n_genes,
-      sample_id_list[replication.matx[.x, ]],
-      .x
-    )
-  )
-  tableCounts.simulated <- do.call(cbind, kij.simu.list)
-
-  # -- log
-  futile.logger::flog.info("k_ij : ok\n")
-
-  return(tableCounts.simulated)
-}
-
-
-#' get matrix of replication
-#'
-#' @param maxN  a integer : number of replicates
-#' @param n_samples an integer : number of samples
-#' @return a matrix of 0 and 1
-#' @export
-#'
-#' @examples
-uniform_replication <- function(maxN, n_samples) {
-  return(rep(TRUE, time = maxN) %>%
-    rep(., each = n_samples) %>%
-    matrix(ncol = n_samples))
-}
-
-
-#' get matrix of replication
-#'
-#' @param maxN  a integer : number of replicates
-#' @param n_samples an integer : number of samples
-#' @import purrr
-#' @return a matrix of 0 and 1
-#' @export
-#'
-#' @examples
-random_replication <- function(maxN, n_samples) {
-  replicating <- function(maxN) {
-    return(sample(x = c(TRUE, FALSE), size = maxN, replace = T))
-  }
-  res <- purrr::map(1:n_samples, ~ replicating(maxN - 1))
-  rep_table <- do.call(cbind, res)
-  rep_table <- rbind(rep(TRUE, times = n_samples), rep_table)
-  return(rep_table)
-}
-
-#' get matrix of replication
-#'
-#' @param maxN  a integer : number of replicates
-#' @param n_genotypes an integer : number of genotypes
-#' @param n_environments
-#' @param uniformNumberOfReplicates bool
-#' @return a matrix of 0 and 1
-#' @export
-#'
-#' @examples
-getReplicationDesign <- function(maxN, n_genotypes, n_environments = 2,
-                                 uniformNumberOfReplicates = T) {
-  nb_sample <- n_genotypes * n_environments
-  if (uniformNumberOfReplicates == T) rep.matrix <- uniform_replication(maxN, nb_sample)
-  if (uniformNumberOfReplicates == F) rep.matrix <- random_replication(maxN, nb_sample)
-  return(rep.matrix)
-}
diff --git a/src/v3/HTRsim/R/dds_manipulations.R b/src/v3/HTRsim/R/dds_manipulations.R
deleted file mode 100644
index 4ac353f30d371aa7754aabcd4fbf76e096dd4bdc..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/R/dds_manipulations.R
+++ /dev/null
@@ -1,100 +0,0 @@
-#' Launch Deseq
-#'
-#' @param tabl_cnts table containing counts per genes & samples
-#' @param bioDesign table describing bioDesgin of input
-#' @import DESeq2
-#' @import dplyr
-#' @return DESEQ2 object
-#' @export
-#'
-#' @examples
-fit_deseq <- function(tabl_cnts, bioDesign) {
-    model <- ~ genotype + environment + genotype:environment
-    tabl_cnts <- tabl_cnts %>% as.data.frame()
-    tabl_cnts <- tabl_cnts %>%
-        dplyr::mutate(
-            across(where(is.double), as.integer)
-        )
-    dds <- DESeq2::DESeqDataSetFromMatrix(
-        countData = round(tabl_cnts),
-        colData = bioDesign,
-        design = model
-    )
-    dds <- DESeq2::DESeq(dds, quiet = TRUE)
-    return(dds)
-}
-
-
-#' Get deseq results
-#'
-#' @param dds_obj  dds object obtain on simulation
-#' @import S4Vectors
-#' @import dplyr
-#' @import tibble
-#' @return a dataframe of dispersion obtained with deseq2
-#' @export
-#'
-#' @examples
-getDispersionFromDDS <- function(dds_obj) {
-    disp <- S4Vectors::mcols(dds_obj)$dispersion
-    gene_id <- rownames(S4Vectors::mcols(dds_obj))
-    names(disp) <- gene_id
-    disp.dtf <- disp %>%
-        data.frame() %>%
-        dplyr::rename(., dispersion.estimate = .) %>%
-        tibble::rownames_to_column("gene_id")
-    return(disp.dtf)
-}
-
-
-#' Get deseq results
-#'
-#' @param dds_obj  dds object obtain on simulation
-#' @param threads
-#' @import stringr
-#' @import dplyr
-#' @import future
-#' @import DESeq2
-#' @import furrr
-#' @return a dataframe of beta obtained with deseq2
-#' @export
-#'
-#' @examples
-getCoefficientsFromDds <- function(dds_obj, threads = 4) { # nolint
-    listBeta <- DESeq2::resultsNames(dds_obj)
-    future::plan(multisession, workers = threads)
-    res <- listBeta %>% furrr::future_map(
-        .x = .,
-        ~ DESeq2::results(dds_obj,
-            contrast = list(.x),
-            # lfcThreshold = threshold, /!\ statistic & pvalue resestimate later
-            # altHypothesis = altH,
-            tidy = TRUE
-        ) %>%
-            dplyr::select(-baseMean) %>%
-            dplyr::mutate(term = .x) %>%
-            dplyr::rename(
-                estimate = log2FoldChange,
-                std.error = lfcSE,
-                statistic = stat,
-                p.value = pvalue,
-                gene_id = row
-            ),
-        .options = furrr_options(seed = TRUE)
-    )
-    deseq_inference <- do.call("rbind", res)
-    deseq_inference.dtf <- deseq_inference %>%
-        dplyr::mutate(
-            term = term %>%
-                stringr::str_replace("Intercept", "(Intercept)") %>%
-                stringr::str_replace("_vs_G0", "") %>%
-                stringr::str_replace("_vs_E0", "") %>%
-                stringr::str_replace_all("_", "") %>%
-                stringr::str_replace("\\.", ":")
-        )
-
-    deseq_inference.dtf <- deseq_inference.dtf %>%
-        select(gene_id, estimate, std.error, term)
-
-    return(deseq_inference.dtf)
-}
\ No newline at end of file
diff --git a/src/v3/HTRsim/R/embeddedfiles_manipulations.R b/src/v3/HTRsim/R/embeddedfiles_manipulations.R
deleted file mode 100644
index 3fc98918441c8938cbf0187a43eb1d10ffdb4ad1..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/R/embeddedfiles_manipulations.R
+++ /dev/null
@@ -1,103 +0,0 @@
-#' Extract beta distribution from DESEQ2 object
-#'
-#' @param dds_obj a DESEQ2 object
-#' @import S4Vectors
-#' @import tidyr
-#' @import stats
-#' @return a list containing 1- mean and sd of BetaG 2- mean and sd of BetaE 3- mean and sd of BetaGE 5- mean and sd of gene dispersion
-#' @export
-#'
-#' @examples
-extraction_embeddedDds <- function(dds_obj) {
-  ## Beta
-  dds.mcols <- S4Vectors::mcols(dds_obj, use.names = TRUE)
-  Intercept <- dds.mcols$Intercept
-  betaG <- dds.mcols$genotype_RM11_vs_GSY147
-  betaE <- dds.mcols$environment_treated_vs_untreated
-  betaGE <- dds.mcols$genotypeRM11.environmenttreated
-  beta.dtf <- cbind(Intercept, betaG, betaE, betaGE) %>%
-    as.data.frame() %>%
-    tidyr::drop_na()
-  colnames(beta.dtf) <- c("(Intercept)", "betaG", "betaE", "betaGE")
-
-  ## Dispersion
-  gene_disp <- dds.mcols$dispersion %>% stats::na.omit()
-
-
-  return(list(
-    beta = beta.dtf,
-    gene_dispersion = gene_disp
-  ))
-}
-
-#' Extract beta distribution from DESEQ2 object
-#'
-#' @import dplyr
-#' @import utils
-#' @return output of dds extraction
-#' @export
-#'
-#' @examples
-embedded_CounTable2observedValues <- function() {
-  tabl_cnts <- loadEmbedded_CounTable()
-  bioDesign <- loadEmbedded_design()
-  ## Launch DESEQ2
-  dds <- fit_deseq(tabl_cnts, bioDesign = bioDesign)
-  ## Extract
-  dds.extraction <- extraction_embeddedDds(dds_obj = dds)
-
-  return(dds.extraction)
-}
-
-
-
-#' Load beta dtf embedded in package
-#'
-#' @return an object dds.Extraction
-#' @export
-#'
-#' @examples
-loadEmbedded_ObservedValues <- function() {
-  ## Import public beta observed
-  fn <- system.file("extdata/", "SRP217588_YM_observedParams.rds", package = "HTRsim")
-  dds.extraction <- readRDS(file = fn)
-  return(dds.extraction)
-}
-
-#' Load public counts table
-#'
-#' @import dplyr
-#' @import utils
-#' @return dataframe
-#' @export
-#'
-#' @examples
-loadEmbedded_CounTable <- function() {
-  ## Import public counts table
-  fn <- system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRsim")
-  tabl_cnts <- utils::read.table(file = fn, header = TRUE)
-  rownames(tabl_cnts) <- tabl_cnts$gene_id
-  tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id) ## suppr colonne GeneID
-  tabl_cnts <- tabl_cnts[order(tabl_cnts %>% rownames()), ]
-  tabl_cnts <- tabl_cnts %>% dplyr::select(!matches("ru_rm_5"))
-  return(tabl_cnts)
-}
-
-#' Load public design
-#'
-#' @import dplyr
-#' @import utils
-#' @return dataframe
-#' @export
-#'
-#' @examples
-loadEmbedded_design <- function() {
-  ## DESIGN
-  fn <- system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRsim")
-  bioDesign <- utils::read.table(file = fn, header = TRUE, sep = ";")
-  ## defining reference
-  bioDesign$genotype <- factor(x = bioDesign$genotype, levels = c("GSY147", "RM11"))
-  bioDesign$environment <- factor(x = bioDesign$environment, levels = c("untreated", "treated"))
-  bioDesign <- bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5"))
-  return(bioDesign)
-}
diff --git a/src/v3/HTRsim/R/evaluation.R b/src/v3/HTRsim/R/evaluation.R
deleted file mode 100644
index 1b5ea22d80da74d10ede3c438ad26010f07302ac..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/R/evaluation.R
+++ /dev/null
@@ -1,168 +0,0 @@
-#' Get prediction results
-#'
-#' @param inference.dtf
-#' @param threshold
-#' @param alphaRisk
-#' @param postInferenceSelection
-#' @param altH
-#' @param pvalCorrection
-#' @import dplyr
-#' @import HTRfit
-#' @import rstatix
-#' @return a dataframe to compare actual and infered betas with annotations
-#' @export
-#'
-#' @examples
-getPrediction <- function(inference.dtf, threshold = 0, alphaRisk = 0.05, altH = "greaterAbs", pvalCorrection = TRUE, postInferenceSelection = FALSE) {
-    ###### embedded functions ###########
-    labelling_gene <- function(altHy, coeff, threshold, proba, alphaRisk, postInferenceSelection = F) {
-        if (postInferenceSelection) {
-            if (altHy == "greaterAbs") label <- ifelse((abs(coeff) > threshold & proba < alphaRisk), "DE", "nonDE")
-            if (altHy == "greater") label <- ifelse((coeff > threshold & proba < alphaRisk), "DE", "nonDE")
-            if (altHy == "lowerAbs") label <- ifelse((abs(coeff) < threshold & proba < alphaRisk), "DE", "nonDE")
-        } else {
-            label <- ifelse(proba < alphaRisk, "DE", "nonDE")
-        }
-        return(label)
-    }
-    ############# WALD TEST #######
-    inference.dtf <- inference.dtf %>%
-        dplyr::mutate(
-            statistic =
-                dplyr::case_when(
-                    postInferenceSelection == TRUE ~
-                        HTRfit::getStatisticWaldTest(
-                            estimate = estimate, stdError = std.error,
-                            threshold = 0, altHypothesis = altH
-                        ),
-                    postInferenceSelection == FALSE ~
-                        HTRfit::getStatisticWaldTest(
-                            estimate = estimate, stdError = std.error,
-                            threshold = threshold, altHypothesis = altH
-                        )
-                )
-        ) %>%
-        dplyr::mutate(
-            p.value = HTRfit::wald_test(w = statistic, altHypothesis = altH)
-        ) %>%
-        rstatix::adjust_pvalue(p.col = "p.value", method = "fdr", output.col = "padj")
-
-
-    ############# PREDICTION #############
-    inference.dtf <- inference.dtf %>%
-        dplyr::mutate(
-            prediction.label =
-                dplyr::case_when(
-                    pvalCorrection == TRUE ~
-                        labelling_gene(altH, estimate, threshold, padj, alphaRisk, postInferenceSelection),
-                    pvalCorrection == FALSE ~
-                        labelling_gene(altH, estimate, threshold, p.value, alphaRisk, postInferenceSelection),
-                )
-        )
-    return(inference.dtf)
-}
-
-#' Get expectated results
-#'
-#' @param actual.dtf
-#' @param threshold
-#' @param altH
-#' @param toEval
-#' @import dplyr
-#' @import reshape2
-#' @return a dataframe to compare actual and infered betas with annotations
-#' @export
-#'
-#' @examples
-getExpectation <- function(actual.dtf, toEval = "glm" ,threshold = 0, altH = "greaterAbs") {
-    stopifnot(toEval %in% c("glm", "glm_mixte"))
-    if (toEval == "glm") {
-    actual.reshaped <- reshapeActualDtf_glm(actual.dtf)
-    ####### ACTUAL LABEL #########
-    actual.reshaped.annot <- actual.reshaped %>%
-        dplyr::mutate(
-            actual.label =
-                dplyr::if_else(abs(actual.value) < threshold,
-                    "nonDE", "DE"
-                )
-        )
-    }
-    if (toEval == "glm_mixte") actual.reshaped.annot <- reshapeActualDtf_glm_mixte(actual.dtf)
-    return(actual.reshaped.annot)
-}
-
-
-#' Get deseq results
-#'
-#' @param inference.dtf a obj output of [deseq, glm, glmm]
-#' @param actual.dtf parameters used during simulation
-#' @import dplyr
-#' @import data.table
-#' @return a dataframe to compare actual and infered betas
-#' @export
-#'
-#' @examples
-getComparison <- function(actual.dtf, inference.dtf) {
-    actual2join.dtf <- data.table::data.table(actual.dtf, key = c("gene_id", "term"))
-    inference2join.dtf <- data.table::data.table(inference.dtf, key = c("gene_id", "term"))
-    comparison.dtf <- actual2join.dtf[inference2join.dtf]
-
-    ########## COMPARING ACTUAL & PREDICTION ######
-    comparison.dtf <- comparison.dtf %>% dplyr::mutate(
-        annotation =
-            dplyr::case_when(
-                (actual.label == "DE" & prediction.label == "DE") ~ "TRUE",
-                (actual.label == "nonDE" & prediction.label == "nonDE") ~ "TRUE",
-                TRUE ~ "FALSE"
-            )
-    )
-
-    return(comparison.dtf %>% as.data.frame())
-}
-
-
-# -- facilities functions
-reshapeActualDtf_glm <- function(actual.dtf) {
-    actual.dtf <- actual.dtf %>%
-        reshape2::melt(
-            id = c("gene_id", "genotype", "idx_mvrnom"),
-            value.name = "value", variable.name = "beta"
-        ) %>%
-        dplyr::group_by(gene_id, beta) %>%
-        dplyr::mutate(
-            actual.value =
-                dplyr::if_else(beta %in% c("(Intercept)", "betaE"),
-                    mean(value), value
-                )
-        ) %>%
-        ungroup() %>%
-        dplyr::select(-value) %>%
-        dplyr::mutate(
-            term =
-                dplyr::case_when(
-                    beta == "(Intercept)" ~ "(Intercept)",
-                    beta == "betaG" ~ paste("genotype", genotype, sep = ""),
-                    beta == "betaE" ~ paste("environment", "E1", sep = ""),
-                    beta == "betaGE" ~ paste("genotype", genotype, ":environmentE1", sep = "")
-                )
-        )
-    actual.dtf <- actual.dtf %>% dplyr::select(-genotype)
-    actual_undup.dtf <- actual.dtf[!duplicated(actual.dtf), ]
-    return(actual_undup.dtf)
-}
-
-
-reshapeActualDtf_glm_mixte <- function(actual.dtf) {
-    actual.dtf <- actual.dtf %>%
-        dplyr::group_by(gene_id) %>%
-        dplyr::summarise(
-            tmp = mean(`(Intercept)` + betaG),
-            environmentE1 = mean(betaE + betaGE),
-            "sd__(Intercept)" = sd(`(Intercept)` + betaG),
-            sd__environmentE1 = sd(betaGE + betaE),
-            "cor__(Intercept).environmentE1" = cor((betaGE + betaE), (`(Intercept)` + betaG))
-        ) %>%
-        dplyr::rename("(Intercept)" = tmp) %>%
-        reshape2::melt(id = "gene_id", value.name = "actual.value", variable.name = "term")
-    return(actual.dtf)
-}
diff --git a/src/v3/HTRsim/R/multinormDistrib_manipulations.R b/src/v3/HTRsim/R/multinormDistrib_manipulations.R
deleted file mode 100644
index 956d1df7e1c9091893130925c7014b852d9b11c6..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/R/multinormDistrib_manipulations.R
+++ /dev/null
@@ -1,47 +0,0 @@
-#' Fit mvnorm
-#' @import Rfast
-#' @return output of Rfast::mvnorm.mle
-#' @export
-#'
-#' @examples
-mvnormFitting <- function(beta.dtf) {
-    beta.matx <- beta.dtf %>% as.matrix()
-    fit.mvnorm <- Rfast::mvnorm.mle(beta.matx)
-    return(fit.mvnorm)
-}
-
-#' get fit mvnorm from public dataset
-#'
-#' @return output of Rfast::mvnorm.mle
-#' @export
-#'
-#' @examples
-getPublicMvnormFit <- function() {
-    ##### Range of observed value #########
-    dds.extraction <- embedded_CounTable2observedValues()
-    beta_observed.dtf <- dds.extraction$beta
-    list_fit.mvnorm <- getListMvnormFit(beta_observed.dtf)
-    return(list_fit.mvnorm)
-}
-
-
-#' get fit mvnorm list
-#'
-#' @param beta.dtf
-#' @param n_genes
-#' @import stats
-#' @import purrr
-#' @import dplyr
-#' @return list of Rfast::mvnorm.mle
-#' @export
-#'
-#' @examples
-getListMvnormFit <- function(beta.dtf, n_clusters = 5) {
-    dtf2cluster <- beta.dtf %>% dplyr::select(c("betaG", "betaGE"))
-    cluster_kmean <- stats::kmeans(dtf2cluster, n_clusters)$cluster
-    list_fit.mvnorm <- purrr::map(
-        .x = 1:n_clusters,
-        ~ mvnormFitting(beta.dtf[cluster_kmean == .x, ])
-    )
-    return(list_fit.mvnorm)
-}
diff --git a/src/v3/HTRsim/R/utils.R b/src/v3/HTRsim/R/utils.R
deleted file mode 100644
index afff9880c16b13eba732dee85ee9362f2d8f3c8d..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/R/utils.R
+++ /dev/null
@@ -1,68 +0,0 @@
-
-
-#' build Venn diagramm
-#'
-#' @param liste_genes
-#' @import ggVennDiagram
-#' @import ggplot2
-#' @return graph venn driagramm
-#' @export
-#'
-#' @examples
-buildVennDiag <- function(liste_gene) {
-    venn <- ggVennDiagram::Venn(liste_gene)
-    data <- ggVennDiagram::process_data(venn)
-    p <- ggplot() +
-        # 1. region count layer
-        geom_sf(aes(fill = id), data = venn_region(data)) +
-        # 2. set edge layer
-        geom_sf(color = "#000000", size = 0.3, data = venn_setedge(data), show.legend = FALSE) +
-        # 3. set label layer
-        geom_sf_text(aes(label = name), data = venn_setlabel(data)) +
-        # 4. region label layer
-        geom_sf_label(
-            aes(
-                label = scales::percent(count / sum(count))
-            ),
-            data = venn_region(data)
-        ) +
-        theme_void() +
-        theme(legend.position = "none")
-
-    return(p)
-}
-
-#' build Venn diagramm
-#'
-#' @param comparisonDTF
-#' @param title
-#' @import dplyr
-#' @import gridExtra
-#' @return graph venn driagramm
-#' @export
-#'
-#' @examples
-getVennDiagramm <- function(comparisonDTF, title = "") {
-    comparisonDTF <- comparisonDTF %>% dplyr::mutate(label = paste(gene_id, term, sep = "_"))
-    coeff_DE <- comparisonDTF %>%
-        dplyr::filter(actual.label == "DE") %>%
-        .$label
-    coeff_nonDE <- comparisonDTF %>%
-        dplyr::filter(actual.label == "nonDE") %>%
-        .$label
-    pred_DE <- comparisonDTF %>%
-        dplyr::filter(prediction.label == "DE") %>%
-        .$label
-    pred_nonDE <- comparisonDTF %>%
-        dplyr::filter(prediction.label == "nonDE") %>%
-        .$label
-    gene_list_DE <- list(DE = coeff_DE, prediction = pred_DE)
-    p <- buildVennDiag(gene_list_DE)
-    p1 <- p + scale_fill_manual(values = c("#00688B", "#528B8B", "#DCDCDC"))
-    gene_list_nonDE <- list(nonDE = coeff_nonDE, prediction = pred_nonDE)
-    p <- buildVennDiag(gene_list_nonDE)
-    p2 <- p + scale_fill_manual(values = c("#458B00", "#9BCD9B", "#DCDCDC"))
-
-    return(gridExtra::arrangeGrob(p1, p2, nrow = 2, top = title))
-}
-
diff --git a/src/v3/HTRsim/R/workflow.R b/src/v3/HTRsim/R/workflow.R
deleted file mode 100644
index 93d3227fced877b9c8bfba09605e6ddfbf466dd1..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/R/workflow.R
+++ /dev/null
@@ -1,147 +0,0 @@
-#' get design
-#' @param count_table
-#' @import stringr
-#' @import dplyr
-#' @return dataframe
-#' @export
-#'
-#' @examples
-summariseDesign <- function(count_table) {
-  sample_id_list <- colnames(count_table)
-  experimental_design <- sample_id_list %>%
-    stringr::str_split(pattern = "_", simplify = TRUE) %>%
-    .[, c(1, 2)] %>%
-    data.frame()
-  colnames(experimental_design) <- c("genotype", "environment")
-  experimental_design <- experimental_design %>%
-    dplyr::mutate(sample_id = sample_id_list) %>%
-    dplyr::select("sample_id", "genotype", "environment")
-  return(experimental_design)
-}
-
-
-
-#' perform all workflow
-#'
-#' @param n_genes number of genes
-#' @param n_genotypes number of genotypes
-#' @param n_environments number of env
-#' @param sequencing_factor
-#' @param dds.extraction output of dds.extraction
-#' @param max_n_replicates  max number of replicates
-#' @param uniformNumberOfReplicates boolean
-#' @param uniformDispersion boolean
-#' @param n_clusters
-#' @param fixBetaE
-#' @param fixIntercept
-#' @return list
-#' @export
-#'
-#' @examples
-rnaMock <- function(n_genes,
-                    n_genotypes,
-                    n_environments = 2,
-                    max_n_replicates,
-                    sequencing_factor = 2,
-                    uniformNumberOfReplicates = T,
-                    uniformDispersion = T,
-                    dds.extraction = loadEmbedded_ObservedValues(),
-                    n_clusters = 5,
-                    fixBetaE = T,
-                    fixIntercept = T) {
-
-  # -log
-  log.simulation(n_genes, 
-                  n_genotypes, 
-                  max_n_replicates, 
-                  sequencing_factor,
-                  uniformNumberOfReplicates, 
-                  uniformDispersion,
-                  fixBetaE,
-                  fixIntercept,
-                  n_clusters)
-
-  ## Fit mvnorm ##
-  list_fit.mvnorm <- getListMvnormFit(dds.extraction$beta, n_clusters)
-
-  ## -- add higher env effect 
-  list_fit.mvnorm = map(1:n_clusters, function(.x) {list_fit.mvnorm[[.x]]$mu[3] = list_fit.mvnorm[[.x]]$mu[3] + runif(1, -20, 20); list_fit.mvnorm[[.x]]} )
-
-
-  ##### Ground truth ######
-  beta.actual <- getBetaforSimulation(
-    n_genes,
-    n_genotypes,
-    list_fit.mvnorm,
-    n_clusters = n_clusters,
-    fixIntercept = fixIntercept,
-    fixBetaE = fixBetaE
-  )
-
-
-  ##### build input for simulation ####
-  model.matx <- getModelMatrix()
-  log_qij <- getLog_qij(beta.actual, model.matx)
-  mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
-  sample_ids <- colnames(mu_ij)
-  gene_dispersion.vec <- dds.extraction$gene_dispersion
-  dispersion.matrix <- getGenesDispersions(n_genes,
-    sample_ids,
-    dispersion.vec = gene_dispersion.vec,
-    uniformDispersion
-  )
-
-  ##### Design replicates ######
-  designReplication.matx <- getReplicationDesign(
-    max_n_replicates,
-    n_genotypes,
-    n_environments,
-    uniformNumberOfReplicates
-  )
-
-  ##### build counts table ####
-  countTable <- getCountTable(mu_ij, dispersion.matrix,
-    n_genes, n_genotypes,
-    sample_id_list = sample_ids,
-    replication.matx = designReplication.matx
-  )
-
-  design <- summariseDesign(countTable)
-  actualParam <- list(
-    dispersion = dispersion.matrix,
-    beta = beta.actual, listMvrnom = list_fit.mvnorm
-  )
-  return(list(
-    design = design, countTable = countTable,
-    actualParameters = actualParam
-  ))
-}
-
-
-
-
-
-
-log.simulation <- function(n_genes,
-                           n_genotypes,
-                           sequencing_factor,
-                           max_n_replicates,
-                           uniformNumberOfReplicates,
-                           uniformDispersion,
-                           fixBetaE,
-                           fixIntercept,
-                           n_clusters) {
-  embelishment_top <- "\n*       -----  SETUP  -----      *\n"
-  string0 <- "\n#   - Number of genes: %d"
-  string1 <- "\n#   - Number of genotypes: %d"
-  string2 <- "\n#   - Maximum number of replicates: %d"
-  string3 <- "\n#   - Number of replicates uniform: %s"
-  string4 <- "\n#   - Dispersion uniform: %s"
-  string5 <- "\n#   - Intercept fixed: %s"
-  string6 <- "\n#   - BetaE fixed: %s"
-  string7 <- "\n#   - Number of clusters: %s\n"
-  string <- paste(string0, string1, string2, string3, string4, string5, string6, string7, sep = "")
-  embelishment_bottom <- "\n*     -----------------------    *\n"
-  whole_string <- paste(embelishment_top, string, embelishment_bottom, sep = "")
-  futile.logger::flog.info(whole_string, n_genes, n_genotypes, max_n_replicates, uniformNumberOfReplicates, uniformDispersion, fixIntercept, fixBetaE, n_clusters)
-}
diff --git a/src/v3/HTRsim/devtools_history.R b/src/v3/HTRsim/devtools_history.R
deleted file mode 100644
index 3a5e0849a17bb7139f0957ef4a4cf4fe6515e059..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/devtools_history.R
+++ /dev/null
@@ -1,21 +0,0 @@
-usethis::use_build_ignore("devtools_history.R")
-usethis::use_package('tidyverse', type = "depends")
-usethis::use_package('stats')
-usethis::use_package('tidyr')
-usethis::use_package("stringr")
-usethis::use_package("S4Vectors")
-usethis::use_package("DESeq2")
-usethis::use_package("MASS")
-usethis::use_package("purrr")
-usethis::use_package("base")
-usethis::use_package("dplyr")
-usethis::use_package("utils")
-usethis::use_package("Rfast")
-usethis::use_package("data.table")
-usethis::use_package("HTRfit")
-usethis::use_package("rstatix")
-usethis::use_package("ggplot2")
-usethis::use_package("ggVennDiagram")
-usethis::use_package("gridExtra")
-usethis::use_package("glmmTMB")
-
diff --git a/src/v3/HTRsim/man/extractionDDS.Rd b/src/v3/HTRsim/man/extractionDDS.Rd
deleted file mode 100644
index e17f40544216d42c1c903ebf47ed2e18fe80ea73..0000000000000000000000000000000000000000
--- a/src/v3/HTRsim/man/extractionDDS.Rd
+++ /dev/null
@@ -1,17 +0,0 @@
-% Generated by roxygen2: do not edit by hand
-% Please edit documentation in R/extractionFromDDS.R
-\name{extractionDDS}
-\alias{extractionDDS}
-\title{Extract beta distribution from DESEQ2 object}
-\usage{
-extractionDDS(dds_obj)
-}
-\arguments{
-\item{dds_obj}{a DESEQ2 object}
-}
-\value{
-a list containing 1- mean and sd of BetaG 2- mean and sd of BetaE 3- mean and sd of BetaGE 5- mean and sd of gene dispersion
-}
-\description{
-Extract beta distribution from DESEQ2 object
-}
diff --git a/src/v4/HTRSIM/dev/0-dev_history.Rmd b/src/v4/HTRSIM/dev/0-dev_history.Rmd
deleted file mode 100644
index f010b99db76da9217d6b006585c30f5ca26a1483..0000000000000000000000000000000000000000
--- a/src/v4/HTRSIM/dev/0-dev_history.Rmd
+++ /dev/null
@@ -1,83 +0,0 @@
----
-title: "Development actions history"
-output: html_document
-editor_options: 
-  chunk_output_type: console
----
-
-All commands that you use to use when developing packages...
-
-# First time just after creating the project
-
-- Fill the following chunk to create the DESCRIPTION of your package
-
-```{r description, eval=FALSE}
-# Describe your package
-fusen::fill_description(
-  pkg = here::here(),
-  fields = list(
-    Title = "HTRSIM",
-    Description = "To do.",
-    `Authors@R` = c(
-      person("Arnaud", "DUVERMY", email = "aduvermy@ens-lyon1.fr", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-1565-9313")),
-      person(given = "ThinkR", role = "cph")
-    )
-  )
-)
-# Define License with use_*_license()
-usethis::use_mit_license("Arnaud Duvermy")
-```
-
-# Set extra sources of documentation
-
-```{r, eval=FALSE}
-# Install a first time
-remotes::install_local()
-# README
-usethis::use_readme_rmd()
-# NEWS
-#usethis::use_news_md()
-```
-
-**From now, you will need to "inflate" your package at least once to be able to use the following commands. Let's go to your flat template, and come back here later if/when needed.**
-
-
-# Package development tools
-## Use once
-
-```{r, eval=FALSE}
-# Already run
-# Pipe
-#usethis::use_pipe()
-
-# package-level documentation
-#usethis::use_package_doc()
-
-# Add new flat template
-#fusen::add_flat_template("add")
-```
-
-## Use everytime needed
-
-```{r}
-# Simulate package installation
-pkgload::load_all()
-
-# Generate documentation and deal with dependencies
-attachment::att_amend_desc()
-
-# Check the package
-devtools::check()
-```
-
-# Share the package
-
-```{r}
-# set and try pkgdown documentation website
-usethis::use_pkgdown()
-pkgdown::build_site()
-
-# build the tar.gz with vignettes to share with others
-devtools::build(vignettes = TRUE)
-```
-
diff --git a/src/v4/HTRSIM/dev/flat_full.Rmd b/src/v4/HTRSIM/dev/flat_full.Rmd
deleted file mode 100644
index f42b3e0ba7ebb41c608c75acc282d5e701402cfe..0000000000000000000000000000000000000000
--- a/src/v4/HTRSIM/dev/flat_full.Rmd
+++ /dev/null
@@ -1,480 +0,0 @@
----
-title: "flat_full.Rmd for working package"
-output: html_document
-editor_options: 
-  chunk_output_type: console
----
-
-<!-- Run this 'development' chunk -->
-<!-- Store every call to library() that you need to explore your functions -->
-
-```{r development, include=FALSE}
-library(testthat)
-```
-
-<!--
- You need to run the 'description' chunk in the '0-dev_history.Rmd' file before continuing your code there.
-
-If it is the first time you use {fusen}, after 'description', you can directly run the last chunk of the present file with inflate() inside.
---> 
-
-```{r development-load}
-# Load already included functions if relevant
-pkgload::load_all(export_all = FALSE)
-```
-
-
-# Initialize variable to simulate
-
-<!--
-Create a chunk for the core of the function
-
-- The chunk needs to be named `function` at least
-- It contains the code of a documented function
-- The chunk can also be named `function-my_median` to make it easily
-findable in your Rmd
-- Let the `@examples` part empty, and use the next `examples` chunk instead to present reproducible examples
-
-After inflating the template
-
--  This function code will automatically be added in a new file in the "R/" directory
--->
-
-```{r function-init_variable, filename = "simulation_initialization"}
-#' init variable
-#'
-#' @param list_var either c() or output of init_variable
-#' @param name variable name
-#' @param mu either a numeric value or a numeric vector (of length = level)
-#' @param sd either numeric value  or NA
-#' @param level numeric value to specify the number of level to simulate
-#'
-#' @return
-#' a list with initialized variables 
-#' @export
-#'
-#' @examples
-init_variable <- function(list_var = c(), name = "my_variable", mu = c(2,3), sd = NA, level = NA){
-  
-  # avoid space in variable name
-  name <- gsub(" ", "_", name, fixed = TRUE)
-  
-  # only mu specified by user => set level param
-  if (is.na(level) & is.na(sd)) level = length(mu)
-  
-  ## Avoid unexplained errors
-  inputs_checking(list_var, name, mu, sd, level)
-  # -- init new var
-  list_var[[name]] = fillInVariable(name,  mu, sd, level)
-  return(list_var)
-}
-
-#' Core of the init variable not exported
-#' @inheritParams init_variable
-#' 
-inputs_checking <- function(list_var, name, mu, sd, level){
-  stopifnot(name != "")
-  stopifnot(is.character(name))
-  stopifnot(is.numeric(mu))
-  stopifnot(is.numeric(sd) | is.na(sd))
-  stopifnot(is.numeric(level)) 
-  stopifnot(length(level) == 1)
-  stopifnot(level >= 2)
-  
-  if (!is.null(list_var)) { 
-    msg_e = "Non conformable list_var parameter.\n list_var have to be set as an init_var output or init as c()"
-    if(!is.list(list_var)) stop(msg_e)
-  }
-  if (length(mu) > 1) {
-      stopifnot(length(mu) == level) 
-  }
-  
-  if (is.na(sd)) { 
-    if(level != length(mu)) {
-    msg_e = "sd was specified as NA. mu should have the same length as level number\n"
-    stop(msg_e)
-    }
-  }
-  # -- Variable already init
-  nameNotInlistVar= identical(which(alreadyInitVariable(list_var, name)), integer(0))
-  if(isFALSE(nameNotInlistVar)) { message(paste(name , "is already initialized in list_var.\nWill be updated", sep = " ")) }
-  return(NULL)
-}
-
-
-
-#' Core of the inputs_checking not exported
-#' @inheritParams init_variable
-#' @param new_var_name string specifying new variable to initialized 
-alreadyInitVariable <- function(list_var, new_var_name){
-  if (is.null(list_var)){ #if list_var = c()
-    return(FALSE)
-  }
-  var_name_in_list = attributes(list_var)$names
-  return( var_name_in_list == new_var_name)
-}
-
-
-#' Core of the init_variable not exported
-#' @inheritParams init_variable
-fillInVariable <- function(name, mu, sd, level){
-  
-  if (length(mu) > 1 | is.na(sd)){  ## Effects given by user
-    level = length(mu)
-    l_labels = paste(name, 1:level, sep = '')
-    l_betaEffects =  mu
-    column_names = c(paste("label", name, sep = "_"), name)
-    sub_obj = build_subObjReturn2User( level, metaData = l_labels ,
-                                       effectsGivenByUser = l_betaEffects, 
-                                       column_names )
-  }
-  ## Effects will be simulated using mvrnorm
-  else sub_obj = as.data.frame(list(mu = mu, sd = sd, level = level))
-
-  return(sub_obj)  
-}
-
-#' Core of the init_variable not exported
-#' @param col_names column names to use
-#' @param effectsGivenByUser list of effect given by user
-#' @param metaData list of labels
-#' @param level numeric value to specify the number of level to simulate
-build_subObjReturn2User <- function(level, metaData, effectsGivenByUser, col_names){
-    sub_obj = list(level = level)
-    data = cbind(metaData, effectsGivenByUser) %>% as.data.frame()
-    colnames(data) <- col_names
-    var_name <- tail(col_names, n=1)
-    data[ ,var_name] <- as.numeric(data[ ,var_name])
-    sub_obj$data = data
-    return(sub_obj)
-}
-```
-
-<!--
-Create a chunk with an example of use for your function
-
-- The chunk needs to be named `examples` at least
-- It contains working examples of your function
-- The chunk is better be named `examples-my_median` to be handled
-correctly when inflated as a vignette
-
-After inflating the template
-
--  This example will automatically be added in the '@examples' part of our function above in the "R/" directory
-- This example will automatically be added in the vignette created from this Rmd template
--->
-
-```{r examples-init_variable}
-l_variable2simulate =  init_variable( name = "genotype", mu = 2, sd = 3, level = 1000) %>% 
-                        init_variable(name = "environment", mu = c(2, 3) )
-```
-
-
-<!--
-Create a chunk with a test of use for your function
-
-- The chunk needs to be named `tests` at least
-- It contains working tests of your function
-- The chunk is better be named `tests-my_median` to be handled
-correctly when inflated as a vignette
-
-After inflating the template
-
--  This test code will automatically be added in the "tests/testthat/" directory
--->
-
-```{r tests-init_variable}
-test_that("inputs_checking tests", {
-  expect_equal(inputs_checking(c(), "varA", 2 , 3, 2), NULL) # everything performed well
-  expect_equal(inputs_checking(c(), "varA", mu = c(2, 3, 4, 6), sd = NA, level = 4), NULL) # everything performed well
-  expect_equal(inputs_checking(c(), "varA", mu = c(2, 3, 4, 6), sd = 8, level = 4), NULL) # everything performed well
-  expect_error(inputs_checking(c(), "varA", c(3, 2, 3) , NA, 2)) # length mu != level
-  expect_error(inputs_checking(c(), "varA", "nonNum_MU" , 3, 4))
-  expect_error(inputs_checking(c(), "varA", 2 , "nonNum_SD", 2))
-  expect_error(inputs_checking(c(), "varA", 2 , 3, "nonNum_LEVEL"))
-  expect_error(inputs_checking(c(), 33, 2 , 3, 2)) # NAME is not characters
-  expect_error(inputs_checking(c(), "", 2 , 3, 2)) # NAME is == "" 
-  expect_error(inputs_checking("nonList", "varA", 2 , 3, 2)) 
-  expected_list <- list(varA = list(mu = 2, sd = 3, level = 1000) %>% as.data.frame())
-  expect_message(inputs_checking(expected_list, "varA", 2 , 3, 2)) 
-
-
-})
-
-test_that("build_subObjReturn2User tests", {
-  expected_list <- list(level = 2, data = list("label_varA" = c("Aa", "Ab") , "varA" = c(1,6)) %>% as.data.frame())
-  expect_equal(build_subObjReturn2User(level = 2, metaData = c("Aa", "Ab"), effectsGivenByUser = c(1,6), col_names = c("label_varA", "varA")), expected_list)
-  expected_list <- list(level = 2, data = list("label_varA" = c("A1", "A2", "A1", "A2") , 
-                                               "label_varB" = c("B2", "B2", "B3", "B3")  ,"effects" = c(1,6, 1, 6)) %>% data.frame())
-  colnames(expected_list$data)[3] <- "varA:varB" 
-  expect_equal(build_subObjReturn2User(level = 2, 
-                                          metaData = list(A = c("A1", "A2", "A1", "A2"), B = c("B2", "B2", "B3", "B3")) %>% as.data.frame(), 
-                                          effectsGivenByUser = c(1,6), col_names = c("label_varA", "label_varB", "varA:varB")), expected_list)
-})
-
-test_that("already_init_var tests", {
-  l_var <- list(varA = list(mu = 2, sd = 3, level = 100) %>% as.data.frame(), varB = list(mu = 3, sd = 1, level = 3) %>% as.data.frame() )
-  expect_equal(alreadyInitVariable(l_var, 'new_var'), c(FALSE, FALSE) )
-  expect_equal(alreadyInitVariable(l_var, 'varA'), c(TRUE, FALSE))
-  expect_equal(alreadyInitVariable(l_var, 'varB'), c(FALSE, TRUE) )
-  expect_equal(alreadyInitVariable(c(), 'new_var'), FALSE )
-})
-
-
-test_that("fillInVariable tests", {
-  expected_df <- list(mu = 8, sd = 2, level = 20) %>% as.data.frame()
-  expect_equal(fillInVariable('varA', 8, 2, 20), expected_df )
-  
-  expected_df <- list(level = 3, data = list(label_varA = c('varA1',"varA2","varA3"), varA = c(1,3,4)) %>% as.data.frame())
-  expect_equal(fillInVariable('varA', c(1, 3, 4), NA, 3), expected_df )
-  
-})
-
-
-test_that("init_variable test", {
-  expected_list <- list(varA = list(mu = 2, sd = 3, level = 1000) %>% as.data.frame())
-  expect_equal(init_variable( name = "varA", mu = 2, sd = 3, level = 1000), expected_list)
-  expected_list <- list(varA = list(level = 2, data = list(label_varA = c("varA1", "varA2"), varA = c(2,3)) %>% as.data.frame()))
-  expect_equal(init_variable(name = "varA", mu = c(2, 3) ), expected_list)
-  
-})
-
-```
-
-
-# Add interaction to simulate
-
-<!--
-Create a chunk for the core of the function
-
-- The chunk needs to be named `function` at least
-- It contains the code of a documented function
-- The chunk can also be named `function-my_median` to make it easily
-findable in your Rmd
-- Let the `@examples` part empty, and use the next `examples` chunk instead to present reproducible examples
-
-After inflating the template
-
--  This function code will automatically be added in a new file in the "R/" directory
--->
-
-```{r function-add_interaction, filename = "simulation_initialization"}
-
-#' init variable
-#'
-#' @inheritParams init_variable
-#' @param between_var vector of variable (already initialized) in interaction
-#'
-#' @return
-#' a list with initialized interaction 
-#' @export
-#'
-#' @examples
-add_interaction <- function(list_var , between_var, mu, sd){
-  name_interaction =  paste(between_var, collapse = ":")
-  check_input2interaction(name_interaction, list_var , between_var, mu, sd)
-  interactionCombinations = getNumberOfCombinationsInInteraction(list_var, between_var)
-  list_var$interactions[[name_interaction]] = fillInInteraction(list_var,between_var, mu ,sd , level = interactionCombinations )
-  return(list_var)
-  
-} 
-
-#' Core of the inputs_checking not exported
-#' @inheritParams add_interaction
-#' @param name_interaction string specifying the name of the interaction (example : "varA:varB") 
-check_input2interaction <- function( name_interaction, list_var ,between_var, mu , sd ){
-  bool_checkInteractionValidity <- function(between_var, listVar){
-      nb_varInInteraction = length(between_var)
-      stopifnot( nb_varInInteraction > 1)
-      existingVar_nb = getListVar(list_var) %in% between_var %>% sum()
-      if (existingVar_nb != nb_varInInteraction ){
-        return(FALSE)
-      } 
-      else return(TRUE)
-  }
-  bool_valid_interaction = bool_checkInteractionValidity(between_var, list_var)
-  if(!bool_valid_interaction) {
-    stop("At least one variable in between_var is not declared\nVariable not init cannot be used in an interaction")
-  }
-  
-  requestedNumberOfValues = getNumberOfCombinationsInInteraction(list_var, between_var)
-  if(is.na(sd) & requestedNumberOfValues != length(mu) ){
-      msg_e = "sd was specified as NA. mu should have the same length as possible interactions number\n"
-      msg_e2 = paste(requestedNumberOfValues, "interactions values are requested")
-      stop(paste(msg_e, msg_e2))
-
-  }
-  level = requestedNumberOfValues
-  inputs_checking(list_var$interactions, name_interaction, mu, sd, level)
-}
-
-#' Core of the add_interaction not exported
-#' @inheritParams add_interaction
-getNumberOfCombinationsInInteraction <- function(list_var, between){
-      levelInlistVar = getGivenAttribute(list_var, "level") %>% unlist()
-      n_combinations = prod(levelInlistVar[between]) 
-      return(n_combinations)
-}
-
-#' Core of the add_interaction not exported
-#' @inheritParams add_interaction
-fillInInteraction <- function(list_var, between, mu, sd, level){
-  if (length(mu) > 1 | is.na(sd)){ ## Effects given by user
-    
-      l_levels = getGivenAttribute(list_var, "level") %>% unlist()
-      l_levelsOfInterest = l_levels[between]
-      l_labels_varOfInterest = getLabels(l_variables2labelized = between, l_nb_label = l_levelsOfInterest ) 
-      
-      grid_combination = getGridCombination(l_labels_varOfInterest)
-      n_combinations = dim(grid_combination)[1]
-      column_names = c( paste("label", between, sep = "_"), paste(between, collapse = ":"))
-      sub_dtf = build_subObjReturn2User(level = n_combinations,
-                                           metaData = grid_combination, effectsGivenByUser = mu, 
-                                           col_names = column_names )
-  }
-  ## Effects simulated using mvrnorm
-  else sub_dtf = list(mu = mu, sd = sd, level = level) %>% as.data.frame()
-  return(sub_dtf)
-}
-
-
-
-############ GLOBAL FUNCTIONS ################
-#' GLOBAL function 
-#' @inheritParams add_interaction
-getListVar <- function(input) attributes(input)$names
-
-
-#' GLOBAL function 
-#' @inheritParams add_interaction
-#' @param attribute string of an attribute to get back in all occurrence of the list
-getGivenAttribute <- function(list_var, attribute){
-  l = lapply(list_var, FUN = function(var) var[[attribute]]) 
-  l_withoutNull = l[!vapply(l, is.null, logical(1))]
-  return(l_withoutNull)
-}
-
-
-#' GLOBAL function 
-#' @param l_variables2labelized list of variable
-#' @param l_nb_label list of numeric representing the nb of level per variable
-getLabels <- function(l_variables2labelized, l_nb_label ){
-    getVarNameLabel <- function(name, level) {
-        list_label = paste(name, 1:level, sep = "")
-        return(list_label)
-    }
-    listLabels = lapply(l_variables2labelized, FUN = function(var) getVarNameLabel(var, l_nb_label[var]))
-    return(listLabels)
-}
-
-#' GLOBAL function 
-#' @param l_labels list of label per variable
-getGridCombination <- function(l_labels){
-  return(expand.grid(l_labels) %>% as.data.frame() )
-}
-```
-
-<!--
-Create a chunk with an example of use for your function
-
-- The chunk needs to be named `examples` at least
-- It contains working examples of your function
-- The chunk is better be named `examples-my_median` to be handled
-correctly when inflated as a vignette
-
-After inflating the template
-
--  This example will automatically be added in the '@examples' part of our function above in the "R/" directory
-- This example will automatically be added in the vignette created from this Rmd template
--->
-
-```{r examples-add_interaction}
-l_variable2simulate =  init_variable( name = "genotype", mu = 2, sd = 3, level = 2) %>% 
-                        init_variable(name = "environment", mu = c(2, 3) )  %>%
-                          add_interaction(between_var = c("genotype", "environment"), mu = c(1,2,3,4), sd = NA) 
-```
-
-
-<!--
-Create a chunk with a test of use for your function
-
-- The chunk needs to be named `tests` at least
-- It contains working tests of your function
-- The chunk is better be named `tests-my_median` to be handled
-correctly when inflated as a vignette
-
-After inflating the template
-
--  This test code will automatically be added in the "tests/testthat/" directory
--->
-
-```{r tests-add_interaction}
-
-test_that("add_interaction test", {
-  
-  expect_error(add_interaction(c() , "interaction", 2, 3))
-  expect_error(add_interaction(c() , c("unkownVarA", "unknownVarB"), 8, 3)) # 
-  
-  init_var_list <- list(varA = list(mu = 2, sd = 3, level = 2) %>% as.data.frame(), 
-                     varB = list(level = 2, data = list(label_varB = c("varB1", "varB2"), varB = c(2,3)) %>% as.data.frame()))
-  
-  ########## interaction effect to simulate  #########
-  expected_list <- list(varA = list(mu = 2, sd = 3, level = 2) %>% as.data.frame(), 
-                        varB = list(level = 2, 
-                                    data = list(label_varB = c("varB1","varB2"), varB = c(2,3)) %>% data.frame()), 
-                        interactions = list("varA:varB" = list(mu = 8, sd = 3, level = 4) %>% as.data.frame()))
-  expect_equal(add_interaction(init_var_list , c("varA", "varB"), 8, sd = 3), expected_list) # 
-  
-  ########## interaction given by user #########
-  expected_list <- list(varA = list(mu = 2, sd = 3, level = 2) %>% as.data.frame(), 
-                        varB = list(level = 2, 
-                                    data = list(label_varB = c("varB1","varB2"), varB = c(2,3)) %>% data.frame()), 
-                        interactions = list("varA:varB" = list(level = 4, 
-                                                              data = list(label_varA = factor(c("varA1","varA2","varA1","varA2")), 
-                                                                          label_varB =  factor(c("varB1","varB1","varB2","varB2")),
-                                                                          "varA:varB" = c(1,2,3,4)) %>% data.frame())))
-  colnames(expected_list$interactions$`varA:varB`$data) = c("label_varA", "label_varB", "varA:varB")
-  expect_equal(add_interaction(init_var_list , c("varA", "varB"), c(1,2,3,4), sd = NA), expected_list)
-  
-
-    ########## triple interactions #########
-    init_var_list <- list(varA = list(mu = 2, sd = 3, level = 2) %>% as.data.frame(), 
-                          varB = list(mu = 1, sd = 9, level = 2) %>% as.data.frame(),
-                          varC = list(level = 2, data = list(label_varB = c("varB1", "varB2"), varB = c(2,3)) %>% as.data.frame()))
-    expected_list <- list(varA = list(mu = 2, sd = 3, level = 2) %>% as.data.frame(),
-                          varB = list(mu = 1, sd = 9, level = 2) %>% as.data.frame(), 
-                          varC = list(level = 2, 
-                                    data = list(label_varB = c("varB1","varB2"), varB = c(2,3)) %>% data.frame()), 
-                        interactions = list("varA:varB:varC" = list(mu = 8, sd = 3, level = 8) %>% as.data.frame()))
-
-    expect_equal(add_interaction(init_var_list , c("varA", "varB", "varC"), 8, 3), expected_list)
-
-})
-
-```
-
-
-<!--
-# There can be development actions
-
-Create a chunk with 'development' actions
-
-- The chunk needs to be named `development` or `dev`
-- It contains functions that are used for package development only
-- Note that you may want to store most of these functions in the 0-dev_history.Rmd file
-
-These are only included in the present flat template file, their content will not be part of the package anywhere else.
--->
-
-```{r development-inflate, eval=FALSE}
-# Keep eval=FALSE to avoid infinite loop in case you hit the knit button
-# Execute in the console directly
-fusen::inflate(flat_file = "dev/flat_full.Rmd", vignette_name = "Get started")
-```
-
-
-```{r development, include=FALSE}
-library(covr)
-covr::package_coverage()
-# Dans un R ou le package n'est pas loader !!
-```
diff --git a/src/v4/HTRSIM/dev/flat_full_bis.Rmd b/src/v4/HTRSIM/dev/flat_full_bis.Rmd
deleted file mode 100644
index 74e04aeb221c405bc62b4df6a68ce49022e033ef..0000000000000000000000000000000000000000
--- a/src/v4/HTRSIM/dev/flat_full_bis.Rmd
+++ /dev/null
@@ -1,7277 +0,0 @@
----
-title: "flat_full.Rmd for working package"
-output: html_document
-editor_options: 
-  chunk_output_type: console
----
-
-<!-- Run this 'development' chunk -->
-<!-- Store every call to library() that you need to explore your functions -->
-
-```{r development, include=FALSE}
-library(testthat)
-```
-
-<!--
- You need to run the 'description' chunk in the '0-dev_history.Rmd' file before continuing your code there.
-
-If it is the first time you use {fusen}, after 'description', you can directly run the last chunk of the present file with inflate() inside.
---> 
-
-```{r development-load}
-# Load already included functions if relevant
-pkgload::load_all(export_all = FALSE)
-```
-
-
-```{r function-utils, filename = "utils"}
-#' Join two data frames using data.table
-#'
-#' @param d1 Data frame 1
-#' @param d2 Data frame 2
-#' @param k1 Key columns for data frame 1
-#' @param k2 Key columns for data frame 2
-#' @importFrom data.table data.table
-#' @return Joined data frame
-#' @export
-#'
-#' @examples
-#'
-#' # Example usage:
-#' df1 <- data.frame(id = 1:5, value = letters[1:5])
-#' df2 <- data.frame(id = 1:5, category = LETTERS[1:5])
-#' join_dtf(df1, df2, "id", "id")
-join_dtf <- function(d1, d2, k1, k2) {
-  d1.dt_table <- data.table::data.table(d1, key = k1)
-  d2.dt_table <- data.table::data.table(d2, key = k2)
-  dt_joined <- d1.dt_table[d2.dt_table, allow.cartesian = TRUE]
-  return(dt_joined %>% as.data.frame())
-}
-
-
-
-#' Clean Variable Name
-#'
-#' This function removes digits, spaces, and special characters from a variable name.
-#' If any of these are present, they will be replaced with an underscore '_'.
-#'
-#' @param name The input variable name to be cleaned.
-#' @return The cleaned variable name without digits, spaces, or special characters.
-#'
-#' @details
-#' This function will check the input variable name for the presence of digits,
-#' spaces, and special characters. If any of these are found, they will be removed
-#' from the variable name and replaced with an underscore '_'. Additionally, it will
-#' check if the cleaned name is not one of the reserved names "interactions" or
-#' "correlations" which are not allowed as variable names.
-#' @export
-#' @examples
-#' clean_variable_name("my_var,:&$àà(-i abl23 e_na__ç^me ")
-clean_variable_name <- function(name){
-      message("Variable name should not contain digits, spaces, or special characters.\nIf any of these are present, they will be removed from the variable name.")
-      # avoid space in variable name
-      name <- gsub(" ", "_", name, fixed = TRUE)
-      # avoid digit in variable name
-      name <-  gsub("[0-9]", "", name)
-      # avoid special character in variable name
-      name <-  gsub("[[:punct:]]", "", name)
-  
-      forbidden_names <- c("interactions", "correlations")
-      if (name %in% forbidden_names) {
-        forbidden_str <- paste(forbidden_names, collapse = " and ")
-        stop(forbidden_str, "cannot be used as variable name")
-      }
-      return(name)
-    
-}
-
-
-#' Get Setting Table
-#'
-#' Create a table of experimental settings.
-#'
-#' This function takes various experimental parameters and returns a data frame
-#' that represents the experimental settings.
-#'
-#' @param n_genes Number of genes in the experiment.
-#' @param max_replicates Maximum number of replicates for each gene.
-#' @param min_replicates Minimum number of replicates for each gene.
-#' @param lib_size  total number of reads
-#'
-#' @return A data frame containing the experimental settings with their corresponding values.
-#' @export
-getSettingsTable <- function(n_genes, max_replicates, min_replicates, lib_size ){
-  
-  settings_df <- data.frame(parameters = c("# genes", "Max # replicates", "Min # replicates", "Library size" ),
-                            values = c(n_genes, max_replicates, min_replicates, lib_size))
-  rownames(settings_df) <- NULL
-  
-  return(settings_df)
-}
-
-```
-
-
-```{r test-dataFromUser}
-# Test unitaires pour la fonction join_dtf
-test_that("join_dtf réalise la jointure correctement", {
-  # Création de données de test
-  df1 <- data.frame(id = 1:5, value = letters[1:5])
-  df2 <- data.frame(id = 1:5, category = LETTERS[1:5])
-  
-  # Exécution de la fonction
-  result <- HTRSIM::join_dtf(df1, df2, "id", "id")
-  
-  # Vérification des résultats
-  expect_true(is.data.frame(result))
-  expect_equal(nrow(result), 5)
-  expect_equal(ncol(result), 3)
-  expect_equal(names(result), c("id", "value", "category"))
-  expect_true(all.equal(result$id, df1$id))
-  expect_true(all.equal(result$id, df2$id))
-})
-
-
-test_that("clean_variable_name correctly removes digits, spaces, and special characters", {
-  expect_equal(clean_variable_name("my variable name"), "myvariablename")
-  expect_equal(clean_variable_name("variable_1"), "variable")
-  expect_equal(clean_variable_name("^spec(ial#chars! "), "specialchars")
-})
-
-test_that("clean_variable_name handles reserved names properly", {
-  expect_error(clean_variable_name("interactions"))
-  expect_error(clean_variable_name("correlations"))
-})
-```
-
-
-```{r function-init_variable, filename = "simulation_initialization"}
-#' Initialize variable
-#'
-#' @param list_var Either c() or output of init_variable
-#' @param name Variable name
-#' @param mu Either a numeric value or a numeric vector (of length = level)
-#' @param sd Either numeric value or NA
-#' @param level Numeric value to specify the number of levels to simulate
-#'
-#' @return
-#' A list with initialized variables
-#' @export
-#'
-#' @examples
-init_variable <- function(list_var = c(), name = "myVariable", mu = c(2,3), sd = NA, level = NA) {
-  
-  name <- clean_variable_name(name)
-  
-  # Only mu specified by user => set level param
-  if (is.na(level) && is.na(sd)) {
-    level <- length(mu)
-  }
-  
-  # Validate inputs
-  inputs_checking(list_var, name, mu, sd, level)
-  
-  if (endsWithDigit(name)) {
-    warning("Names ending with digits are not allowed. They will be removed from the variable name.")
-    name <- removeDigitsAtEnd(name)
-  }
-  
-  # Initialize new variable
-  list_var[[name]] <- fillInVariable(name, mu, sd, level)
-  
-  return(list_var)
-}
-
-
-
-#' Check if a string ends with a digit
-#'
-#' This function checks whether a given string ends with a digit.
-#'
-#' @param string The input string to be checked
-#' @return \code{TRUE} if the string ends with a digit, \code{FALSE} otherwise
-#' @export
-#' @examples
-#' endsWithDigit("abc123")  # Output: TRUE
-#' endsWithDigit("xyz")     # Output: FALSE
-endsWithDigit <- function(string) {
-  lastChar <- substring(string, nchar(string))
-  return(grepl("[0-9]", lastChar))
-}
-
-#' Remove digits at the end of a string
-#'
-#' This function removes any digits occurring at the end of a given string.
-#'
-#' @param string The input string from which digits are to be removed
-#' @return The modified string with digits removed from the end
-#' @export
-#' @examples
-#' removeDigitsAtEnd("abc123")  # Output: "abc"
-#' removeDigitsAtEnd("xyz")     # Output: "xyz"
-removeDigitsAtEnd <- function(string) {
-  return(gsub("\\d+$", "", string))
-}
-
-
-#' Check Input Parameters
-#'
-#' This function checks the validity of the input parameters for initializing a variable.
-#' It ensures that the necessary conditions are met for the input parameters.
-#'
-#' @param list_var List containing the variables to be initialized.
-#' @param name Name of the variable.
-#' @param mu Mean of the variable.
-#' @param sd Standard deviation of the variable (optional).
-#' @param level Number of levels for categorical variables.
-#' 
-#' @return NULL
-#' @export
-#'
-#' @examples
-#' inputs_checking(list_var = c(), name = "var1", mu = 0, sd = 1, level = 2)
-inputs_checking <- function(list_var, name, mu, sd, level) {
-  stopifnot(name != "")
-  stopifnot(is.character(name))
-  stopifnot(is.numeric(mu))
-  stopifnot(is.numeric(sd) | is.na(sd))
-  stopifnot(is.numeric(level))
-  stopifnot(length(level) == 1)
-  stopifnot(level >= 2)
-
-  if (!is.null(list_var)) {
-    error_msg <- "Non conformable list_var parameter.\nlist_var must be set as an init_var output or initialized as c()"
-    if (!is.list(list_var)) {
-      stop(error_msg)
-    }
-  }
-
-  if (length(mu) > 1) {
-    stopifnot(length(mu) == level)
-  }
-
-  if (is.na(sd)) {
-    if (level != length(mu)) {
-      stop("sd was specified as NA. mu should have the same length as the number of levels\n")
-    }
-  }
-
-  # Check if variable is already initialized
-  name_not_in_list_var <- identical(which(already_init_variable(list_var, name)), integer(0))
-  if (!name_not_in_list_var) {
-    message(paste(name, "is already initialized in list_var.\nIt will be updated", sep = " "))
-  }
-
-  return(NULL)
-}
-
-
-#' Check if Variable is Already Initialized
-#'
-#' This function checks if a variable is already initialized in the variable list.
-#'
-#' @param list_var A list object representing the variable list.
-#' @param new_var_name A character string specifying the name of the new variable.
-#'
-#' @return TRUE if the variable is already initialized, FALSE otherwise.
-#' @export
-#'
-#' @examples
-#' my_list <- list(var1 = 1, var2 = 2, var3 = 3)
-#' already_initialized <- already_init_variable(list_var = my_list, new_var_name = "myVariable")
-already_init_variable <- function(list_var, new_var_name) {
-  if (is.null(list_var)) {
-    return(FALSE)
-  }
-  
-  var_names <- names(list_var)
-  return(new_var_name %in% var_names)
-}
-
-#' Fill in Variable
-#'
-#' This function fills in a variable with simulated data based on the provided parameters.
-#'
-#' @param name The name of the variable.
-#' @param mu A numeric value or a numeric vector (of length = level) representing the mean.
-#' @param sd A numeric value representing the standard deviation, or NA if not applicable.
-#' @param level A numeric value specifying the number of levels to simulate.
-#'
-#' @return A data frame or a list containing the simulated data for the variable.
-#' @export
-#'
-#' @examples
-#' variable_data <- fillInVariable(name = "myVariable", mu = c(2, 3), sd = NA, level = 2)
-#' 
-fillInVariable <- function(name, mu, sd, level) {
-  
-  if (length(mu) > 1 | is.na(sd)) {  # Effects given by user
-    level <- length(mu)
-    l_labels <- paste(name, 1:level, sep = '')
-    l_betaEffects <- mu
-    column_names <- c(paste("label", name, sep = "_"), name)
-    sub_obj <- build_sub_obj_return_to_user(level, metaData = l_labels,
-                                       effectsGivenByUser = l_betaEffects,
-                                       column_names)
-  } else {
-    sub_obj <- as.data.frame(list(mu = mu, sd = sd, level = level))
-  }
-  
-  return(sub_obj)  
-}
-
-#' Build Sub Object to Return to User
-#'
-#' This function builds the sub-object to be returned to the user.
-#'
-#' @param level A numeric value specifying the number of levels.
-#' @param metaData A list of labels.
-#' @param effectsGivenByUser A list of effects given by the user.
-#' @param col_names A character vector specifying the column names to use.
-#' @importFrom utils tail
-
-#'
-#' @return A list with the sub-object details.
-build_sub_obj_return_to_user <- function(level, metaData, effectsGivenByUser, col_names) {
-  sub_obj <- list(level = level)
-  data <- cbind(metaData, effectsGivenByUser) %>% as.data.frame()
-  colnames(data) <- col_names
-  var_name <- utils::tail(col_names, n = 1)
-  data[, var_name] <- as.numeric(data[, var_name])
-  sub_obj$data <- data
-  return(sub_obj)
-}
-
-
-#' Add interaction
-#'
-#' @param list_var A list of variables (already initialized)
-#' @param between_var A vector of variable names to include in the interaction
-#' @param mu Either a numeric value or a numeric vector (of length = level)
-#' @param sd Either numeric value or NA
-#'
-#' @return
-#' A list with initialized interaction
-#' @export
-#'
-#' @examples
-add_interaction <- function(list_var, between_var, mu, sd = NA) {
-  name_interaction <- paste(between_var, collapse = ":")
-  check_input2interaction(name_interaction, list_var, between_var, mu, sd)
-  
-  # Check the number of variables in the interaction
-  if (length(between_var) > 3) {
-    stop("Cannot initialize an interaction with more than 3 variables.")
-  }
-  
-  interactionCombinations <- getNumberOfCombinationsInInteraction(list_var, between_var)
-  list_var$interactions[[name_interaction]] <- fillInInteraction(list_var, between_var, mu, sd, interactionCombinations)
-  return(list_var)
-}
-
-#' Check input for interaction
-#'
-#' @param name_interaction String specifying the name of the interaction (example: "varA:varB")
-#' @param list_var A list of variables (already initialized)
-#' @param between_var A vector of variable names to include in the interaction
-#' @param mu Either a numeric value or a numeric vector (of length = level)
-#' @param sd Either numeric value or NA
-#'
-#' @return
-#' NULL (throws an error if the input is invalid)
-#' @export
-check_input2interaction <- function(name_interaction, list_var, between_var, mu, sd) {
-  # Check if variables in between_var are declared and initialized
-  bool_checkInteractionValidity <- function(between_var, list_var) {
-    nb_varInInteraction <- length(unique(between_var))
-    stopifnot(nb_varInInteraction > 1)
-    existingVar_nb <- getListVar(list_var) %in% between_var %>% sum()
-    if (existingVar_nb != nb_varInInteraction) {
-      return(FALSE)
-    } else {
-      return(TRUE)
-    }
-  }
-  
-  bool_valid_interaction <- bool_checkInteractionValidity(between_var, list_var)
-  if (!bool_valid_interaction) {
-    stop("At least one variable in between_var is not declared. Variable not initialized cannot be used in an interaction.")
-  }
-  
-  requestedNumberOfValues <- getNumberOfCombinationsInInteraction(list_var, between_var)
-  if (is.na(sd) && requestedNumberOfValues != length(mu)) {
-    msg_e <- "sd was specified as NA. mu should have the same length as the possible number of interactions:\n"
-    msg_e2 <- paste(requestedNumberOfValues, "interaction values are requested.")
-    stop(paste(msg_e, msg_e2))
-  }
-  
-  level <- requestedNumberOfValues
-  inputs_checking(list_var$interactions, name_interaction, mu, sd, level)
-}
-
-#' Get the number of combinations in an interaction
-#'
-#' @param list_var A list of variables (already initialized)
-#' @param between A vector of variable names to include in the interaction
-#'
-#' @return
-#' The number of combinations in the interaction
-#' @export
-getNumberOfCombinationsInInteraction <- function(list_var, between) {
-  levelInlistVar <- getGivenAttribute(list_var, "level") %>% unlist()
-  n_combinations <- prod(levelInlistVar[between]) 
-  return(n_combinations)
-}
-
-#' getGridCombination
-#'
-#' Generates all possible combinations of labels.
-#'
-#' @param l_labels List of label vectors
-#'
-#' @return A data frame with all possible combinations of labels
-#' @export
-#'
-#' @examples
-#' l_labels <- list(
-#'   c("A", "B", "C"),
-#'   c("X", "Y")
-#' )
-#' getGridCombination(l_labels)
-getGridCombination <- function(l_labels) {
-  grid <- expand.grid(l_labels)
-  colnames(grid) <- paste("label", seq_along(l_labels), sep = "_")
-  return(grid)
-}
-
-
-
-#' Get grid combination from list_var
-#'
-#' @param list_var A list of variables (already initialized)
-#'
-#' @return
-#' The grid combination between variable in list_var
-#' @export
-generateGridCombination_fromListVar <- function (list_var){
-  l_levels <- getGivenAttribute(list_var, "level") %>% unlist()
-  vars <- names(l_levels)
-  l_levels <- l_levels[vars]
-  l_labels <- getLabels(l_variables2labelized = vars, l_nb_label = l_levels)
-  gridComb <- getGridCombination(l_labels)
-  colnames(gridComb) <- paste("label", vars, sep = "_")
-  return(gridComb)
-}
-
-
-#' Fill in interaction
-#'
-#' @param list_var A list of variables (already initialized)
-#' @param between A vector of variable names to include in the interaction
-#' @param mu Either a numeric value or a numeric vector (of length = level)
-#' @param sd Either numeric value or NA
-#' @param level Number of interactions
-#'
-#' @return
-#' A data frame with the filled-in interaction values
-#' @export
-fillInInteraction <- function(list_var, between, mu, sd, level) {
-  if (length(mu) > 1 || is.na(sd)) {
-    l_levels <- getGivenAttribute(list_var, "level") %>% unlist()
-    l_levelsOfInterest <- l_levels[between]
-    l_labels_varOfInterest <- getLabels(l_variables2labelized = between, l_nb_label = l_levelsOfInterest ) 
-    
-    grid_combination <- getGridCombination(l_labels_varOfInterest)
-    n_combinations <- dim(grid_combination)[1]
-    column_names <- c(paste("label", between, sep = "_"), paste(between, collapse = ":"))
-    sub_dtf <- build_sub_obj_return_to_user(level = n_combinations,
-                                            metaData = grid_combination,
-                                            effectsGivenByUser = mu, 
-                                            col_names = column_names)
-  } else {
-    sub_dtf <- list(mu = mu, sd = sd, level = level) %>% as.data.frame()
-  }
-  
-  return(sub_dtf)
-}
-
-#' Get the list of variable names
-#'
-#' @param input R list, e.g., output of init_variable
-#'
-#' @return
-#' A character vector with the names of variables
-getListVar <- function(input) attributes(input)$names
-
-#' Get a given attribute from a list of variables
-#'
-#' @param list_var A list of variables (already initialized)
-#' @param attribute A string specifying the attribute to retrieve in all occurrences of the list
-#'
-#' @return
-#' A list without NULL values
-getGivenAttribute <- function(list_var, attribute) {
-  l <- lapply(list_var, FUN = function(var) var[[attribute]]) 
-  l_withoutNull <- l[!vapply(l, is.null, logical(1))]
-  return(l_withoutNull)
-}
-
-
-#' @return
-#' A list of labels per variable
-#' Get labels for variables
-#'
-#' @param l_variables2labelized A list of variables
-#' @param l_nb_label A list of numeric values representing the number of levels per variable
-#'
-#' @return
-#' A list of labels per variable
-getLabels <- function(l_variables2labelized, l_nb_label) {
-  getVarNameLabel <- function(name, level) {
-    list_label <- paste(name, 1:level, sep = "")
-    return(list_label)
-  }
-  
-  listLabels <- Map(getVarNameLabel, l_variables2labelized, l_nb_label)
-  return(listLabels)
-}
-
-```
-
-
-```{r tests-init_variable}
-
-test_that("endsWithDigit returns the correct result", {
-  expect_true(endsWithDigit("abc123"))
-  expect_false(endsWithDigit("xyz"))
-})
-
-test_that("removeDigitsAtEnd removes digits at the end of a string", {
-  expect_equal(removeDigitsAtEnd("abc123"), "abc")
-  expect_equal(removeDigitsAtEnd("xyz"), "xyz")
-})
-
-
-test_that("init_variable initializes a variable correctly", {
-  # Test case 1: Initialize a variable with default parameters
-  list_var <- init_variable()
-  expect_true("myVariable" %in% names(list_var))
-  expect_equal(nrow(list_var$myVariable$data), 2)
-  
-  # Test case 2: Initialize a variable with custom parameters
-  list_var <- init_variable(name = "custom_variable", mu = c(1, 2, 3), sd = 0.5, level = 3)
-  expect_true("customvariable" %in% names(list_var))
-  expect_equal(nrow(list_var$customvariable$data), 3)
-})
-
-test_that("inputs_checking performs input validation", {
-  
-  # Test case 1: Invalid inputs - sd is NA but mu has unique values
-  expect_error(inputs_checking(list_var = c(), name = "myVariable", mu = 2, sd = NA, level = 2))
-  
-  # Test case 2: Invalid inputs - empty name
-  expect_error(inputs_checking(list_var = c(), name = "", mu = 2, sd = NA, level = 2))
-  
-  # Test case 3: Invalid inputs - non-numeric mu
-  expect_error(inputs_checking(list_var = c(), name = "myVariable", mu = "invalid", sd = NA, level = 2))
-  
-  # Test case 4: Invalid inputs - non-numeric sd
-  expect_error(inputs_checking(list_var = c(), name = "myVariable", mu = 2, sd = "invalid", level = 2))
-  
-  # Test case 5: Invalid inputs - level less than 2
-  expect_error(inputs_checking(list_var = c(), name = "myVariable", mu = 2, sd = NA, level = 1))
-  
-  # Test case 6: Invalid inputs - mu and level have different lengths
-  expect_error(inputs_checking(list_var = c(), name = "myVariable", mu = c(1, 2, 3), sd = NA, level = 2))
-  
-  # Test case 7: Valid inputs
-  expect_silent(inputs_checking(list_var = c(), name = "myVariable", mu = c(1, 2, 3), sd = NA, level = 3))
-})
-
-
-
-test_that("already_init_variable checks if a variable is already initialized", {
-  list_var <- init_variable()
-  
-  # Test case 1: Variable not initialized
-  list_var <- init_variable(name = "custom_variable", mu = c(2, 3), sd = NA, level = 2)
-  expect_true(already_init_variable(list_var, "customvariable"))
-  
-  # Test case 2: Variable already initialized 
-  expect_false(already_init_variable(list_var, "myVariable"))
-  
-})
-
-test_that("fillInVariable fills in variable correctly", {
-  # Test case 1: Effects given by user
-  sub_obj <- fillInVariable("myVariable", c(1, 2, 3), NA, NA)
-  expect_equal(sub_obj$level, 3)
-  expect_equal(ncol(sub_obj$data), 2)
-  
-  # Test case 2: Effects simulated using mvrnorm
-  sub_obj <- fillInVariable("myVariable", 2, 0.5, 3)
-  expect_equal(sub_obj$level, 3)
-  expect_equal(sub_obj$sd, 0.5)
-  expect_equal(sub_obj$mu, 2)
-})
-
-test_that("build_sub_obj_return_to_user returns the expected output", {
-  level <- 3
-  metaData <- paste("label", 1:level, sep = "_")
-  effectsGivenByUser <- c(2, 3, 4)
-  col_names <- c("metadata", "effects")
-  
-  result <- build_sub_obj_return_to_user(level, metaData, effectsGivenByUser, col_names)
-  
-  expect_equal(result$level, level)
-  expect_identical(result$data$metadata, metaData)
-  expect_identical(result$data$effects, effectsGivenByUser)
-  
-  
-})
-
-test_that("generateGridCombination_fromListVar returns expected output", {
-  result <- generateGridCombination_fromListVar(init_variable())
-  expect <- data.frame(label_myVariable = c("myVariable1", "myVariable2"))
-  expect_equal(nrow(result), nrow(expect))
-  expect_equal(ncol(result), ncol(expect))
-})
-
-test_that("add_interaction adds an interaction between variables", {
-  list_var <- init_variable(name = "varA", mu = 1, sd = 1, level = 2)
-  list_var <- init_variable(list_var, name = "varB", mu = 2, sd = 1, level = 3)
-  list_var <- add_interaction(list_var, between_var = c("varA", "varB"), mu = 0.5, sd = 3)
-  expect_true("varA:varB" %in% names(list_var$interactions))
-})
-
-test_that("add_interaction throws an error for invalid variables", {
-  list_var <- init_variable(name = "varA", mu = 1, sd = 1, level = 2)
-  expect_error(add_interaction(list_var, between_var = c("varA", "varB"), mu = 0.5, sd = NA))
-})
-
-
-test_that("getNumberOfCombinationsInInteraction calculates the number of combinations", {
-  list_var <- init_variable(name = "varA", mu = 1, sd = 1, level = 2)
-  list_var <- init_variable(list_var, name = "varB", mu = 2, sd = 1, level = 3)
-  expect_equal(getNumberOfCombinationsInInteraction(list_var, c("varA", "varB")), 6)
-})
-
-test_that("getLabels generates labels for variables", {
-  labels <- getLabels(c("varA", "varB"), c(2, 3))
-  expect_equal(length(labels), 2)
-  expect_equal(length(labels[[1]]), 2)
-  expect_equal(length(labels[[2]]), 3)
-})
-
-test_that("getGridCombination generates a grid of combinations", {
-  labels <- list(A = c("A1", "A2"), B = c("B1", "B2", "B3"))
-  grid_combination <- getGridCombination(labels)
-  expect_equal(dim(grid_combination), c(6, 2))
-})
-
-```
-
-```{r function-mvrnorm, filename = "datafrommvrnorm_manipulations" }
-#' getInput2mvrnorm
-#'
-#' @inheritParams init_variable
-#'
-#' @return
-#' a list that can be used as input for MASS::mvrnorm
-#' @export
-#'
-#' @examples
-#' list_var <- init_variable(name = "my_var", mu = 0, sd = 2, level = 3)
-#' getInput2mvrnorm(list_var)
-getInput2mvrnorm <- function(list_var){
-  # -- pick up sd provided by user
-  variable_standard_dev <- getGivenAttribute(list_var, attribute = "sd") %>% unlist()
-  interaction_standard_dev <- getGivenAttribute(list_var$interactions, attribute = "sd") %>% unlist()
-  list_stdev_2covmatx <- c(variable_standard_dev, interaction_standard_dev)
-  if (is.null(list_stdev_2covmatx)) ## NO SD provided
-    return(list(mu = NULL, covMatrix = NULL))
-
-  # - COV matrix
-  covar_userProvided = getGivenAttribute(list_var$correlations, "covar")
-  covMatrix <- getCovarianceMatrix(list_stdev_2covmatx, covar_userProvided)
-
-  # -- MU
-  variable_mu <- getGivenAttribute(list_var, attribute = "mu") %>% unlist()
-  interaction_mu <- getGivenAttribute(list_var$interactions, attribute = "mu") %>% unlist()
-  list_mu <- c(variable_mu, interaction_mu)
-
-  return(list(mu = list_mu, covMatrix = covMatrix))
-
-}
-
-
-#' getCovarianceMatrix 
-#' @param list_stdev standard deviation list
-#' @param list_covar covariance list
-#' 
-#' @return
-#' covariance matrix
-#' @export
-#'
-#' @examples
-#' vector_sd <- c(1,2, 3)
-#' names(vector_sd) <- c("varA", "varB", "varC")
-#' vector_covar <- c(8, 12, 24)
-#' names(vector_covar) <- c("varA.varB", "varA.varC", "varB.varC")
-#' covMatrix <- getCovarianceMatrix(vector_sd, vector_covar)
-getCovarianceMatrix <- function(list_stdev, list_covar){
-  # -- cov(A, A) = sd(A)^2
-  diag_cov <- list_stdev^2
-  dimension <- length(diag_cov)
-  covariance_matrix <- matrix(0,nrow = dimension, ncol = dimension)
-  diag(covariance_matrix) <- diag_cov
-  colnames(covariance_matrix) <- paste("label", names(diag_cov), sep = "_")
-  rownames(covariance_matrix) <- paste("label", names(diag_cov), sep = "_")
-  names_covaration <- names(list_covar)
-
-  ###### -- utils -- #####
-  convertDF <- function(name, value){
-    ret <- data.frame(value)
-    colnames(ret) <- name
-    ret
-  }
-
-  ## -- needed to use reduce after ;)
-  l_covarUserDf <- lapply(names_covaration, function(n_cov) convertDF(n_cov, list_covar[n_cov] ))
-  covariance_matrix2ret <- Reduce(fillInCovarMatrice, x = l_covarUserDf, init =  covariance_matrix)
-  covariance_matrix2ret
-}
-
-
-#' Fill in Covariance Matrix
-#'
-#' This function updates the covariance matrix with the specified covariance value between two variables.
-#'
-#' @param covarMatrice The input covariance matrix.
-#' @param covar A data frame containing the covariance value between two variables.
-#' @return The updated covariance matrix with the specified covariance value filled in.
-#' @export
-#' @examples
-#' covarMat <- matrix(0, nrow = 3, ncol = 3)
-#' colnames(covarMat) <- c("label_varA", "label_varB", "label_varC")
-#' rownames(covarMat) <- c("label_varA", "label_varB", "label_varC")
-#' covarValue <- data.frame("varA.varB" = 0.5)
-#' fillInCovarMatrice(covarMatrice = covarMat, covar = covarValue)
-fillInCovarMatrice <- function(covarMatrice, covar){
-  varsInCovar <- strsplit(colnames(covar), split = "[.]") %>% unlist()
-  index_matrix <- paste("label",varsInCovar, sep  = "_")
-  covar_value <- covar[1,1]
-  covarMatrice[index_matrix[1], index_matrix[2]] <- covar_value
-  covarMatrice[index_matrix[2], index_matrix[1]] <- covar_value
-  return(covarMatrice)
-}
-
-
-#' Check if a matrix is positive definite
-#' This function checks whether a given matrix is positive definite, i.e., all of its eigenvalues are positive.
-#' @param mat The matrix to be checked.
-#' @return A logical value indicating whether the matrix is positive definite.
-#' @export
-#' @examples
-#' # Create a positive definite matrix
-#' mat1 <- matrix(c(4, 2, 2, 3), nrow = 2)
-#' is_positive_definite(mat1)
-#' # Expected output: TRUE
-#'
-#' # Create a non-positive definite matrix
-#' mat2 <- matrix(c(4, 2, 2, -3), nrow = 2)
-#' is_positive_definite(mat2)
-#' # Expected output: FALSE
-#'
-#' # Check an empty matrix
-#' mat3 <- matrix(nrow = 0, ncol = 0)
-#' is_positive_definite(mat3)
-#' # Expected output: TRUE
-#'
-#' @export
-is_positive_definite <- function(mat) {
-  if (nrow(mat) == 0 && ncol(mat) == 0) return(TRUE)
-  eigenvalues <- eigen(mat)$values
-  all(eigenvalues > 0)
-}
-
-
-
-#' getGeneMetadata
-#'
-#' @inheritParams init_variable
-#' @param n_genes Number of genes to simulate
-#'
-#' @return
-#' metadata matrix
-#' 
-#' @export
-#'
-#' @examples
-#' list_var <- init_variable()
-#' metadata <- getGeneMetadata(list_var, n_genes = 10)
-getGeneMetadata <- function(list_var, n_genes) {
-  metaData <- generateGridCombination_fromListVar(list_var)
-  n_combinations <- dim(metaData)[1]
-  genes_vec <- base::paste("gene", 1:n_genes, sep = "")
-  geneID <- rep(genes_vec, each = n_combinations)
-  metaData <- cbind(geneID, metaData)
-  
-  return(metaData)
-}
-
-
-#' getDataFromMvrnorm
-#'
-#' @inheritParams init_variable 
-#' @param input2mvrnorm list with mu and covariance matrix, output of getInput2mvrnorm
-#' @param n_genes Number of genes to simulate
-#' 
-#' @return
-#' data simulated from multivariate normal distribution
-#' 
-#' @export
-#'
-#' @examples
-#' list_var <- init_variable()
-#' input <- getInput2mvrnorm(list_var)
-#' simulated_data <- getDataFromMvrnorm(list_var, input, n_genes = 10)
-getDataFromMvrnorm <- function(list_var, input2mvrnorm, n_genes = 1) {
-  if (is.null(input2mvrnorm$covMatrix))
-    return(list())
-  
-  metaData <- getGeneMetadata(list_var, n_genes)
-  n_tirages <- dim(metaData)[1]
-  
-  mtx_mvrnormSamplings <- samplingFromMvrnorm(n_samplings = n_tirages, 
-                                             l_mu = input2mvrnorm$mu, matx_cov = input2mvrnorm$covMatrix)
-  
-  dataFromMvrnorm <- cbind(metaData, mtx_mvrnormSamplings)
-  
-  return(list(dataFromMvrnorm))
-}
-
-
-#' getDataFromMvrnorm
-#'
-#' @param n_samplings number of samplings using mvrnorm
-#' @param l_mu vector of mu
-#' @param matx_cov covariance matrix
-#'
-#' @return
-#' samples generated from multivariate normal distribution
-#' 
-#' @export
-#'
-#' @examples
-#' n <- 100
-#' mu <- c(0, 0)
-#' covMatrix <- matrix(c(1, 0.5, 0.5, 1), ncol = 2)
-#' samples <- samplingFromMvrnorm(n_samplings = n, l_mu = mu, matx_cov = covMatrix)
-samplingFromMvrnorm <- function(n_samplings, l_mu, matx_cov) {
-  mvrnormSamp <-  MASS::mvrnorm(n = n_samplings, mu = l_mu, Sigma = matx_cov, empirical = TRUE)
-  
-  return(mvrnormSamp)
-}
-
-```
-
-```{r  tests-mvrnorm}
-test_that("getInput2mvrnorm returns the correct list", {
-  list_var <- init_variable()
-  input <- getInput2mvrnorm(list_var)
-  expect_is(input, "list")
-  expect_true("mu" %in% names(input))
-  expect_true("covMatrix" %in% names(input))
-})
-
-
-test_that("fillInCovarMatrice returns the correct matrix", {
-  covarMat <- matrix(0, nrow = 3, ncol = 3)
-  colnames(covarMat) <- c("label_varA", "label_varB", "label_varC")
-  rownames(covarMat) <- c("label_varA", "label_varB", "label_varC")
-  covarValue <- data.frame("varA.varB" = 18)
-  matrice <- fillInCovarMatrice(covarMatrice = covarMat, covar = covarValue)
-  
-  expected_matrice <- matrix(0, nrow = 3, ncol = 3)
-  colnames(expected_matrice) <- c("label_varA", "label_varB", "label_varC")
-  rownames(expected_matrice) <- c("label_varA", "label_varB", "label_varC")
-  expected_matrice["label_varA", "label_varB"] <- 18
-  expected_matrice["label_varB", "label_varA"] <- 18
-  expect_identical(matrice, expected_matrice)
-})
-
-test_that("getCovarianceMatrix returns the correct covariance matrix", {
-  vector_sd <- c(1,2, 3)
-  names(vector_sd) <- c("varA", "varB", "varC")
-  vector_covar <- c(8, 12, 24)
-  names(vector_covar) <- c("varA.varB", "varA.varC", "varB.varC")
-  covMatrix <- getCovarianceMatrix(vector_sd, vector_covar)
-  
-  expect_is(covMatrix, "matrix")
-  expect_equal(dim(covMatrix), c(3, 3))
-  expected_matrix <- matrix(c(1,8,12,8,4,24, 12,24,9), nrow = 3,  byrow = T)
-  rownames(expected_matrix) <- c("label_varA", "label_varB", "label_varC")
-  colnames(expected_matrix) <- c("label_varA", "label_varB", "label_varC")
-  expect_equal(expected_matrix, covMatrix)
-})
-
-test_that("getGeneMetadata returns the correct metadata", {
-  list_var <- init_variable()
-  n_genes <- 10
-  metadata <- getGeneMetadata(list_var, n_genes)
-  expect_is(metadata, "data.frame")
-  expect_equal(colnames(metadata), c("geneID", paste("label", (attributes(list_var)$names), sep ="_")))
-  expect_equal(nrow(metadata), n_genes * list_var$myVariable$level)
-})
-
-test_that("getDataFromMvrnorm returns the correct data", {
-  list_var <- init_variable(name = "varA", mu = 1, sd = 4, level = 3) %>% init_variable("varB", mu = 2, sd = 1, level = 2)
-  input <- getInput2mvrnorm(list_var)
-  n_genes <- 10
-  n_samplings <- n_genes * (list_var$varA$level ) * (list_var$varB$level )
-  data <- getDataFromMvrnorm(list_var, input, n_genes)
-  expect_is(data, "list")
-  expect_equal(length(data), 1)
-  expect_is(data[[1]], "data.frame")
-  expect_equal(nrow(data[[1]]), n_samplings)
-  
-})
-
-test_that("getDataFromMvrnomr returns empty list",{
-  list_var <- init_variable()
-  input <- getInput2mvrnorm(list_var)
-  n_genes <- 10
-  n_samplings <- n_genes * (list_var$varA$level ) * (list_var$varB$level )
-  data <- getDataFromMvrnorm(list_var, input, n_genes)
-  expect_is(data, "list")
-  expect_equal(data, list())
-})
-
-test_that("samplingFromMvrnorm returns the correct sampling", {
-  n_samplings <- 100
-  l_mu <- c(1, 2)
-  matx_cov <- matrix(c(1, 0.5, 0.5, 1), ncol = 2)
-  sampling <- samplingFromMvrnorm(n_samplings, l_mu, matx_cov)
-  
-  expect_is(sampling, "matrix")
-  expect_equal(dim(sampling), c(n_samplings, length(l_mu)))
-})
-
-
-```
-
-```{r function-dataFromUser, filename = "datafromUser_manipulations"}
-
-#' Get data from user
-#'
-#'
-#' @param list_var A list of variables (already initialized)
-#' @return A list of data to join
-#' @export
-#'
-#' @examples
-#' getDataFromUser(init_variable())
-getDataFromUser <- function(list_var) {
-  variable_data2join <- getGivenAttribute(list_var, "data")
-  id_var2join <- names(variable_data2join)
-  interaction_data2join <- getGivenAttribute(list_var$interactions, "data")
-  id_interaction2join <- names(interaction_data2join)
-  
-  data2join <- list(variable_data2join, interaction_data2join) %>%
-    unlist(recursive = FALSE)
-  id2join <- c(id_var2join, id_interaction2join)
-  l_data2join <- lapply(id2join, function(id) data2join[[id]])
-  
-  return(l_data2join)
-}
-
-```
-
-```{r test-dataFromUser}
-# Test unitaires pour la fonction join_dtf
-test_that("join_dtf réalise la jointure correctement", {
-  # Création de données de test
-  df1 <- data.frame(id = 1:5, value = letters[1:5])
-  df2 <- data.frame(id = 1:5, category = LETTERS[1:5])
-  
-  # Exécution de la fonction
-  result <- join_dtf(df1, df2, "id", "id")
-  
-  # Vérification des résultats
-  expect_true(is.data.frame(result))
-  expect_equal(nrow(result), 5)
-  expect_equal(ncol(result), 3)
-  expect_equal(names(result), c("id", "value", "category"))
-  expect_true(all.equal(result$id, df1$id))
-  expect_true(all.equal(result$id, df2$id))
-})
-
-
-# Test unitaires pour la fonction getDataFromUser
-test_that("getDataFromUser renvoie les données appropriées", {
-  # Exécution de la fonction
-  list_var <- init_variable()
-  list_var <- init_variable(list_var, "second_var")
-  result <- getDataFromUser(list_var)
-  
-  # Vérification des résultats
-  expect_true(is.list(result))
-  expect_equal(length(result), 2)
-  expect_true(all(sapply(result, is.data.frame)))
-  expect_equal(names(result[[1]]), c("label_myVariable", "myVariable"))
-})
-```
-
-```{r function-setCorrelation, filename =  "setCorrelation"}
-
-#' Compute Covariation from Correlation and Standard Deviations
-#'
-#' This function computes the covariation between two variables (A and B) given their correlation and standard deviations.
-#'
-#' @param corr_AB The correlation coefficient between variables A and B.
-#' @param sd_A The standard deviation of variable A.
-#' @param sd_B The standard deviation of variable B.
-#'
-#' @return The covariation between variables A and B.
-#' @export
-#' @examples
-#' corr <- 0.7
-#' sd_A <- 3
-#' sd_B <- 4
-#' compute_covariation(corr, sd_A, sd_B)
-compute_covariation <- function(corr_AB, sd_A, sd_B) {
-  cov_AB <- corr_AB * sd_A * sd_B
-  return(cov_AB)
-}
-
-
-#' Get Standard Deviations for Variables in Correlation
-#'
-#' This function extracts the standard deviations for the variables involved in the correlation.
-#'
-#' @param list_var A list containing the variables and their attributes.
-#' @param between_var A character vector containing the names of the variables involved in the correlation.
-#'
-#' @return A numeric vector containing the standard deviations for the variables in the correlation.
-#' @export
-#' @examples
-#' list_var <- init_variable(name = "varA", mu = 0, sd = 5, level = 3) %>%
-#'          init_variable(name = "varB", mu = 0, sd = 25, level = 3)
-#' between_var <- c("varA", "varB")
-#' getStandardDeviationInCorrelation(list_var, between_var)
-getStandardDeviationInCorrelation <- function(list_var, between_var){
-  for (var in between_var) sd_List <- getGivenAttribute(list_var, "sd")
-  for (var in between_var) sd_ListFromInteraction <- getGivenAttribute(list_var$interactions, "sd")
-  sd_List <- c(sd_List, sd_ListFromInteraction)
-  return(unname(unlist(sd_List[between_var])))
-}
-
-
-
-#' Set Correlation between Variables
-#'
-#' Set the correlation between two or more variables in a simulation.
-#'
-#' @param list_var A list containing the variables used in the simulation, initialized using \code{\link{init_variable}}.
-#' @param between_var Character vector specifying the names of the variables to set the correlation between.
-#' @param corr Numeric value specifying the desired correlation between the variables.
-#'
-#' @return Updated \code{list_var} with the specified correlation set between the variables.
-#'
-#' @details The function checks if the variables specified in \code{between_var} are declared and initialized in the \code{list_var}. It also ensures that at least two variables with provided standard deviation are required to set a correlation in the simulation.
-#' The specified correlation value must be within the range (-1, 1). The function computes the corresponding covariance between the variables based on the specified correlation and standard deviations.
-#' The correlation information is then added to the \code{list_var} in the form of a data frame containing the correlation value and the corresponding covariance value.
-#' @export
-#' @examples
-#' list_var <- init_variable(name = "varA", mu = 0, sd = 5, level = 3) %>%
-#'             init_variable(name = "varB", mu = 0, sd = 25, level = 3)
-#' list_var <- set_correlation(list_var, between_var = c("varA", "varB"), corr = 0.7)
-set_correlation <- function(list_var, between_var, corr) {
-
-  # Check if variables in between_var are declared and initialized
-  bool_checkBetweenVarValidity <- function(between_var, list_var) {
-    nb_varInCorrelation <- length(unique(between_var))
-    stopifnot(nb_varInCorrelation > 1)
-    # -- check also for interaction
-    varInitialized <- c(getListVar(list_var), getListVar(list_var$interactions))
-    existingVar_nb <- varInitialized  %in% between_var %>% sum()
-    if (existingVar_nb != nb_varInCorrelation) {
-      return(FALSE)
-    } else {
-      return(TRUE)
-    }
-  }
-  
-  name_correlation <- paste(between_var, collapse = ".")
-  bool_valid_corr <- bool_checkBetweenVarValidity(between_var, list_var)
-  if (!bool_valid_corr) {
-    stop("At least one variable in between_var is not declared. Variable not initialized cannot be used in a correlation.")
-  }
-  
-  vec_standardDev <- getStandardDeviationInCorrelation(list_var, between_var)
-  if (length(vec_standardDev) < 2) {
-    stop("Exactly two variables with provided standard deviation are required to set a correlation in simulation.")
-  }
-  # Validate the specified correlation value to be within the range [-1, 1]
-  if (corr < -1 || corr > 1) {
-    stop("Invalid correlation value. Correlation must be in the range [-1, 1].")
-  }
-  
-  name_interaction <- paste(between_var, collapse = ":")
-  corr <- data.frame(cor = corr, covar = compute_covariation(corr, vec_standardDev[1], vec_standardDev[2] ))
-  list_var$correlations[[name_correlation]] <- corr
-  return(list_var)
-}
-
-
-```
-
-```{r  test-setcorrelation}
-
-test_that("compute_covariation returns the correct covariation", {
-  # Test case 1: Positive correlation
-  corr <- 0.7
-  sd_A <- 3
-  sd_B <- 4
-  expected_cov <- corr * sd_A * sd_B
-  actual_cov <- compute_covariation(corr, sd_A, sd_B)
-  expect_equal(actual_cov, expected_cov)
-
-  # Test case 2: Negative correlation
-  corr <- -0.5
-  sd_A <- 2.5
-  sd_B <- 3.5
-  expected_cov <- corr * sd_A * sd_B
-  actual_cov <- compute_covariation(corr, sd_A, sd_B)
-  expect_equal(actual_cov, expected_cov)
-
-  # Test case 3: Zero correlation
-  corr <- 0
-  sd_A <- 1
-  sd_B <- 2
-  expected_cov <- corr * sd_A * sd_B
-  actual_cov <- compute_covariation(corr, sd_A, sd_B)
-  expect_equal(actual_cov, expected_cov)
-})
-
-
-# Unit tests for getStandardDeviationInCorrelation
-test_that("getStandardDeviationInCorrelation returns correct standard deviations", {
-  
-  # Initialize list_var
-  list_var <- init_variable(name = "varA", mu = 0, sd = 5, level = 3) %>%
-              init_variable(name = "varB", mu = 0, sd = 25, level = 3)
-  
-  # Test case 1: Two variables correlation
-  between_var_1 <- c("varA", "varB")
-  sd_expected_1 <- c(5, 25)
-  sd_result_1 <- getStandardDeviationInCorrelation(list_var, between_var_1)
-  expect_equal(sd_result_1, sd_expected_1)
-  
-})
-
-
-
-test_that("set_correlation sets the correlation between variables correctly", {
-  # Initialize variables in the list_var
-  list_var <- init_variable(name = "varA", mu = 0, sd = 5, level = 3) %>%
-              init_variable(name = "varB", mu = 0, sd = 25, level = 3)
-
-  # Test setting correlation between varA and varB
-  list_var <- set_correlation(list_var, between_var = c("varA", "varB"), corr = 0.7)
-  
-  corr_result <- list_var$correlations$varA.varB$cor
-  covar_result <- list_var$correlations$varA.varB$covar
-  expect_equal(corr_result, 0.7)
-  expect_equal(covar_result, 87.5)
-
-  # Test setting correlation between varA and varC (should raise an error)
-  expect_error(set_correlation(list_var, between_var = c("varA", "varC"), corr = 0.8),
-               "At least one variable in between_var is not declared. Variable not initialized cannot be used in a correlation.")
-
-  # Test setting correlation with invalid correlation value
-  expect_error(set_correlation(list_var, between_var = c("varA", "varB"), corr = 1.5))
-
-  # Test setting correlation with less than 2 variables with provided standard deviation
-  expect_error(set_correlation(list_var, between_var = c("varA"), corr = 0.7))
-})
-
-
-```
-
-```{r function-simulation , filename = "simulation"}
-#' Get input for simulation based on coefficients
-#'
-#' This function generates input data for simulation based on the coefficients provided in the \code{list_var} argument.
-#'
-#' @param list_var A list of variables (already initialized)
-#' @param n_genes Number of genes to simulate (default: 1)
-#' @param input2mvrnorm Input to the \code{mvrnorm} function for simulating data from multivariate normal distribution (default: NULL)
-#' @return A data frame with input coefficients for simulation
-#' @export
-#' @examples
-#' # Example usage
-#' list_var <- init_variable()
-#' getInput2simulation(list_var, n_genes = 10)
-getInput2simulation <- function(list_var, n_genes = 1, input2mvrnorm = NULL) {
-  
-  # Use default input to mvrnorm if not provided by the user
-  if (is.null(input2mvrnorm)) input2mvrnorm = getInput2mvrnorm(list_var)  
-
-  l_dataFromMvrnorm = getDataFromMvrnorm(list_var, input2mvrnorm, n_genes)
-  l_dataFromUser = getDataFromUser(list_var)
-  df_input2simu <- getCoefficients(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes)
-  return(df_input2simu)
-}
-
-#' getCoefficients
-#'
-#' Get the coefficients.
-#'
-#' @param list_var A list of variables (already initialized)
-#' @param l_dataFromMvrnorm Data from the `getGeneMetadata` function (optional).
-#' @param l_dataFromUser Data from the `getDataFromUser` function (optional).
-#' @param n_genes The number of genes.
-#' @export
-#' @return A dataframe containing the coefficients.
-#' @examples
-#' # Example usage
-#' list_var <- init_variable()
-#' input2mvrnorm = getInput2mvrnorm(list_var)
-#' l_dataFromMvrnorm = getDataFromMvrnorm(list_var, input2mvrnorm, n_genes)
-#' l_dataFromUser = getDataFromUser(list_var)
-#' getCoefficients(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes = 3)
-getCoefficients <- function(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes) {
-  if (length(l_dataFromMvrnorm) == 0) {
-    metaData <- getGeneMetadata(list_var, n_genes)
-    l_dataFromMvrnorm <- list(metaData)
-  }
-  l_df2join <- c(l_dataFromMvrnorm, l_dataFromUser)
-  
-  
-  df_coef <- Reduce(function(d1, d2){ column_names = colnames(d2)
-                                      idx_key = grepl(pattern = "label", column_names )
-                                      keys = column_names[idx_key]
-                                      join_dtf(d1, d2, k1 = keys , k2 = keys)
-                                    } 
-                    , l_df2join ) %>% as.data.frame()
-  column_names <- colnames(df_coef)
-  idx_column2factor <- grep(pattern = "label_", column_names)
-  
-  if (length(idx_column2factor) > 1) {
-    df_coef[, idx_column2factor] <- lapply(df_coef[, idx_column2factor], as.factor)
-  } else {
-    df_coef[, idx_column2factor] <- as.factor(df_coef[, idx_column2factor])
-  }
-  
-  return(df_coef)
-}
-
-
-#' Get the log_qij values from the coefficient data frame.
-#'
-#' @param dtf_coef The coefficient data frame.
-#' @return The coefficient data frame with log_qij column added.
-#' @export
-getLog_qij <- function(dtf_coef) {
-  dtf_beta_numeric <- dtf_coef[sapply(dtf_coef, is.numeric)]
-  dtf_coef$log_qij <- rowSums(dtf_beta_numeric, na.rm = TRUE)
-  return(dtf_coef)
-}
-
-
-#' Calculate mu_ij values based on coefficient data frame and scaling factor
-#'
-#' This function calculates mu_ij values by raising 2 to the power of the log_qij values
-#' from the coefficient data frame and multiplying it by the provided scaling factor.
-#'
-#' @param dtf_coef Coefficient data frame containing the log_qij values
-#'
-#' @return Coefficient data frame with an additional mu_ij column
-#'
-#' @examples
-#' list_var <- init_variable()
-#' N_GENES <- 5
-#' dtf_coef <- getInput2simulation(list_var, N_GENES)
-#' dtf_coef <- getLog_qij(dtf_coef)
-#' dtf_coef <- addBasalExpression(dtf_coef, N_GENES, c(10, 20, 0))
-#' getMu_ij(dtf_coef)
-#' @export
-getMu_ij <- function(dtf_coef) {
-  log_qij_scaled <- dtf_coef$log_qij + dtf_coef$basalExpr
-  dtf_coef$log_qij_scaled <- log_qij_scaled
-  mu_ij <- exp(log_qij_scaled)  
-  dtf_coef$mu_ij <- mu_ij
-  return(dtf_coef)
-}
-
-#' getMu_ij_matrix
-#'
-#' Get the Mu_ij matrix.
-#'
-#' @param dtf_coef A dataframe containing the coefficients.
-#' @importFrom reshape2 dcast
-#' @importFrom stats as.formula
-
-#' @export
-#' @return A Mu_ij matrix.
-getMu_ij_matrix <- function(dtf_coef) {
-  column_names <- colnames(dtf_coef)
-  idx_var <- grepl(pattern = "label", column_names)
-  l_var <- column_names[idx_var]
-  str_formula_rigth <- paste(l_var, collapse = " + ")
-  if (str_formula_rigth == "") stop("no variable label detected")
-  str_formula <- paste(c("geneID", str_formula_rigth), collapse = " ~ ")
-  formula <- stats::as.formula(str_formula)
-  dtf_Muij <- dtf_coef %>% reshape2::dcast(formula = formula, value.var = "mu_ij", drop = F)
-  dtf_Muij[is.na(dtf_Muij)] <- 0
-  mtx_Muij <- data.frame(dtf_Muij[, -1], row.names = dtf_Muij[, 1]) %>% as.matrix()
-  mtx_Muij <- mtx_Muij[, order(colnames(mtx_Muij)), drop = F]
-  return(mtx_Muij)
-}
-
-#' getSubCountsTable
-#'
-#' Get the subcounts table.
-#'
-#' @param matx_Muij The Mu_ij matrix.
-#' @param matx_dispersion The dispersion matrix.
-#' @param replicateID The replication identifier.
-#' @param l_bool_replication A boolean vector indicating the replicates.
-#' @importFrom stats rnbinom
-#' 
-#' @return A subcounts table.
-getSubCountsTable <- function(matx_Muij, matx_dispersion, replicateID, l_bool_replication) {
-  getKijMatrix <- function(matx_Muij, matx_dispersion, n_genes, n_samples) {
-    k_ij <- stats::rnbinom(n_genes * n_samples,
-                           size = matx_dispersion,
-                           mu = matx_Muij) %>%
-              matrix(nrow = n_genes, ncol = n_samples)
-    
-    k_ij[is.na(k_ij)] <- 0
-    return(k_ij)
-  }
-  
-  if (!any(l_bool_replication))
-    return(NULL) 
-  
-  matx_Muij <- matx_Muij[, l_bool_replication, drop = FALSE]
-  matx_dispersion <- matx_dispersion[, l_bool_replication, drop = FALSE] 
-  l_sampleID <- colnames(matx_Muij)
-  l_geneID <- rownames(matx_Muij)
-  dimension_mtx <- dim(matx_Muij)
-  n_genes <- dimension_mtx[1]
-  n_samples <- dimension_mtx[2]
-  matx_kij <- getKijMatrix(matx_Muij, matx_dispersion, n_genes, n_samples)
-  colnames(matx_kij) <- paste(l_sampleID, replicateID, sep = "_")
-  rownames(matx_kij) <- l_geneID
-  return(matx_kij)
-}
-
-
-```
-
-```{r test-simulation}
-
-
-# Test case 1: Check if the function returns a data frame
-test_that("getInput2simulation returns a data frame", {
-  list_var <- init_variable()
-  result <- getInput2simulation(list_var)
-  expect_is(result, "data.frame")
-  expected <- data.frame(geneID = c("gene1", "gene1"), label_myVariable = as.factor(c("myVariable1", "myVariable2")), myVariable = c(2,3))
-  expect_equal(result, expected)
-  })
-
-# Test for getCoefficients function
-test_that("getCoefficients returns the correct output", {
-  # Create dummy data
-  n_genes <- 3
-  list_var = init_variable()
-  # Call the function
-  coefficients <- getCoefficients(list_var, list(), list(), n_genes)
-  
-  # Check the output
-  expect_equal(nrow(coefficients), n_genes*list_var$myVariable$level)
-  expect_equal(colnames(coefficients), c("geneID", "label_myVariable")) 
-})
-
-# Test for getMu_ij_matrix function
-test_that("getMu_ij_matrix returns the correct output", {
-  # Create a dummy coefficients dataframe
-  dtf_coef <- data.frame(geneID = c("Gene1", "Gene1", "Gene1"),
-                         label_varA = c("A1", "A2", "A3"),
-                         label_varB = c("B1", "B2", "B3"),
-                         mu_ij = c(1, 2, 3))
-  
-  # Call the function
-  mu_matrix <- getMu_ij_matrix(dtf_coef)
-  # Check the output
-  expect_equal(dim(mu_matrix), c(1, 9)) 
-  
-})
-
-# Test for getSubCountsTable function
-test_that("getSubCountsTable returns the correct output", {
-  # Create dummy data
-  l_genes <- c("gene1", "gene2", "gene3")
-  matx_Muij = data.frame(sple1 = c(1,3,4), sple2 = c(2, 0, 9), sple3 = c(1, 69, 2)) %>% as.matrix()
-  rownames(matx_Muij) <- l_genes
-  matx_dispersion <- matrix(0.5, nrow = 3, ncol = 3)
-  replicateID <- 1
-  l_bool_replication <- c(TRUE, FALSE, TRUE)
-  
-  # Call the function
-  subcounts_table <- getSubCountsTable(matx_Muij, matx_dispersion, 1, l_bool_replication)
-  
-  # Check the output
-  expect_equal(dim(subcounts_table), c(3, 2))
-  expect_equal(rownames(subcounts_table), l_genes)
-})
-
-
-```
-
-
-```{r function-simulation2 , filename = "simulation2"}
-
-#' getReplicationMatrix
-#'
-#' @param minN Minimum number of replicates for each sample
-#' @param maxN Maximum number of replicates for each sample
-#' @param n_samples Number of samples
-#' @export
-#' @return A replication matrix indicating which samples are replicated
-getReplicationMatrix <- function(minN, maxN, n_samples) {
-  
-  # Create a list of logical vectors representing the minimum number of replicates
-  l_replication_minimum = lapply(1:n_samples, 
-                                 FUN = function(i) rep(TRUE, times = minN) )
-  
-  # Create a list of random logical vectors representing additional replicates
-  l_replication_random = lapply(1:n_samples, 
-                                FUN = function(i) sample(x = c(TRUE, FALSE), size = maxN-minN, replace = T) )
-  
-  # Combine the replication vectors into matrices
-  matx_replication_minimum <- do.call(cbind, l_replication_minimum)
-  matx_replication_random <- do.call(cbind, l_replication_random)
-  
-  # Combine the minimum replicates and random replicates into a single matrix
-  matx_replication <- rbind(matx_replication_minimum, matx_replication_random)
-  
-  # Sort the columns of the replication matrix in descending order
-  matx_replication = apply(matx_replication, 2, sort, decreasing = TRUE ) %>% matrix(nrow = maxN)
-  
-  return(matx_replication)
-}
-
-#' getCountsTable
-#'
-#' @param matx_Muij Matrix of mean expression values for each gene and sample
-#' @param matx_dispersion Matrix of dispersion values for each gene and sample
-#' @param matx_bool_replication Replication matrix indicating which samples are replicated
-#'
-#' @return A counts table containing simulated read counts for each gene and sample
-getCountsTable <- function(matx_Muij ,  matx_dispersion, matx_bool_replication ){
-  max_replicates <-  dim(matx_bool_replication)[1]
-  
-  # Apply the getSubCountsTable function to each row of the replication matrix
-  l_countsTable = lapply(1:max_replicates, function(i) getSubCountsTable(matx_Muij , matx_dispersion, i, matx_bool_replication[i,]  ))
-  
-  # Combine the counts tables into a single matrix
-  countsTable = do.call(cbind, l_countsTable)
-  
-  return(countsTable %>% as.data.frame())
-}
-
-#' getDispersionMatrix
-#'
-#' @param list_var A list of variables (already initialized)
-#' @param n_genes Number of genes
-#' @param dispersion Vector of dispersion values for each gene
-#' @export
-#'
-#' @return A matrix of dispersion values for each gene and sample
-getDispersionMatrix <- function(list_var, n_genes, dispersion = stats::runif(n_genes, min = 0, max = 1000)){
-  l_geneID = base::paste("gene", 1:n_genes, sep = "")
-  l_sampleID = getSampleID(list_var) 
-  n_samples = length(l_sampleID) 
-  l_dispersion <- dispersion
-  
-  # Create a data frame for the dispersion values
-  dtf_dispersion = list(dispersion =  l_dispersion) %>% as.data.frame()
-  dtf_dispersion <- dtf_dispersion[, rep("dispersion", n_samples)]
-  rownames(dtf_dispersion) = l_geneID
-  colnames(dtf_dispersion) = l_sampleID
-  
-  matx_dispersion = dtf_dispersion %>% as.matrix()
-  
-  return(matx_dispersion)
-}
-
-
-
-
-
-#' Replicate rows of a data frame by group
-#'
-#' Replicates the rows of a data frame based on a grouping variable and replication counts for each group.
-#'
-#' @param df Data frame to replicate
-#' @param group_var Name of the grouping variable in the data frame
-#' @param rep_list Vector of replication counts for each group
-#' @return Data frame with replicated rows
-#' @examples
-#' df <- data.frame(group = c("A", "B"), value = c(1, 2))
-#' .replicateByGroup(df, "group", c(2, 3))
-#'
-#' @export
-.replicateByGroup <- function(df, group_var, rep_list) {
-  l_group_var <- df[[group_var]]
-  group_levels <- unique(l_group_var)
-  names(rep_list) <- group_levels
-  group_indices <- rep_list[l_group_var]
-  replicated_indices <- rep(seq_len(nrow(df)), times = group_indices)
-  replicated_df <- df[replicated_indices, ]
-  suffix_sampleID <- sequence(group_indices)
-  replicated_df[["sampleID"]] <- paste(replicated_df[["sampleID"]], suffix_sampleID, sep = "_")
-  rownames(replicated_df) <- NULL
-  return(replicated_df)
-}
-
-
-
-#' Replicate rows of a data frame
-#'
-#' Replicates the rows of a data frame by a specified factor.
-#'
-#' @param df Data frame to replicate
-#' @param n Replication factor for each row
-#' @return Data frame with replicated rows
-#' @export
-#' @examples
-#' df <- data.frame(a = 1:3, b = letters[1:3])
-#' .replicateRows(df, 2)
-#'
-.replicateRows <- function(df, n) {
-  indices <- rep(seq_len(nrow(df)), each = n)
-  replicated_df <- df[indices, , drop = FALSE]
-  rownames(replicated_df) <- NULL
-  return(replicated_df)
-}
-
-#' Get sample metadata
-#'
-#' Generates sample metadata based on the input variables, replication matrix, and number of genes.
-#'
-#' @param list_var A list of variables (already initialized)
-#' @param replicationMatrix Replication matrix
-#' @param n_genes Number of genes
-#' @return Data frame of sample metadata
-#' @importFrom data.table setorderv
-#' @export
-#' @examples
-#' list_var <- init_variable()
-#' n_genes <- 10
-#' replicationMatrix <- generateReplicationMatrix(list_var ,2, 3)
-#' getSampleMetadata(list_var, n_genes,  replicationMatrix)
-getSampleMetadata <- function(list_var, n_genes, replicationMatrix) {
-  l_sampleIDs = getSampleID(list_var)
-  metaData <- generateGridCombination_fromListVar(list_var)
-  metaData[] <- lapply(metaData, as.character) ## before reordering
-  data.table::setorderv(metaData, cols = colnames(metaData))
-  metaData[] <- lapply(metaData, as.factor)
-  metaData$sampleID <- l_sampleIDs
-  rep_list <- colSums(replicationMatrix)
-  metaData$sampleID <- as.character(metaData$sampleID) ## before replicating
-  sampleMetadata <- .replicateByGroup(metaData, "sampleID", rep_list)
-  colnames(sampleMetadata) <- gsub("label_", "", colnames(sampleMetadata))
-  return(sampleMetadata)
-}
-
-
-#' getSampleID
-#'
-#' @param list_var A list of variables (already initialized)
-#' @export
-#' @return A sorted vector of sample IDs
-getSampleID <- function(list_var){
-  gridCombination <- generateGridCombination_fromListVar(list_var)
-  l_sampleID <- apply( gridCombination , 1 , paste , collapse = "_" ) %>% unname()
-  return(sort(l_sampleID))
-}
-
-
-```
-
-```{r test-simulations}
-
-test_that("getReplicationMatrix returns the correct replication matrix", {
-  minN <- 2
-  maxN <- 4
-  n_samples <- 3
-  expected <- matrix(c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, FALSE, TRUE, TRUE, TRUE, FALSE), nrow = maxN)
-  
-  set.seed(123)
-  result <- getReplicationMatrix(minN, maxN, n_samples)
-  
-  expect_equal(result, expected)
-})
-
-test_that("getSampleID return the correct list of sampleID",{
-   expect_equal(getSampleID(init_variable()), c("myVariable1", "myVariable2"))
-})
-
-# Create a test case for getMu_ij
-test_that("getMu_ij returns the correct output", {
-  # Create a sample coefficient data frame
-  dtf_coef <- data.frame(
-    log_qij = c(1, 9, 0.1),
-    basalExpr = c(2, 3, 4)
-  )
-
-    # Call the getMu_ij function
-  result <- getMu_ij(dtf_coef)
-
-  # Check if the mu_ij column is added
-  expect_true("mu_ij" %in% colnames(result))
-
-  # Check the values of mu_ij
-  #expected_mu_ij <- c(20.08554, 162754.79142 , 60.34029)
-  #expect_equal(result$mu_ij, expected_mu_ij, tolerance = 0.000001)
-})
-
-
-# Create a test case for getLog_qij
-test_that("getLog_qij returns the correct output", {
-  # Create a sample coefficient data frame
-  dtf_coef <- data.frame(
-    beta1 = c(1.2, 2.3, 3.4),
-    beta2 = c(0.5, 1.0, 1.5),
-    non_numeric = c("a", "b", "c")
-  )
-
-  # Call the getLog_qij function
-  result <- getLog_qij(dtf_coef)
-
-  # Check if the log_qij column is added
-  expect_true("log_qij" %in% colnames(result))
-
-  # Check the values of log_qij
-  expected_log_qij <- c(1.7, 3.3, 4.9)
-  expect_equal(result$log_qij, expected_log_qij)
-})
-
-test_that("getCountsTable returns the correct counts table", {
-  mat_mu_ij <- matrix(c(1,2,3,4,5,6), ncol = 3, byrow = T)
-  rownames(mat_mu_ij) <- c("gene1", "gene2")
-  colnames(mat_mu_ij) <- c("sample1", "sample2", "sample3")
-  mat_disp <- matrix(c(0.3,0.3,0.3, 0.5,0.5,0.5), ncol = 3, byrow = T)
-  rownames(mat_disp) <- c("gene1", "gene2")
-  colnames(mat_disp) <- c("sample1", "sample2", "sample3")
-  mat_repl <- matrix(c(TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE, TRUE), ncol = 3, byrow = T)
-  
-  expected_df <- matrix(c(0,0,1,0,0,0,0,1,0,2,34,18,0,0,3,10,7,2), nrow = 2, byrow = T) %>% as.data.frame()
-  rownames(expected_df) <- c("gene1", "gene2")
-  colnames(expected_df) <- c("sample1_1", "sample2_1", "sample3_1", "sample1_2", 
-                             "sample2_2","sample3_2","sample1_3", "sample2_3" ,"sample3_3")
-  
-  set.seed(123)
-  result <- getCountsTable(mat_mu_ij, mat_disp, mat_repl)
-
-  expect_true(is.data.frame(result))
-  expect_equal(colnames(result), colnames(expected_df))
-  expect_equal(rownames(result), rownames(expected_df))
-
-})
-
-
-
-test_that("getSampleMetadata returns expected output", {
-  # Set up input variables
-  list_var <- init_variable()
-  n_genes <- 3
-  replicationMatrix <- matrix(TRUE, nrow = 2, ncol = 2)
-
-  # Run the function
-  result <- getSampleMetadata(list_var, n_genes, replicationMatrix)
-  
-  # Define expected output
-  expected_colnames <- c("myVariable", "sampleID")
-  expect_equal(colnames(result), expected_colnames)
-  
-  # Check the output class
-  expect_true(is.data.frame(result))
-  
-  # check nrow output
-  expect_equal(nrow(result), 4)
-
-})
-
-
-test_that(".replicateByGroup return the correct ouptut", {
-  df <- data.frame(group = c("A", "B"), value = c(1, 2))
-  result <- .replicateByGroup(df, "group", c(2, 3))
-  
-  expect <- data.frame(group = c("A", "A", "B", "B", "B"), 
-                       value = c(1, 1, 2,2,2), 
-                       sampleID = c("_1", "_2", "_1", "_2", "_3" ))
-  expect_equal(result, expect)
-
-})
-
-
-test_that("getDispersionMatrix returns the correct dispersion matrix", {
-  n_genes = 3
-  list_var = init_variable()
-  dispersion <- 1:3
-  expected <- matrix(1:3,byrow = F, nrow = 3, ncol = 2)
-  rownames(expected) <- c("gene1", "gene2", "gene3")
-  colnames(expected) <- c("myVariable1", "myVariable2")
-  result <- getDispersionMatrix(list_var, n_genes, dispersion )
-  expect_equal(result, expected)
-})
-
-
-
-```
-
-
-```{r function-mock , filename = "mock-rnaSeq" }
-
-#' Check the validity of the dispersion matrix
-#'
-#' Checks if the dispersion matrix has the correct dimensions.
-#'
-#' @param matx_dispersion Replication matrix
-#' @param matx_bool_replication Replication matrix
-#' @return TRUE if the dimensions are valid, FALSE otherwise
-#' @export
-#' @examples
-#' matx_dispersion <- matrix(1:12, nrow = 3, ncol = 4)
-#' matx_bool_replication <- matrix(TRUE, nrow = 3, ncol = 4)
-#' .isDispersionMatrixValid(matx_dispersion, matx_bool_replication)
-.isDispersionMatrixValid <- function(matx_dispersion, matx_bool_replication) {
-  expected_nb_column <- dim(matx_bool_replication)[2]
-  if (expected_nb_column != dim(matx_dispersion)[2]) {
-    return(FALSE)
-  }
-  return(TRUE)
-}
-
-#' Generate count table
-#'
-#' Generates the count table based on the mu_ij matrix, dispersion matrix, and replication matrix.
-#'
-#' @param mu_ij_matx_rep Replicated mu_ij matrix
-#' @param matx_dispersion_rep Replicated dispersion matrix
-#' @return Count table
-#' @export
-#' @examples
-#' mu_ij_matx_rep <- matrix(1:12, nrow = 3, ncol = 4)
-#' matx_dispersion_rep <- matrix(1:12, nrow = 3, ncol = 4)
-#' generateCountTable(mu_ij_matx_rep, matx_dispersion_rep)
-generateCountTable <- function(mu_ij_matx_rep, matx_dispersion_rep) {
-  message("k_ij ~ Nbinom(mu_ij, dispersion)")
-  n_genes <- dim(mu_ij_matx_rep)[1]
-  n_samples <- dim(mu_ij_matx_rep)[2]
-  n_samplings <- prod(n_genes * n_samples)
-  mat_countsTable <- rnbinom(n_samplings, 
-                             size = matx_dispersion_rep, 
-                             mu = mu_ij_matx_rep) %>%
-                      matrix(nrow = n_genes, ncol = n_samples)
-  colnames(mat_countsTable) <- colnames(mu_ij_matx_rep)
-  rownames(mat_countsTable) <- rownames(mu_ij_matx_rep)
-  mat_countsTable[is.na(mat_countsTable)] <- 0
-  return(mat_countsTable)
-}
-
-
-#' Perform RNA-seq simulation
-#'
-#' Simulates RNA-seq data based on the input variables.
-#'
-#' @param list_var List of input variables
-#' @param n_genes Number of genes
-#' @param min_replicates Minimum replication count
-#' @param max_replicates Maximum replication count
-#' @param sequencing_depth Sequencing depth
-#' @param basal_expression base expression gene
-#' @param dispersion User-provided dispersion vector (optional)
-#' @return List containing the ground truth, counts, and metadata
-#' @export
-#' @examples
-#' mock_rnaseq(list_var = init_variable(), 
-#'              n_genes = 1000, min_replicates = 2,   
-#'               max_replicates = 4)
-mock_rnaseq <- function(list_var, n_genes, min_replicates, max_replicates, sequencing_depth = NULL,  
-                        basal_expression = 0 , dispersion = stats::runif(n_genes, min = 0, max = 1000) ) {
-  
-  ## -- get my effect
-  df_inputSimulation <- getInput2simulation(list_var, n_genes)
-  ## -- add column logQij
-  df_inputSimulation <- getLog_qij(df_inputSimulation)
-  df_inputSimulation <- addBasalExpression(df_inputSimulation, n_genes, basal_expression)
-  df_inputSimulation <- getMu_ij(df_inputSimulation )
-  
-  message("Building mu_ij matrix")
-  ## -- matrix
-  matx_Muij <- getMu_ij_matrix(df_inputSimulation)
-  l_sampleID <- getSampleID(list_var)
-  matx_bool_replication <- generateReplicationMatrix(list_var, min_replicates, max_replicates)
-  mu_ij_matx_rep <- .replicateMatrix(matx_Muij, matx_bool_replication)
-  
-  
-  dispersion <- getValidDispersion(dispersion)
-  genes_dispersion <- sample(dispersion , size = n_genes, replace = T)
-  matx_dispersion <- getDispersionMatrix(list_var, n_genes, genes_dispersion)
-  l_geneID = base::paste("gene", 1:n_genes, sep = "")
-  names(genes_dispersion) <- l_geneID
-  
-  ## same order as mu_ij_matx_rep
-  matx_dispersion <- matx_dispersion[ order(row.names(matx_dispersion)), ]
-  matx_dispersion_rep <- .replicateMatrix(matx_dispersion, matx_bool_replication)
-  matx_countsTable <- generateCountTable(mu_ij_matx_rep, matx_dispersion_rep)
-
-  message("Counts simulation: Done")
-  
-  
-  dtf_countsTable <- matx_countsTable %>% as.data.frame()
-  if (!is.null(sequencing_depth)) {
-    message("Scaling count table according to sequencing depth.")
-    dtf_countsTable <- scaleCountsTable(dtf_countsTable, sequencing_depth)
-  }
-  
-  metaData <- getSampleMetadata(list_var, n_genes, matx_bool_replication)
-  libSize <- sum(colSums(dtf_countsTable))
-  settings_df <- getSettingsTable(n_genes, min_replicates, max_replicates, libSize)
-    
-  list2ret <- list(
-    settings = settings_df,
-    init = list_var, 
-    groundTruth = list(effects = df_inputSimulation, gene_dispersion = genes_dispersion),
-    counts = dtf_countsTable,
-    metadata = metaData)
-  return(list2ret)
-}
-
-
-
-
-#' Validate and Filter Dispersion Values
-#'
-#' This function takes an input vector and validates it to ensure that it meets certain criteria.
-#'
-#' @param input_vector A vector to be validated.
-#' @return A validated and filtered numeric vector.
-#' @details The function checks whether the input is a vector, suppresses warnings while converting to numeric,
-#' and filters out non-numeric elements. It also checks for values greater than zero and removes negative values.
-#' If the resulting vector has a length of zero, an error is thrown.
-#' @examples
-#' getValidDispersion(c(0.5, 1.2, -0.3, "invalid", 0.8))
-#' @export
-getValidDispersion <- function(input_vector) {
-  # Verify if it's a vector
-  if (!is.vector(input_vector)) {
-    stop("dispersion param is not a vector.")
-  }
-
-  input_vector <- suppressWarnings(as.numeric(input_vector))
-
-  # Filter numeric elements
-  numeric_elements <- !is.na(input_vector)
-  if (sum(!numeric_elements) > 0) {
-    message("Non-numeric elements were removed from the dispersion vector")
-    input_vector <- input_vector[numeric_elements]
-  }
-
-  # Check and filter values > 0
-  numeric_positive_elements <- input_vector > 0
-  if (sum(!numeric_positive_elements) > 0) {
-    message("Negative numeric values were removed from the dispersion vector")
-    input_vector <- input_vector[numeric_positive_elements]
-  }
-
-  if (length(input_vector) == 0) stop("Invalid dispersion values provided.")
-
-  return(input_vector)
-}
-
-
-#' Generate replication matrix
-#'
-#' Generates the replication matrix based on the minimum and maximum replication counts.
-#'
-#' @param list_var Number of samples
-#' @param min_replicates Minimum replication count
-#' @param max_replicates Maximum replication count
-#' @return Replication matrix
-#' @export
-#' @examples
-#' list_var = init_variable()
-#' generateReplicationMatrix(list_var, min_replicates = 2, max_replicates = 4)
-generateReplicationMatrix <- function(list_var, min_replicates, max_replicates) {
-  if (min_replicates > max_replicates) {
-    message("min_replicates > max_replicates have been supplied")
-    message("Automatic reversing")
-    tmp_min_replicates <- min_replicates
-    min_replicates <- max_replicates
-    max_replicates <- tmp_min_replicates
-  }
-  l_sampleIDs <- getSampleID(list_var)
-  n_samples <-  length(l_sampleIDs)
-  return(getReplicationMatrix(min_replicates, max_replicates, n_samples = n_samples))
-}
-
-#' Replicate matrix
-#'
-#' Replicates a matrix based on a replication matrix.
-#'
-#' @param matrix Matrix to replicate
-#' @param replication_matrix Replication matrix
-#' @return Replicated matrix
-#' @export
-#' @examples
-#' matrix <- matrix(1:9, nrow = 3, ncol = 3)
-#' replication_matrix <- matrix(TRUE, nrow = 3, ncol = 3)
-#' .replicateMatrix(matrix, replication_matrix)
-.replicateMatrix <- function(matrix, replication_matrix) {
-  n_genes <- dim(matrix)[1]
-  rep_list <- colSums(replication_matrix)
-  replicated_indices <- rep(seq_len(ncol(matrix)), times = rep_list)
-  replicated_matrix <- matrix[, replicated_indices, drop = FALSE]
-  suffix_sampleID <- sequence(rep_list)
-  colnames(replicated_matrix) <- paste(colnames(replicated_matrix), suffix_sampleID, sep = "_")
-  return(replicated_matrix)
-}
-
-
-```
-
-```{r test-hiddenFunction}
-
-# Test case: Valid input vector with numeric and positive values
-test_that("Valid input vector with numeric and positive values", {
-  input_vector <- c(0.5, 1.2, 0.8)
-  result <- getValidDispersion(input_vector)
-  expect_identical(result, input_vector)
-})
-
-# Test case: Valid input vector with numeric, positive, and negative values
-test_that("Valid input vector with numeric, positive, and negative values", {
-  input_vector <- c(0.5, -0.3, 1.2, 0.8)
-  result <- getValidDispersion(input_vector)
-  expect_identical(result, c(0.5, 1.2, 0.8))
-})
-
-# Test case: Valid input vector with non-numeric elements
-test_that("Valid input vector with non-numeric elements", {
-  input_vector <- c(0.5, "invalid", 0.8)
-  result <- getValidDispersion(input_vector)
-  expect_identical(result, c(0.5, 0.8))
-})
-
-# Test case: Empty input vector
-test_that("Empty input vector", {
-  input_vector <- numeric(0)
-  expect_error(getValidDispersion(input_vector), "Invalid dispersion values provided.")
-})
-
-# Test case: unique value in vector
-test_that("unique value in vector", {
-  input_vector <- 5
-  expect_equal(getValidDispersion(input_vector), 5)
-})
-
-# Test case: All negative values
-test_that("All negative values", {
-  input_vector <- c(-0.5, -1.2, -0.8)
-  expect_error(getValidDispersion(input_vector), "Invalid dispersion values provided.")
-})
-
-
-# Test for .isDispersionMatrixValid
-test_that(".isDispersionMatrixValid returns TRUE for valid dimensions", {
-  matx_dispersion <- matrix(1:6, nrow = 2, ncol = 3)
-  matx_bool_replication <- matrix(TRUE, nrow = 2, ncol = 3)
-  expect_true(.isDispersionMatrixValid(matx_dispersion, matx_bool_replication))
-})
-
-test_that(".isDispersionMatrixValid throws an error for invalid dimensions", {
-  matx_dispersion <- matrix(1:4, nrow = 2, ncol = 2)
-  matx_bool_replication <- matrix(TRUE, nrow = 2, ncol = 3)
-  expect_false(.isDispersionMatrixValid(matx_dispersion, matx_bool_replication))
-})
-
-# Test for generateCountTable
-test_that("generateCountTable generates count table with correct dimensions", {
-  mu_ij_matx_rep <- matrix(1:6, nrow = 2, ncol = 3)
-  matx_dispersion_rep <- matrix(1:6, nrow = 2, ncol = 3)
-  count_table <- generateCountTable(mu_ij_matx_rep, matx_dispersion_rep)
-  expect_equal(dim(count_table), c(2, 3))
-})
-
-
-
-# Test for .replicateMatrix
-test_that(".replicateMatrix replicates matrix correctly", {
-  matrix <- matrix(1:9, nrow = 3, ncol = 3)
-  replication_matrix <- matrix(TRUE, nrow = 3, ncol = 3)
-  replicated_matrix <- .replicateMatrix(matrix, replication_matrix)
-  expect_equal(dim(replicated_matrix), c(3, 9))
-})
-
-```
-
-```{r  test-mock}
-
-# Test for mock_rnaseq
-#test_that("mock_rnaseq returns expected output", {
-  # Set up input variables
-#  list_var <- NULL
-#  n_genes <- 3
-#  min_replicates <- 2
-#  max_replicates <- 4
-#  df_inputSimulation <- data.frame(gene_id = 1:3, coef_value = c(0.5, 0.3, 0.2))
-#  matx_dispersion <- matrix(1:9, nrow = 3, ncol = 3)
-
-  # Run the function
-#  expect_error(mock_rnaseq(list_var, n_genes, min_replicates, max_replicates, df_inputSimulation, 
-#                           matx_dispersion))
-  
-  
-  #list_var <- init_variable(name = "my_var", mu = c(10, 20), level = 2 )
-  #n_genes <- 10
-  #min_replicates <- 2
-  #max_replicates <- 4
-  #scaling_factor <- 1
-  #df_inputSimulation <- getInput2simulation(list_var, n_genes)
-  #dispersion <- getDispersionMatrix(list_var, n_genes, c(1000, 1000, 1000, 1000, 1000, 1, 1, 1, 1, 1))
-  #mock_rnaseq(list_var, n_genes, min_replicates, 
-  #            max_replicates, 
-  #            df_inputSimulation, dispersion)
-  #ERROOR
-#})
-
-
-# Test for generateReplicationMatrix
-test_that("generateReplicationMatrix generates replication matrix correctly", {
-  replication_matrix <- generateReplicationMatrix(init_variable(),min_replicates = 2, max_replicates = 4)
-  expect_equal(dim(replication_matrix), c(4, 2))
-})
-
-```
-
-
-```{r  function-preparingData , filename = "prepare_data2fit"}
-
-#' Convert count matrix to long data frame
-#'
-#' Converts a count matrix to a long data frame format using geneID as the identifier.
-#'
-#' @param countMatrix Count matrix
-#' @param value_name Name for the value column
-#' @param id_vars Name for the id column (default "geneID")
-#' @return Long data frame
-#' @importFrom reshape2 melt
-#' @export
-#' @examples
-#' list_var <- init_variable()
-#' mock_data <- mock_rnaseq(list_var, n_genes = 3, 2, 2)
-#' countMatrix_2longDtf(mock_data$counts)
-countMatrix_2longDtf <- function(countMatrix, value_name = "kij", id_vars = "geneID") {
-  countMatrix <- as.data.frame(countMatrix)
-  countMatrix$geneID <- rownames(countMatrix)
-  dtf_countLong <- reshape2::melt(countMatrix, id.vars = id_vars, variable.name = "sampleID", 
-                                  value.name = value_name)
-  dtf_countLong$sampleID <- as.character(dtf_countLong$sampleID)
-  return(dtf_countLong)
-}
-
-#' Get column name with sampleID
-#'
-#' Returns the column name in the metadata data frame that corresponds to the given sampleID.
-#'
-#' @param dtf_countsLong Long data frame of counts
-#' @param metadata Metadata data frame
-#' @return Column name with sampleID
-#' @export
-#' @examples
-#' list_var <- init_variable()
-#' mock_data <- mock_rnaseq(list_var, n_genes = 3, 2,2, 2)
-#' dtf_countLong <- countMatrix_2longDtf(mock_data$counts)
-#' .getColumnWithSampleID(dtf_countLong, mock_data$metadata)
-.getColumnWithSampleID <- function(dtf_countsLong, metadata) {
-  example_spleID <- as.character(dtf_countsLong[1, "sampleID"])
-  regex <- paste("^", as.character(dtf_countsLong[1, "sampleID"]), "$", sep = "")
-  for (indice_col in dim(metadata)[2]) {
-    if (grep(pattern = regex, metadata[, indice_col]) == 1) {
-      return(colnames(metadata)[indice_col])
-    } else {
-      return(NA)  # SampleID does not correspond between countMatrix and metadata
-    }
-  }
-}
-
-#' Prepare data for fitting
-#'
-#' Prepares the countMatrix and metadata for fitting by converting the countMatrix to a long format and joining with metadata.
-#'
-#' @param countMatrix Count matrix
-#' @param metadata Metadata data frame
-#' @param normalization A boolean value indicating whether to apply median ratio
-#'                      normalization. If \code{TRUE} (default), the counts matrix will be
-#'                      normalized using median ratio normalization. If
-#'                      \code{FALSE}, no normalization will be applied.
-#' @param response_name String referring to target variable name that is being modeled and predicted (default : "kij")
-#' @param groupID String referring the group variable name (default : "geneID")
-#' @return Data frame for fitting
-#' @export
-#' @examples
-#'  list_var <- init_variable()
-#'  mock_data <- mock_rnaseq(list_var, n_genes = 3, 2,2, 2)
-#'  data2fit <- prepareData2fit(mock_data$counts, mock_data$metadata)
-prepareData2fit <- function(countMatrix, metadata, normalization = TRUE , response_name = "kij", groupID = "geneID" ) {
-  
-  ## -- scaling for size differences
-  if ( isTRUE(normalization) ) {
-      message("INFO: Median ratio normalization.")
-      countMatrix <- medianRatioNormalization(countMatrix)
-  }
-
-  dtf_countsLong <- countMatrix_2longDtf(countMatrix, response_name)
-  metadata_columnForjoining <- .getColumnWithSampleID(dtf_countsLong, metadata)
-  if (is.na(metadata_columnForjoining)) {
-    stop("SampleIDs do not seem to correspond between countMatrix and metadata")
-  }
-  data2fit <- join_dtf(dtf_countsLong, metadata, k1 = "sampleID", k2 = metadata_columnForjoining)
-  if (sum(is.na(data2fit[[groupID]])) > 0) {
-    warning("Something went wrong. NA introduced in the geneID column. Check the coherence between countMatrix and metadata.")
-  }
-  return(data2fit)
-}
-
-
-
-#' Apply Median Ratio Normalization to a Counts Matrix
-#'
-#' This function performs median ratio normalization on a counts matrix to
-#' adjust for differences in sequencing depth across samples.
-#'
-#' @param countsMatrix A counts matrix where rows represent genes and columns
-#'                     represent samples.
-#'
-#' @return A normalized counts matrix after applying median ratio normalization.
-#'
-#' @details This function calculates the logarithm of the counts matrix,
-#' computes the average log expression for each gene, and then scales each
-#' sample's counts by the exponential of the difference between its average log
-#' expression and the median of those averages.
-#' 
-#' @importFrom median
-#'
-#' @examples
-#' counts <- matrix(c(100, 200, 300, 1000, 1500, 2500), ncol = 2)
-#' normalized_counts <- medianRatioNormalization(counts)
-#'
-#' @export
-medianRatioNormalization <- function(countsMatrix) {
-  log_data <- log(countsMatrix)
-  average_log <- rowMeans(log_data)
-  
-  if (all(is.infinite(average_log)))
-    stop("Every gene contains at least one zero, cannot compute log geometric means")
-  
-  idx2keep <- average_log != "-Inf"
-  average_log <- average_log[idx2keep]
-  
-  ratio_data <- sweep(log_data[idx2keep, ], 1, average_log, "-")
-  sample_medians <- apply(ratio_data, 2, stats::median)
-  
-  scaling_factors <- exp(sample_medians)
-  countsMatrix_normalized <- sweep(countsMatrix, 2, scaling_factors, "/")
-  
-  return(countsMatrix_normalized)
-}
-
-
-```
-
-```{r  test-prepareData2fit}
-
-
-# Unit tests for countMatrix_2longDtf
-test_that("countMatrix_2longDtf converts count matrix to long data frame", {
-  # Sample count matrix
-  list_var <- init_variable()
-  mock_data <- mock_rnaseq(list_var, n_genes = 3, 2,2, 1)
-  # Convert count matrix to long data frame
-  dtf_countLong <- countMatrix_2longDtf(mock_data$counts)
-  expect_true(is.character(dtf_countLong$sampleID))
-  expect_true(is.character(dtf_countLong$geneID))
-  expect_true(is.numeric(dtf_countLong$kij))
-  expect_equal(unique(dtf_countLong$geneID), c("gene1", "gene2", "gene3"))
-  expect_equal(unique(dtf_countLong$sampleID),c("myVariable1_1", "myVariable1_2", 
-                                                "myVariable2_1", "myVariable2_2"))
-})
-
-# Unit tests for getColumnWithSampleID
-test_that("getColumnWithSampleID returns column name with sampleID", {
-  # dummy data
-  list_var <- init_variable()
-  mock_data <- mock_rnaseq(list_var, n_genes = 3, 2,2, 2)
-  dtf_countLong <- countMatrix_2longDtf(mock_data$counts)
-  
-  # Expected output
-  expected_output <- "sampleID"
-  
-  # Get column name with sampleID
-  column_name <- .getColumnWithSampleID(dtf_countLong, mock_data$metadata)
-  
-  # Check if the output matches the expected output
-  expect_identical(column_name, expected_output)
-})
-
-# Unit tests for prepareData2fit
-test_that("prepareData2fit prepares data for fitting", {
-  # dummy data
-  list_var <- init_variable()
-  mock_data <- mock_rnaseq(list_var, n_genes = 3, 2,2, 2)
-  
-  # Prepare data for fitting
-  data2fit <- prepareData2fit(mock_data$counts, mock_data$metadata)
-  
-  expect_true(is.character(data2fit$sampleID))
-  expect_true(is.character(data2fit$geneID))
-  expect_true(is.numeric(data2fit$kij))
-  expect_equal(unique(data2fit$geneID), c("gene1", "gene2", "gene3"))
-  expect_equal(unique(data2fit$sampleID),c("myVariable1_1", "myVariable1_2", 
-                                                "myVariable2_1", "myVariable2_2"))
-})
-
-
-
-
-
-# Test case 1: Normalization with positive counts
-test_that("Median ratio normalization works for positive counts", {
-  counts <- matrix(c(100, 200, 300, 1000, 1500, 2500), ncol = 2)
-  normalized_counts <- medianRatioNormalization(counts)
-  
-  expected_normalized_counts <- matrix(c(288.6751 , 577.3503 , 866.0254 , 346.4102, 519.6152, 866.0254), ncol = 2)
-  
-  expect_equal(normalized_counts, expected_normalized_counts, tolerance = 1e-4)
-})
-
-# Test case 2: Normalization with zero counts
-test_that("Median ratio normalization return error for zero counts", {
-  counts <- matrix(c(0, 0, 0, 1000, 1500, 2500), ncol = 2)
-  expect_error(medianRatioNormalization(counts))
-  
-})
-
-
-# Test case 5: All-zero genes
-test_that("Throws an error when all-zero genes are encountered", {
-  counts <- matrix(c(0, 0, 0, 0, 0, 0), ncol = 2)
-  expect_error(medianRatioNormalization(counts))
-})
-
-
-```
-
-```{r functionFitModel, filename = "fitModel"}
-#' Check if Data is Valid for Model Fitting
-#'
-#' This function checks whether the provided data contains all the variables required in the model formula for fitting.
-#'
-#' @param data2fit The data frame or tibble containing the variables to be used for model fitting.
-#' @param formula The formula specifying the model to be fitted.
-#'
-#' @return \code{TRUE} if all the variables required in the formula are present in \code{data2fit}, otherwise an error is raised.
-#'
-#' @examples
-#' data(iris)
-#' formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-#' isValidInput2fit(iris, formula) # Returns TRUE if all required variables are present
-#' @keywords internal
-#' @export
-isValidInput2fit <- function(data2fit, formula){
-  vec_bool <- all.vars(formula) %in% colnames(data2fit)
-  for (i in seq_along(vec_bool)){
-    if (isFALSE(vec_bool[i]) ) {
-      stop(paste("Variable", all.vars(formula)[i],  "not found"))
-    }
-  }
-  return(TRUE)
-}
-
-
-#' Drop Random Effects from a Formula
-#'
-#' This function allows you to remove random effects from a formula by specifying 
-#' which terms to drop. It checks for the presence of vertical bars ('|') in the 
-#' terms of the formula and drops the random effects accordingly. If all terms 
-#' are random effects, the function updates the formula to have only an intercept. 
-#'
-#' @param form The formula from which random effects should be dropped.
-#'
-#' @return A modified formula with specified random effects dropped.
-#'
-#' @examples
-#' # Create a formula with random effects
-#' formula <- y ~ x1 + (1 | group) + (1 | subject)
-#' # Drop the random effects related to 'group'
-#' modified_formula <- drop_randfx(formula)
-#'
-#' @importFrom stats terms
-#' @importFrom stats update
-#'
-#' @export
-drop_randfx <- function(form) {
-  form.t <- stats::terms(form)
-  dr <- grepl("|", labels(form.t), fixed = TRUE)
-  if (mean(dr) == 1) {
-    form.u <- stats::update(form, . ~ 1)
-  } else {
-    if (mean(dr) == 0) {
-      form.u <- form
-    } else {
-      form.td <- stats::drop.terms(form.t, which(dr))
-      form.u <- stats::update(form, form.td)
-    }
-  }
-  form.u
-}
-
-
-
-#' Check if a Model Matrix is Full Rank
-#'
-#' This function checks whether a model matrix is full rank, which is essential for 
-#' certain statistical analyses. It computes the eigenvalues of the crossproduct 
-#' of the model matrix and determines if the first eigenvalue is positive and if 
-#' the ratio of the last eigenvalue to the first is within a defined tolerance.
-#'
-#' This function is inspired by a similar function found in the Limma package.
-#'
-#' @param metadata The metadata used to create the model matrix.
-#' @param formula The formula used to specify the model matrix.
-#'
-#' @return \code{TRUE} if the model matrix is full rank, \code{FALSE} otherwise.
-#'
-#' @examples
-#' metadata <- data.frame(x = rnorm(10), y = rnorm(10))
-#' formula <- y ~ x
-#' is_fullrank(metadata, formula)
-#'
-#'
-#' @importFrom stats model.matrix
-#' @export
-is_fullrank <- function(metadata, formula) {
-  ## drop random eff
-  formula <- drop_randfx(formula)
-  ## TEST
-  model_matrix <- stats::model.matrix(data = metadata, formula)
-  e <- eigen(crossprod(model_matrix), symmetric = TRUE, only.values = TRUE)$values
-  modelFullRank <- e[1] > 0 && abs(e[length(e)] / e[1]) > 1e-13
-  
-  if (!modelFullRank) 
-    stop("The model matrix is not full rank, so the model cannot be fit as specified. One or more variables or interaction terms in the design formula are linear combinations of the others and must be removed.")
-  
-  return(TRUE)
-}
-
-
-
-
-
-#' Fit a model using the fitModel function.
-#'
-#' @param formula Formula specifying the model formula
-#' @param data Data frame containing the data
-#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function
-#' @return Fitted model object or NULL if there was an error
-#' @export
-#' @examples
-#' .fitModel(formula = mpg ~ cyl + disp, data = mtcars)
-.fitModel <- function(formula, data, ...) {
-  # Fit the model using glm.nb from the GLmmTMB package
-  model <- glmmTMB::glmmTMB(formula, ..., data = data ) 
-  model$frame <- data
-   ## family in ... => avoid error in future update
-  additional_args <- list(...)
-  familyArgs <- additional_args[['family']]
-  if (!is.null(familyArgs)) model$call$family <- familyArgs
-  ## control in ... => avoid error in future update
-  controlArgs <- additional_args[['control']]
-  if (!is.null(controlArgs)) model$call$control <- controlArgs
-  return(model)
-}
-
-
-
-#' Fit the model based using fitModel functions.
-#'
-#' @param group The specific group to fit the model for
-#' @param group_by Column name in data representing the grouping variable
-#' @param formula Formula specifying the model formula
-#' @param data Data frame containing the data
-#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function
-#' @return Fitted model object or NULL if there was an error
-#' @export
-#' @examples
-#' .subsetData_andfit(group = "setosa", group_by = "Species", 
-#'                  formula = Sepal.Length ~ Sepal.Width + Petal.Length, 
-#'                  data = iris )
-.subsetData_andfit <- function(group, group_by, formula, data, ...) {
-  subset_data <- data[data[[group_by]] == group, ]
-  fit_res <- .fitModel(formula, subset_data, ...)
-  #glance_df <- glance.negbin(group_by ,group , fit_res)
-  #tidy_df <- tidy.negbin(group_by ,group,fit_res )
-  #list(glance = glance_df, summary = tidy_df)
-  fit_res
-}
-
-
-
-#' Launch the model fitting process for a specific group.
-#'
-#' This function fits the model using the specified group, group_by, formula, and data.
-#' It handles warnings and errors during the fitting process and returns the fitted model or NULL if there was an error.
-#'
-#' @param group The specific group to fit the model for
-#' @param group_by Column name in data representing the grouping variable
-#' @param formula Formula specifying the model formula
-#' @param data Data frame containing the data
-#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function
-#' @return List with 'glance' and 'summary' attributes representing the fitted model or NULL if there was an error
-#' @export
-#' @examples
-#' launchFit(group = "setosa", group_by = "Species", 
-#'            formula = Sepal.Length ~ Sepal.Width + Petal.Length, 
-#'            data = iris )
-launchFit <- function(group, group_by, formula, data, ...) {
-  tryCatch(
-    expr = {
-      withCallingHandlers(
-          .subsetData_andfit(group, group_by, formula, data, ...),
-          warning = function(w) {
-            message(paste(Sys.time(), "warning for group", group, ":", conditionMessage(w)))
-            invokeRestart("muffleWarning")
-          })
-    },
-    error = function(e) {
-      message(paste(Sys.time(), "error for group", group, ":", conditionMessage(e)))
-      NULL
-      #return(list(glance = empty.glance.negbin(group_by, group), summary = empty.tidy.negbin(group_by, group)))
-    }
-  )
-}
-
-
-#' Fit models in parallel for each group using mclapply and handle logging.
-#' Uses parallel_fit to fit the models.
-#'
-#' @param groups Vector of unique group values
-#' @param group_by Column name in data representing the grouping variable
-#' @param formula Formula specifying the model formula
-#' @param data Data frame containing the data
-#' @param n.cores The number of CPU cores to use for parallel processing.
-#'  If set to NULL (default), the number of available CPU cores will be automatically detected.
-#' @param log_file File to write log (default : log.txt)
-#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function
-#' @return List of fitted model objects or NULL for any errors
-#' @importFrom stats setNames
-#' @export
-#' @examples
-#' .parallel_fit(group_by = "Species", "setosa", 
-#'                formula = Sepal.Length ~ Sepal.Width + Petal.Length, 
-#'                data = iris, n.cores = 1, log_file = "log.txt" )
-.parallel_fit <- function(groups, group_by, formula, data, n.cores = NULL, log_file,  ...) {
-  if (is.null(n.cores)) n.cores <- parallel::detectCores()
-  
-  clust <- parallel::makeCluster(n.cores, outfile = log_file)
-  parallel::clusterExport(clust, c(".subsetData_andfit", ".fitModel"),  envir=environment())
-  results_fit <- parallel::parLapply(clust, X = stats::setNames(groups, groups), fun = launchFit, 
-                      group_by = group_by, formula = formula, data = data, ...)
-                                     
-  parallel::stopCluster(clust)
-  #closeAllConnections()
-  return(results_fit)
-}
-
-#' Fit models in parallel for each group using mclapply and handle logging.
-#' Uses parallel_fit to fit the models.
-#'
-#' @param formula Formula specifying the model formula
-#' @param data Data frame containing the data
-#' @param group_by Column name in data representing the grouping variable
-#' @param n.cores The number of CPU cores to use for parallel processing.
-#'               If set to NULL (default), the number of available CPU cores will be automatically detected.
-#' @param log_file File path to save the log messages (default : log.txt)
-#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function
-#' @return List of fitted model objects or NULL for any errors
-#' @export
-#' @examples
-#' fitModelParallel(formula = Sepal.Length ~ Sepal.Width + Petal.Length, 
-#'                  data = iris, group_by = "Species", n.cores = 1) 
-fitModelParallel <- function(formula, data, group_by, n.cores = NULL, log_file = "log.txt", ...) {
-  
-  ## SOme verification
-  isValidInput2fit(data, formula)
-  is_fullrank(data, formula)
-  
-  
-  groups <- unique(data[[group_by]])
-  # Fit models in parallel and capture the results
-  results <- .parallel_fit(groups, group_by, formula, data, n.cores, log_file, ...)
-  #results <- mergeListDataframes(results)
-  return(results)
-}
-
-
-```
-
-
-```{r  test-fitData}
-
-
-test_that("isValidInput2fit returns TRUE for valid data", {
-  data(iris)
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-  result <- isValidInput2fit(iris, formula)
-  expect_true(result)
-})
-
-# Test that the function raises an error when a required variable is missing
-test_that("isValidInput2fit raises an error for missing variable", {
-  data(iris)
-  formula <- Sepal.Length ~ Sepal.Width + NonExistentVariable
-  expect_error(isValidInput2fit(iris, formula), "Variable NonExistentVariable not found")
-})
-
-test_that(".fitModel returns a fitted model object", {
-  data(mtcars)
-  formula <- mpg ~ cyl + disp
-  fitted_model <- suppressWarnings(.fitModel(formula, mtcars))
-  #expect_warning(.fitModel(formula, mtcars))
-  expect_s3_class(fitted_model, "glmmTMB")
-  
-  # Test with invalid formula
-  invalid_formula <- mpg ~ cyl + disp + invalid_var
-  expect_error(.fitModel(invalid_formula, mtcars))
-  
-  
-   # Additional parameters: 
-   #change family + formula
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length + (1 | Species)
-  fitted_models <- suppressWarnings(.fitModel(formula = formula, 
-                                                    data = iris, 
-                                                    family = glmmTMB::nbinom1(link = "log") ))
-  expect_s3_class(fitted_models$call$family, "family")
-  expect_equal(fitted_models$call$formula, formula)
-  #change control settings
-  fitted_models <- suppressWarnings(.fitModel(formula = formula, 
-                                                    data = iris, 
-                                                    family = glmmTMB::nbinom1(link = "log"), 
-                                                control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,
-                                                                                               eval.max=1e3))))
-  expect_equal(fitted_models$call$control,  glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,eval.max=1e3)))
-  
-  
-  
-})
-
-
-# Test if random effects are dropped correctly
-test_that("Drop random effects from formula", {
-  formula <- y ~ x1 + (1 | group) + (1 | subject)
-  modified_formula <- drop_randfx(formula)
-  expect_equal(deparse(modified_formula), "y ~ x1")
-})
-
-# Test if formula with no random effects remains unchanged
-test_that("Keep formula with no random effects unchanged", {
-  formula <- y ~ x1 + x2
-  modified_formula <- drop_randfx(formula)
-  expect_equal(deparse(modified_formula), "y ~ x1 + x2")
-})
-
-# Test if all random effects are dropped to intercept
-test_that("Drop all random effects to intercept", {
-  formula <- ~ (1 | group) + (1 | subject)
-  modified_formula <- drop_randfx(formula)
-  expect_equal(deparse(modified_formula), ". ~ 1")
-})
-
-
-# Test if a full-rank model matrix is identified correctly
-test_that("Identify full-rank model matrix", {
-  metadata <- data.frame(x = rnorm(10), y = rnorm(10))
-  formula <- y ~ x
-  expect_true(is_fullrank(metadata, formula))
-})
-
-# Test if a rank-deficient model matrix is detected and throws an error
-test_that("Detect rank-deficient model matrix and throw error", {
-  metadata <- data.frame(x = factor(rep(c("xA","xB"),each = 5)), 
-                         w = factor(rep(c("wA","wB"), each = 5)), 
-                         z = factor(rep(c("zA","zB"), each = 5)),
-                         y = rnorm(10))
-  formula <- y ~ x + w + z + y:w
-  expect_error(is_fullrank(metadata, formula), 
-    regexp = "The model matrix is not full rank, so the model cannot be fit as specified.")
-})
-
-# Test if a rank-deficient model matrix is detected and throws an error
-test_that("Detect rank-deficient model matrix and throw error (with random eff)", {
-  metadata <- data.frame(x = factor(rep(c("xA","xB"),each = 5)), 
-                         w = factor(rep(c("wA","wB"), each = 5)), 
-                         z = factor(rep(c("zA","zB"), each = 5)),
-                         y = rnorm(10))
-  formula <- y ~ x + w + z + y:w + (1 | w)
-  expect_error(is_fullrank(metadata, formula), 
-    regexp = "The model matrix is not full rank, so the model cannot be fit as specified.")
-})
-
-# Test if a rank-deficient model matrix is detected and throws an error
-test_that("Identify full-rank model matrix (with random eff)", {
-  metadata <- data.frame(x = factor(rep(c("xA","xB"),each = 5)), 
-                         w = factor(rep(c("wA","wB"), each = 5)), 
-                         z = factor(rep(c("zA","zB"), each = 5)),
-                         y = rnorm(10))
-  formula <- y ~ x + (1 | w)
-  expect_true(is_fullrank(metadata, formula))
-})
-
-#test_that(".fitMixteModel returns a fitted mixed-effects model object or NULL if there was an error", {
-#  data(mtcars)
-#  formula <- mpg ~ cyl + disp + (1|gear)
-#  fitted_model <- .fitMixteModel(formula, mtcars)
-  # Add appropriate expectations for the fitted mixed-effects model object
-  
-  # Test with invalid formula
-#  invalid_formula <- formula + "invalid"
-#  fitted_model_error <- .fitMixteModel(invalid_formula, mtcars)
-#  expect_null(fitted_model_error)
-#})
-
-test_that(".subsetData_andfit returns a glmTMB obj", {
-  data(iris)
-  group <- "setosa"
-  group_by <- "Species"
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-  fitted_model <- .subsetData_andfit(group, group_by, formula, iris)
-  expect_s3_class(fitted_model, "glmmTMB")
-
-  # Test with invalid formula
-  invalid_formula <- Sepal.Length ~ Sepal.Width + Petal.Length +  invalid_var
-  expect_error(.subsetData_andfit(group, group_by, invalid_formula, mtcars))
-  
-  
-    # Additional parameters: 
-   #change family + formula
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length + (1 | Species)
-  fitted_models <- suppressWarnings(.subsetData_andfit(group,
-                                                       group_by,
-                                                       formula = formula, 
-                                                        data = iris, 
-                                                        family = glmmTMB::nbinom1(link = "log") ))
-  expect_s3_class(fitted_models$call$family, "family")
-  expect_equal(fitted_models$call$formula, formula)
-  #change control settings
-  fitted_models <- suppressWarnings(.subsetData_andfit(group,
-                                                       group_by,
-                                                       formula = formula, 
-                                                        data = iris, 
-                                                    family = glmmTMB::nbinom1(link = "log"), 
-                                                control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,
-                                                                                               eval.max=1e3))))
-  expect_equal(fitted_models$call$control,  glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,eval.max=1e3)))
-  
-})
-
-test_that("launchFit handles warnings and errors during the fitting process", {
-  data(mtcars)
-  group <- "Group1"
-  group_by <- "Group"
-  formula <- mpg ~ cyl + disp
-  fitted_model <- suppressWarnings(launchFit(group, group_by, formula, mtcars))
-  expect_s3_class(fitted_model, "glmmTMB")
-
-  # Test with invalid formula
-  invalid_formula <- Sepal.Length ~ Sepal.Width + Petal.Length 
-  output_msg <- capture_message( HTRSIM::launchFit(group, group_by, invalid_formula, mtcars))
-  expect_match(output_msg$message, ".* error for group Group1 : object 'Sepal.Length' not found")
-  
-  
-  # Additional parameters: 
-   #change family + formula
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-  fitted_models <- suppressWarnings(launchFit(formula = formula, 
-                                                    data = iris, 
-                                                    group_by = group_by, 
-                                                    group = "setosa",
-                                                    family = glmmTMB::nbinom1(link = "log") ))
-  expect_s3_class(fitted_models$call$family, "family")
-  expect_equal(fitted_models$call$formula, formula)
-  #change control settings
-  fitted_models <- suppressWarnings(launchFit(formula = formula, 
-                                                    data = iris, 
-                                                    group_by = group_by, 
-                                                    group = "setosa",
-                                                     family = glmmTMB::nbinom1(link = "log"), 
-                                                control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,
-                                                                                               eval.max=1e3))))
-  expect_equal(fitted_models$call$control,  glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,eval.max=1e3)))
-})
-
-test_that(".parallel_fit returns a list of fitted model objects or NULL for any errors", {
-  data(iris)
-  groups <- unique(iris$Species)
-  group_by <- "Species"
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-  fitted_models <- .parallel_fit(groups, group_by, formula, iris, log_file = "log.txt", n.cores = 1)
-  expect_s3_class(fitted_models$setosa, "glmmTMB")
-  expect_length(fitted_models, length(groups))
-
-  # Test with invalid formula
-  invalid_formula <- blabla ~ cyl + disp 
-  result <- suppressWarnings(.parallel_fit(groups, group_by, invalid_formula,  
-                                           iris, log_file = "log.txt",  n.cores = 1))
-  expect_equal(result, expected = list(setosa = NULL, versicolor = NULL, virginica = NULL))
-  
-  
-   # Additional parameters: 
-   #change family + formula
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-  fitted_models <- suppressWarnings(.parallel_fit(formula = formula, 
-                                                    data = iris, 
-                                                    group_by = group_by, 
-                                                    groups = "setosa",
-                                                    log_file = "log.txt",
-                                                    n.cores = 1,
-                                                    family = glmmTMB::nbinom1(link = "log") ))
-  expect_s3_class(fitted_models$setosa$call$family, "family")
-  expect_equal(fitted_models$setosa$call$formula, formula)
-  #change control settings
-  fitted_models <- suppressWarnings(.parallel_fit(formula = formula, 
-                                                    data = iris, 
-                                                    group_by = group_by, 
-                                                    groups = "setosa",
-                                                    log_file = "log.txt", 
-                                                    family = glmmTMB::nbinom1(link = "log"),
-                                                    n.cores = 1,
-                                                    control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,
-                                                                                               eval.max=1e3))))
-  expect_equal(fitted_models$setosa$call$control,  glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,eval.max=1e3)))
-})
-
-test_that("fitModelParallel fits models in parallel for each group and returns a list of fitted model objects or NULL for any errors", {
-  data(iris)
-  groups <- unique(iris$Species)
-  group_by <- "Species"
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-  #is.numeric(iris)
-  #iris <- data.frame(lapply(iris, function(y) if(is.numeric(y)) round(y, 0) else y)) 
-  fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1)
-  expect_s3_class(fitted_models$setosa, "glmmTMB")
-  expect_length(fitted_models, length(groups))
-  
-  invalid_formula <- blabla ~ cyl + disp 
-  expect_error(fitModelParallel(invalid_formula, iris,  group_by ,log_file = "log.txt",  n.cores = 1))
-  
-   # Additional parameters: 
-   #change family + formula
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-  fitted_models <- suppressWarnings(fitModelParallel(formula = formula, 
-                                                     data = iris, 
-                                                     group_by = group_by, 
-                                                      n.cores = 1,
-                                                      family = glmmTMB::nbinom1(link = "log") ))
-  expect_s3_class(fitted_models$setosa$call$family, "family")
-  expect_equal(fitted_models$setosa$call$formula, formula)
-  #change control settings
-  fitted_models <- suppressWarnings(fitModelParallel(formula = formula, 
-                                                     data = iris, 
-                                                     group_by = group_by, 
-                                                      n.cores = 1,
-                                                     family = glmmTMB::nbinom1(link = "log"), 
-                                                control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,
-                                                                                               eval.max=1e3))))
-  expect_equal(fitted_models$setosa$call$control,  glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,eval.max=1e3)))
-
-})
-
-```
-
-
-```{r functionUpdateFitModel, filename = "updateFitModel"}
-
-
-#' Update GLMNB models in parallel.
-#'
-#' This function fits GLMNB models in parallel using multiple cores, allowing for faster computation.
-#'
-#' @param formula Formula for the GLMNB model.
-#' @param l_tmb List of GLMNB objects.
-#' @param n.cores Number of cores to use for parallel processing. If NULL, the function will use all available cores.
-#' @param log_file File path for the log output.
-#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function.
-#' @export
-#' @return A list of updated GLMNB models.
-#'
-#' @examples
-#' data(iris)
-#' groups <- unique(iris$Species)
-#' group_by <- "Species"
-#' formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-#' fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1)
-#' new_formula <- Sepal.Length ~ Sepal.Width 
-#' results <- updateParallel(new_formula, fitted_models, n.cores = 1)
-updateParallel <- function(formula, l_tmb, n.cores = NULL, log_file = "log.txt", ...) {
-    
-    isValidInput2fit(l_tmb[[1]]$frame, formula)
-  
-    is_fullrank(l_tmb[[1]]$frame, formula)
-    
-    # Fit models update in parallel and capture the results
-    results <- .parallel_update(formula, l_tmb, n.cores, log_file, ...)
-    return(results)
-}
-
-
-#' Internal function to fit GLMNB models in parallel.
-#'
-#' This function is used internally by \code{\link{updateParallel}} to fit GLMNB models in parallel.
-#'
-#' @param formula Formula for the GLMNB model.
-#' @param l_tmb List of GLMNB objects.
-#' @param n.cores Number of cores to use for parallel processing.
-#' @param log_file File path for the log output.
-#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function.
-#' @export
-#' @return A list of updated GLMNB models.
-#' @examples
-#' data(iris)
-#' groups <- unique(iris$Species)
-#' group_by <- "Species"
-#' formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-#' fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1)
-#' new_formula <- Sepal.Length ~ Sepal.Width 
-#' results <- .parallel_update(new_formula, fitted_models, n.cores = 1)
-.parallel_update <- function(formula, l_tmb, n.cores = NULL, log_file = "log.txt",  ...) {
-  if (is.null(n.cores)) n.cores <- parallel::detectCores()
-  clust <- parallel::makeCluster(n.cores, outfile = log_file)
-  #l_geneID <- attributes(l_tmb)$names
-  parallel::clusterExport(clust, c("launchUpdate", "fitUpdate"),  envir=environment())
-  updated_res <- parallel::parLapply(clust, X = l_tmb, fun = launchUpdate , formula = formula, ...)
-  parallel::stopCluster(clust)
-  #closeAllConnections()
-  return(updated_res)
-}
-
-
-#' Fit and update a GLMNB model.
-#'
-#' This function fits and updates a GLMNB model using the provided formula.
-#'
-#' @param glmnb_obj A GLMNB object to be updated.
-#' @param formula Formula for the updated GLMNB model.
-#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function.
-#' @export
-#' @return An updated GLMNB model.
-#'
-#' @examples
-#' data(iris)
-#' groups <- unique(iris$Species)
-#' group_by <- "Species"
-#' formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-#' fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1)
-#' new_formula <- Sepal.Length ~ Sepal.Width 
-#' updated_model <- fitUpdate(fitted_models[[1]], new_formula)
-fitUpdate <- function(glmnb_obj, formula , ...){
-  data = glmnb_obj$frame
-  resUpdt <- stats::update(glmnb_obj, formula, ...)
-  resUpdt$frame <- data
-  ## family in ... => avoid error in future update
-  additional_args <- list(...)
-  familyArgs <- additional_args[['family']]
-  if (!is.null(familyArgs)) resUpdt$call$family <- familyArgs
-  ## control in ... => avoid error in future update
-  controlArgs <- additional_args[['control']]
-  if (!is.null(controlArgs)) resUpdt$call$control <- controlArgs
-  return(resUpdt)
-}
-
-
-#' Launch the update process for a GLMNB model.
-#'
-#' This function launches the update process for a GLMNB model, capturing and handling warnings and errors.
-#'
-#' @param glmnb_obj A GLMNB object to be updated.
-#' @param formula Formula for the updated GLMNB model.
-#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function.
-#' @export
-#' @return An updated GLMNB model or NULL if an error occurs.
-#'
-#' @examples
-#' data(iris)
-#' groups <- unique(iris$Species)
-#' group_by <- "Species"
-#' formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-#' fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1)
-#' new_formula <- Sepal.Length ~ Sepal.Width 
-#' updated_model <- launchUpdate(fitted_models[[1]], new_formula)
-launchUpdate <- function(glmnb_obj, formula,  ...) {
-  group = deparse(substitute(glmnb_obj))
-  tryCatch(
-    expr = {
-      withCallingHandlers(
-        fitUpdate(glmnb_obj, formula, ...),
-        warning = function(w) {
-          message(paste(Sys.time(), "warning for group", group ,":", conditionMessage(w)))
-          invokeRestart("muffleWarning")
-        })
-    },
-    error = function(e) {
-    message(paste(Sys.time(), "error for group", group,":", conditionMessage(e)))
-    return(NULL)
-    }
-  )
-}
-
-```
-
-
-```{r  test-updateFit}
-# Test updateParallel function
-test_that("updateParallel function returns correct results", {
-  # Load the required data
-  data(iris)
-  groups <- unique(iris$Species)
-  group_by <- "Species"
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-  fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1)
-  new_formula <- Sepal.Length ~ Sepal.Width 
-  results <- updateParallel(new_formula, fitted_models, n.cores = 1)
-  expect_is(results, "list")
-  expect_equal(length(results), length(fitted_models))
-  expect_is(results$setosa, "glmmTMB")
-
-  #uncorrect formula 
-  new_formula <- Sepal.Length ~ blabla
-  expect_error(updateParallel(new_formula, fitted_models, n.cores = 1))
-  
-  # Additional parameters: 
-   #change family + formula
-  new_formula <- Sepal.Length ~ Sepal.Width 
-  updated_model <- suppressWarnings(updateParallel(l_tmb = fitted_models, 
-                                                    formula = new_formula,
-                                                    n.cores = 1,
-                                                    family = glmmTMB::nbinom1(link = "log") ))
-  expect_s3_class(updated_model$setosa$call$family, "family")
-  expect_equal(updated_model$setosa$call$formula, new_formula)
-  #change control settings
-  updated_model <- suppressWarnings(updateParallel(l_tmb = fitted_models, 
-                                                 formula = new_formula, 
-                                                 family = glmmTMB::nbinom1(link = "log"), 
-                                                  n.cores = 1,
-                                                control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,
-                                                                                               eval.max=1e3))))
-  expect_equal(updated_model$setosa$call$control,  glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,eval.max=1e3)))
-  
-  # Update an updated model
-  updated_updated_model <- suppressWarnings(updateParallel(l_tmb = updated_model, 
-                                                 formula = new_formula, 
-                                                  n.cores = 1,
-                                                 family = glmmTMB::ziGamma(link = "inverse")))
-  expect_s3_class(updated_updated_model$setosa$call$family,  "family")
-})
-
-# Test .parallel_update function
-test_that(".parallel_update function returns correct results", {
-# Load the required data
-  data(iris)
-  groups <- unique(iris$Species)
-  group_by <- "Species"
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-  fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1)
-  new_formula <- Sepal.Length ~ Sepal.Width 
-  results <- .parallel_update(new_formula, fitted_models, n.cores = 1)
-  expect_is(results, "list")
-  expect_equal(length(results), length(fitted_models))
-  expect_is(results$setosa, "glmmTMB")
-
-  #uncorrect formula 
-  new_formula <- Sepal.Length ~ blabla
-  results <- .parallel_update(new_formula, fitted_models, n.cores = 1)
-  expect_is(results, "list")
-  expect_equal(length(results), length(fitted_models))
-  expect_equal(results$setosa, NULL)
-  
-  # Additional parameters: 
-   #change family + formula
-  new_formula <- Sepal.Length ~ Sepal.Width 
-  updated_model <- suppressWarnings(.parallel_update(l_tmb = fitted_models, 
-                                                     formula = new_formula,
-                                                      n.cores = 1,
-                                                      family = glmmTMB::nbinom1(link = "log") ))
-  expect_s3_class(updated_model$setosa$call$family, "family")
-  expect_equal(updated_model$setosa$call$formula, new_formula)
-  #change control
-  updated_model <- suppressWarnings(.parallel_update(l_tmb = fitted_models, 
-                                                 formula = new_formula, 
-                                                  n.cores = 1,
-                                                 family = glmmTMB::nbinom1(link = "log"), 
-                                                control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,
-                                                                                               eval.max=1e3))))
-  expect_equal(updated_model$setosa$call$control,  glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,eval.max=1e3)))
-})
-
-# Test fitUpdate function
-test_that("fitUpdate function returns correct results", {
-  #Load the required data
-  data(iris)
-  groups <- unique(iris$Species)
-  group_by <- "Species"
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-  fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1)
-  new_formula <- Sepal.Length ~ Sepal.Width 
-
-  updated_model <- fitUpdate(fitted_models[[1]], new_formula)
-  expect_is(updated_model, "glmmTMB")
-  
-  # Additional parameters: 
-   #change family + formula
-  updated_model <- suppressWarnings(fitUpdate(fitted_models[[1]], new_formula, 
-                                              family = glmmTMB::nbinom1(link = "log") ))
-  expect_s3_class(updated_model$call$family, "family")
-  expect_equal(updated_model$call$formula, new_formula)
-  #change control
-  updated_model <- suppressWarnings(fitUpdate(fitted_models[[1]], 
-                                              new_formula, 
-                                              family = glmmTMB::nbinom1(link = "log"), 
-                                              control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,
-                                                                                               eval.max=1e3))))
-  expect_equal(updated_model$call$control,  glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,eval.max=1e3)))
-  
-})
-
-
-# Test launchUpdate function
-test_that("launchUpdate function returns correct results", {
-  data(iris)
-  groups <- unique(iris$Species)
-  group_by <- "Species"
-  formula <- Sepal.Length ~ Sepal.Width + Petal.Length
-  fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1)
-  new_formula <- Sepal.Length ~ Sepal.Width 
-  updated_model <- launchUpdate(fitted_models[[1]], new_formula)
-  expect_is(updated_model, "glmmTMB")
-  # Additional parameters: 
-   #change family + formula
-  updated_model <- launchUpdate(fitted_models[[1]], new_formula, family = glmmTMB::nbinom1(link = "log") )
-  expect_s3_class(updated_model$call$family, "family")
-  expect_equal(updated_model$call$formula, new_formula)
-  #change control
-  updated_model <- launchUpdate(fitted_models[[1]], new_formula, family = glmmTMB::nbinom1(link = "log"), 
-                                control = glmmTMB::glmmTMBControl(optimizer=optim, optArgs=list(method="BFGS")))
-  expect_equal(updated_model$call$control,  glmmTMB::glmmTMBControl(optimizer=optim, optArgs=list(method="BFGS")))
-  
-})
-
-```
-
-```{r functionTidyGLM, filename = "tidy_glmmTMB"}
-
-
-#' Extract Fixed Effects from a GLMMTMB Model Summary
-#'
-#' This function extracts fixed effects from the summary of a glmmTMB model.
-#'
-#' @param x A glmmTMB model object.
-#' @return A dataframe containing the fixed effects and their corresponding statistics.
-#' @export
-#' @examples
-#'
-#' model <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length, data = iris)
-#' fixed_effects <- extract_fixed_effect(model)
-extract_fixed_effect <- function(x){
-  ss = summary(x)
-  as.data.frame(ss$coefficients$cond)
-  ss_reshaped <- lapply(ss$coefficients,
-                        function(sub_obj){
-                          if(is.null(sub_obj)) return(NULL)
-                          sub_obj <- data.frame(sub_obj)
-                          sub_obj$term <- removeDuplicatedWord(rownames(sub_obj))
-                          rownames(sub_obj) <- NULL
-                          sub_obj <- renameColumns(sub_obj)
-                          sub_obj
-                        }
-  )
-
-  ss_df <- do.call(rbind, ss_reshaped)
-  ss_df$component <- sapply(rownames(ss_df), function(x) strsplit(x, split = "[.]")[[1]][1])
-  ss_df$effect <- "fixed"
-  rownames(ss_df) <- NULL
-  ss_df
-}
-
-
-
-#' Extract Tidy Summary of glmmTMB Model
-#'
-#' This function extracts a tidy summary of the fixed and random effects from a glmmTMB model and binds them together in a data frame. Missing columns are filled with NA.
-#'
-#' @param glm_TMB A glmmTMB model object.
-#' @param ID An identifier to be included in the output data frame.
-#' @return A data frame containing a tidy summary of the fixed and random effects from the glmmTMB model.
-#' @export
-#' @examples
-#'
-#' model <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length, data = iris)
-#' tidy_summary <- getTidyGlmmTMB(glm_TMB = model, ID = "Model1")
-getTidyGlmmTMB <- function(glm_TMB, ID){
-  if(is.null(glm_TMB)) return(NULL)
-  df1 <- extract_fixed_effect(glm_TMB)
-  df1 <- build_missingColumn_with_na(df1)
-  df2 <- extract_ran_pars(glm_TMB)
-  df2 <- build_missingColumn_with_na(df2)
-  df_2ret <- rbind(df1, df2)
-  df_2ret[df_2ret == "NaN"] <- NA
-  df_2ret <- df_2ret[rowSums(!is.na(df_2ret)) > 0, ] # drop rows full of NA
-  df_2ret$ID <- ID
-  df_2ret <- reorderColumns(df_2ret,  
-                            c("ID","effect", "component", "group", "term", 
-                              "estimate", "std.error", "statistic", "p.value"))
-  return(df_2ret)
-}
-
-
-
-#' Extract Tidy Summary of Multiple glmmTMB Models
-#'
-#' This function takes a list of glmmTMB models and extracts a tidy summary of the fixed and random effects from each model. It then combines the results into a single data frame.
-#'
-#' @param l_tmb A list of glmmTMB model objects.
-#' @return A data frame containing a tidy summary of the fixed and random effects from all glmmTMB models in the list.
-#' @export
-#' @examples
-#' model1 <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length + (1 | Species), data = iris)
-#' model2 <- glmmTMB::glmmTMB(Petal.Length ~ Sepal.Length + Sepal.Width + (1 | Species), data = iris)
-#' model_list <- list(Model1 = model1, Model2 = model2)
-#' tidy_summary <- tidy_tmb(model_list)
-tidy_tmb <- function(l_tmb){
-    if (identical(class(l_tmb), "glmmTMB")) return(getTidyGlmmTMB(l_tmb, "glmmTMB"))
-    attributes(l_tmb)$names
-    l_tidyRes <- lapply(attributes(l_tmb)$names,
-                 function(ID)
-                   {
-                      glm_TMB <- l_tmb[[ID]]
-                      getTidyGlmmTMB(glm_TMB, ID)
-                  }
-                )
-    ret <- do.call("rbind", l_tidyRes)
-    return(ret) 
-}
-  
-
-#' Build DataFrame with Missing Columns and NA Values
-#'
-#' This function takes a DataFrame and a list of column names and adds missing columns with NA values to the DataFrame.
-#'
-#' @param df The input DataFrame.
-#' @param l_columns A character vector specifying the column names to be present in the DataFrame.
-#' @return A DataFrame with missing columns added and filled with NA values.
-#' @export
-#' @examples
-#'
-#' df <- data.frame(effect = "fixed", term = "Sepal.Length", estimate = 0.7)
-#' df_with_na <- build_missingColumn_with_na(df)
-build_missingColumn_with_na <- function(df, l_columns = c("effect", "component", "group", 
-                                                          "term", "estimate", "std.error", "statistic", "p.value")) {
-  df_missing_cols <- setdiff(l_columns, colnames(df))
-  # Ajouter les colonnes manquantes à df1
-  if (length(df_missing_cols) > 0) {
-    for (col in df_missing_cols) {
-      df[[col]] <- NA
-    }
-  }
-  return(df)
-}
-
-#' Remove Duplicated Words from Strings
-#'
-#' This function takes a vector of strings and removes duplicated words within each string.
-#'
-#' @param strings A character vector containing strings with potential duplicated words.
-#' @return A character vector with duplicated words removed from each string.
-#' @export
-#' @examples
-#'
-#' words <- c("hellohello", "worldworld", "programmingprogramming", "R isis great")
-#' cleaned_words <- removeDuplicatedWord(words)
-removeDuplicatedWord <- function(strings){
-  gsub("(.*)\\1+", "\\1", strings, perl = TRUE)
-}
-
-
-
-
-#' Convert Correlation Matrix to Data Frame
-#'
-#' This function converts a correlation matrix into a data frame containing the correlation values and their corresponding interaction names.
-#'
-#' @param corr_matrix A correlation matrix to be converted.
-#' @return A data frame with the correlation values and corresponding interaction names.
-#' @export
-#' @examples
-#' mat <- matrix(c(1, 0.7, 0.5, 0.7, 
-#'                  1, 0.3, 0.5, 0.3, 1), 
-#'                  nrow = 3, 
-#'                  dimnames = list(c("A", "B", "C"), 
-#'                                  c("A", "B", "C")))
-#' correlation_matrix_2df(mat)
-correlation_matrix_2df <- function(corr_matrix){
-  vec_corr <- corr_matrix[lower.tri(corr_matrix)]
-  col_names <- removeDuplicatedWord(colnames(corr_matrix))
-  row_names <- removeDuplicatedWord(rownames(corr_matrix))
-  interaction_names <- vector("character", length(vec_corr))
-  k <- 1
-  n <- nrow(corr_matrix)
-  for (i in 1:(n - 1)) {
-    for (j in (i + 1):n) {
-      interaction_names[k] <- paste("cor__", paste(col_names[i], ".", row_names[j], sep = ""), sep ="")
-      k <- k + 1
-    }
-  }
-  names(vec_corr) <- interaction_names
-  ret <- data.frame(estimate = vec_corr)
-  ret$term <- rownames(ret)
-  rownames(ret) <- NULL
-  ret
-}
-
-#' Wrapper for Extracting Variance-Covariance Components
-#'
-#' This function extracts variance-covariance components from a glmmTMB model object for a specific grouping factor and returns them as a data frame.
-#'
-#' @param var_cor A variance-covariance object from the glmmTMB model.
-#' @param elt A character indicating the type of effect, either "cond" or "zi".
-#' @return A data frame containing the standard deviation and correlation components for the specified grouping factor.
-#' @export
-#' @examples
-#' model <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length + (1|Species), 
-#'                            data = iris, family = gaussian)
-#' var_cor <- summary(model)$varcor$cond
-#' ran_pars_df <- wrapper_var_cor(var_cor, "Species")
-wrapper_var_cor <- function(var_cor, elt){
-  var_group <- attributes(var_cor)$names
-  l_ret <- lapply(var_group,
-         function(group)
-         {
-           ## -- standard dev
-           std_df <- data.frame(estimate = attributes(var_cor[[group]])$stddev)
-           std_df$term <- paste("sd_", removeDuplicatedWord(rownames(std_df)), sep = "")
-           ## -- correlation
-           corr_matrix <- attributes(var_cor[[group]])$correlation
-           #no correlation 2 return 
-           if (nrow(corr_matrix) <= 1) ret <-  std_df
-           else {
-            corr_df <- correlation_matrix_2df(corr_matrix)
-            ret <- rbind(std_df, corr_df)
-          }
-           ret$component <- elt
-           ret$effect <- "ran_pars"
-           ret$group <- group
-           rownames(ret) <- NULL
-           return(ret)
-         })
-  l_ret
-
-}
-
-
-#' Extract Random Parameters from a glmmTMB Model
-#'
-#' This function extracts the random parameters from a glmmTMB model and returns them as a data frame.
-#'
-#' @param x A glmmTMB model object.
-#' @return A data frame containing the random parameters and their estimates.
-#' @export
-#' @importFrom stats setNames
-#' @examples
-#' model <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length + (1|Species), data = iris, 
-#'          family = gaussian)
-#' random_params <- extract_ran_pars(model)
-extract_ran_pars <- function(x){
-  ss <- summary(x)
-  l_2parcour <- c("cond", "zi")
-  l_res = lapply(stats::setNames(l_2parcour, l_2parcour),
-          function(elt)
-            {
-              var_cor <- ss$varcor[[elt]]
-              return(wrapper_var_cor(var_cor, elt))
-  })
-
-  ret <- rbind(do.call("rbind", l_res$cond),do.call("rbind", l_res$zi))
-  return(ret)
-
-}
-
-
-#' Rename Columns in a Data Frame
-#'
-#' This function renames columns in a data frame based on specified old names and corresponding new names.
-#'
-#' @param df A data frame.
-#' @param old_names A character vector containing the old column names to be replaced.
-#' @param new_names A character vector containing the corresponding new column names.
-#' @return The data frame with renamed columns.
-#' @export
-#' @examples
-#' df <- data.frame(Estimate = c(1.5, 2.0, 3.2),
-#'                  Std..Error = c(0.1, 0.3, 0.2),
-#'                  z.value = c(3.75, 6.67, 4.90),
-#'                  Pr...z.. = c(0.001, 0.0001, 0.002))
-#'
-#' renamed_df <- renameColumns(df, old_names = c("Estimate", "Std..Error", "z.value", "Pr...z.."),
-#'                               new_names = c("estimate", "std.error", "statistic", "p.value"))
-#'
-renameColumns <- function(df, old_names  = c("Estimate","Std..Error", "z.value", "Pr...z.."), 
-                           new_names = c("estimate","std.error", "statistic", "p.value")) {
-  stopifnot(length(old_names) == length(new_names))
-
-  for (i in seq_along(old_names)) {
-    old_col <- old_names[i]
-    new_col <- new_names[i]
-
-    if (old_col %in% names(df)) {
-      names(df)[names(df) == old_col] <- new_col
-    } else {
-      warning(paste("Column", old_col, "not found in the data frame. Skipping renaming."))
-    }
-  }
-
-  return(df)
-}
-
-
-
-#' Reorder the columns of a dataframe
-#'
-#' This function reorders the columns of a dataframe according to the specified column order.
-#'
-#' @param df The input dataframe.
-#' @param columnOrder A vector specifying the desired order of columns.
-#'
-#' @return A dataframe with columns reordered according to the specified column order.
-#' @export
-#' @examples
-#' # Example dataframe
-#' df <- data.frame(A = 1:3, B = 4:6, C = 7:9)
-#'
-#' # Define the desired column order
-#' columnOrder <- c("B", "C", "A")
-#'
-#' # Reorder the columns of the dataframe
-#' df <- reorderColumns(df, columnOrder)
-reorderColumns <- function(df, columnOrder) {
-  df <- df[, columnOrder, drop = FALSE]
-  return(df)
-}
-
-```
-
-
-```{r  test-tidyGLM}
-
-test_that("extract_fixed_effect returns the correct results for glmmTMB models", {
-  data(iris)
-  # Créer un modèle glmmTMB avec les données iris (exemple)
-  model <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length + (1|Species), data = iris)
-  
-  # Appeler la fonction extract_fixed_effect sur le modèle
-  result <- extract_fixed_effect(model)
-  
-  # Check les résultats attendus
-  expect_s3_class(result, "data.frame")
-  expect_equal(result$effect, c("fixed", "fixed", "fixed"))
-  expect_equal(result$component , c("cond", "cond", "cond"))
-  expect_equal(result$term , c("(Intercept)", "Sepal.Width", "Petal.Length"))
-  
-})
-
-
-test_that("getTidyGlmmTMB returns the correct results for glmmTMB models", {
-  data(iris)
-  # Créer un modèle glmmTMB avec les données iris (exemple)
-  model <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length, data = iris)
-  tidy_summary <- getTidyGlmmTMB(glm_TMB = model, ID = "Model1")
-  
-  # Check les résultats attendus
-  expect_s3_class(tidy_summary, "data.frame")
-  expect_equal(tidy_summary$effect, c("fixed", "fixed", "fixed"))
-  expect_equal(tidy_summary$component , c("cond", "cond", "cond"))
-  expect_equal(tidy_summary$term , c("(Intercept)", "Sepal.Width", "Petal.Length"))
-  expect_equal(tidy_summary$ID , c("Model1", "Model1", "Model1"))
-
-  #MODEL == NULL
-  tidy_summary <- getTidyGlmmTMB(glm_TMB = NULL, ID = "Model1")
-  expect_equal(tidy_summary, NULL)
-})
-
-
-test_that("build_missingColumn_with_na returns the correct results", {
-  df <- data.frame(effect = "fixed", term = "Sepal.Length", estimate = 0.7)
-  df_with_na <- build_missingColumn_with_na(df)
-  expected_df <- data.frame(effect = "fixed",
-                            term = "Sepal.Length",
-                            estimate = 0.7,
-                            component = NA,
-                            group = NA,
-                            std.error = NA,
-                            statistic = NA,
-                            p.value  = NA)
-    
-  expect_equal(df_with_na, expected_df)
-})
-
-
-test_that("removeDuplicatedWord returns expected output", {
-  words <- c("hellohello", "worldworld", "programmingprogramming", "R isis great")
-  cleaned_words <- removeDuplicatedWord(words)
-  expect_equal(cleaned_words, c("hello", "world", "programming", "R is great"))
-})
-
-
-
-test_that("correlation_matrix_2df returns expected output",{
-
-  mat <- matrix(c(1, 0.7, 0.5, 0.7, 1, 0.3, 0.5, 0.3, 1), nrow = 3, dimnames = list(c("A", "B", "C"), c("A", "B", "C")))
-  df_corr <- correlation_matrix_2df(mat)
-  df_expected <- data.frame(estimate = c(0.7, 0.5, 0.3),
-                            term = c("cor__A.B", "cor__A.C", "cor__B.C"))
-  expect_equal(df_corr, df_expected)
-})
-
-
-
-test_that("wrapper_var_cor returns expected output",{
-  data(iris)
-  model <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length + (1|Species), data = iris, family = gaussian)
-  var_cor <- summary(model)$varcor$cond
-  ran_pars_df <- wrapper_var_cor(var_cor, "Species")
-  expected_l = list(data.frame(estimate = 0.4978083, term = "sd_(Intercept)", 
-                               component = "Species", effect = "ran_pars", group = "Species"))
-  expect_equal(ran_pars_df , expected_l, tolerance = 0.0000001) 
-})
-
-
-test_that("extract_ran_pars returns expected output",{
-  model <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length + (1|Species), 
-                            data = iris, family = gaussian)
-  random_params <- extract_ran_pars(model)
-  
-  expected_df = data.frame(estimate = 0.4978083, term = "sd_(Intercept)", 
-                               component = "cond", effect = "ran_pars", group = "Species")
-  expect_equal(random_params , expected_df, tolerance = 0.0000001) 
-})
-
-
-test_that("renameColumns returns expected output",{
-  df <- data.frame(Estimate = c(1.5, 2.0, 3.2),
-                  Std..Error = c(0.1, 0.3, 0.2),
-                  z.value = c(3.75, 6.67, 4.90),
-                  Pr...z.. = c(0.001, 0.0001, 0.002))
-
-  new_colnames <- c("estimate", "std.error", "statistic", "p.value")
-  renamed_df <- renameColumns(df, old_names = c("Estimate", "Std..Error", "z.value", "Pr...z.."),
-                               new_names = new_colnames)
-  expect_equal(colnames(renamed_df),c("estimate", "std.error", "statistic", "p.value"))
-  expect_equal(dim(renamed_df), dim(df))
-})
-    
-
-test_that("reorderColumns returns expected output",{
-    df <- data.frame(A = 1:3, B = 4:6, C = 7:9)
-    # Define the desired column order
-    columnOrder <- c("B", "C", "A")
-    # Reorder the columns of the dataframe
-    df_reorder <- reorderColumns(df, columnOrder)
-    expect_equal(colnames(df_reorder), columnOrder)
-    expect_equal(dim(df_reorder), dim(df))
-
-})
-
-
-test_that("tidy_tmb returns expected output",{
-  model1 <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length + (1 | Species), data = iris)
-  model2 <- glmmTMB::glmmTMB(Petal.Length ~ Sepal.Length + Sepal.Width + (1 | Species), data = iris)
-  model_list <- list(Model1 = model1, Model2 = model2)
-  result <- tidy_tmb(model_list)
-  expect_equal(unique(result$ID), c("Model1", "Model2"))
-  expect_equal(unique(result$effect), c("fixed", "ran_pars"))
-  expect_equal(unique(result$component), "cond")
-  expect_equal(unique(result$term), c("(Intercept)", "Sepal.Width", "Petal.Length", "sd_(Intercept)", "Sepal.Length"))
-  expect_true("estimate" %in% colnames(result))
-  expect_true("std.error" %in% colnames(result))
-  expect_true("statistic" %in% colnames(result))
-  expect_true("p.value" %in% colnames(result))
-  
-  
-  # zi component
-  model2 <- glmmTMB::glmmTMB(Petal.Length ~ Sepal.Length + Sepal.Width + (1 | Species), data = iris, ziformula = ~1)
-  model_list <- list(Model1 = model1, Model2 = model2)
-  result_withZi <- tidy_tmb(model_list)
-  expect_equal(dim(result_withZi)[1], dim(result)[1] + 1 )
-  expect_equal(unique(result_withZi$component), c("cond", "zi"))
-
-   ## unique obect in list 
-  model <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length + (1|Species), data = iris)
-  result <- tidy_tmb(model)
-  expect_true("effect" %in% colnames(result))
-  expect_true("component" %in% colnames(result))
-  expect_true("group" %in% colnames(result))
-  expect_true("term" %in% colnames(result))
-  expect_true("estimate" %in% colnames(result))
-  expect_true("std.error" %in% colnames(result))
-  expect_true("statistic" %in% colnames(result))
-  expect_true("p.value" %in% colnames(result))
-})
-```
-
-
-```{r functionGlanceGLM, filename = "glance_tmb"}
-
-#' Extracts the summary statistics from a list of glmmTMB models.
-#'
-#' This function takes a list of glmmTMB models and extracts the summary statistics (AIC, BIC, logLik, deviance,
-#' df.resid, and dispersion) for each model and returns them as a single DataFrame.
-#'
-#' @param l_tmb A list of glmmTMB models or a unique glmmTMB obj model
-#' @return A DataFrame with the summary statistics for all the glmmTMB models in the list.
-#' @export
-#' @importFrom stats setNames
-#' @examples
-#' data(mtcars)
-#' models <-  fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length, 
-#'                            group_by = "Species",n.cores = 1, data = iris)
-#' result <- glance_tmb(models)
-glance_tmb <- function(l_tmb){
-  if (identical(class(l_tmb), "glmmTMB")) return(getGlance(l_tmb))
-  l_group <- attributes(l_tmb)$names
-  l_glance <- lapply(stats::setNames(l_group, l_group), function(group) getGlance(l_tmb[[group]]))
-  return(do.call("rbind", l_glance))
-}
-
-
-#' Extracts the summary statistics from a single glmmTMB model.
-#'
-#' This function takes a single glmmTMB model and extracts the summary statistics (AIC, BIC, logLik, deviance,
-#' df.resid, and dispersion) from the model and returns them as a DataFrame.
-#'
-#' @param x A glmmTMB model.
-#' @return A DataFrame with the summary statistics for the glmmTMB model.
-#' @export
-#'
-#' @examples
-#' data(mtcars)
-#' model <- glmmTMB::glmmTMB(mpg ~ wt + (1|cyl), data = mtcars)
-#' getGlance(model)
-getGlance <- function(x){
-  if (is.null(x)) return(NULL)
-  ret <- data.frame(t(summary(x)$AICtab))
-  ret$dispersion <- glmmTMB::sigma(x)
-  ret
-}
-
-
-```
-
-
-```{r testGlanceGLM }
-
-test_that("glance_tmb returns the summary statistics for multiple models", {
-  data(iris)
-  models <-  fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length, group_by = "Species",n.cores = 1, data = iris)
-  result <- glance_tmb(models)
-  expect_true("AIC" %in% colnames(result))
-  expect_true("BIC" %in% colnames(result))
-  expect_true("logLik" %in% colnames(result))
-  expect_true("deviance" %in% colnames(result))
-  expect_true("df.resid" %in% colnames(result))
-  expect_true("dispersion" %in% colnames(result))
-  expect_true(sum(c("setosa","versicolor", "virginica") %in% rownames(result)) == 3) 
-  
-  ## unique obect in list 
-  model <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length + (1|Species), data = iris)
-  result <- glance_tmb(model)
-  expect_true("AIC" %in% colnames(result))
-  expect_true("BIC" %in% colnames(result))
-  expect_true("logLik" %in% colnames(result))
-  expect_true("deviance" %in% colnames(result))
-  expect_true("df.resid" %in% colnames(result))
-  expect_true("dispersion" %in% colnames(result))
-
-})
-
-test_that("getGlance returns the summary statistics for a single model", {
-  model <- glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length + (1|Species), data = iris)
-  result <- getGlance(model)
-  expect_true("AIC" %in% colnames(result))
-  expect_true("BIC" %in% colnames(result))
-  expect_true("logLik" %in% colnames(result))
-  expect_true("deviance" %in% colnames(result))
-  expect_true("df.resid" %in% colnames(result))
-  expect_true("dispersion" %in% colnames(result))
-})
-```
-
-
-```{r functionPlotMetrics, filename = "plot_metrics"}
-
-#' Subset the glance DataFrame based on selected variables.
-#'
-#' This function subsets the glance DataFrame to keep only the specified variables.
-#'
-#' @param glance_df The glance DataFrame to subset.
-#' @param focus A character vector of variable names to keep, including "AIC", "BIC", "logLik", "deviance",
-#' "df.resid", and "dispersion".
-#' @return A subsetted glance DataFrame with only the selected variables.
-#' @export
-#'
-#' @examples
-#' data(iris)
-#' models <-  fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length, 
-#'                        group_by = "Species",n.cores = 1, data = iris)
-#' glance_df <- glance_tmb(models)
-#' glance_df$group_id <- rownames(glance_df)
-#' subset_glance(glance_df, c("AIC", "BIC"))
-subset_glance <- function(glance_df, focus){
-  idx_existing_column <- focus %in% c("AIC", "BIC", "logLik", "deviance" ,"df.resid", "dispersion" )
-  if(sum(!idx_existing_column) > 0) warning(paste(focus[!idx_existing_column], ": does not exist\n"))
-  focus <- focus[idx_existing_column]
-  if (identical(focus, character(0)))
-    stop(paste0("Please select at least one variable to focus on : ", 
-                "AIC, BIC, logLik, deviance, df.resid, dispersion" ))
-  glance_df <- glance_df[ , c("group_id", focus)]
-  return(glance_df)
-}
-
-
-#' Plot Metrics for Generalized Linear Mixed Models (GLMM)
-#'
-#' This function generates a density plot of the specified metrics for the given
-#' list of generalized linear mixed models (GLMMs).
-#'
-#' @param l_tmb A list of GLMM objects to extract metrics from.
-#' @param focus A character vector specifying the metrics to focus on. Possible
-#'   values include "AIC", "BIC", "logLik", "deviance", "df.resid", and
-#'   "dispersion". If \code{NULL}, all available metrics will be plotted.
-#'
-#' @return A ggplot object displaying density plots for the specified metrics.
-#'
-#' @importFrom reshape2 melt
-#' @importFrom ggplot2 aes facet_wrap geom_density theme_bw theme ggtitle
-#'
-#' @export
-#'
-#' @examples
-#' models_list <-  fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length, 
-#'                      group_by = "Species",n.cores = 1, data = iris)
-#' metrics_plot(models_list, focus = c("AIC", "BIC", "deviance"))
-metrics_plot <- function(l_tmb, focus = NULL) {
-  glance_df <- glance_tmb(l_tmb)
-  glance_df$group_id <- rownames(glance_df)
-  if (!is.null(focus)) {
-    glance_df <- subset_glance(glance_df, focus)
-  }
-  long_glance_df <- reshape2::melt(glance_df, variable.name = "metric")
-  p <- ggplot2::ggplot(long_glance_df) +
-    ggplot2::geom_density(ggplot2::aes(x = value, col = metric, fill = metric), alpha = 0.4) +
-    ggplot2::facet_wrap(~metric, scales = "free") +
-    ggplot2::theme_bw() +
-    ggplot2::theme(legend.position = 'null') + 
-    ggplot2::ggtitle("Metrics plot")
-  return(p)
-}
-
-
-```
-
-```{r testPlotMetrics }
-
-
-test_that("subset_glance subsets the glance DataFrame correctly", {
-  data(iris)
-  models <-  fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length, group_by = "Species",n.cores = 1, data = iris)
-  glance_df <- glance_tmb(models)
-  glance_df$group_id <- rownames(glance_df)
-  result <- subset_glance(glance_df, c("AIC", "BIC"))
-  expect_true("AIC" %in% colnames(result))
-  expect_true("BIC" %in% colnames(result))
-  expect_true("group_id" %in% colnames(result))
-  expect_true(sum(c("setosa","versicolor", "virginica") %in% rownames(result)) == 3) 
-})
-
-
-
-
-test_that("metrics_plot returns a ggplot object", {
-  
-  data(iris)
-  l_glmTMB <- list(
-        setosa = glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length, 
-                     data = subset(iris, Species == "setosa")),
-        versicolor = glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length, 
-                         data = subset(iris, Species == "versicolor")),
-        virginica = glmmTMB::glmmTMB(Sepal.Length ~ Sepal.Width + Petal.Length, 
-                          data = subset(iris, Species == "virginica"))
-  )
-  p <- metrics_plot(l_glmTMB)
-  expect_true(inherits(p, "gg"))
-
-})
-
-
-```
-
-
-
-
-
-
-
-```{r functionEvalDispersion, filename = "evaluateDispersion"}
-
-#' Evaluate Dispersion Comparison
-#'
-#' Compares dispersion values between two data frames containing dispersion information.
-#'
-#' @param TMB_dispersion_df A data frame containing dispersion values from TMB.
-#' @param DESEQ_dispersion_df A data frame containing dispersion values from DESeq2.
-#' @param color2use vector of color use for points coloration
-#'
-#' @return A list containing a dispersion plot and a data frame with dispersion comparison.
-#' @importFrom ggplot2 scale_color_manual
-#' @export
-#'
-#' @examples
-#' \dontrun{
-#' disp_comparison <- evaluateDispersion(TMB_dispersion_df, DESEQ_dispersion_df, "red")
-#' plot_dispersion <- disp_comparison$disp_plot
-#' comparison_df <- disp_comparison$data
-#' }
-evaluateDispersion <- function(TMB_dispersion_df, DESEQ_dispersion_df, color2use) {
-  disp_comparison_dtf <- rbind(TMB_dispersion_df, DESEQ_dispersion_df)
-  disp_plot <- dispersion_plot(disp_comparison_dtf, col = "from") + ggplot2::scale_color_manual(values = color2use)
-
-  return(list(disp_plot = disp_plot, data = disp_comparison_dtf))
-}
-
-
-#' Get Dispersion Comparison
-#'
-#' Compares inferred dispersion values with actual dispersion values.
-#'
-#' @param inferred_dispersion A data frame containing inferred dispersion values.
-#' @param actual_dispersion A numeric vector containing actual dispersion values.
-#'
-#' @return A data frame comparing actual and inferred dispersion values.
-#' 
-#' @export
-#'
-#' @examples
-#' \dontrun{
-#' dispersion_comparison <- getDispersionComparison(inferred_disp, actual_disp)
-#' print(dispersion_comparison)
-#' }
-getDispersionComparison <- function(inferred_dispersion, actual_dispersion) {
-  actual_disp <- data.frame(actual_dispersion = actual_dispersion)
-  actual_disp$geneID <- rownames(actual_disp)
-  rownames(actual_disp) <- NULL
-  disp_comparison <- join_dtf(actual_disp, inferred_dispersion, "geneID", "geneID")
-  return(disp_comparison)
-}
-
-
-#' Extract DESeq2 Dispersion Values
-#'
-#' Extracts inferred dispersion values from a DESeq2 wrapped object.
-#'
-#' @param deseq_wrapped A DESeq2 wrapped object containing dispersion values.
-#'
-#' @return A data frame containing inferred dispersion values.
-#' 
-#' @export
-#'
-#' @examples
-#' \dontrun{
-#' dispersion_df <- extractDESeqDispersion(deseq2_object)
-#' print(dispersion_df)
-#' }
-extractDESeqDispersion <- function(deseq_wrapped) {
-  inferred_dispersion <- data.frame(inferred_dispersion = deseq_wrapped$dispersion)
-  inferred_dispersion$geneID <- rownames(inferred_dispersion)
-  rownames(inferred_dispersion) <- NULL
-  return(inferred_dispersion)
-}
-
-
-#' Extract TMB Dispersion Values
-#'
-#' Extracts inferred dispersion values from a TMB result object.
-#'
-#' @param l_tmb A TMB result object containing dispersion values.
-#'
-#' @return A data frame containing inferred dispersion values.
-#' 
-#' @export
-#'
-#' @examples
-#' \dontrun{
-#' dispersion_df <- extractTMBDispersion(tmb_result)
-#' print(dispersion_df)
-#' }
-extractTMBDispersion <- function(l_tmb) {
-  glanceRes <- glance_tmb(l_tmb)
-  inferred_dispersion <- data.frame(inferred_dispersion = glanceRes$dispersion)
-  inferred_dispersion$geneID <- rownames(glanceRes)
-  rownames(inferred_dispersion) <- NULL
-  return(inferred_dispersion)
-}
-
-
-
-#' Dispersion Evaluation Plot
-#'
-#' Creates a scatter plot to evaluate the dispersion values between actual and inferred dispersions.
-#'
-#' @param eval_dispersion A data frame containing actual and inferred dispersion values.
-#' @param ... Additional arguments to be passed to the ggplot2::aes function.
-#' @importFrom ggplot2 ggplot geom_point aes geom_abline theme_bw ggtitle scale_x_log10 scale_y_log10
-#' @return A ggplot2 scatter plot.
-#' 
-#' @export
-#'
-#' @examples
-#' \dontrun{
-#' disp_plot <- dispersion_plot(disp_comparison_dtf, col = "from")
-#' print(disp_plot)
-#' }
-dispersion_plot <- function(eval_dispersion, ...) {
-
-  args <- lapply(list(...), function(x) if (!is.null(x)) ggplot2::sym(x))
-
-  p <- ggplot2::ggplot(eval_dispersion) +
-    ggplot2::geom_point(ggplot2::aes(x = actual_dispersion, y = inferred_dispersion, !!!args), size = 3, alpha = 0.6) +
-    ggplot2::geom_abline(intercept = 0, slope = 1, lty = 3, col = 'red', linewidth = 1) +
-    ggplot2::theme_bw() +
-    ggplot2::ggtitle("Dispersion evaluation") +
-    ggplot2::scale_x_log10() +
-    ggplot2::scale_y_log10()
-
-  return(p)
-}
-
-
-
-```
-
-```{r testPlotMetrics }
-
-
-# Example data
-
-
-# Tests
-test_that("dispersion_plot function works correctly", {
-  eval_disp <- data.frame(
-    actual_dispersion = c(0.1, 0.2, 0.3),
-    inferred_dispersion = c(0.12, 0.18, 0.28),
-    from = c("HTRfit", "HTRfit", "DESeq2")
-  )
-  disp_plot <- dispersion_plot(eval_disp, col = "from")
-  expect_s3_class(disp_plot, "gg")
-})
-
-test_that("extractTMBDispersion function extracts dispersion correctly", {
-   N_GENES = 100
-  MAX_REPLICATES = 5
-  MIN_REPLICATES = 5
-  input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3)
-  mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                         min_replicates = MIN_REPLICATES, max_replicates = MAX_REPLICATES)
-  data2fit <- prepareData2fit(countMatrix = mock_data$counts, metadata =  mock_data$metadata)
-  l_res <- fitModelParallel(formula = kij ~ varA,
-                          data = data2fit, group_by = "geneID",
-                          family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-  extracted_disp <- extractTMBDispersion(l_res)
-  expect_identical(colnames(extracted_disp), c("inferred_dispersion", "geneID"))
-})
-
-test_that("extractDESeqDispersion function extracts dispersion correctly", {
-   N_GENES = 100
-  MAX_REPLICATES = 5
-  MIN_REPLICATES = 5
-  input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3)
-  mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                         min_replicates = MIN_REPLICATES, max_replicates = MAX_REPLICATES)
-  dds <- DESeq2::DESeqDataSetFromMatrix(
-      countData = round(mock_data$counts),
-      colData = mock_data$metadata,
-      design = ~ varA)
-  dds <- DESeq2::DESeq(dds, quiet = TRUE)
-  deseq_wrapped = wrapper_DESeq2(dds, 2, "greaterAbs")
-  
-  extracted_disp <- extractDESeqDispersion(deseq_wrapped)
-  expect_identical(colnames(extracted_disp), c("inferred_dispersion", "geneID"))
-})
-
-test_that("getDispersionComparison function works correctly", {
-   N_GENES = 100
-  MAX_REPLICATES = 5
-  MIN_REPLICATES = 5
-  input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3)
-  mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                         min_replicates = MIN_REPLICATES, max_replicates = MAX_REPLICATES)
-  data2fit <- prepareData2fit(countMatrix = mock_data$counts, metadata =  mock_data$metadata)
-  l_res <- fitModelParallel(formula = kij ~ varA,
-                          data = data2fit, group_by = "geneID",
-                          family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-  
-  tmb_disp_inferred <- extractTMBDispersion(l_res)
-    
-  comparison <- getDispersionComparison(tmb_disp_inferred, mock_data$groundTruth$gene_dispersion)
-  expect_identical(colnames(comparison), c("actual_dispersion",  "geneID", "inferred_dispersion"))
-})
-
-test_that("evaluateDispersion function works correctly", {
-   N_GENES = 100
-  MAX_REPLICATES = 5
-  MIN_REPLICATES = 5
-  input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3)
-  mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                         min_replicates = MIN_REPLICATES, max_replicates = MAX_REPLICATES)
-  data2fit <- prepareData2fit(countMatrix = mock_data$counts, metadata =  mock_data$metadata)
-  l_res <- fitModelParallel(formula = kij ~ varA,
-                          data = data2fit, group_by = "geneID",
-                          family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-  dds <- DESeq2::DESeqDataSetFromMatrix(
-      countData = round(mock_data$counts),
-      colData = mock_data$metadata,
-      design = ~ varA)
-  dds <- DESeq2::DESeq(dds, quiet = TRUE)
-  deseq_wrapped = wrapper_DESeq2(dds, 2, "greaterAbs")
-  
-  tmb_disp_inferred <- extractTMBDispersion(l_res)
-  TMB_dispersion_df <- getDispersionComparison(tmb_disp_inferred, mock_data$groundTruth$gene_dispersion)
-  TMB_dispersion_df$from <- 'HTRfit'
-  DESEQ_disp_inferred <- extractDESeqDispersion(deseq_wrapped)
-  DESEQ_dispersion_df <- getDispersionComparison(DESEQ_disp_inferred , mock_data$groundTruth$gene_dispersion)
-  DESEQ_dispersion_df$from <- 'DESeq2'
-    
-  eval_disp <- evaluateDispersion(TMB_dispersion_df, DESEQ_dispersion_df, c("red", "blue"))
-  expect_identical(names(eval_disp), c("disp_plot", "data"))
-})
-
-
-  
-```
-
-
-
-
-
-```{r function-seqDepth, filename =  "scalingSequencingDepth"}
-
-#' Scale Counts Table
-#'
-#' This function scales a counts table based on the expected sequencing depth per sample.
-#'
-#' @param countsTable A counts table containing raw read counts.
-#' @param seq_depth  sequencing depth vector
-#' @return A scaled counts table.
-#'
-#' @export
-#' @examples
-#' mock_data <- list(counts = matrix(c(10, 20, 30, 20, 30, 10, 10, 20, 20, 20, 30, 10), ncol = 4))
-#' scaled_counts <- scaleCountsTable(countsTable = mock_data$counts, 1000000)
-#'
-scaleCountsTable <- function(countsTable, seq_depth){
-  seq_depth_simu <- colSums(countsTable)
-
-  if (length(seq_depth) > length(seq_depth_simu))
-    message("INFO: The length of the sequencing_depth vector exceeds the number of samples. Only the first N values will be utilized.")
-  if (length(seq_depth) < length(seq_depth_simu))
-    message("INFO: The length of the sequencing_depth vector is shorter than the number of samples. Values will be recycled.")
-
-  scalingDepth_factor <- suppressWarnings(seq_depth/seq_depth_simu)
-  counts_scaled <- as.data.frame(sweep(as.matrix(countsTable), 2,  scalingDepth_factor, "*"))
-  return(counts_scaled)
-}
-
-
-
-
-```
-
-```{r  test-scalingSequencingDepth}
-
-# Test case 1: Scaling with valid min_seq_depth and max_seq_depth
-test_that("Valid scaling of counts table", {
-      # Test data
-      mock_data <- list(counts = matrix(c(10, 20, 30, 20, 30, 10, 10, 20, 20, 20, 30, 10), ncol = 4))
-      # Test function
-      scaled_counts <- scaleCountsTable(countsTable = mock_data$counts, 115000)
-      
-      # Expected scaled counts
-      expected_scaled_counts <- matrix(c(5000, 10000, 15000, 10000, 15000, 5000, 
-                                         5000, 10000, 10000, 10000, 15000, 5000), ncol = 4)
-      
-      # Check if the scaled counts match the expected scaled counts
-      expect_true(all(colSums(scaled_counts) ==  115000))
-
-})
-
-```
-
-
-
-```{r function-geneExpressionScaling, filename =  "scalingGeneExpression"}
-
-
-
-
-#' Get bin expression for a data frame.
-#'
-#' This function divides the values of a specified column in a data frame into \code{n_bins} bins of equal width.
-#' The bin labels are then added as a new column in the data frame.
-#'
-#' @param dtf_coef A data frame containing the values to be binned.
-#' @param n_bins The number of bins to create.
-#' 
-#' @return A data frame with an additional column named \code{binExpression}, containing the bin labels.
-#' @export
-#' @examples
-#' dtf <- data.frame(mu_ij = c(10, 20, 30, 15, 25, 35, 40, 5, 12, 22))
-#' dtf_with_bins <- getBinExpression(dtf, n_bins = 3)
-#' 
-getBinExpression <- function(dtf_coef, n_bins){
-      col2bin <- "mu_ij"
-      bin_labels <- cut(dtf_coef[[col2bin]], n_bins, labels = paste("BinExpression", 1:n_bins, sep = "_"))
-      dtf_coef$binExpression <-  bin_labels     
-      return(dtf_coef)
-}
-
-
-
-
-#' Generate BE data.
-#' 
-#' This function generates BE data for a given number of genes, in a vector of BE values.
-#' 
-#' @param n_genes The number of genes to generate BE data for.
-#' @param basal_expression a numeric vector from which sample BE for eacg genes
-#' 
-#' @return A data frame containing gene IDs, BE values
-#' 
-#' @examples
-#' generate_BE(n_genes = 100, 10)
-#' 
-#' @export
-generate_BE <- function(n_genes, basal_expression) {
-  ## --avoid bug if one value in basal_expr
-  pool2sample <- c(basal_expression, basal_expression)
-  BE <- sample(x = pool2sample, size = n_genes, replace = T)
-  l_geneID <- base::paste("gene", 1:n_genes, sep = "")
-  ret <- list(geneID = l_geneID, basalExpr = BE) %>% as.data.frame()
-  return(ret)
-}
-
-
-
-#' Compute basal expresion for gene expression based on the coefficients data frame.
-#'
-#' This function takes the coefficients data frame \code{dtf_coef} and computes
-#' basal expression for gene expression. The scaling factors are generated 
-#' using the function \code{generate_BE}.
-#'
-#' @param dtf_coef A data frame containing the coefficients for gene expression.
-#' @param n_genes number of genes in simulation
-#' @param basal_expression
-#'
-#' @return A modified data frame \code{dtf_coef} with an additional column containing
-#'         the scaling factors for gene expression.
-#' @export
-#' @examples 
-#' list_var <- init_variable()
-#' N_GENES <- 5
-#' dtf_coef <- getInput2simulation(list_var, N_GENES)
-#' dtf_coef <- getLog_qij(dtf_coef)
-#' addBasalExpression(dtf_coef, N_GENES, 1)
-addBasalExpression <- function(dtf_coef, n_genes, basal_expression){
-    BE_df  <-  generate_BE(n_genes, basal_expression )
-    dtf_coef <- join_dtf(dtf_coef, BE_df, "geneID", "geneID")
-    return(dtf_coef) 
-}
-
-
-
-
-```
-
-```{r  test-geneExpressionScaling}
-
-test_that("generate_BE returns correct number of genes", {
-  be_data <- generate_BE(n_genes = 100, 1)
-  expect_equal(nrow(be_data), 100)
-})
-
-
-test_that("generate_BE returns BE values within specified vector", {
-  BE_vec <- c(1, 2, 33, 0.4)
-  be_data <- generate_BE(n_genes = 100, BE_vec)
-  expect_true(all(be_data$basalExpr %in% BE_vec))
-})
-
-
-test_that("Test for addbasalExpre function",{
-  
-  list_var <- init_variable()
-  N_GENES <- 5
-  dtf_coef <- getInput2simulation(list_var, N_GENES)
-  dtf_coef <- getLog_qij(dtf_coef)
-
-  # Test the function
-  dtf_coef_with_BE <- addBasalExpression(dtf_coef, N_GENES, 5)
-
-  # Check if the output is a data frame
-  expect_true(is.data.frame(dtf_coef_with_BE))
-
-  # Check if the number of rows is equal to number of row in dtf_coef
-  expect_equal(nrow(dtf_coef_with_BE), nrow(dtf_coef))
-  
-  # Check if the number of rows is equal to number of row in dtf_coef +1
-  expect_equal(ncol(dtf_coef_with_BE), ncol(dtf_coef)+1)
-  
-  # Check if the data frame has a new column "BE"
-  expect_true("basalExpr" %in% colnames(dtf_coef_with_BE))
-  
-  # Check if the values in the "BE" column are numeric
-  expect_true(all(is.numeric(dtf_coef_with_BE$basalExpr)))
-
-})
-
-
-# Test 1: Check if the function returns the correct number of bins
-test_that("getBinExpression returns the correct number of bins", {
-  dtf <- data.frame(mu_ij = c(10, 20, 30, 15, 25, 35, 40, 5, 12, 22))
-  n_bins <- 3
-  dtf_with_bins <- getBinExpression(dtf, n_bins)
-  expect_equal(nrow(dtf_with_bins), nrow(dtf), label = "Number of rows should remain the same")
-  expect_equal(ncol(dtf_with_bins), ncol(dtf) + 1, label = "Number of columns should increase by 1")
-})
-
-# Test 2: Check if the function adds the binExpression column correctly
-test_that("getBinExpression adds the binExpression column correctly", {
-  dtf <- data.frame(mu_ij = c(10, 20, 30, 15, 25, 35, 40, 5, 12, 22))
-  n_bins <- 3
-  dtf_with_bins <- getBinExpression(dtf, n_bins)
-  expected_bins <- c("BinExpression_1", "BinExpression_2", "BinExpression_3", "BinExpression_1", "BinExpression_2",
-                     "BinExpression_3", "BinExpression_3", "BinExpression_1", "BinExpression_1", "BinExpression_2")
-  expect_equal(dtf_with_bins$binExpression, factor(expected_bins))
-})
-
-# Test 3: Check if the function handles negative values correctly
-test_that("getBinExpression handles negative values correctly", {
-  dtf <- data.frame(mu_ij = c(10, -20, 30, -15, 25, 35, -40, 5, 12, -22))
-  n_bins <- 4
-  dtf_with_bins <- getBinExpression(dtf, n_bins)
-  expected_bins <- c("BinExpression_3", "BinExpression_2", "BinExpression_4", "BinExpression_2", "BinExpression_4",
-                     "BinExpression_4", "BinExpression_1", "BinExpression_3", "BinExpression_3", "BinExpression_1")
-  expect_equal(dtf_with_bins$binExpression, factor(expected_bins))
-})
-
-
-
-```
-
-
-
-```{r functionActualMainFixEff, filename =  "actualMainFixEffects" }
-
-#' Calculate average values by group
-#'
-#' @param data The input data frame
-#' @param column The name of the target variable
-#' @param group_by The names of the grouping variables
-#' @importFrom data.table setDT tstrsplit
-#' @importFrom rlang :=
-#' @return A data frame with average values calculated by group
-#' @export
-averageByGroup <- function(data, column, group_by) {
-  group_values <- split(data[[column]], data[group_by])
-  mean_values <- sapply(group_values, mean)
-  result <- data.frame(Group = names(mean_values), logQij_mean = mean_values)
-  data.table::setDT(result)[, {{ group_by }} := data.table::tstrsplit(Group, "[.]")]
-  result <- subset(as.data.frame(result), select = -Group)
-  return(result)
-}
-
-#' Convert specified columns to factor
-#'
-#' @param data The input data frame
-#' @param columns The column names to be converted to factors
-#' @return The modified data frame with specified columns converted to factors
-#' @export
-convert2Factor <- function(data, columns) {
-  if (is.character(columns)) {
-    columns <- match(columns, colnames(data))
-  }
-
-  if (length(columns) > 1) data[, columns] <- lapply(data[, columns], as.factor )
-  else data[, columns] <- as.factor(data[, columns])
-  return(data)
-}
-
-#' Subset Fixed Effect Inferred Terms
-#'
-#' This function subsets the tidy TMB object to extract the fixed effect inferred terms
-#' along with their categorization into interaction and non-interaction terms.
-#'
-#' @param tidy_tmb The tidy TMB object containing the inferred terms.
-#'
-#' @return A list with two elements:
-#' \describe{
-#'   \item{fixed_term}{A list with two components - \code{nonInteraction} and \code{interaction},
-#'   containing the names of the fixed effect inferred terms categorized as non-interaction and interaction terms, respectively.}
-#'   \item{data}{A data frame containing the subset of tidy_tmb that contains the fixed effect inferred terms.}
-#' }
-#' @export
-#' @examples
-#' input_var_list <- init_variable()
-#' mock_data <- mock_rnaseq(input_var_list, 10, 2, 2)
-#' getData2computeActualFixEffect(mock_data$groundTruth$effect)
-#' data2fit = prepareData2fit(countMatrix = mock_data$counts, metadata =  mock_data$metadata )
-#' #-- fit data
-#' resFit <- fitModelParallel(formula = kij ~ myVariable   ,
-#'                            data = data2fit, group_by = "geneID",
-#'                            family = glmmTMB::nbinom2(link = "log"), n.cores = 1) 
-#' tidy_tmb <- tidy_tmb(resFit)
-#' subsetFixEffectInferred(tidy_tmb)
-subsetFixEffectInferred <- function(tidy_tmb){
-  fixed_tidy <- tidy_tmb[tidy_tmb$effect == "fixed",]
-  l_term <- unique(fixed_tidy$term)
-  l_term <- l_term[!l_term %in% c("(Intercept)", NA)]
-  index_interaction <- grepl(x = l_term, ":")
-  l_term_nonInteraction <- l_term[!index_interaction]
-  l_term_interaction <- l_term[index_interaction]
-  l_term2ret <- list(nonInteraction = l_term_nonInteraction, interaction = l_term_interaction )
-  return(list(fixed_term = l_term2ret, data = fixed_tidy))
-}
-
-
-#' Get data for calculating actual values
-#'
-#' @param groundTruth The ground truth data frame
-#' @return A list containing required data for calculating actual values
-#' @export
-#' @examples
-#' input_var_list <- init_variable()
-#' mock_data <- mock_rnaseq(input_var_list, 10, 2, 2)
-#' getData2computeActualFixEffect(mock_data$groundTruth$effect)
-getData2computeActualFixEffect <- function(groundTruth){
-  col_names <- colnames(groundTruth)
-  categorical_vars <- col_names[grepl(col_names, pattern = "label_")]
-  average_gt <- averageByGroup(groundTruth, "log_qij_scaled", c("geneID", categorical_vars))
-  average_gt <- convert2Factor(data = average_gt, columns = categorical_vars )
-  return(list(categorical_vars = categorical_vars, data = average_gt))
-}
-
-
-#' Get the intercept dataframe
-#'
-#' @param fixeEff_dataActual The input list containing  the categorical variables and the data 
-#' @return The intercept dataframe
-#' @export
-getActualIntercept <- function(fixeEff_dataActual) {
-  ## -- split list
-  data<- fixeEff_dataActual$data
-  categorical_vars <- fixeEff_dataActual$categorical_vars
-
-  if (length(categorical_vars) == 1){
-    l_labels <- list()
-    l_labels[[categorical_vars]] <- levels(data[, categorical_vars])
-
-  } else l_labels <- lapply(data[, categorical_vars], levels)
-  index_ref <- sapply(categorical_vars, function(var) data[[var]] == l_labels[[var]][1])
-  index_ref <- rowSums(index_ref) == dim(index_ref)[2]
-  df_intercept <- data[index_ref, ]
-  df_intercept$term <- "(Intercept)"
-  colnames(df_intercept)[colnames(df_intercept) == "logQij_mean"] <- "actual"
-  df_intercept$description <- "(Intercept)"
-
-  index2keep <- !colnames(df_intercept) %in% categorical_vars
-  df_intercept <- df_intercept[,index2keep]
-
-  return(df_intercept)
-}
-
-
-#' Generate actual values for a given term
-#'
-#' @param term The term for which actual values are calculated
-#' @param df_actualIntercept The intercept dataframe
-#' @param dataActual The average ground truth dataframe
-#' @param categorical_vars The names of the categorical variables
-#' @return The data frame with actual values for the given term
-#' @export
-generateActualForMainFixEff <- function(term , df_actualIntercept , dataActual  , categorical_vars){
-  
-  computeActualValueForMainFixEff <- function(df_actualIntercept, df_term) {
-        df_term$actual <- df_term$logQij_mean - df_actualIntercept$actual
-        return(subset(df_term, select = -c(logQij_mean)))
-  }
-  
-  df_term <- subsetByTermLabel(dataActual, categorical_vars , term  )
-  df_term <- computeActualValueForMainFixEff(df_actualIntercept, df_term)
-  df_term$description <- gsub("\\d+$", "", term)
-  return(df_term)
-}
-
-
-
-#' subset data By Term Label
-#'
-#'
-#' Get a subset of the data based on a specific term label in the categorical variables.
-#'
-#' @param data The data frame to subset
-#' @param categorical_vars The categorical variables to consider
-#' @param term_label The term label to search for
-#' @return A subset of the data frame containing rows where the categorical variables match the specified term label
-#' @export
-#'
-#' @examples
-#' # Create a data frame
-#' my_data <- data.frame(color = c("red", "blue", "green", "red"),
-#'                       size = c("small", "medium", "large", "medium"),
-#'                       shape = c("circle", "square", "triangle", "circle"))
-#' my_data[] <- lapply(my_data, as.factor)
-#'
-#' # Get the subset for the term "medium" in the "size" variable
-#' subsetByTermLabel(my_data, "size", "medium")
-#' # Output: A data frame with rows where "size" is "medium"
-#'
-#' # Get the subset for the term "red" in the "color" variable
-#' subsetByTermLabel(my_data, "color", "red")
-#' # Output: A data frame with rows where "color" is "red"
-subsetByTermLabel <- function(data, categorical_vars, term_label ) {
-  if (length(categorical_vars) == 1) {
-    l_labels <- list()
-    l_labels[[categorical_vars]] <- levels(data[, categorical_vars])
-  } else {
-    l_labels <- lapply(data[, categorical_vars], levels)
-  }
-
-  term_variable <- findAttribute(term_label, l_labels)
-  if(is.null(term_variable)) stop("term_label not in 'data'")
-
-  index_ref <- sapply(categorical_vars, function(var) {
-    if (var == term_variable) {
-      data[[var]] == term_label
-    } else {
-      data[[var]] == l_labels[[var]][1]
-    }
-  })
-
-  index_ref <- rowSums(index_ref) == dim(index_ref)[2]
-  df_term <- data[index_ref, ]
-  df_term$term <- term_label
-  return(df_term)
-}
-
-#' Find Attribute
-#'
-#' Find the attribute containing the specified term in a given list.
-#'
-#' @param term The term to search for
-#' @param list The list to search within
-#' @return The attribute containing the term, or NULL if the term is not found in any attribute
-#' @export
-#'
-#' @examples
-#' # Create a list
-#' my_list <- list(color = c("red", "blue", "green"),
-#'                 size = c("small", "medium", "large"),
-#'                 shape = c("circle", "square", "triangle"))
-#'
-#' # Find the attribute containing "medium"
-#' findAttribute("medium", my_list)
-findAttribute <- function(term, list) {
-  for (attr in names(list)) {
-    if (term %in% list[[attr]]) {
-      return(attr)
-    }
-  }
-  return(NULL)  # If the term is not found in any attribute
-}
-
-#' Get actual values for non-interaction terms
-#'
-#' @param l_term list of term to compute 
-#' @param fixeEff_dataActual A list containing required data for calculating actual values
-#' @param df_actualIntercept The data frame containing the actual intercept values
-#' @return A data frame with actual values for non-interaction terms
-#' @export
-getActualMainFixEff <- function( l_term , fixeEff_dataActual , df_actualIntercept  ){
-  ## -- split list
-  categorical_vars <- fixeEff_dataActual$categorical_vars
-  data_groundTruth <- fixeEff_dataActual$data
-  ## -- iteration over term
-  l_actual <- lapply(l_term,
-                     function(term){
-                       generateActualForMainFixEff(term, df_actualIntercept,
-                                               data_groundTruth, categorical_vars)})
-  df_actual <- do.call("rbind", l_actual)
-  index2keep <- !colnames(df_actual) %in% categorical_vars
-  df_actual <- df_actual[,index2keep]
-  return(df_actual)
-}
-
-
-
-
-
-```
-
-```{r test-actualMainFixEff}
-
-test_that("Test for subsetFixEffectInferred function", {
-  # Prepare the test data
-  input_var_list <- init_variable(name = "varA", mu = c(1,2,3), level = 3) %>%
-                    init_variable(name = "varB", mu = c(2,-6), level = 2) %>%
-                    add_interaction(between_var = c("varA", "varB"), mu = 1, sd = 3)
-
-  mock_data <- mock_rnaseq(input_var_list, 10, 2, 2)
-  getData2computeActualFixEffect(mock_data$groundTruth$effect)
-  data2fit <- prepareData2fit(countMatrix = mock_data$counts, metadata = mock_data$metadata, normalization = F)
-
-  # Fit data
-  resFit <- fitModelParallel(formula = kij ~ varA + varB + varA:varB,
-                             data = data2fit, group_by = "geneID",
-                             family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-  tidy_tmb <- tidy_tmb(resFit)
-
-  # Test the subsetFixEffectInferred function
-  result <- subsetFixEffectInferred(tidy_tmb)
-  # Define expected output
-  expected_nonInteraction <- c("varA2", "varA3", "varB2")
-  expected_interaction <- c("varA2:varB2", "varA3:varB2")
-
-  # Compare the output with the expected values
-  expect_equal(result$fixed_term$nonInteraction, expected_nonInteraction)
-  expect_equal(result$fixed_term$interaction, expected_interaction)
-})
-
-
-# Tests for averageByGroup
-test_that("averageByGroup returns correct average values", {
-  # Create a sample data frame
-  data <- data.frame(
-    Group1 = rep(c("A", "B", "C", "D"), each = 2),
-    Group2 = rep(c("X", "Y"), times = 4),
-    Value = 1:8
-  )
-  
-  # Calculate average values by group
-  result <- averageByGroup(data, column = "Value", group_by = c("Group1", "Group2"))
-  
-  # Check the output
-  expect_equal(nrow(result), 8)  # Number of rows
-  expect_equal(colnames(result), c("logQij_mean","Group1", "Group2" ))  # Column names
-  expect_equal(result$logQij_mean, c(1, 3,5, 7, 2, 4, 6, 8))  # Average values
-})
-
-# Tests for convert2Factor
-test_that("convert2Factor converts specified columns to factors", {
-  # Create a sample data frame
-  data <- data.frame(
-    Category1 = c("A", "B", "A", "B"),
-    Category2 = c("X", "Y", "X", "Z"),
-    Value = 1:4,
-    stringsAsFactors = FALSE
-  )
-  
-  # Convert columns to factors
-  result <- convert2Factor(data, columns = c("Category1", "Category2"))
-  
-  # Check the output
-  expect_is(result$Category1, "factor")  # Category1 column converted to factor
-  expect_is(result$Category2, "factor")  # Category2 column converted to factor
-})
-
-# Tests for findAttribute
-test_that("findAttribute returns the correct attribute", {
-  # Create a sample list
-  my_list <- list(
-    color = c("red", "blue", "green"),
-    size = c("small", "medium", "large"),
-    shape = c("circle", "square", "triangle")
-  )
-  
-  # Find attributes
-  attr1 <- findAttribute("medium", my_list)
-  attr2 <- findAttribute("rectangle", my_list)
-  
-  # Check the output
-  expect_equal(attr1, "size")  # Attribute containing "medium"
-  expect_equal(attr2, NULL)  # Attribute containing "rectangle"
-})
-
-# Tests for getActualIntercept
-test_that("getActualIntercept returns the correct intercept dataframe", {
-  # Create a sample data frame
-  data <- data.frame(
-    Category1 = c("A", "B", "A", "B"),
-    Category2 = c("X", "Y", "X", "Z"),
-    logQij_mean = 1:4
-  )
-  data[, c("Category1", "Category2")] <- lapply(data[, c("Category1", "Category2")], as.factor )
-  
-  l_fixEffDataActual= list(categorical_vars = c("Category1", "Category2"), data = data)
-  # Get the intercept dataframe
-  result <- getActualIntercept(l_fixEffDataActual)
-  
-  # Check the output
-  expect_equal(nrow(result), 2)  # Number of rows
-  expect_equal(unique(result$term), "(Intercept)")  # Term column
-  expect_equal(result$actual, c(1, 3))  # Actual column
-})
-
-
-
-
-
-# Test subsetByTermLabel with single categorical variable
-test_that("subsetByTermLabel with single categorical variable", {
-  my_data <- list(color = c("red", "blue", "green", "red"),
-                        size = c("small", "medium", "large", "medium"),
-                        shape = c("circle", "square", "triangle", "circle"))
-  my_data <- expand.grid(my_data)
-  my_data[] <- lapply(my_data, as.factor)
-
-  subset_data <- subsetByTermLabel(my_data, categorical_vars = "size", term_label = "medium")
-  expected_data <- my_data[my_data$size == "medium", ]
-  expected_data$term <- "medium"
-  
-  expect_equal(subset_data, expected_data)
-})
-
-# Test subsetByTermLabel with single term label in multiple categorical variables
-test_that("subsetByTermLabel with single term label in multiple categorical variables", {
-   my_data <- list(color = c("red", "blue", "green", "red"),
-                        size = c("small", "medium", "large", "medium"),
-                        shape = c("circle", "square", "triangle", "circle"))
-  my_data <- expand.grid(my_data)
-  my_data[] <- lapply(my_data, as.factor)
-
-  subset_data <- subsetByTermLabel(data = my_data, categorical_vars = c("color", "shape"), term_label = "circle")
-  expected_data <- my_data[my_data$shape == "circle" & my_data$color == "red" , ]
-  expected_data$term <- "circle"
-
-  expect_equal(subset_data, expected_data)
-})
-
-# Test subsetByTermLabel with non-existent term label expect error
-test_that("subsetByTermLabel with non-existent term label", {
-   my_data <- list(color = c("red", "blue", "green", "red"),
-                        size = c("small", "medium", "large", "medium"),
-                        shape = c("circle", "square", "triangle", "circle"))
-  my_data <- expand.grid(my_data)
-  my_data[] <- lapply(my_data, as.factor)
-
-  expect_error(subsetByTermLabel(data = my_data, categorical_vars = "size", term_label = "extra-large"))
-})
-
-
-
-# Test getActualMainFixEff
-test_that("getActualMainFixEff", {
-  input_var_list <- init_variable() 
-  mock_data <- mock_rnaseq(input_var_list, 2, 2, 2)
-  data2fit <- prepareData2fit(mock_data$counts, mock_data$metadata)
-  inference <- fitModelParallel(kij ~ myVariable , 
-                                  group_by = "geneID", data2fit, n.cores = 1)
-  tidy_inference <- tidy_tmb(inference)
-  tidy_fix <- subsetFixEffectInferred(tidy_inference)
-  fixEff_dataActual <- getData2computeActualFixEffect(mock_data$groundTruth$effects)
-  actual_intercept <- getActualIntercept(fixEff_dataActual)
-  ## -- main = non interaction
-  actual_mainFixEff <- getActualMainFixEff(tidy_fix$fixed_term$nonInteraction,
-                    fixEff_dataActual, actual_intercept)
-  
-  expected_actual <- data.frame(geneID = c("gene1", "gene2"),
-                                term = c("myVariable2", "myVariable2"),
-                                actual = c(1, 1),
-                                description = "myVariable")
-  rownames(actual_mainFixEff) <- NULL
-  rownames(actual_mainFixEff) <- NULL
-  expect_equal(actual_mainFixEff, expected_actual)
-})
-
-
-
-test_that("getData2computeActualFixEffect return correct output",{
-  # Prepare the test data
-  input_var_list <- init_variable() 
-  mock_data <- mock_rnaseq(input_var_list, 2, 2, 2)
-  data2fit <- prepareData2fit(mock_data$counts, mock_data$metadata)
-  inference <- fitModelParallel(kij ~ myVariable, group_by = "geneID", data2fit, n.cores = 1)
-  tidy_inference <- tidy_tmb(inference)
-  tidy_fix <- subsetFixEffectInferred(tidy_inference)
-
-  # Call the function to test
-  fixEff_dataActual <- getData2computeActualFixEffect(mock_data$groundTruth$effects)
-
-  # Define expected output
-  expected_data <- data.frame(logQij_mean = c(2,2,3,3), geneID = c("gene1", "gene2", "gene1", "gene2"), label_myVariable = factor(c("myVariable1", "myVariable1", "myVariable2", "myVariable2")))
-  expected_categorical_vars <- "label_myVariable"
-  # Compare the output with the expected values
-  expect_equal(fixEff_dataActual$data, expected_data)
-  expect_equal(fixEff_dataActual$categorical_vars, expected_categorical_vars)
-})
-
-
-test_that("generateActualForMainFixEff returns correct values for main fixed effect term", {
-  # Prepare the test data
-  input_var_list <- init_variable() 
-  mock_data <- mock_rnaseq(input_var_list, 2, 2, 2)
-  data2fit <- prepareData2fit(mock_data$counts, mock_data$metadata)
-  fixEff_dataActual <- getData2computeActualFixEffect(mock_data$groundTruth$effects)
-  actual_intercept <- getActualIntercept(fixEff_dataActual)
-  df_term <- generateActualForMainFixEff("myVariable2", actual_intercept, fixEff_dataActual$data, fixEff_dataActual$categorical_vars)
-
-  # Define expected output
-  expected <- data.frame(
-    geneID = c("gene1", "gene2"),
-    label_myVariable = factor(c("myVariable2", "myVariable2"), levels = c("myVariable1", "myVariable2")),
-    term = c("myVariable2", "myVariable2"),
-    actual = c(1, 1),
-    description = c("myVariable", "myVariable")
-  )
-  rownames(df_term) <- NULL
-  rownames(expected) <- NULL
-  # Compare the output with the expected values
-  expect_equal(df_term, expected)
-})
-
-
-```
-
-```{r functionActualInteractionFixEff, filename =  "actualInteractionFixEffects" }
-#' Filter DataFrame
-#'
-#' Filter a DataFrame based on the specified filter list.
-#'
-#' @param df The DataFrame to be filtered
-#' @param filter_list A list specifying the filters to be applied
-#' @return The filtered DataFrame
-#' @export
-#'
-#' @examples
-#' # Create a DataFrame
-#' df <- data.frame(ID = c(1, 2, 3, 4),
-#'                  Name = c("John", "Jane", "Mike", "Sarah"),
-#'                  Age = c(25, 30, 28, 32),
-#'                  Gender = c("Male", "Female", "Male", "Female"))
-#'
-#' # Create a filter list
-#' filter_list <- list(Name = c("John", "Mike"), Age = c(25, 28))
-#'
-#' # Filter the DataFrame
-#' filter_dataframe(df, filter_list)
-filter_dataframe <- function(df, filter_list ) {
-  filtered_df <- df
-
-  for (attr_name in attributes(filter_list)$names) {
-    attr_value <- filter_list[[attr_name]]
-
-    filtered_df <- filtered_df[filtered_df[[attr_name]] %in% attr_value, ]
-  }
-
-  return(filtered_df)
-}
-
-
-#' Calculate actual interaction values between two terms in a data frame.
-#'
-#' This function calculates the actual interaction values between two terms, \code{lbl_term_1} and \code{lbl_term_2},
-#' in the given data frame \code{data}. The interaction values are computed based on the mean log expression levels
-#' of the conditions satisfying the specified term combinations, and also considering a reference condition.
-#'
-#' @param data A data frame containing the expression data and associated terms.
-#' @param l_reference A data frame representing the reference condition for the interaction.
-#' @param clmn_term_1 The name of the column in \code{data} representing the first term.
-#' @param lbl_term_1 The label of the first term to compute interactions for.
-#' @param clmn_term_2 The name of the column in \code{data} representing the second term.
-#' @param lbl_term_2 The label of the second term to compute interactions for.
-#'
-#' @return A numeric vector containing the actual interaction values between the specified terms.
-#' @export
-#' @examples
-#' average_gt <- data.frame(clmn_term_1 = c("A", "A", "B", "B"), 
-#'                          clmn_term_2 = c("X", "Y", "Y", "X"),
-#'                          logQij_mean = c(1.5, 8.0, 0.5, 4.0))
-#' # Définir les paramètres de la fonction
-#' l_label <- list(clmn_term_1 = c("A", "B"), clmn_term_2 = c("X", "Y"))
-#' clmn_term_1 <- "clmn_term_1"
-#' lbl_term_1 <- "B"
-#' clmn_term_2 <- "clmn_term_2"
-#' lbl_term_2 <- "Y"
-#' # Calculer la valeur d'interaction réelle
-#' actual_interaction <- calculate_actual_interactionX2_values(average_gt, 
-#'                                        l_label, clmn_term_1, lbl_term_1, 
-#'                                        clmn_term_2, lbl_term_2)
-calculate_actual_interactionX2_values <- function(data, l_reference , clmn_term_1, lbl_term_1, clmn_term_2, lbl_term_2) {
-  A <- data[data[[clmn_term_1]] == lbl_term_1 & 
-              data[[clmn_term_2]] == lbl_term_2, ]
-  B <- data[data[[clmn_term_1]] == lbl_term_1 & 
-              data[[clmn_term_2]] == l_reference[[clmn_term_2]][1], ]
-  C <- data[data[[clmn_term_1]] == l_reference[[clmn_term_1]][1] & 
-              data[[clmn_term_2]] == lbl_term_2, ]
-  D <- data[data[[clmn_term_1]] == l_reference[[clmn_term_1]][1] &
-              data[[clmn_term_2]] == l_reference[[clmn_term_2]][1], ]
-  actual_interaction <- (A$logQij_mean - B$logQij_mean) - (C$logQij_mean - D$logQij_mean)
-  return(actual_interaction)
-}
-
-
-#' Prepare data for computing interaction values.
-#'
-#' This function prepares the data for computing interaction values between variables.
-#' It filters the \code{dataActual} data frame by selecting only the rows where the categorical variables
-#' specified in \code{categorical_vars} are at their reference levels.
-#'
-#' @param categorical_vars A character vector containing the names of categorical variables.
-#' @param categorical_varsInInteraction A character vector containing the names of categorical variables involved in interactions.
-#' @param dataActual A data frame containing the actual data with categorical variables and associated expression levels.
-#'
-#' @return A data frame containing the filtered data for computing interaction values.
-#' @export
-prepareData2computeInteraction <- function(categorical_vars, categorical_varsInInteraction, dataActual){
-  l_RefInCategoricalVars <- lapply(dataActual[, categorical_vars], function(vector) levels(vector)[1])
-  l_categoricalVars_NOT_InInteraction <-  categorical_vars[! categorical_vars %in% categorical_varsInInteraction ]
-  l_filter <- l_RefInCategoricalVars[l_categoricalVars_NOT_InInteraction]
-  dataActual_2computeInteractionValues <- filter_dataframe(dataActual, l_filter)
-  return(dataActual_2computeInteractionValues)
-}
-
-
-
-#' Generate actual values for the interaction fixed effect.
-#'
-#' This function calculates the actual values for the interaction fixed effect
-#' based on the input labels in the interaction, categorical variables in the interaction,
-#' data to compute interaction values, actual intercept, and the reference levels in
-#' categorical variables.
-#'
-#' @param labelsInInteraction A vector containing the labels of the interaction terms.
-#' @param l_categoricalVarsInInteraction A vector containing the names of categorical variables
-#'                                        involved in the interaction.
-#' @param data2computeInteraction The data frame used to compute interaction values.
-#' @param l_RefInCategoricalVars A list containing the reference levels of categorical variables.
-#'
-#' @return A data frame with the actual values for the interaction fixed effect.
-#' The data frame includes columns: term, actual, and description.
-#'
-#' @export
-generateActualInteractionX2_FixEff <- function(labelsInInteraction, l_categoricalVarsInInteraction, 
-                                               data2computeInteraction, l_RefInCategoricalVars ){
-  clmn_term_1 <- l_categoricalVarsInInteraction[1]
-  lbl_term_1 <- labelsInInteraction[1]
-  clmn_term_2 <- l_categoricalVarsInInteraction[2]
-  lbl_term_2 <- labelsInInteraction[2]
-  interactionValues <- calculate_actual_interactionX2_values(data2computeInteraction,
-                                                              l_RefInCategoricalVars, clmn_term_1,
-                                                              lbl_term_1, clmn_term_2, lbl_term_2)
-
-
-  df_actualForMyInteraction <- data.frame(geneID = unique(data2computeInteraction$geneID))
-  df_actualForMyInteraction$term <- paste(labelsInInteraction, collapse = ":")
-  df_actualForMyInteraction$actual <- interactionValues
-  df_actualForMyInteraction$description <- paste(gsub("\\d+$", "", lbl_term_1) , 
-                                                 gsub("\\d+$", "", lbl_term_2), sep = ":")
-
-  return(df_actualForMyInteraction)
-
-}
-
-
-#' Generate Actual Interaction Values for Three Fixed Effects
-#'
-#' This function generates actual interaction values for three fixed effects in a dataset. It takes the labels of the three fixed effects, the dataset, and the reference values for the categorical variables. The function computes the actual interaction values and returns a data frame containing the geneID, the term description, and the actual interaction values.
-#'
-#' @param labelsInInteraction A character vector of labels for the three fixed effects.
-#' @param l_categoricalVarsInInteraction A list of categorical variable names corresponding to the three fixed effects.
-#' @param data2computeInteraction The dataset on which to compute the interaction values.
-#' @param l_RefInCategoricalVars A list of reference values for the categorical variables.
-#'
-#' @return A data frame with geneID, term description, and actual interaction values.
-#'
-#' @export
-generateActualInteractionX3_FixEff <- function(labelsInInteraction, l_categoricalVarsInInteraction,
-                                            data2computeInteraction, l_RefInCategoricalVars) {
-
-   clmn_term_1 <- l_categoricalVarsInInteraction[1]
-  lbl_term_1 <- labelsInInteraction[1]
-  clmn_term_2 <- l_categoricalVarsInInteraction[2]
-  lbl_term_2 <- labelsInInteraction[2]
-  clmn_term_3 <- l_categoricalVarsInInteraction[3]
-  lbl_term_3 <- labelsInInteraction[3]
-  interactionValues <- calculate_actual_interactionX3_values(data2computeInteraction,
-                                                          l_RefInCategoricalVars, clmn_term_1,
-                                                           lbl_term_1, clmn_term_2, lbl_term_2, lbl_term_3, clmn_term_3)
-
-
-  df_actualForMyInteraction <- data.frame(geneID = unique(data2computeInteraction$geneID))
-  df_actualForMyInteraction$term <- paste(labelsInInteraction, collapse = ":")
-  df_actualForMyInteraction$actual <- interactionValues
-  df_actualForMyInteraction$description <- paste(gsub("\\d+$", "", lbl_term_1) ,
-                                                 gsub("\\d+$", "", lbl_term_2),
-                                                 gsub("\\d+$", "", lbl_term_3), sep = ":")
-
-  return(df_actualForMyInteraction)
-  
-}
-
-
-#' Calculate Actual Interaction Values for Three Fixed Effects
-#'
-#' This function calculates actual interaction values for three fixed effects in a dataset. It takes the data, reference values for categorical variables, and the specifications for the fixed effects. The function computes the interaction values and returns the result.
-#'
-#' @param data The dataset on which to calculate interaction values.
-#' @param l_reference A list of reference values for categorical variables.
-#' @param clmn_term_1 The name of the first categorical variable.
-#' @param lbl_term_1 The label for the first categorical variable.
-#' @param clmn_term_2 The name of the second categorical variable.
-#' @param lbl_term_2 The label for the second categorical variable.
-#' @param lbl_term_3 The label for the third categorical variable.
-#' @param clmn_term_3 The name of the third categorical variable.
-#'
-#' @return The computed actual interaction values.
-#'
-#' @export
-calculate_actual_interactionX3_values <- function(data, l_reference, clmn_term_1, lbl_term_1, 
-                                                  clmn_term_2, lbl_term_2, lbl_term_3, clmn_term_3) {
-  ## Label term 3
-  A <- data[data[[clmn_term_1]] == lbl_term_1 & 
-              data[[clmn_term_2]] == lbl_term_2 & 
-              data[[clmn_term_3]] == lbl_term_3, ]
-  
-  B <- data[data[[clmn_term_1]] == l_reference[[clmn_term_1]][1] & 
-              data[[clmn_term_2]] == lbl_term_2 & 
-              data[[clmn_term_3]] == lbl_term_3 , ]
-  
-  C <- data[data[[clmn_term_1]] == lbl_term_1 & 
-              data[[clmn_term_2]] == l_reference[[clmn_term_2]][1] & 
-              data[[clmn_term_3]] == lbl_term_3, ]
-  
-  D <- data[data[[clmn_term_1]] == l_reference[[clmn_term_1]][1] & 
-              data[[clmn_term_2]] == l_reference[[clmn_term_2]][1] & 
-              data[[clmn_term_3]] == lbl_term_3, ]
-  
-  termA = (A$logQij_mean-B$logQij_mean) - (C$logQij_mean - D$logQij_mean)
-  
-  ## Label term 3 == reference !
-  A <- data[data[[clmn_term_1]] == lbl_term_1 & 
-              data[[clmn_term_2]] == lbl_term_2 & 
-              data[[clmn_term_3]] == l_reference[[clmn_term_3]][1], ]
-  
-  B <- data[data[[clmn_term_1]] == l_reference[[clmn_term_1]][1] & 
-              data[[clmn_term_2]] == lbl_term_2 & 
-              data[[clmn_term_3]] == l_reference[[clmn_term_3]][1] , ]
-  
-  C <- data[data[[clmn_term_1]] == lbl_term_1 & 
-              data[[clmn_term_2]] == l_reference[[clmn_term_2]][1] & 
-              data[[clmn_term_3]] == l_reference[[clmn_term_3]][1], ]
-  
-  D <- data[data[[clmn_term_1]] == l_reference[[clmn_term_1]][1] & 
-              data[[clmn_term_2]] == l_reference[[clmn_term_2]][1] & 
-              data[[clmn_term_3]] == l_reference[[clmn_term_3]][1], ]
-  
-  termB = (A$logQij_mean-B$logQij_mean) - (C$logQij_mean - D$logQij_mean)
-  actual_interaction <- termA - termB
-  return(actual_interaction)
-}
-
-
-
-#' Get the actual interaction values for a given interaction term in the data.
-#'
-#' This function takes an interaction term, the dataset, and the names of the categorical variables 
-#' as inputs. It calculates the actual interaction values based on the difference in log-transformed 
-#' mean expression levels for the specified interaction term. The function first prepares the data for 
-#' computing the interaction values and then generates the actual interaction values.
-#'
-#' @param labelsInInteraction A character vector containing the labels of the categorical levels 
-#'     involved in the interaction.
-#' @param data The dataset containing the gene expression data and categorical variables.
-#' @param categorical_vars A character vector containing the names of the categorical variables in 
-#'     the dataset.
-#' @return A data frame containing the actual interaction values.
-#' @export 
-getActualInteractionFixEff <- function(labelsInInteraction, data, categorical_vars ){
-  l_RefInCategoricalVars <- lapply(data[, categorical_vars], function(vector) levels(vector)[1])
-  l_labelsInCategoricalVars <- lapply(data[, categorical_vars], levels)
-  l_categoricalVarsInInteraction <- lapply(labelsInInteraction,
-                                           function(label) findAttribute(label, l_labelsInCategoricalVars)) %>% unlist()
-  data2computeInteraction <- prepareData2computeInteraction(categorical_vars, l_categoricalVarsInInteraction,  data )
-
-  ## Interaction x3
-  if (length(labelsInInteraction) == 3){
-        actualInteractionValues <- generateActualInteractionX3_FixEff(labelsInInteraction,
-                                                                     l_categoricalVarsInInteraction ,
-                                                                     data2computeInteraction, 
-                                                                     l_RefInCategoricalVars)
-  }
-  # Interaction x2
-  if (length(labelsInInteraction) == 2){
-    actualInteractionValues <- generateActualInteractionX2_FixEff(labelsInInteraction,
-                                                               l_categoricalVarsInInteraction ,
-                                                               data2computeInteraction, 
-                                                               l_RefInCategoricalVars)
-  }
-  return(actualInteractionValues)
-}
-
-
-#' Compute actual interaction values for multiple interaction terms.
-#'
-#' This function calculates the actual interaction values for multiple interaction terms 
-#' using the provided data.
-#'
-#' @param l_interactionTerm A list of interaction terms in the form of "term1:term2".
-#' @param categorical_vars A character vector containing the names of categorical variables in the data.
-#' @param dataActual The data frame containing the actual gene expression values and metadata.
-#'
-#' @return A data frame containing the actual interaction values for each interaction term.
-#' @export
-#' @examples
-#' N_GENES <- 4
-#' MIN_REPLICATES <- 3
-#' MAX_REPLICATES <- 3
-#' init_var <- init_variable(name = "varA", mu = 8, sd = 0.1, level = 3) %>%
-#'   init_variable(name = "varB", mu = c(5,-5), NA , level = 2) %>%
-#'   init_variable(name = "varC", mu = 1, 3, 3) %>%
-#'   add_interaction(between_var = c("varA", "varC"), mu = 5, 0.1)
-#' mock_data <- mock_rnaseq(init_var, N_GENES, 
-#'                          MIN_REPLICATES, MAX_REPLICATES )
-#' data2fit <- prepareData2fit(countMatrix = mock_data$counts, 
-#'                              metadata =  mock_data$metadata )
-#' results_fit <- fitModelParallel(formula = kij ~ varA + varB + varC + varA:varC,
-#'                              data = data2fit, group_by = "geneID",
-#'                              family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-#' tidy_tmb <- tidy_tmb(results_fit)
-#' fixEff_dataInference  <- subsetFixEffectInferred(tidy_tmb)
-#' fixEff_dataActual <- getData2computeActualFixEffect(mock_data$groundTruth$effects)
-#' interactionTerm <- fixEff_dataInference$fixed_term$interaction[[1]]
-#' categorical_vars <- fixEff_dataActual$categorical_vars
-#' dataActual <- fixEff_dataActual$data
-#' l_labelsInCategoricalVars <- lapply(dataActual[, categorical_vars], levels)
-#' l_interaction <- strsplit(interactionTerm, split = ":")[[1]]
-#' l_categoricalVarsInInteraction <- lapply(l_interaction,
-#'                                          function(label) findAttribute(label, l_labelsInCategoricalVars)) %>% unlist()
-#' data_prepared <- prepareData2computeInteraction(categorical_vars, l_categoricalVarsInInteraction, dataActual)
-#' # Compute actual interaction values for multiple interactions
-#' actualInteraction <- computeActualInteractionFixEff(interactionTerm, categorical_vars, dataActual)
-computeActualInteractionFixEff <- function(l_interactionTerm, categorical_vars, dataActual){
-
-  l_interaction <- strsplit(l_interactionTerm, split = ":")
-  l_interactionActualValues <- lapply(l_interaction, function(labelsInInteraction)
-                                getActualInteractionFixEff(labelsInInteraction, dataActual, categorical_vars))
-  actualInteraction_df <- do.call('rbind', l_interactionActualValues)
-  return(actualInteraction_df)
-}
-```
-
-```{r test-actualInteractionFixEff }
-
-test_that("filter_dataframe retourne le dataframe filtré correctement", {
-  # Créer un exemple de dataframe
-  df <- data.frame(
-  col1 = c(1, 2, 3, 4, 5),
-  col2 = c("A", "B", "C", "D", "E"),
-  col3 = c("X", "Y", "Z", "X", "Y")
-  )
-  
-  # Créer une liste de filtres
-  filter_list <- list(
-    col1 = c(2),
-    col2 = "B",
-    col3 = c("Y")
-  )
-
-  # Appliquer les filtres sur le dataframe
-  filtered_df <- filter_dataframe(df, filter_list)
-
-  # Vérifier que les lignes correspondantes sont présentes dans le dataframe filtré
-  expect_equal(nrow(filtered_df), 1)
-  expect_true(all(filtered_df$col1 %in% c(2)))
-  expect_true(all(filtered_df$col2 == "B"))
-  expect_true(all(filtered_df$col3 %in% c("Y")))
-})
-
-test_that("filter_dataframe retourne le dataframe d'origine si aucun filtre n'est spécifié", {
-  # Créer une liste de filtres vide
-  filter_list <- list()
-
-  # Appliquer les filtres sur le dataframe
-  filtered_df <- filter_dataframe(df, filter_list)
-
-  # Vérifier que le dataframe filtré est identique au dataframe d'origine
-  expect_identical(filtered_df, df)
-})
-
-test_that("calculate_actual_interactionX2_values retourne la valeur d'interaction réelle correctement", {
-  average_gt <- data.frame(
-  clmn_term_1 = c("A", "A", "B", "B"),
-  clmn_term_2 = c("X", "Y", "X", "Y"),
-  logQij_mean = c(1.5, 2.0, 85, 1.0)
-  )
-
-  # Définir les paramètres de la fonction
-  l_label <- list(clmn_term_1 = c("A", "B"), clmn_term_2 = c("X", "Y"))
-  clmn_term_1 <- "clmn_term_1"
-  lbl_term_1 <- "B"
-  clmn_term_2 <- "clmn_term_2"
-  lbl_term_2 <- "Y"
-
-  # Calculer la valeur d'interaction réelle
-  actual_interaction <- calculate_actual_interactionX2_values(average_gt, l_label, clmn_term_1, lbl_term_1, clmn_term_2, lbl_term_2)
-
-  # Vérifier que la valeur d'interaction réelle est correcte
-  expect_equal(actual_interaction, -84.5)
-})
-
-
-
-test_that("prepareData2computeInteraction filters data correctly", {
-  
-  data <- data.frame(
-  geneID = c("gene1", "gene2", "gene3", "gene4"),
-  label_varA = factor(c("A", "A", "B", "B")),
-  label_varB = factor(c("X", "X", "Y", "Y")),
-  label_varC = factor(c("P", "P", "Q", "Q")),
-  logQij_mean = c(1.2, 3.4, 5.6, 7.8)
-  )
-  categorical_vars <- c("label_varA", "label_varB", "label_varC")
-  categorical_varsInInteraction <- c("label_varA", "label_varC")
-
-  dataActual_2computeInteractionValues <- prepareData2computeInteraction(categorical_vars, categorical_varsInInteraction, data)
-
-  expect_equal(nrow(dataActual_2computeInteractionValues), 2)
-  expect_true(all(dataActual_2computeInteractionValues$label_varA %in% c("A", "A")))
-  expect_true(all(dataActual_2computeInteractionValues$label_varB %in% c("X", "X")))
-  expect_true(all(dataActual_2computeInteractionValues$label_varC %in% c("P", "P")))
-  expect_equal(dataActual_2computeInteractionValues$logQij_mean, c(1.2, 3.4 ))
-})
-
-
-
-## TEST
-test_that("Generate actual interaction fixed effect correctly", {
-  
-  ########################################################################"
-  N_GENES <- 4
-  MIN_REPLICATES <- 3
-  MAX_REPLICATES <- 3
-  
-  init_var <- init_variable(name = "varA", mu = 8, sd = 0.1, level = 3) %>%
-  init_variable(name = "varB", mu = c(5, -5), NA, level = 2) %>%
-  init_variable(name = "varC", mu = 1, 3, 3) %>%
-  add_interaction(between_var = c("varA", "varC"), mu = 5, 0.1)
-  
-  # -- simulation
-  mock_data <- mock_rnaseq(init_var, N_GENES, min_replicates = MIN_REPLICATES, max_replicates = MAX_REPLICATES)
-  
-  # -- fit data
-  data2fit <- prepareData2fit(countMatrix = mock_data$counts, metadata = mock_data$metadata)
-  results_fit <- fitModelParallel(formula = kij ~ varA + varB + varC + varA:varC,
-                                data = data2fit, group_by = "geneID",
-                                family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-  
-  # -- inputs
-  tidy_tmb <- tidy_tmb(results_fit)
-  fixEff_dataInference <- subsetFixEffectInferred(tidy_tmb)
-  fixEff_dataActual <- getData2computeActualFixEffect(mock_data$groundTruth$effects)
-  
-  interactionTerm <- fixEff_dataInference$fixed_term$interaction[[1]]
-  categorical_vars <- fixEff_dataActual$categorical_vars
-  dataActual <- fixEff_dataActual$data
-  l_labelsInCategoricalVars <- lapply(dataActual[, categorical_vars], levels)
-  l_interaction <- strsplit(interactionTerm, split = ":")[[1]]
-  l_categoricalVarsInInteraction <- lapply(l_interaction,
-                                          function(label) findAttribute(label, l_labelsInCategoricalVars)) %>% unlist()
-  
-  data_prepared <- prepareData2computeInteraction(categorical_vars, l_categoricalVarsInInteraction, dataActual)
-  actual_intercept <- getActualIntercept(fixEff_dataActual)
-  l_RefInCategoricalVars <- lapply(dataActual[, categorical_vars], function(vector) levels(vector)[1])
-  #######################################################################
-  
-  actualInteraction <- generateActualInteractionX2_FixEff(l_interaction, l_categoricalVarsInInteraction, 
-                                                          data_prepared, l_RefInCategoricalVars)
-
-  # Add your assertions here based on the expected values
-  # For example:
-  expect_true(nrow(actualInteraction) == 4)
-  expect_equal(actualInteraction$geneID,  c("gene1", "gene2", "gene3", "gene4"))
-  expect_true(all(actualInteraction$term %in%  'varA2:varC2'))
-  #expect_true(all(actualInteraction$description %in%  'interaction'))
-  expect_true(is.numeric(actualInteraction$actual))
-
-  # Add more assertions as needed...
-})
-
-
-# Test the function `generateActualInteractionX2_FixEff`
-test_that("Test generateActualInteractionX2_FixEff function", {
-  # Generate example data
-  data <- data.frame(
-    geneID = rep(x = c("gene1", "gene2"), each = 8),
-    logQij_mean = 1:16
-    
-  )
-  metadata = expand.grid(list(varA = factor(c("A1", "A2")),
-    varB = factor(c("B1", "B2")),
-    varC = factor(c("C1", "C2"))))
-  metadata = rbind(metadata, metadata)
-  
-  data <- cbind(metadata, data)
-  
-  categorical_vars <- c("varA", "varB", "varC")
-  labelsInInteraction <- c("A2", "C2")
-  
-  actual_intercept <- data.frame(actual = c(23, 21 ), 
-                                 geneID = c("gene1", "gene2"), 
-                                 term = c("(Intercept)", "(Intercept)"), 
-                                 description = c("(Intercept)", "(Intercept)"))
-  # Run the function
-  
-  actualInteractionValues <- getActualInteractionFixEff(labelsInInteraction, data, categorical_vars  )
-
-  
-  # Define the expected output based on the example data
-  expected_output <- data.frame(
-    term = "A2:C2",
-    geneID = c("gene1", "gene2"),
-    actual = c(0, 0),
-    description = c("A:C", "A:C")
-  )
-  
-  # Add your assertions here to compare the actual output with the expected output
-  expect_equal(nrow(actualInteractionValues), nrow(expected_output))
-  expect_equal(actualInteractionValues$geneID, expected_output$geneID)
-  expect_equal(actualInteractionValues$term, expected_output$term)
-  expect_equal(actualInteractionValues$actual, expected_output$actual)
-  #expect_equal(actualInteractionValues$description, expected_output$description)
-
-})
-
-
-
-# Test for generateActualInteractionX3FixEff
-test_that("generateActualInteractionX3FixEff returns correct data frame", {
-  
-  # Create reference values
-  reference <- list(
-    varA = c("A1", "A2"),
-    varB = c("B1", "B2"),
-    varC = c("C1", "C2")
-  )
-  # Generate example data
-  set.seed(123)
-  data <- data.frame(
-    geneID = rep(x = c("gene1", "gene2"), each = 8),
-    logQij_mean = sample(x = -3:12, 16)
-    
-  )
-  metadata = expand.grid(list(varA = factor(c("A1", "A2")),
-    varB = factor(c("B1", "B2")),
-    varC = factor(c("C1", "C2"))))
-  metadata = rbind(metadata, metadata)
-  
-  data <- cbind(metadata, data)
-  
-  # Call the function
-  result <- generateActualInteractionX3_FixEff(
-    labelsInInteraction = c("A2", "B2", "C2"),
-    l_categoricalVarsInInteraction = c("varA", "varB", "varC"),
-    data2computeInteraction = data,
-    l_RefInCategoricalVars = reference
-  )
-  
-  # Check the result
-  expect_equal(nrow(result), 2)
-  expect_equal(ncol(result), 4)
-  expect_identical(result$term, c("A2:B2:C2","A2:B2:C2"))
-  expect_equal(result$actual, c(-3, 13))
-  expect_identical(result$description, c("A:B:C", "A:B:C"))
-})
-
-# Test for calculate_actual_interactionX3_values
-test_that("calculate_actual_interactionX3_values returns correct values", {
-  # Create reference values
-  reference <- list(
-    varA = c("A1", "A2"),
-    varB = c("B1", "B2"),
-    varC = c("C1", "C2")
-  )
-  # Generate example data
-  set.seed(123)
-  data <- data.frame(
-    geneID = rep(x = c("gene1", "gene2"), each = 8),
-    logQij_mean = sample(x = -8:8, 16)
-    
-  )
-  metadata = expand.grid(list(varA = factor(c("A1", "A2")),
-    varB = factor(c("B1", "B2")),
-    varC = factor(c("C1", "C2"))))
-  metadata = rbind(metadata, metadata)
-  
-  data <- cbind(metadata, data)
-  # Call the function
-  result <- calculate_actual_interactionX3_values(
-    data = data,
-    l_reference = reference,
-    clmn_term_1 = "varA",
-    lbl_term_1 = "A2",
-    clmn_term_2 = "varB",
-    lbl_term_2 = "B2",
-    lbl_term_3 = "C2",
-    clmn_term_3 = "varC"
-  )
-  
-  # Check the result
-  expect_equal(result, c(-7, 11))
-})
-
-
-
-## Test interaction X2
-test_that("Test getActualInteractionFixEff", {
-
-  # Exemple de données d'entrée
-  N_GENES <- 4
-  MIN_REPLICATES <- 3
-  MAX_REPLICATES <- 3
-  
-  init_var <- init_variable(name = "varA", mu = 8, sd = 0.1, level = 3) %>%
-    init_variable(name = "varB", mu = c(5,-5), NA, level = 2) %>%
-    init_variable(name = "varC", mu = 1, 3, 3) %>%
-    add_interaction(between_var = c("varA", "varC"), mu = 5, 0.1)
-  
-  # Simulation
-  mock_data <- mock_rnaseq(init_var, N_GENES, min_replicates = MIN_REPLICATES, max_replicates = MAX_REPLICATES)
-  
-  # Données de fit
-  data2fit <- prepareData2fit(countMatrix = mock_data$counts, metadata = mock_data$metadata)
-  results_fit <- fitModelParallel(formula = kij ~ varA + varB + varC + varA:varC,
-                                  data = data2fit, group_by = "geneID",
-                                  family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-  
-  # Données d'entrée
-  tidy_tmb <- tidy_tmb(results_fit)
-  fixEff_dataInference <- subsetFixEffectInferred(tidy_tmb)
-  fixEff_dataActual <- getData2computeActualFixEffect(mock_data$groundTruth$effects)
-  interactionTerm <- fixEff_dataInference$fixed_term$interaction[[1]]
-  categorical_vars <- fixEff_dataActual$categorical_vars
-  dataActual <- fixEff_dataActual$data
-  l_labelsInCategoricalVars <- lapply(dataActual[, categorical_vars], levels)
-  l_interaction <- strsplit(interactionTerm, split = ":")[[1]]
-  l_categoricalVarsInInteraction <- lapply(l_interaction,
-                                           function(label) findAttribute(label, l_labelsInCategoricalVars)) %>% unlist()
-  
-  data_prepared <- prepareData2computeInteraction(categorical_vars, l_categoricalVarsInInteraction, dataActual)
-  #actual_intercept <- getActualIntercept(fixEff_dataActual)
-  
-  # Appel de la fonction à tester
-  actualInteraction <- getActualInteractionFixEff(l_interaction, data_prepared, categorical_vars)
-  
-
-  expect_true(nrow(actualInteraction) == 4)
-  expect_equal(actualInteraction$geneID,  c("gene1", "gene2", "gene3", "gene4"))
-  expect_true(all(actualInteraction$term %in%  'varA2:varC2'))
-  #expect_true(all(actualInteraction$description %in%  'interaction'))
-  expect_true(is.numeric(actualInteraction$actual))
-})
-
-
-## Test interaction X3
-test_that("Test getActualInteractionFixEff", {
-
-  # Exemple de données d'entrée
-  N_GENES <- 4
-  MIN_REPLICATES <- 20
-  MAX_REPLICATES <- 20
-  
- init_var <- init_variable( name = "varA", mu = 3,sd = 1, level = 2) %>%
-    init_variable( name = "varB", mu = 2, sd = 2, level = 2) %>%
-      init_variable( name = "varC", mu = 2, sd = 1, level = 2) %>%
-      add_interaction(between_var = c("varA", 'varC'), mu = 0.3, sd = 1) %>%
-      add_interaction(between_var = c("varB", 'varC'), mu = 2, sd = 1) %>%
-      add_interaction(between_var = c("varA", 'varB'), mu = -2, sd = 1) %>%
-      add_interaction(between_var = c("varA", 'varB', "varC"), mu = 1, sd = 1)
-    
-  
-  # Simulation
-  mock_data <- mock_rnaseq(init_var, N_GENES, 
-                           min_replicates = MIN_REPLICATES, 
-                           max_replicates = MAX_REPLICATES, dispersion = 100)
-  
-  # Données de fit
-  data2fit <- prepareData2fit(countMatrix = mock_data$counts, metadata = mock_data$metadata)
-  results_fit <- fitModelParallel(formula = kij ~ varA + varB + varC + varA:varB + varB:varC + varA:varC + varA:varB:varC,
-                                  data = data2fit, group_by = "geneID",
-                                  family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-  
-  # Données d'entrée
-  tidy_tmb <- tidy_tmb(results_fit)
-  fixEff_dataInference <- subsetFixEffectInferred(tidy_tmb)
-  fixEff_dataActual <- getData2computeActualFixEffect(mock_data$groundTruth$effects)
-  interactionTerm <- fixEff_dataInference$fixed_term$interaction[[4]]
-  categorical_vars <- fixEff_dataActual$categorical_vars
-  dataActual <- fixEff_dataActual$data
-  l_labelsInCategoricalVars <- lapply(dataActual[, categorical_vars], levels)
-  l_interaction <- strsplit(interactionTerm, split = ":")[[1]]
-  l_categoricalVarsInInteraction <- lapply(l_interaction,
-                                           function(label) findAttribute(label, l_labelsInCategoricalVars)) %>% unlist()
-  
-  data_prepared <- prepareData2computeInteraction(categorical_vars, l_categoricalVarsInInteraction, dataActual)
-
-  actualInteraction <- getActualInteractionFixEff(l_interaction, data_prepared, categorical_vars)
-  
-
-  expect_true(nrow(actualInteraction) == 4)
-  expect_equal(actualInteraction$geneID,  c("gene1", "gene2", "gene3", "gene4"))
-  expect_true(all(actualInteraction$term %in%  'varA2:varB2:varC2'))
-  expect_true(all(actualInteraction$description %in%  'varA:varB:varC'))
-  expect_true(is.numeric(actualInteraction$actual))
-})
-
-
-```
-
-```{r function-inferenceToExpected, filename =  "inferenceToExpected" }
-
-#' Compare the results of inference with the ground truth data.
-#'
-#' This function takes the data frames containing the inference results and the ground truth data
-#' and generates a table to compare the inferred values with the expected values.
-#'
-#' @param tidy_tmb A data frame containing the results of inference.
-#' @param df_ground_truth A data frame containing the ground truth data used for simulation.
-#'
-#' @return A data frame
-#'
-#' @examples
-#'
-#' @export
-inferenceToExpected_withFixedEff <- function(tidy_tmb , df_ground_truth) {
-
-  ## -- get data
-  fixEff_dataInference  <- subsetFixEffectInferred(tidy_tmb)
-  fixEff_dataActual <- getData2computeActualFixEffect(df_ground_truth)
-  actual_intercept <- getActualIntercept(fixEff_dataActual)
-
-  ## -- main = non interaction
-  l_mainEffectTerm <- fixEff_dataInference$fixed_term$nonInteraction
-  actual_mainFixEff <- getActualMainFixEff(l_mainEffectTerm, fixEff_dataActual, actual_intercept)
-
-  ## -- interaction term
-  l_interactionTerm <- fixEff_dataInference$fixed_term$interaction
-  categorical_vars <- fixEff_dataActual$categorical_vars
-  data <- fixEff_dataActual$data
-  actualInteractionFixEff <- computeActualInteractionFixEff(l_interactionTerm, categorical_vars, data)
-
-  ## -- rbind Interaction & Main
-  actual_fixEff <- rbind(actual_mainFixEff , actualInteractionFixEff, actual_intercept )
-
-  ## -- join inference & actual
-  inference_fixEff <- fixEff_dataInference$data
-  res <- join_dtf(inference_fixEff, actual_fixEff  ,  c("ID", "term"), c("geneID", "term"))
-  return(res)
-}
-
-```
-
-
-```{r function-waldTest, filename =  "waldTest" }
-
-#' Wald test for hypothesis testing
-#'
-#' This function performs a Wald test for hypothesis testing by comparing an estimation
-#' to a reference value using the provided standard error. It allows testing for
-#' one-tailed alternatives: "greater" - β > reference_value, "less" - β < −reference_value,
-#' or two-tailed alternative: "greaterAbs" - |β| > reference_value.
-#' If the p-value obtained is greater than 1, it is set to 1 to avoid invalid p-values.
-#'
-#' @param estimation The estimated coefficient value.
-#' @param std_error The standard error of the estimation.
-#' @param reference_value The reference value for comparison (default is 0).
-#' @param alternative The type of alternative hypothesis to test (default is "greaterAbs").
-#' @return A list containing the test statistic and p-value.
-#' @importFrom stats pnorm
-#' @export
-#' @examples
-#' # Perform a Wald test with the default "greaterAbs" alternative
-#' wald_test(estimation = 0.1, std_error = 0.02, reference_value = 0.2)
-wald_test <- function(estimation, std_error, reference_value = 0, alternative = "greaterAbs") {
-  if (alternative == "greater") {
-    test_statistic <- (estimation - reference_value) / std_error
-    p_value <- 1 - stats::pnorm(test_statistic, mean = 0, sd = 1, lower.tail = TRUE)
-  } else if (alternative == "less") {
-    test_statistic <- (estimation - reference_value) / std_error
-    p_value <- pnorm(test_statistic, mean = 0, sd = 1, lower.tail = TRUE)
-  } else if (alternative == "greaterAbs") {
-    test_statistic <- (abs(estimation) - reference_value) / std_error
-    p_value <- 2 * (1 - pnorm(test_statistic, mean = 0, sd = 1, lower.tail = TRUE))
-  } else {
-    stop("Invalid alternative type. Use 'greater', 'less', or 'greaterAbs'.")
-  }
-
-  # Set p-value to 1 if it exceeds 1
-  p_value <- pmin(p_value, 1)
-  return(list(statistic = test_statistic, p.value = p_value))
-}
-
-
-
-
-#' Perform statistical tests and return tidy results
-#'
-#' This function takes a list of glmmTMB objects and performs statistical tests based on the estimated coefficients and their standard errors. The results are returned in a tidy data frame format.
-#'
-#' @param list_tmb A list of glmmTMB objects representing the fitted models.
-#' @param coeff_threshold The threshold value for coefficient testing (default is 0).
-#' @param alternative_hypothesis The type of alternative hypothesis for the statistical test (default is "greaterAbs").
-#'                               Possible options are "greater" (for greater than threshold), "less" (for less than threshold), 
-#'                                and "greaterAbs" (for greater than absolute value of threshold).
-#' @param correction_method a character string indicating the correction method to apply to p-values. Possible values are: 
-#'                          "holm", "hochberg", "hommel", #' "bonferroni", "BH", "BY", "fdr", and "none".
-#'
-#' @return A tidy data frame containing the results of statistical tests for the estimated coefficients.
-#'
-#' @importFrom stats p.adjust
-#' @export
-#'
-#' @examples
-#' data(iris)
-#' model_list <- fitModelParallel(formula = Sepal.Length ~ Sepal.Width + Petal.Length, 
-#'                  data = iris, group_by = "Species", n.cores = 1) 
-#' results_df <- results(model_list, coeff_threshold = 0.1, alternative_hypothesis = "greater")
-results <- function(list_tmb, coeff_threshold = 0, alternative_hypothesis = "greaterAbs", correction_method = "BH") {
-  tidy_tmb_df <- tidy_tmb(list_tmb)
-  if (coeff_threshold != 0 || alternative_hypothesis != "greaterAbs") {
-    waldRes <- wald_test(tidy_tmb_df$estimate, tidy_tmb_df$std.error, coeff_threshold, alternative_hypothesis)
-    tidy_tmb_df$statistic <- waldRes$statistic
-    tidy_tmb_df$p.value <- waldRes$p.value
-  }
-  tidy_tmb_df$p.adj <- stats::p.adjust(tidy_tmb_df$p.value, method = correction_method)
-  return(tidy_tmb_df)
-}
-
-
-```
-
-
-
-```{r test-waldTest}
-
-# Test unitaires
-test_that("wald_test performs correct tests", {
-  # Test with "greater" alternative
-  result_greater <- wald_test(estimation = 0.1, std_error = 0.02, reference_value = 0.05, alternative = "greater")
-  expect_equal(result_greater$p.value, 1 - pnorm((0.1 - 0.05) / 0.02, mean = 0, sd = 1, lower.tail = TRUE))
-
-  # Test with "less" alternative
-  result_less <- wald_test(estimation = 0.1, std_error = 0.02, reference_value = 0.05, alternative = "less")
-  expect_equal(result_less$p.value, pnorm((0.1 - 0.05) / 0.02, mean = 0, sd = 1, lower.tail = TRUE))
-
-  # Test with "greaterAbs" alternative
-  result_greaterAbs <- wald_test(estimation = 0.1, std_error = 0.02, reference_value = 0.05, alternative = "greaterAbs")
-  expect_equal(result_greaterAbs$p.value, (2 * (1 - pnorm((abs(0.1) - 0.05) / 0.02, mean = 0, sd = 1, lower.tail = TRUE))))
-
-  # Test with invalid alternative
-  expect_error(wald_test(estimation = 0.1, std_error = 0.02, reference_value = 0.05, alternative = "invalid"))
-})
-
-
-
-test_that("results function performs statistical tests correctly", {
-  # Charger les données iris pour les tests
-  data(iris)
-  # Fit models and perform statistical tests
-  model_list <- fitModelParallel(formula = Sepal.Length ~ Sepal.Width + Petal.Length, 
-                                 data = iris, group_by = "Species", n.cores = 1) 
-  results_df <- results(model_list, coeff_threshold = 0.1, alternative_hypothesis = "greater")
-
-  # Vérifier que les colonnes 'statistic' et 'p.value' ont été ajoutées au dataframe
-  expect_true("statistic" %in% colnames(results_df))
-  expect_true("p.value" %in% colnames(results_df))
-
-  # Vérifier que les tests statistiques ont été effectués correctement
-  # Ici, nous ne vérifierons pas les valeurs exactes des résultats car elles peuvent varier en fonction de la machine et des packages utilisés.
-  # Nous nous assurerons seulement que les résultats sont dans le format attendu.
-  expect_is(results_df$statistic, "numeric")
-  expect_is(results_df$p.value, "numeric")
-  expect_is(results_df$p.adj, "numeric")
-
-
-  # Vérifier que les p-values ne dépassent pas 1
-  expect_true(all(results_df$p.value <= 1))
-
-  # Vérifier que les valeurs sont correctes pour les colonnes 'statistic' et 'p.value'
-  # (Cela dépend des données iris et des modèles ajustés)
-  # Remarque : Vous devrez peut-être ajuster ces tests en fonction des valeurs réelles des données iris et des modèles ajustés.
-  expect_true(all(!is.na(results_df$statistic)))
-  expect_true(all(!is.na(results_df$p.value)))
-
-  # Vérifier que le seuil des coefficients et l'hypothèse alternative sont correctement appliqués
-  # Ici, nous nous attendons à ce que les p-values soient uniquement pour les coefficients dépassant le seuil
-  expect_true(all(ifelse(abs(results_df$estimate) > 0.1, results_df$p.value <= 1, results_df$p.value == 1)))
-  expect_true(all(ifelse(abs(results_df$estimate) > 0.1, results_df$p.adj <= 1, results_df$p.adj == 1)))
-
-  })
-
-
-
-
-```
-
-
-
-```{r function-rocPlot, filename = "ROCplot"}
-
-
-#' Get Labels for Expected Differential Expression
-#'
-#' This function assigns labels to genes based on whether their actual effect estimates
-#' indicate differential expression according to a given threshold and alternative hypothesis.
-#'
-#' @param comparison_df A data frame containing comparison results with actual effect estimates.
-#' @param coeff_threshold The threshold value for determining differential expression.
-#' @param alt_hypothesis The alternative hypothesis for comparison. Possible values are "greater",
-#'                      "less", and "greaterAbs".
-#' @return A modified data frame with an additional column indicating if the gene is differentially expressed.
-#'
-#' @examples
-#' # Generate a sample comparison data frame
-#' comparison_data <- data.frame(
-#'   geneID = c("gene1", "gene2", "gene3"),
-#'   actual = c(0.5, -0.3, 0.8)
-#' )
-#'
-#' # Get labels for expected differential expression
-#' labeled_data <- getLabelExpected(comparison_data, coeff_threshold = 0.2, alt_hypothesis = "greater")
-#'
-#' @export
-getLabelExpected <- function(comparison_df, coeff_threshold, alt_hypothesis) {
-  if (alt_hypothesis == "greater") {
-    idx_DE <- comparison_df$actual > coeff_threshold
-    comparison_df$isDE <- idx_DE
-  } else if (alt_hypothesis == "less") {
-    idx_DE <- comparison_df$actual < coeff_threshold
-    comparison_df$isDE <- idx_DE
-  } else if (alt_hypothesis == "greaterAbs") {
-    idx_DE <- abs(comparison_df$actual) > coeff_threshold
-    comparison_df$isDE <- idx_DE
-  }
-  return(comparison_df)
-}
-
-
-#' Generate ROC Curve Plot
-#'
-#' This function generates an ROC curve plot based on the comparison dataframe.
-#'
-#' @param comparison_df A dataframe containing comparison results.
-#' @param ... additional params to pass ggplot2::aes
-#' @return A ggplot object representing the ROC curve plot.
-#' @importFrom plotROC geom_roc
-#' @importFrom ggplot2 ggtitle theme_bw aes sym
-#'
-#' @examples
-#' comparison_data <- data.frame(
-#'   geneID = c("gene1", "gene2", "gene3"),
-#'   isDE = c(TRUE, FALSE, TRUE),
-#'   p.adj = c(0.05, 0.2, 0.01)
-#' )
-#' roc_plot(comparison_data)
-#'
-#' @export
-roc_plot <- function(comparison_df, ...) {
-  
-  checkLabelValidityForROC <- function(labels) {
-    if (all(labels == TRUE)) 
-      message("WARNING : No FALSE label in 'isDE' column, ROC curve cannot be computed")
-    if (all(labels == FALSE)) 
-      message("WARNING : No TRUE label in 'isDE' column, ROC curve cannot be computed")
-  }
-  
-  checkLabelValidityForROC(comparison_df$isDE)
-  
-  args <- lapply(list(...), function(x) if (!is.null(x)) ggplot2::sym(x))
-
-  #comparison_df$isDE <- factor(comparison_df$isDE, levels= c(TRUE, FALSE))
-  p <- ggplot2::ggplot(comparison_df, ggplot2::aes(d = !isDE , m = p.adj, !!!args )) +
-        plotROC::geom_roc(n.cuts = 0, labels = FALSE) + 
-        ggplot2::theme_bw() +
-        ggplot2::ggtitle("ROC curve") 
-  
-  ## -- annotation AUC
-  df_AUC <- subset(plotROC::calc_auc(p) , select = -c(PANEL, group))
-  df_AUC$AUC <- round(df_AUC$AUC, digits = 3)
-  if (nrow(df_AUC) == 1) annotations <- paste("AUC", df_AUC$AUC, sep = " : ")
-  else annotations <- do.call(paste, c(df_AUC, sep = " - AUC: "))
-  annotations <- paste(annotations, collapse  = "\n")
-  p <- p + ggplot2::annotate("text", x = .75, y = .25, label = annotations)
-  return(p)
-}
-
-
-
-```
-
-```{r test-rocPlot}
-
-
-# Test cases for getLabelExpected function
-test_that("getLabelExpected assigns labels correctly", {
-  
-
-    # Sample comparison data frame
-  comparison_data <- data.frame(
-      geneID = c("gene1", "gene2", "gene3"),
-      actual = c(0.5, -0.3, 0.8)
-  )
-  
-  # Test case 1: Alt hypothesis = "greater"
-  labeled_data_greater <- getLabelExpected(comparison_data, coeff_threshold = 0.2, alt_hypothesis = "greater")
-  expect_identical(labeled_data_greater$isDE, c(TRUE, FALSE, TRUE))
-  
-  # Test case 2: Alt hypothesis = "less"
-  labeled_data_less <- getLabelExpected(comparison_data, coeff_threshold = -0.2, alt_hypothesis = "less")
-  expect_identical(labeled_data_less$isDE, c(FALSE, TRUE, FALSE))
-  
-  # Test case 3: Alt hypothesis = "greaterAbs"
-  labeled_data_greaterAbs <- getLabelExpected(comparison_data, coeff_threshold = 0.6, alt_hypothesis = "greaterAbs")
-  expect_identical(labeled_data_greaterAbs$isDE, c(FALSE, FALSE, TRUE))
-  
-})
-
-
-test_that("ROC plot is generated correctly", {
-  comparison_data <- data.frame(
-    geneID = c("gene1", "gene2", "gene3"),
-    isDE = c(TRUE, FALSE, TRUE),
-    p.adj = c(0.05, 0.2, 0.01), 
-    from = "example"
-  )
-  
-  plot <- roc_plot(comparison_data, col = "from")
-  
-  expect_true("gg" %in% class(plot))
-  
-  comparison_data <- data.frame(
-    geneID = c("gene1", "gene2", "gene3"),
-    isDE = c(TRUE, FALSE, TRUE),
-    p.adj = c(0.05, 0.2, 0.01)  )
-  
-  plot <- roc_plot(comparison_data)
-  
-  expect_true("gg" %in% class(plot))
-})
-
-
-```
-
-
-```{r function-countsPlot, filename = "countsPlot"}
-
-#' Generate a density plot of gene counts
-#'
-#' This function generates a density plot of gene counts from mock data.
-#'
-#' @param mock_obj The mock data object containing gene counts.
-#'
-#' @return A ggplot2 density plot.
-#'
-#' @importFrom ggplot2 aes geom_density theme_bw ggtitle scale_x_log10 element_blank
-#' @export
-#'
-#' @examples
-#' mock_data <- list(counts = matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3))
-#' counts_plot(mock_data)
-counts_plot <- function(mock_obj){
-
-  counts <- unname(unlist(mock_obj$counts))
-  p <- ggplot2::ggplot() +
-      ggplot2::aes(x = "Genes", y = counts) +
-      ggplot2::geom_point(position = "jitter", alpha = 0.6, size = 0.4, col = "#F0B27A") +
-      ggplot2::geom_violin(fill = "#F8F9F9", alpha = 0.4) +
-      ggplot2::stat_summary(fun = "mean", geom = "point", color = "#B63A0F", size = 5) +
-      ggplot2::theme_bw() +
-      ggplot2::ggtitle("Gene expression plot") +
-      ggplot2::theme(axis.title.x =  ggplot2::element_blank())
-  return(p)
-}
-
-
-```
-
-```{r test-countsPlot}
-
-
-
-# Test cases
-test_that("Counts plot is generated correctly", {
-  mock_data <- list(
-    counts = matrix(c(1, 2, 3, 4, 5, 6, 7, 8, 9), ncol = 3)
-  )
-  
-  plot <- counts_plot(mock_data)
-  
-  expect_true("gg" %in% class(plot))
-})
-
-
-
-```
-
-
-
-```{r function-identityPlot, filename = "identityPlot"}
-
-#' Generate an identity plot
-#'
-#' This function generates an identity plot for comparing actual values with estimates.
-#'
-#' @param comparison_df A data frame containing comparison results with "actual" and "estimate" columns.
-#' @param ... additional parameters to pass ggplot2::aes 
-#' @return A ggplot2 identity plot.
-#'
-#' @importFrom ggplot2 sym aes geom_point geom_abline facet_wrap theme_bw ggtitle scale_x_log10 scale_y_log10
-#' @export
-#' @examples
-#'   comparison_data <- data.frame(
-#'    actual = c(1, 2, 3, 4, 5),
-#'    estimate = c(0.9, 2.2, 2.8, 4.1, 5.2),
-#'    description = rep("Category A", 5))
-#' identity_plot(comparison_data)
-
-identity_plot <- function(comparison_df, ...){
-  
-  args <- lapply(list(...), function(x) if (!is.null(x)) ggplot2::sym(x))
-
-  
-  ggplot2::ggplot(comparison_df) +
-    ggplot2::geom_point(ggplot2::aes(x = actual, y = estimate, !!!args), alpha = 0.6)  +
-    ggplot2::geom_abline(intercept = 0, slope = 1, lty = 3, col = 'red', linewidth = 1) +
-    ggplot2::facet_wrap(~description, scales = "free") +
-    ggplot2::theme_bw()  +
-    ggplot2::ggtitle("Identity plot") +
-    ggplot2::scale_x_log10() +
-    ggplot2::scale_y_log10()
-    
-
-}
-
-
-```
-
-```{r test-identityPlot}
-
-
-# Test cases
-test_that("Identity plot is generated correctly", {
-  comparison_data <- data.frame(
-    actual = c(1, 2, 3, 4, 5),
-    estimate = c(0.9, 2.2, 2.8, 4.1, 5.2),
-    description = rep("Category A", 5)
-  )
-  
-  plot <- identity_plot(comparison_data)
-  
-  expect_true("gg" %in% class(plot))
-})
-
-
-
-```
-
-
-```{r function-simulationReport, filename =  "simulationReport" }
-
-#' Export the Analysis Report to a File
-#'
-#' This function generates an analysis report by arranging and combining various plots
-#' and tables, and then exports the report to a specified file.
-#'
-#' @param report_file Path to the file where the report will be exported.
-#' @param table_settings A table containing settings and parameters used in the analysis.
-#' @param roc_curve A plot displaying the Receiver Operating Characteristic (ROC) curve.
-#' @param dispersion_plot A plot displaying the dispersion values.
-#' @param id_plot A plot displaying unique identifiers.
-#' @param counts_plot A plot displaying the gene counts.
-#'
-#'
-#' @importFrom gridExtra arrangeGrob grid.arrange
-#' @importFrom ggplot2 ggsave
-#'
-#'
-#' @return report
-#' @export
-exportReportFile <- function(report_file, table_settings, roc_curve, dispersion_plot, id_plot, counts_plot){
-
-  middle_part  <- gridExtra::arrangeGrob(counts_plot, dispersion_plot, heights = c(1, 1.5))
-  left_part  <- gridExtra::arrangeGrob(table_settings, roc_curve ,heights = c(1, 1.5))
-  p2export <- gridExtra::grid.arrange(left_part, middle_part, id_plot ,ncol = 3, widths = c(1,1,2))
-
-  if (!is.null(report_file)) ggplot2::ggsave(report_file, p2export, height = 10, width = 15)
-
-  return(p2export)
-}
-
-
-#' Generate a Formatted Table as a Grid Graphics Object
-#'
-#' This function generates a formatted table using the provided data frame and returns
-#' it as a grid graphics object.
-#'
-#' @param df The data frame to be converted into a formatted table.
-#'
-#' @return A grid graphics object representing the formatted table.
-#' @export
-#' @importFrom ggplot2 unit
-#' @importFrom gridExtra tableGrob ttheme_minimal
-#' @examples
-#' # Create a sample data frame
-#' sample_data <- data.frame(
-#'   Name = c("Alice", "Bob", "Charlie"),
-#'   Age = c(25, 30, 28)
-#' )
-#'
-#' # Generate the formatted table
-#' table_grob <- getGrobTable(sample_data)
-getGrobTable <- function(df){
-  theme_custom <- gridExtra::ttheme_minimal(
-    core=list(bg_params = list(fill = c("#F8F9F9", "#E5E8E8"), col=NA)),
-    colhead=list(fg_params=list(col="white", fontface=4L), bg_params = list(fill = "#5D6D7E", col=NA)),
-    base_size = 15)
-  grob_df <- gridExtra::tableGrob(df, rows=NULL, theme = theme_custom, widths = ggplot2::unit(x = c(0.4,0.3), "npc" ) )
-  return(grob_df)
-}
-
-
-#' Generate a simulation report
-#'
-#' This function generates a simulation report containing various plots and evaluation metrics.
-#'
-#' @param mock_obj A list containing simulation data and ground truth.
-#' @param list_tmb A list of model results.
-#' @param dds_obj a DESeq2 object
-#' @param coeff_threshold A threshold for comparing estimates.
-#' @param alt_hypothesis The alternative hypothesis for comparisons ("greater", "less", "greaterAbs").
-#' @param report_file File name to save the generated report. If NULL, the report will not be exported.
-#' @importFrom ggplot2 aes geom_point geom_abline facet_wrap theme_bw ggtitle
-#' @return A list containing settings, plots, and evaluation results.
-#' @export
-simulationReport <- function(mock_obj, list_tmb = NULL, dds_obj = NULL ,
-                             coeff_threshold = 0, alt_hypothesis = "greaterAbs", report_file = NULL){
-
-  #-- init 
-  TMB_comparison_df <- data.frame()
-  DESEQ_comparison_df <- data.frame()
-  DESEQ_dispersion_df <- data.frame()
-  TMB_dispersion_df <- data.frame()
-  
-  # -- build data from list_tmb
-  if (!is.null(list_tmb)){
-      tidyRes  <- results(list_tmb, coeff_threshold, alt_hypothesis)
-      formula_used <- list_tmb[[1]]$modelInfo$allForm$formula
-      TMB_comparison_df <- compareInferenceToExpected(tidyRes, mock_obj$groundTruth$effects, formula_used)
-      TMB_comparison_df <- getLabelExpected(TMB_comparison_df, coeff_threshold, alt_hypothesis)
-      TMB_comparison_df$from <- "HTRfit"
-      tmb_disp_inferred <- extractTMBDispersion(list_tmb)
-      TMB_dispersion_df <- getDispersionComparison(tmb_disp_inferred, mock_data$groundTruth$gene_dispersion)
-      TMB_dispersion_df$from <- 'HTRfit'
-  }
-  
-  if (!is.null(dds_obj)){
-      deseq2_wrapped <- wrapper_DESeq2(dds, coeff_threshold, alt_hypothesis)
-      DESEQ_comparison_df <- inferenceToExpected_withFixedEff(deseq2_wrapped$fixEff, mock_obj$groundTruth$effects)
-      DESEQ_comparison_df <- getLabelExpected(DESEQ_comparison_df, coeff_threshold, alt_hypothesis)
-      DESEQ_comparison_df$from <- "DESeq2"
-      DESEQ_comparison_df$component <- NA
-      DESEQ_comparison_df$group <- NA
-      DESEQ_disp_inferred <- extractDESeqDispersion(deseq2_wrapped)
-      DESEQ_dispersion_df <- getDispersionComparison(DESEQ_disp_inferred , mock_data$groundTruth$gene_dispersion)
-      DESEQ_dispersion_df$from <- 'DESeq2'
-  }
-  
-  comparison_df <- rbind( DESEQ_comparison_df, TMB_comparison_df )
-  
-  
-  color2use <- c("#D2B4DE", "#A2D9CE")
-  color2use <- color2use[c(!is.null(list_tmb), !is.null(dds_obj))]
-
-  # -- plotting
-  roc_curve <- roc_plot(comparison_df, col = "from" ) + ggplot2::scale_color_manual(values = color2use)
-  id_plot <- identity_plot(comparison_df, col = "from") + ggplot2::scale_color_manual(values = color2use)
-  #metrics_plot <- metrics_plot(list_tmb)
-  evalDisp <- evaluateDispersion(TMB_dispersion_df, DESEQ_dispersion_df, color2use )
-  dispersion_plot <- evalDisp$disp_plot
-  counts_plot <- counts_plot(mock_obj)
-  
-  # -- export report
-  df_settings <- mock_obj$settings
-  grobTableSettings <- getGrobTable(df_settings)
-  exportReportFile(report_file, grobTableSettings, roc_curve, dispersion_plot, id_plot, counts_plot)
-
-  # -- return
-  ret <- list(settings = df_settings, roc_plot = roc_curve,
-              dispersionEvaluation =  evalDisp, identity_plot = id_plot, counts_plot = counts_plot, data = comparison_df)
-  return(ret)
-}
-
-```
-
-
-
-```{r test-simulationReport}
-
-
-# Test case 1: Testing with a sample data frame
-test_that("Generating a formatted table works correctly", {
-  sample_data <- data.frame(
-    Name = c("Alice", "Bob", "Charlie"),
-    Age = c(25, 30, 28)
-  )
-  
-  table_grob <- getGrobTable(sample_data)
-  
-  expect_s3_class(table_grob, "gtable")
-})
-
-# Test case 4: Testing with non-numeric values
-test_that("Handling non-numeric values in the data frame", {
-  non_numeric_data <- data.frame(
-    Name = c("Alice", "Bob", "Charlie"),
-    Age = c(25, "N/A", 28)
-)
-  
-  table_grob <- getGrobTable(non_numeric_data)
-  
-  expect_s3_class(table_grob, "gtable")
-})
-
-```
-
-
-```{r function-deseq2, filename =  "wrapperDESeq2" }
-
-#' Wrapper Function for DESeq2 Analysis
-#'
-#' This function performs differential expression analysis using DESeq2 based on the provided
-#' DESeqDataSet (dds) object. It calculates the dispersion values from the dds object and then
-#' performs inference on the log-fold change (LFC) values using the specified parameters.
-#'
-#' @param dds A DESeqDataSet object containing the count data and experimental design.
-#' @param lfcThreshold The threshold for minimum log-fold change (LFC) to consider differentially expressed.
-#' @param altHypothesis The alternative hypothesis for the analysis, indicating the direction of change.
-#'                      Options are "greater", "less", or "two.sided".
-#' @param correction_method The method for p-value correction. Default is "BH" (Benjamini-Hochberg).
-#'
-#' @return A list containing the dispersion values and the results of the differential expression analysis.
-#'         The dispersion values are calculated from the dds object and named according to sample names.
-#'         The inference results include adjusted p-values and log2 fold changes for each gene.
-#'
-#' @examples
-#' N_GENES = 100
-#' MAX_REPLICATES = 5
-#' MIN_REPLICATES = 5
-#' ## --init variable
-#' input_var_list <- init_variable( name = "genotype", mu = 12, sd = 0.1, level = 3) %>%
-#'                    init_variable(name = "environment", mu = c(0,1), NA , level = 2) 
-#'
-#' mock_data <- mock_rnaseq(input_var_list, N_GENES, MIN_REPLICATES, MAX_REPLICATES)
-#' dds <- DESeq2::DESeqDataSetFromMatrix(mock_data$counts , 
-#'                    mock_data$metadata, ~ genotype + environment)
-#' dds <- DESeq2::DESeq(dds, quiet = TRUE)
-#' result <- wrapper_DESeq2(dds, lfcThreshold = 1, altHypothesis = "greater")
-#' @export
-wrapper_DESeq2 <- function(dds, lfcThreshold , altHypothesis, correction_method = "BH") {
-  dds_full <- S4Vectors::mcols(dds) %>% as.data.frame()
-  
-  ## -- dispersion
-  message("INFO: The dispersion values from DESeq2 were reparametrized to their reciprocals (1/dispersion).")
-  dispersion <- 1/dds_full$dispersion
-  names(dispersion) <- rownames(dds_full)
-
-  ## -- coeff
-  inference_df <- get_inference(dds_full, lfcThreshold, altHypothesis, correction_method)
-  res <- list(dispersion = dispersion, fixEff = inference_df)
-  return(res)
-}
-
-
-
-#' Calculate Inference for Differential Expression Analysis
-#'
-#' This function calculates inference for differential expression analysis based on the results of DESeq2.
-#'
-#' @param dds_full A data frame containing DESeq2 results, including estimate and standard error information.
-#' @param lfcThreshold Log fold change threshold for determining differentially expressed genes.
-#' @param altHypothesis Alternative hypothesis for testing, one of "greater", "less", or "two.sided".
-#' @param correction_method Method for multiple hypothesis correction, e.g., "BH" (Benjamini-Hochberg).
-#'
-#' @return A data frame containing inference results, including statistics, p-values, and adjusted p-values.
-#'
-#' @examples
-#' \dontrun{
-#' # Example usage of the function
-#' inference_result <- get_inference(dds_full, lfcThreshold = 0.5, 
-#'                                    altHypothesis = "greater", 
-#'                                    correction_method = "BH")
-#' }
-#' @importFrom stats p.adjust
-#' @export
-get_inference <- function(dds_full, lfcThreshold, altHypothesis, correction_method){
-
-  ## -- build subdtf
-  stdErr_df <- getSE_df(dds_full)
-  estim_df <- getEstimate_df(dds_full)
-  ## -- join
-  df2ret <- join_dtf(estim_df, stdErr_df, k1 = c("ID", "term") , k2 = c("ID", "term"))
-
-  ## -- convert to log10
-  message("INFO: The log2-fold change estimates and standard errors from DESeq2 were converted to the natural logarithm scale.")
-  df2ret$estimate <- df2ret$estimate*log(2)
-  df2ret$std.error <- df2ret$std.error*log(2)
-
-  ## -- some details reshaped
-  df2ret$term <- gsub("_vs_.*","", df2ret$term)
-  df2ret$term <- gsub(pattern = "_", df2ret$term, replacement = "")
-  df2ret$term <- removeDuplicatedWord(df2ret$term)
-  df2ret$term <- gsub(pattern = "[.]", df2ret$term, replacement = ":")
-  df2ret$effect <- "fixed"
-  idx_intercept <- df2ret$term == "Intercept"
-  df2ret$term[idx_intercept] <- "(Intercept)"
-
-  ## -- statistical part
-  waldRes <- wald_test(df2ret$estimate, df2ret$std.error, lfcThreshold, altHypothesis)
-  df2ret$statistic <- waldRes$statistic
-  df2ret$p.value <- waldRes$p.value
-  df2ret$p.adj <- stats::p.adjust(df2ret$p.value, method = correction_method)
-
-  return(df2ret)
-}
-
-
-#' Extract Standard Error Information from DESeq2 Results
-#'
-#' This function extracts the standard error (SE) information from DESeq2 results.
-#'
-#' @param dds_full A data frame containing DESeq2 results, including standard error columns.
-#'
-#' @return A data frame with melted standard error information, including gene IDs and terms.
-#'
-#' @examples
-#' \dontrun{
-#' # Example usage of the function
-#' se_info <- getSE_df(dds_full)
-#' }
-#' @importFrom reshape2 melt
-#' @export
-getSE_df <- function(dds_full){
-  columnsInDds_full <- colnames(dds_full)
-  SE_columns <- columnsInDds_full [ grepl("SE" , columnsInDds_full) ]
-  SE_df <- dds_full[, SE_columns]
-  SE_df$ID <- rownames(SE_df)
-  SE_df_long <- reshape2::melt(SE_df,
-                                       measure.vars = SE_columns,
-                                       variable.name  = "term", value.name = "std.error", drop = F)
-  SE_df_long$term <- gsub(pattern = "SE_", SE_df_long$term, replacement = "")
-  return(SE_df_long)
-
-}
-
-
-#' Extract Inferred Estimate Information from DESeq2 Results
-#'
-#' This function extracts the inferred estimate values from DESeq2 results.
-#'
-#' @param dds_full A data frame containing DESeq2 results, including estimate columns.
-#'
-#' @return A data frame with melted inferred estimate information, including gene IDs and terms.
-#'
-#' @examples
-#' \dontrun{
-#' # Example usage of the function
-#' estimate_info <- getEstimate_df(dds_full)
-#'  }
-#' @importFrom reshape2 melt
-#' @export
-getEstimate_df <- function(dds_full){
-  columnsInDds_full <- colnames(dds_full)
-  SE_columns <- columnsInDds_full [ grepl("SE" , columnsInDds_full) ]
-  inferedVal_columns <- gsub("SE_", "" , x = SE_columns)
-
-  estimate_df <- dds_full[, inferedVal_columns]
-  estimate_df$ID <- rownames(estimate_df)
-  estimate_df_long <- reshape2::melt(estimate_df,
-                                 measure.vars = inferedVal_columns,
-                                 variable.name  = "term", value.name = "estimate", drop = F)
-  return(estimate_df_long)
-
-}
-
-```
-
-
-```{r test-wrapperDESeq2}
-
-
-test_that("get_inference returns a data frame with correct columns", {
-  # Create a sample dds_full data frame
-  N_GENES = 100
-  MAX_REPLICATES = 5
-  MIN_REPLICATES = 5
-  ## --init variable
-  input_var_list <- init_variable( name = "genotype", mu = 12, sd = 0.1, level = 3) %>%
-                    init_variable(name = "environment", mu = c(0,1), NA , level = 2) 
-
-  mock_data <- mock_rnaseq(input_var_list, N_GENES, MIN_REPLICATES, max_replicates = MAX_REPLICATES)
-  dds <- DESeq2::DESeqDataSetFromMatrix(mock_data$counts , mock_data$metadata, ~ genotype + environment)
-  dds <- DESeq2::DESeq(dds, quiet = TRUE)
-  dds_full <- S4Vectors::mcols(dds) %>% as.data.frame()
-  
-  # Call the function
-  inference_results <- get_inference(dds_full, lfcThreshold = 0.5, altHypothesis = "greater", correction_method = "BH")
-  
-  # Check if the returned object is a data frame
-  expect_true(is.data.frame(inference_results))
-  
-  # Check if the data frame contains the correct columns
-  expect_true("ID" %in% colnames(inference_results))
-  expect_true("estimate" %in% colnames(inference_results))
-  expect_true("std.error" %in% colnames(inference_results))
-  expect_true("term" %in% colnames(inference_results))
-  expect_true("effect" %in% colnames(inference_results))
-  expect_true("statistic" %in% colnames(inference_results))
-  expect_true("p.value" %in% colnames(inference_results))
-  expect_true("p.adj" %in% colnames(inference_results))
-})
-
-
-
-
-
-
-test_that("getEstimate_df function works correctly", {
-  
- # Create a sample dds_full data frame
-  N_GENES = 100
-  MAX_REPLICATES = 5
-  MIN_REPLICATES = 5
-  ## --init variable
-  input_var_list <- init_variable( name = "genotype", mu = 12, sd = 0.1, level = 3) %>%
-                    init_variable(name = "environment", mu = c(0,1), NA , level = 2) 
-
-  mock_data <- mock_rnaseq(input_var_list, N_GENES, MIN_REPLICATES, max_replicates = MAX_REPLICATES)
-  dds <- DESeq2::DESeqDataSetFromMatrix(mock_data$counts , mock_data$metadata, ~ genotype + environment)
-  dds <- DESeq2::DESeq(dds, quiet = TRUE)
-  dds_full <- S4Vectors::mcols(dds) %>% as.data.frame()
-  
-  # Call the function
-  estimate_df_long <- getEstimate_df(dds_full)
-  
-  # Check if the resulting data frame has the expected structure
-  expect_true("ID" %in% colnames(estimate_df_long))
-  expect_true("term" %in% colnames(estimate_df_long))
-  expect_true("estimate" %in% colnames(estimate_df_long))
-})
-
-
-
-# Define a test context
-test_that("getSE_df function works correctly", {
-  
- # Create a sample dds_full data frame
-  N_GENES = 100
-  MAX_REPLICATES = 5
-  MIN_REPLICATES = 5
-  ## --init variable
-  input_var_list <- init_variable( name = "genotype", mu = 12, sd = 0.1, level = 3) %>%
-                    init_variable(name = "environment", mu = c(0,1), NA , level = 2) 
-
-  mock_data <- mock_rnaseq(input_var_list, N_GENES, MIN_REPLICATES, max_replicates = MAX_REPLICATES)
-  dds <- DESeq2::DESeqDataSetFromMatrix(mock_data$counts , mock_data$metadata, ~ genotype + environment)
-  dds <- DESeq2::DESeq(dds, quiet = TRUE)
-  dds_full <- S4Vectors::mcols(dds) %>% as.data.frame()
-  
-  # Call the function
-  SE_df_long <- getSE_df(dds_full)
-  
-  # Check if the resulting data frame has the expected structure
-  expect_true("ID" %in% colnames(SE_df_long))
-  expect_true("term" %in% colnames(SE_df_long))
-  expect_true("std.error" %in% colnames(SE_df_long))
-})
-
-
-# Define a test context
-test_that("wrapperDESeq2 function works correctly", {
-  
- # Create a sample dds_full data frame
-  N_GENES = 100
-  MAX_REPLICATES = 5
-  MIN_REPLICATES = 5
-  ## --init variable
-  input_var_list <- init_variable( name = "genotype", mu = 12, sd = 0.1, level = 3) %>%
-                    init_variable(name = "environment", mu = c(0,1), NA , level = 2) 
-
-  mock_data <- mock_rnaseq(input_var_list, N_GENES, MIN_REPLICATES, max_replicates = MAX_REPLICATES)
-  dds <- DESeq2::DESeqDataSetFromMatrix(mock_data$counts , mock_data$metadata, ~ genotype + environment)
-  dds <- DESeq2::DESeq(dds, quiet = TRUE)
-  deseq2_wrapped <- wrapper_DESeq2(dds, 0.2, "greaterAbs")
-  
-  expect_true(is.list(deseq2_wrapped))
-  
-  # Check if the resulting data frame has the expected structure
-  expect_true("ID" %in% colnames(deseq2_wrapped$fixEff))
-  expect_true("term" %in% colnames(deseq2_wrapped$fixEff))
-  expect_true("std.error" %in% colnames(deseq2_wrapped$fixEff))
-  expect_true("estimate" %in% colnames(deseq2_wrapped$fixEff))
-  expect_true("statistic" %in% colnames(deseq2_wrapped$fixEff))
-  expect_true("p.value" %in% colnames(deseq2_wrapped$fixEff))
-  expect_true("p.adj" %in% colnames(deseq2_wrapped$fixEff))
-
-})
-
-```
-
-
-```{r function-anova, filename =  "anova"}
-
-#' Handle ANOVA Errors
-#'
-#' This function handles ANOVA errors and warnings during the ANOVA calculation process.
-#'
-#' @param l_TMB A list of fitted glmmTMB models.
-#' @param group A character string indicating the group for which ANOVA is calculated.
-#' @param ... Additional arguments to be passed to the \code{car::Anova} function.
-#' 
-#' @return A data frame containing ANOVA results for the specified group.
-#' @export
-#' 
-#' @examples
-#' l_tmb <- fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length,
-#'                           data = iris, group_by = "Species", n.cores = 1)
-#' anova_res <- handleAnovaError(l_tmb, "setosa", type = "III")
-#'
-#' @importFrom car Anova
-#' @export
-handleAnovaError <- function(l_TMB, group, ...) {
-  tryCatch(
-    expr = {
-      withCallingHandlers(
-        car::Anova(l_TMB[[group]], ...),
-        warning = function(w) {
-          message(paste(Sys.time(), "warning for group", group, ":", conditionMessage(w)))
-          invokeRestart("muffleWarning")
-        })
-    },
-    error = function(e) {
-      message(paste(Sys.time(), "error for group", group, ":", conditionMessage(e)))
-      NULL
-    }
-  )
-}
-
-
-#' Perform ANOVA on Multiple glmmTMB Models in Parallel
-#'
-#' This function performs analysis of variance (ANOVA) on a list of \code{glmmTMB}
-#' models in parallel for different groups specified in the list. It returns a list
-#' of ANOVA results for each group.
-#'
-#' @param l_tmb A list of \code{glmmTMB} models, with model names corresponding to the groups.
-#' @param ... Additional arguments passed to \code{\link[stats]{anova}} function.
-#'
-#' @return A list of ANOVA results for each group.
-#' @importFrom stats setNames
-#' @examples
-#' # Perform ANOVA
-#' data(iris)
-#' l_tmb<- fitModelParallel( Sepal.Length ~ Sepal.Width  + Petal.Length, 
-#'                          data = iris, group_by = "Species", n.cores = 1 )
-#' anov_res <- anovaParallel(l_tmb , type = "III")
-#' @importFrom stats anova
-#' @export
-anovaParallel <- function(l_tmb, ...) {
-  l_group <- attributes(l_tmb)$names
-  lapply(stats::setNames(l_group, l_group), function(group) handleAnovaError(l_tmb, group, ...))
-}
-
-
-```
-
-
-```{r  test-anova}
-
-
-test_that("handleAnovaError return correct ouptut", {
-  data(iris)
-  l_tmb <- fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length,
-                            data = iris, group_by = "Species", n.cores = 1)
-  anova_res <- handleAnovaError(l_tmb, "setosa", type = "III")
-  
-  expect_s3_class(anova_res, "data.frame")
-  expect_equal(nrow(anova_res), 3)  # Number of levels
-})
-
-test_that("handleAnovaError return correct ouptut", {
-  data(iris)
-  l_tmb <- fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length,
-                            data = iris, group_by = "Species", n.cores = 1)
-  anova_res <- handleAnovaError(l_tmb, "INALID_GROUP", type = "III")
-  
-  expect_null(anova_res)
-})
-
-
-
-test_that("anovaParallel returns valid ANOVA results", {
-  data(iris)
-  l_tmb <- fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length,
-                            data = iris, group_by = "Species", n.cores = 1)
-  anov_res <- anovaParallel(l_tmb, type = "III")
-  
-  expect_is(anov_res, "list")
-  expect_equal(length(anov_res), length(unique(iris$Species)))
-  
-})
-
-
-
-
-
-```
-
-
-```{r function-subsetGenes, filename =  "subsetGenes"}
-
-#' Subset Genes in Genomic Data
-#'
-#' This function filters and adjusts genomic data within the Roxygeb project, based on a specified list of genes.
-# It is designed to enhance precision and customization in transcriptomics analysis by retaining only the genes of interest.
-# 
-#' @param l_genes A character vector specifying the genes to be retained in the dataset.
-#' @param mockObj An object containing relevant genomic information to be filtered.
-#'
-#' @return A modified version of the 'mockObj' data object, with genes filtered according to 'l_genes'.
-#'
-#' @description The 'subsetGenes' function selects and retains genes from 'mockObj' that match the genes specified in 'l_genes'.
-# It filters the 'groundTruth$effects' data to keep only the rows corresponding to the selected genes. 
-# Additionally, it updates 'gene_dispersion' and the count data, ensuring that only the selected genes are retained.
-# The function also replaces the total number of genes in 'settings$values' with the length of 'l_genes'.
-# The result is a more focused and tailored genomic dataset, facilitating precision in subsequent analyses.
-#'
-#' @examples
-#' \dontrun{
-#' # Example list of genes to be retained
-#' selected_genes <- c("GeneA", "GeneB", "GeneC")
-#'
-#' # Example data object 'mockObj' (simplified structure)
-#' mockObj <- list(
-#'   # ... (mockObj structure)
-#' )
-#'
-#' # Using the subsetGenes function to filter 'mockObj'
-#' filtered_mockObj <- subsetGenes(selected_genes, mockObj)
-#' }
-#' @export
-subsetGenes <- function(l_genes, mockObj) {
-  # Selects the indices of genes in 'groundTruth$effects$geneID' that are present in 'l_genes'.
-  idx_gt_effects <- mockObj$groundTruth$effects$geneID %in% l_genes
-  
-  # Filters 'groundTruth$effects' to keep only the rows corresponding to the selected genes.
-  mockObj$groundTruth$effects <- mockObj$groundTruth$effects[idx_gt_effects, ]
-  
-  # Updates 'gene_dispersion' by retaining values corresponding to the selected genes.
-  mockObj$groundTruth$gene_dispersion <- mockObj$groundTruth$gene_dispersion[l_genes]
-  
-  # Filters the count data to keep only the rows corresponding to the selected genes.
-  mockObj$counts <- as.data.frame(mockObj$counts[l_genes, ])
-  
-  # Replaces the total number of genes in 'settings$values' with the length of 'l_genes'.
-  mockObj$settings$values[1] <- length(l_genes)
-  
-  # Returns the modified 'mockObj'.
-  return(mockObj)
-}
-
-
-```
-
-
-```{r function-evaluationWithMixedEffect, filename =  "evaluationWithMixedEffect"}
-
-#' Check if the formula contains a mixed effect structure.
-#'
-#' This function checks if the formula contains a mixed effect structure indicated by the presence of "|".
-#'
-#' @param formula A formula object.
-#'
-#' @return \code{TRUE} if the formula contains a mixed effect structure, \code{FALSE} otherwise.
-#'
-#' @examples
-#' is_mixedEffect_inFormula(y ~ x + (1|group))
-#'
-#' @export
-is_mixedEffect_inFormula <- function(formula) {
-  return("|" %in% all.names(formula))
-}
-
-#' Check if the formula follows a specific type I mixed effect structure.
-#'
-#' This function checks if the formula follows a specific type I mixed effect structure, which consists of a fixed effect and a random effect indicated by the presence of "|".
-#'
-#' @param formula A formula object.
-# 
-#' @return \code{TRUE} if the formula follows the specified type I mixed effect structure, \code{FALSE} otherwise.
-# 
-#' @examples
-#' is_formula_mixedTypeI(formula = y ~ x + (1|group))
-# 
-#' @export
-is_formula_mixedTypeI <- function(formula) {
-  if (length(all.vars(formula)) != 3) return(FALSE)
-  if (sum(all.names(formula) == "+") > 1) return(FALSE)
-  if (sum(all.names(formula) == "/") > 0) return(FALSE)
-  return(TRUE)
-}
-
-
-#' Get the categorical variable associated with the fixed effect in a type I formula.
-#'
-#' This function extracts the categorical variable associated with the fixed effect in a type I formula from a tidy tibble.
-# The categorical variable is constructed by taking the label of the second main fixed effect term (ignoring any numeric suffix) and prefixing it with "label_".
-#
-#' @param tidy_tmb A tidy tibble containing model terms.
-# 
-#' @return The categorical variable associated with the fixed effect in the type I formula.
-# 
-#' @examples
-#' \dontrun{
-#' getCategoricalVar_inFixedEffect(tidy_tmb)
-#' } 
-#' @export
-getCategoricalVar_inFixedEffect <- function(tidy_tmb) {
-  main_fixEffs <- unique(subset(tidy_tmb, effect == "fixed")$term)
-  categorical_var_inFixEff <- paste("label", gsub("\\d+$", "", main_fixEffs[2]), sep = "_")
-  return(categorical_var_inFixEff)
-}
-
-
-#' Group log_qij values per genes and labels.
-#'
-#' This function groups log_qij values in a ground truth tibble per genes and labels using a specified categorical variable.
-#
-#' @param ground_truth A tibble containing ground truth data.
-#' @param categorical_var The categorical variable to use for grouping.
-# 
-#' @return A list of log_qij values grouped by genes and labels.
-#' @importFrom stats as.formula
-#' @importFrom reshape2 dcast
-#' 
-# 
-#' @examples
-#' ' \dontrun{
-#' group_logQij_per_genes_and_labels(ground_truth, categorical_var)
-#' }
-#' @export
-group_logQij_per_genes_and_labels <- function(ground_truth, categorical_var) {
-  str_formula <- paste(c(categorical_var, "geneID"), collapse = " ~ ")
-  formula <- stats::as.formula(str_formula)
-  list_logqij <- ground_truth %>%
-    reshape2::dcast(
-      formula,
-      value.var = "log_qij_scaled",
-      fun.aggregate = list
-    )
-  list_logqij[categorical_var] <- NULL
-  return(list_logqij)
-}
-
-#' Calculate actual mixed effect values for each gene.
-#'
-#' This function calculates actual mixed effect values for each gene using the provided data, reference labels, and other labels in a categorical variable.
-#
-#' @param list_logqij A list of log_qij values grouped by genes and labels.
-#' @param genes_iter_list A list of genes for which to calculate the actual mixed effect values.
-#' @param categoricalVar_infos Information about the categorical variable, including reference labels and other labels.
-# 
-#' @return A data frame containing the actual mixed effect values for each gene.
-# 
-#' @examples
-#' ' \dontrun{
-#' getActualMixed_typeI(list_logqij, genes_iter_list, categoricalVar_infos)
-#' }
-#' @export
-getActualMixed_typeI <- function(list_logqij, genes_iter_list, categoricalVar_infos) {
-  labelRef_InCategoricalVar <- categoricalVar_infos$ref
-  labels_InCategoricalVar <- categoricalVar_infos$labels
-  labelOther_inCategoricalVar <- categoricalVar_infos$labelsOther
-
-  data_per_gene <- lapply(genes_iter_list, function(g) {
-    data_gene <- data.frame(list_logqij[[g]])
-    colnames(data_gene) <- labels_InCategoricalVar
-    return(data_gene)
-  })
-  
-  l_actual_per_gene <- lapply(genes_iter_list, function(g) {
-    data_gene <- data_per_gene[[g]]
-    res <- calculate_actualMixed(data_gene, labelRef_InCategoricalVar, labelOther_inCategoricalVar)
-    res$geneID <- g
-    return(res)
-  })
-  
-  actual_mixedEff <- do.call("rbind", l_actual_per_gene)
-  rownames(actual_mixedEff) <- NULL
-  return(actual_mixedEff)
-}
-
-
-
-#' Compare the mixed-effects inference to expected values.
-#'
-#' This function compares the mixed-effects inference obtained from a mixed-effects model to expected values derived from a ground truth dataset. The function assumes a specific type I mixed-effect structure in the input model.
-# 
-#' @param tidy_tmb  tidy model results obtained from fitting a mixed-effects model.
-#' @param ground_truth_eff A data frame containing ground truth effects.
-# 
-#' @return A data frame with the comparison of estimated mixed effects to expected values.
-#' @importFrom stats setNames
-#' @examples
-#' \dontrun{
-#' inferenceToExpected_withMixedEff(tidy_tmb(l_tmb), ground_truth_eff)
-#' } 
-#' @export
-inferenceToExpected_withMixedEff <- function(tidy_tmb, ground_truth_eff){
-
-  # -- CategoricalVar involve in fixEff
-  categorical_var <- getCategoricalVar_inFixedEffect(tidy_tmb)
-  labels_InCategoricalVar <- levels(ground_truth_eff[, categorical_var])
-  labelRef_InCategoricalVar <- labels_InCategoricalVar[1]
-  labelOther_inCategoricalVar <- labels_InCategoricalVar[2:length(labels_InCategoricalVar)]
-  categoricalVar_infos <- list(ref = labelRef_InCategoricalVar,
-                               labels = labels_InCategoricalVar,
-                               labelsOther = labelOther_inCategoricalVar )
-
-  ## -- prepare data 2 get actual
-  l_logqij <- group_logQij_per_genes_and_labels(ground_truth_eff, categorical_var)
-  l_genes <- unique(ground_truth_eff$geneID)
-  genes_iter_list <- stats::setNames(l_genes,l_genes)
-  actual_mixedEff <- getActualMixed_typeI(l_logqij, genes_iter_list, categoricalVar_infos)
-
-  res <- join_dtf(actual_mixedEff, tidy_tmb  ,c("geneID", "term"), c("ID", "term"))
-
-  ## -- reorder for convenience
-  actual <- res$actual
-  res <- res[, -1]
-  res$actual <- actual
-  return(res)
-}
-
-
-#' Calculate actual mixed effects.
-#'
-#' This function calculates actual mixed effects based on the given data for a specific type I mixed-effect structure.
-# It calculates the expected values, standard deviations, and correlations between the fixed and random effects.
-# The function is designed to work with specific input data for type I mixed-effect calculations.
-# 
-#' @param data_gene Data for a specific gene.
-#' @param labelRef_InCategoricalVar The reference label for the categorical variable.
-#' @param labelOther_inCategoricalVar Labels for the categorical variable other than the reference label.
-#' @importFrom stats sd cor
-# 
-#' @return A data frame containing the calculated actual mixed effects.
-# 
-#' @examples
-#' \dontrun{
-# calculate_actualMixed(data_gene, labelRef_InCategoricalVar, labelOther_inCategoricalVar)
-#' }
-#' @export
-calculate_actualMixed <- function(data_gene, labelRef_InCategoricalVar, labelOther_inCategoricalVar ){
-   log_qij_scaled_intercept <- data_gene[labelRef_InCategoricalVar]
-  colnames(log_qij_scaled_intercept) <- '(Intercept)'
-
-  if (length(labelOther_inCategoricalVar == 1 )) {
-    log_qij_scaled_other <- data_gene[labelOther_inCategoricalVar]
-  } else log_qij_scaled_other <- data_gene[,labelOther_inCategoricalVar]
-  log_qij_scaled_transf <- log_qij_scaled_other - log_qij_scaled_intercept[,"(Intercept)"]
-
-  log_qij_scaled_transf <- cbind(log_qij_scaled_intercept, log_qij_scaled_transf)
-  ## -- fix eff
-  actual_fixedValues <- colMeans(log_qij_scaled_transf)
-
-  ## -- stdev values
-  std_values <- sapply(log_qij_scaled_transf, function(x) stats::sd(x))
-  names(std_values) <- paste("sd", names(std_values), sep = '_')
-
-  ## -- correlation
-  corr_mat <- stats::cor(log_qij_scaled_transf)
-  indx <- which(upper.tri(corr_mat, diag = FALSE), arr.ind = TRUE)
-  corr2keep = corr_mat[indx]
-  name_corr <- paste(rownames(corr_mat)[indx[, "row"]], colnames(corr_mat)[indx[, "col"]], sep = ".")
-  names(corr2keep) <- paste("cor", name_corr, sep = "__")
-
-  ## -- output 
-  actual <- c(actual_fixedValues, std_values, corr2keep)
-  res <- as.data.frame(actual)
-  res$term <- rownames(res)
-  rownames(res) <- NULL
-  res$description <- sub("_.*", "", gsub("\\d+$", "" , res$term))
-  return(res)
-  
-  
-}
-
-
-#' Compare inference results to expected values for a given model.
-#'
-#' This function compares the inference results from a model to the expected values based on a ground truth dataset with the simulated effects. The function handles models with mixed effects and fixed effects separately, ensuring that the comparison is appropriate for the specific model type.
-#'
-#' If a model includes mixed effects, the function checks for support for the specific mixed effect structure and provides an informative error message if the structure is not supported.
-#'
-#' @param tidy_tmb A fitted model object convert to tidy dataframe.
-#' @param ground_truth_eff A ground truth dataset with the simulated effects.
-#' @param formula_used formula used in model 
-#'
-#' @return A data frame containing the comparison results, including the term names, inference values, and expected values.
-#'
-#' @examples
-#' \dontrun{
-#' evalData <- compareInferenceToExpected(l_tmb, ground_truth_eff)
-#' }
-#' @export
-compareInferenceToExpected <- function(tidy_tmb, ground_truth_eff, formula_used) {
-  ## -- parsing formula & check mixed effect
-  involvMixedEffect <- is_mixedEffect_inFormula(formula_used)
-
-  msg_e_formula_type <- "This simulation evaluation supports certain types of formulas with mixed effects, but not all.
-    Please refer to the package documentation for information on supported formula structures.
-    You are welcome to implement additional functions to handle specific formula types with mixed effects that are not currently supported."
-
-  ## -- if mixed effect
-  if (involvMixedEffect){
-    message("Mixed effect detected in the formula structure.")
-
-    if(!is_formula_mixedTypeI(formula_used)){
-      stop(msg_e_formula_type)
-    }
-    evalData <- inferenceToExpected_withMixedEff(tidy_tmb, ground_truth_eff)
-
-  ## -- only fixed effect
-  } else {
-    
-    message("Only fixed effects are detected in the formula structure.")
-    evalData <- inferenceToExpected_withFixedEff(tidy_tmb, ground_truth_eff)
-  }
-
-  return(evalData)
-}
-
-
-```
-
-```{r  test-evaluationWithMixedEffect}
-
-
-
-test_that("Test is_mixedEffect_inFormula", {
-  formula1 <- y ~ a + (1 | B)
-  formula2 <- ~ a + (1 | B)
-  formula3 <- x ~ c + d
-
-  expect_true(is_mixedEffect_inFormula(formula1))
-  expect_true(is_mixedEffect_inFormula(formula2))
-  expect_false(is_mixedEffect_inFormula(formula3))
-})
-
-test_that("Test is_formula_mixedTypeI", {
-  formula1 <- y ~ x + (1 | group)
-  formula2 <- y ~ z + group1 + (1 | group1)
-  formula3 <- y ~ z + (1 | group1 + group2)
-  formula4 <- y ~ z + (1 | group1/z)
-
-  expect_true(is_formula_mixedTypeI(formula1))
-  expect_false(is_formula_mixedTypeI(formula2))
-  expect_false(is_formula_mixedTypeI(formula3))
-  expect_false(is_formula_mixedTypeI(formula4))
-
-})
-
-
-test_that("getCategoricalVar_inFixedEffect returns the correct result", {
-  
-    ###### PREPARE DATA
-    N_GENES = 2
-    MAX_REPLICATES = 4
-    MIN_REPLICATES = 4
-
-    input_var_list <- init_variable( name = "genotype", mu = 2, sd = 0.5, level = 10) %>%
-      init_variable( name = "environment", mu = c(1, 3), sd = NA, level = 2) %>%
-      add_interaction(between_var = c("genotype", 'environment'), mu = 1, sd = 0.39)
-    
-    mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                             min_replicates = MIN_REPLICATES,
-                             max_replicates = MAX_REPLICATES,
-                             basal_expression = 3, dispersion = 100)
-    
-    data2fit = prepareData2fit(countMatrix = mock_data$counts, metadata =  mock_data$metadata, normalization = F)
-    
-    l_tmb <- fitModelParallel(formula = kij ~  environment  + (environment | genotype ),
-                              data = data2fit, group_by = "geneID",
-                              family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-      
-  
-    tidy_tmb <- tidy_tmb(l_tmb)
-    categorical_var <- getCategoricalVar_inFixedEffect(tidy_tmb)
-    expect_equal(categorical_var, "label_environment")
-})
-
-test_that("group_logQij_per_genes_and_labels returns the correct result", {
-    
-    ############ PREPARE DATA
-    N_GENES = 2
-    MAX_REPLICATES = 4
-    MIN_REPLICATES = 4
-    input_var_list <- init_variable( name = "genotype", mu = 2, sd = 0.5, level = 10) %>%
-      init_variable( name = "environment", mu = c(1, 3), sd = NA, level = 2) %>%
-      add_interaction(between_var = c("genotype", 'environment'), mu = 1, sd = 0.39)
-    
-    mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                             min_replicates = MIN_REPLICATES,
-                             max_replicates = MAX_REPLICATES,
-                             basal_expression = 3, dispersion = 100)
-    
-    data2fit = prepareData2fit(countMatrix = mock_data$counts, metadata =  mock_data$metadata, normalization = F)
-    
-    l_tmb <- fitModelParallel(formula = kij ~  environment  + (environment | genotype ),
-                              data = data2fit, group_by = "geneID",
-                              family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-    
-    ground_truth_eff <- mock_data$groundTruth$effects
-    categorical_var <- "label_environment"
-    logqij_list <- group_logQij_per_genes_and_labels(ground_truth_eff, categorical_var)
-    
-    expect_is(logqij_list, "data.frame")
-    expect_equal(attributes(logqij_list)$names , c("gene1", "gene2"))
-    expect_equal(length(logqij_list$gene1), 2)
-    expect_equal(length(logqij_list$gene2), 2)
-    expect_equal(length(logqij_list$gene2[[1]]), 10)
-})
-
-test_that("getActualMixed_typeI returns the correct result", {
-   ############ PREPARE DATA
-    N_GENES = 2
-    MAX_REPLICATES = 4
-    MIN_REPLICATES = 4
-    input_var_list <- init_variable( name = "genotype", mu = 2, sd = 0.5, level = 10) %>%
-      init_variable( name = "environment", mu = c(1, 3), sd = NA, level = 2) %>%
-      add_interaction(between_var = c("genotype", 'environment'), mu = 1, sd = 0.39)
-    
-    mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                             min_replicates = MIN_REPLICATES,
-                             max_replicates = MAX_REPLICATES,
-                             basal_expression = 3, dispersion = 100)
-    
-    data2fit = prepareData2fit(countMatrix = mock_data$counts, metadata =  mock_data$metadata, normalization = F)
-    
-    l_tmb <- fitModelParallel(formula = kij ~  environment  + (environment | genotype ),
-                              data = data2fit, group_by = "geneID",
-                              family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-    
-    ground_truth_eff <- mock_data$groundTruth$effects
-    categorical_var <- "label_environment"
-    logqij_list <- group_logQij_per_genes_and_labels(ground_truth_eff, categorical_var)
-    l_genes <- unique(ground_truth_eff$geneID)
-    genes_iter_list <- stats::setNames(l_genes, l_genes)
-    categoricalVar_infos= list(ref = "environment1", 
-                             labels = c("environment1", "environment2"), 
-                             labelsOther = "environment2")
-    
-    ## -- test
-    actual_mixedEff <- getActualMixed_typeI(logqij_list, 
-                                              genes_iter_list, 
-                                                categoricalVar_infos)
-    
-    ## -- verif
-    expect_is(actual_mixedEff, "data.frame")
-    expect_equal(colnames(actual_mixedEff), c("actual", "term", "description", "geneID"))
-    expect_equal(unique(actual_mixedEff$geneID), c("gene1", "gene2"))
-    expect_equal(unique(actual_mixedEff$term), c("(Intercept)", "environment2", 
-                                                 "sd_(Intercept)", "sd_environment2", "cor__(Intercept).environment2"))
-
-})
-
-
-# Test for InferenceToExpected_withMixedEff
-test_that("inferenceToExpected_withMixedEff correctly compares inference to expected values", {
-  
-  ## -- PREPARE DATA
-  N_GENES = 2
-  MAX_REPLICATES = 4
-  MIN_REPLICATES = 4
-  
-  input_var_list <- init_variable(name = "genotype", mu = 2, sd = 0.5, level = 10) %>%
-  init_variable(name = "environment", mu = c(1, 3), sd = NA, level = 2) %>%
-  add_interaction(between_var = c("genotype", 'environment'), mu = 1, sd = 0.39)
-  
-  mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                         min_replicates = MIN_REPLICATES,
-                         max_replicates = MAX_REPLICATES,
-                         basal_expression = 3, dispersion = 100)
-  
-  data2fit <- prepareData2fit(countMatrix = mock_data$counts, metadata = mock_data$metadata, normalization = FALSE)
-  
-  l_tmb <- fitModelParallel(formula = kij ~ environment + (environment | genotype),
-                          data = data2fit, group_by = "geneID",
-                          family = glmmTMB::nbinom2(link = "log"), n.cores = 1)
-
-  ## -- call fonction to test
-  compared_df <- inferenceToExpected_withMixedEff(tidy_tmb(l_tmb), mock_data$groundTruth$effects)
-  
-  ## -- TEST VERIF
-  expect_equal(c("term", "description", "geneID", "effect", 
-                "component", "group", "estimate", "std.error", 
-                "statistic", "p.value", "actual" ) , colnames(compared_df))
-  expect_equal(c("gene1", "gene2" ) , unique(compared_df$geneID))
-  expect_equal(unique(compared_df$term), c("(Intercept)", "cor__(Intercept).environment2", "environment2", 
-                                                 "sd_(Intercept)", "sd_environment2"))
-
-})
-
-# Test for calculate_actualMixed
-test_that("calculate_actualMixed calculates actual mixed effects as expected", {
-   ## -- PREPARE DATA
-  N_GENES = 2
-  MAX_REPLICATES = 4
-  MIN_REPLICATES = 4
-  
-  input_var_list <- init_variable(name = "genotype", mu = 2, sd = 0.5, level = 10) %>%
-  init_variable(name = "environment", mu = c(1, 3), sd = NA, level = 2) %>%
-  add_interaction(between_var = c("genotype", 'environment'), mu = 1, sd = 0.39)
-  
-  mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                         min_replicates = MIN_REPLICATES,
-                         max_replicates = MAX_REPLICATES,
-                         basal_expression = 3, dispersion = 100)
-  
-  data2fit <- prepareData2fit(countMatrix = mock_data$counts, metadata = mock_data$metadata, normalization = FALSE)
-  
-  
-  ground_truth_eff <- mock_data$groundTruth$effects
-  categorical_var <- "label_environment"
-  logqij_list <- group_logQij_per_genes_and_labels(ground_truth_eff, categorical_var)
-  l_genes <- unique(ground_truth_eff$geneID)
-  genes_iter_list <- stats::setNames(l_genes, l_genes)
-  categoricalVar_infos= list(ref = "environment1", 
-                           labels = c("environment1", "environment2"), 
-                           labelsOther = "environment2")
-    
-  ## -- call function & test
-  data_per_gene <- lapply(genes_iter_list, function(g) {
-                          data_gene <- data.frame(logqij_list[[g]])
-                          colnames(data_gene) <- categoricalVar_infos$labels
-                          return(data_gene)
-                    })
-  data_gene <- data_per_gene$gene1
-  actual_mixed <- calculate_actualMixed(data_gene, 
-                                        labelRef_InCategoricalVar = categoricalVar_infos$ref ,
-                                        labelOther_inCategoricalVar = categoricalVar_infos$labelsOther)
-  expect_equal( colnames(actual_mixed), c("actual", "term", "description"))
-  expect_equal(actual_mixed$term, c("(Intercept)", "environment2", 
-                                    "sd_(Intercept)", "sd_environment2", 
-                                    "cor__(Intercept).environment2"))
-  expect_equal(actual_mixed$description, c("(Intercept)", "environment", 
-                                    "sd", "sd", 
-                                    "cor"))
-})
-
-
-
-```
-
-
-# Initialize variable to simulate
-
-The `init_variable()` function, which is a key tool for defining the variables in your experimental design. You can specify the variables' names and the size of the effects involved. By manually setting the effect of a variable, you make it a fixed effect, while random effect definitions can make it either fixed or mixed.
-
-```{r example-init_variable, warning=FALSE, message=FALSE}
-## -- Manually init my first variable
-input_var_list <- init_variable( name = "varA", mu = c(0.2, 4, -3), level = 3)
-
-# The 'init_variable' function allows for precise control over the variables in your experimental design. 
-# In this example, we manually initialize 'varA' with specific means (mu) and levels.
-
-## -- Randomly init my first variable
-input_var_list <- init_variable( name = "varA", mu = 10, sd = 0.2, level = 5) 
-
-# Alternatively, you can randomly initialize 'varA' by specifying a mean (mu) and standard deviation (sd). 
-# This introduces variability into 'varA', making it either a fixed or mixed effect in your design.
-
-## -- Randomly init several variables
-input_var_list <- init_variable( name = "varA", mu = 10, sd = 0.2, level = 5) %>%
-                      init_variable( name = "varB", mu = -3, sd = 0.34, level = 2)
-
-# You can also initialize multiple variables, such as 'varA' and 'varB', with random values. 
-# This flexibility allows you to create diverse experimental designs.
-
-```
-
-Similary to `init_variable()`, `add_interaction()` allow to init an interaction between variable.
-
-```{r example-add_interaction, warning=FALSE, message=FALSE}
-## --init variable
-input_var_list <- init_variable( name = "varA", mu = 10, sd = 0.2, level = 5) %>%
-                    init_variable( name = "varB", mu = 1, sd = 0.78, level = 2) %>%
-                      init_variable( name = "varC", mu = -3, sd = 6, level = 3) %>%
-                        add_interaction( between_var = c("varA", "varC"), mu = 9, sd = 0.2)
-                        
-# In this example, we initialize 'varA', 'varB', and 'varC', and create an interaction between 'varA' and 'varC' using 'add_interaction'. 
-# Interactions can be defined to represent complex relationships between variables.
-                        
-```
-
-Using set_correlation you can constraint the correlation between a variable or an interaction that have been randomly declared. A manually define-variable cannot 
-
-```{r example-setCorrelation, warning=FALSE, message=FALSE}
-## -- Set correlation between 2 main variables
-input_var_list <- init_variable( name = "varA", mu = 10, sd = 0.2, level = 5) %>%
-                    init_variable( name = "varB", mu = 1, sd = 0.78, level = 2) %>%
-                      init_variable( name = "varC", mu = -3, sd = 6, level = 3)  %>%
-                        set_correlation( between_var = c("varA", "varC"), corr = 0.32)
-
-# You can set correlations between variables using 'set_correlation'. Here, we establish a correlation between 'varA' and 'varC'.
-
-## -- Set correlation between 2 interactions
-input_var_list <- init_variable( name = "varA", mu = 10, sd = 0.2, level = 5) %>%
-                    init_variable( name = "varB", mu = 1, sd = 0.78, level = 2) %>%
-                      init_variable( name = "varC", mu = -3, sd = 6, level = 3)  %>%
-                        add_interaction( between_var = c("varA", "varC"), mu = 9, sd = 0.2) %>%
-                          add_interaction( between_var = c("varB", "varA"), mu = 9, sd = 0.2) %>%
-                            set_correlation( between_var = c("varA:varC", "varB:varA"), corr = -0.5 )
-
-# Similarly, you can set correlations between interactions, introducing complexity to your experimental design.
-
-## -- Set correlation between an interaction and a variable
-input_var_list <- init_variable( name = "varA", mu = 1, sd = 0.2, level = 5) %>%
-                    init_variable( name = "varB", mu = 1, sd = 0.78, level = 2) %>%
-                        add_interaction( between_var = c("varA", "varB"), mu = 9, sd = 0.2) %>%
-                          set_correlation( between_var = c("varA:varB", "varA"), corr = 0.8 )
-
-## You can also establish correlations between interactions and variables, providing flexibility in your design.
-
-## -- output 
-input_var_list
-```
-
-
-# Simulate RNAseq data
-
-In this section, you will learn how to simulate RNAseq data, how to generate data based on the input variables defined earlier. Using ....
-
-```{r example-mock_rnaseq, warning=FALSE, message=FALSE}
-## -- Required parameters
-N_GENES = 4
-MIN_REPLICATES = 2
-MAX_REPLICATES = 4
-########################
-
-
-
-## -- simulate RNAseq data based on input_var_list
-mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                         min_replicates  = MIN_REPLICATES,
-                         max_replicates = MAX_REPLICATES)
-                        
-## -- Scaling genes counts with sequencing depth
-SEQ_DEPTH = c(100000, 5000000, 10000000) #possible number of reads/sample
-mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                         min_replicates  = MIN_REPLICATES,
-                         max_replicates = MAX_REPLICATES,
-                         sequencing_depth = SEQ_DEPTH)
-
-## -- Set gene dispersion : k ~ Nbinomial(mu, disp)
-DISP = 0.1 # Same dispersion for each genes
-DISP = 1000 # Same dispersion for each genes
-DISP = runif(N_GENES, 0, 1000) ## Dispersion can vary between genes
-mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                         min_replicates  = MIN_REPLICATES,
-                         max_replicates = MAX_REPLICATES,
-                         dispersion = DISP  )
-
-## -- Set basal gene expression
-BASAL_EXPR = -3 # Value can be negative to simulate low expressed gene
-BASAL_EXPR = 2 # Same basal gene expression for the N_GENES
-BASAL_EXPR = c(-3, -2, -1, 0, 1, 2, 3 ) ## Basal expression can vary between genes
-mock_data <- mock_rnaseq(input_var_list, N_GENES,
-                         min_replicates  = MIN_REPLICATES,
-                         max_replicates = MAX_REPLICATES,
-                         basal_expression = BASAL_EXPR)
-
-## -- output list attributes
-names(mock_data)
-```
-
-
-# Fitting models
-
-## Prepare data for fitting
-
- We explore how to prepare the data for modeling and the importance of data normalization. 
-
-
-```{r example-prepareData, warning=FALSE, message=FALSE}
-## -- get data from simulation or real data
-count_matrix <- mock_data$counts
-metaData <- mock_data$metadata
-##############################
-
-## -- convert counts matrix and samples metadatas in a data frame for fitting
-data2fit = prepareData2fit(countMatrix = count_matrix, 
-                           metadata =  metaData, 
-                           normalization = F)
-
-
-## -- data normalization
-data2fit = prepareData2fit(countMatrix = count_matrix, 
-                           metadata =  metaData, 
-                           normalization = T, 
-                           response_name = "kij")
-
-## -- output 
-head(data2fit)
-```
-
-## Generalized linear model fit
-
-We discuss the use of generalized linear models (GLMs) and how to incorporate mixed effects into your models. You'll also find examples of fitting models beyond RNAseq data.
-
-```{r example-fitModelParallel, warning=FALSE, message=FALSE}
-
-## -- fit data from your model
-l_tmb <- fitModelParallel(formula = kij ~ varA,
-                          data = data2fit, 
-                          group_by = "geneID",
-                          family = glmmTMB::nbinom2(link = "log"), 
-                          log_file = "log.txt",
-                          n.cores = 1)
-
-
-## -- use mixed effect in your model
-l_tmb <- fitModelParallel(formula = kij ~ varA + ( 1 | varB ),
-                          data = data2fit, 
-                          group_by = "geneID",
-                          family = glmmTMB::nbinom2(link = "log"), 
-                          log_file = "log.txt",
-                          n.cores = 1)
-
-## -- not only RNAseq data
-data("iris")
-l_tmb <- fitModelParallel(formula =  Sepal.Length ~ Sepal.Width + Petal.Length + Petal.Width ,
-                          data = iris,
-                          group_by = "Species",
-                          family = gaussian(),
-                          log_file = "log.txt",
-                          n.cores = 1)
-
-
-## -- additional settings
-l_tmb <- fitModelParallel(formula = kij ~ varA,
-                          data = data2fit, 
-                          group_by = "geneID",
-                          family = glmmTMB::nbinom2(link = "log"), 
-                          n.cores = 1, 
-                          log_file = "log.txt",
-                          control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e5,
-                                                                         eval.max=1e5)))
-
-## -- output 
-l_tmb$gene1
-
-```
-
-## Update fit
-
-This section is all about updating your models. Learn how to modify the model family, control settings, and even update your model formula. Model updates are a crucial part of the modeling process, allowing you to refine and adapt your analyses.
-
-```{r example-update, warning=FALSE,  message=FALSE}
-
-## -- update your fit modifying the model family
-l_tmb <- updateParallel(formula =  kij ~ varA,
-                          l_tmb = l_tmb ,
-                          family = gaussian(), 
-                          log_file = "log.txt",
-                          n.cores = 1)
-
-## -- update fit using additional model control settings
-l_tmb <- updateParallel(formula =  kij ~ varA ,
-                          l_tmb = l_tmb ,
-                          family = gaussian(), 
-                          log_file = "log.txt",
-                          n.cores = 1,
-                          control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3,
-                                                                         eval.max=1e3)))
-
-
-## -- update your model formula
-l_tmb <- updateParallel(formula =   kij ~ varA + varB  + varA:varB ,
-                          l_tmb = l_tmb ,
-                          family = glmmTMB::nbinom2(link = "log"), 
-                          log_file = "log.txt",
-                          n.cores = 1)
-
-## -- output 
-l_tmb$gene1
-```
-
-## Plot fit metrics
-
-Visualizing fit metrics is essential for evaluating your models. Here, we show you how to generate various plots to assess the quality of your models. You can explore all metrics or focus on specific aspects like dispersion and log-likelihood.
-
-```{r example-plotMetrics, warning=FALSE, message=FALSE}
-
-## -- plot all metrics
-metrics_plot(l_tmb = l_tmb)
-
-## -- Focus on metrics
-metrics_plot(l_tmb = l_tmb, focus = c("dispersion", "logLik"))
-```
-
-## Anova to select the best model
-
-Selecting the best model is a fundamental step in data analysis. We explore how to use analysis of variance (ANOVA) to compare different models and identify the most suitable one for your data. Model selection is critical for drawing meaningful conclusions from your analyses.
-
-```{r example-anova, warning=FALSE,  message=FALSE}
-
-## -- update your fit modifying the model family
-l_anova <- anovaParallel(l_tmb = l_tmb)
-
-## -- additional settings
-l_anova <- anovaParallel(l_tmb = l_tmb, type = "III" )
-
-## -- output 
-l_anova$gene1
-```
-
-
-# Simulation evaluation report
-
-In this section, we delve into the evaluation of your simulation results. You'll find details on generating receiver operating characteristic (ROC) curves, identity plots, and dispersion evaluations. These evaluations provide valuable insights into the performance of your simulated data and models.
-
-```{r example-simulationReport, warning=FALSE, message=FALSE}
-
-## -- get simulation/fit evaluation
-resSimu <- simulationReport(mock_data, 
-                            list_tmb = l_tmb,
-                            coeff_threshold = 0.4, 
-                            alt_hypothesis = "greaterAbs")
-
-## -- roc curve
-resSimu$roc_plot
-
-## -- identity plot 
-resSimu$identity_plot
-
-## -- dispersion 
-resSimu$dispersionEvaluation$disp_plot
-
-```
-
-## Compare HTRfit with DESeq2
-
-Comparing different analysis approaches is a common practice in data analysis. Here, we compare the results obtained with HTRfit to those of DESeq2. This comparison helps you understand the advantages and unique features of HTRfit in your analyses.
-
-```{r example-ddsComparison, warning=FALSE, message=FALSE}
-## -- DESeq2
-library(DESeq2)
-dds <- DESeq2::DESeqDataSetFromMatrix(
-          countData = count_matrix,
-          colData = metaData,
-          design = ~ varA + varB  + varA:varB )
-dds <- DESeq2::DESeq(dds, quiet = TRUE)
-
-
-
-## -- get simulation/fit evaluation
-resSimu <- simulationReport(mock_data, 
-                            list_tmb = l_tmb,
-                            dds_obj = dds,
-                            coeff_threshold = 0.4, 
-                            alt_hypothesis = "greaterAbs")
-
-## -- roc curve
-resSimu$roc_plot
-
-## -- identity plot 
-resSimu$identity_plot
-
-## -- dispersion 
-resSimu$dispersionEvaluation$disp_plot
-
-```
-
-## Focus evaluation on a subset of genes 
-
-This section demonstrates how to evaluate low-expressed genes by filtering and analyzing a subset of your data. This focused evaluation can reveal insights that may be obscured when considering the entire dataset.
-
-```{r example-subsetGenes, warning=FALSE, message=FALSE}
-
-## Focus on low expressed genes 
-#low_expressed <- mock_data$groundTruth$effects[ mock_data$groundTruth$effects$basalExpr < 0, ]
-#l_genes <- unique(low_expressed$geneID)
-#mock_lowExpressed <- subsetGenes(l_genes, mock_data)
-
-
-## -- get simulation/fit evaluation
-#resSimu <- simulationReport(mock_lowExpressed, 
-#                            list_tmb = l_tmb,
-#                            coeff_threshold = 0.4, 
-#                            alt_hypothesis = "greaterAbs")
-
-## -- roc curve
-#resSimu$roc_plot
-```
-
-
-
-```{r development-inflate, eval=FALSE}
-fusen::fill_description(fields = list(Title = "HTRSIM"), overwrite = T)
-usethis::use_mit_license("Arnaud DUVERMY")
-usethis::use_pipe(export = TRUE)
-devtools::document()
-# Keep eval=FALSE to avoid infinite loop in case you hit the knit button
-# Execute in the console directly
-fusen::inflate(flat_file = "dev/flat_full_bis.Rmd", vignette_name = "Get started")
-```