From eefea2283877bee3e53f816f930de771b91c8735 Mon Sep 17 00:00:00 2001
From: aduvermy <arnaud.duvermy@ens-lyon.fr>
Date: Sat, 14 Jan 2023 22:05:23 +0100
Subject: [PATCH] Add res v3

---
 results/v3/DESEQ2/2022_02_03_dispersion.Rmd   |  75 ++++++
 .../2023_02_01_postInferenceSelection.Rmd     | 120 ++++++++++
 results/v3/DESEQ2/2023_02_03_FDR.Rmd          | 117 +++++++++
 .../v3/DESEQ2/2023_02_05_sequencingDepth.Rmd  | 128 ++++++++++
 results/v3/DESEQ2/2023_02_06_replicates.Rmd   | 126 ++++++++++
 results/v3/GLM/2023_02_04_dispersion.Rmd      |  78 ++++++
 results/v3/GLM/2023_02_05_sequencingDepth.Rmd | 129 ++++++++++
 results/v3/GLM/2023_02_06_replicates.Rmd      | 128 ++++++++++
 .../Tuto/2023_02_03_rangeObservedValues.Rmd   |  55 +++++
 results/v3/css/splendor.css                   | 225 ++++++++++++++++++
 10 files changed, 1181 insertions(+)
 create mode 100644 results/v3/DESEQ2/2022_02_03_dispersion.Rmd
 create mode 100644 results/v3/DESEQ2/2023_02_01_postInferenceSelection.Rmd
 create mode 100644 results/v3/DESEQ2/2023_02_03_FDR.Rmd
 create mode 100644 results/v3/DESEQ2/2023_02_05_sequencingDepth.Rmd
 create mode 100644 results/v3/DESEQ2/2023_02_06_replicates.Rmd
 create mode 100644 results/v3/GLM/2023_02_04_dispersion.Rmd
 create mode 100644 results/v3/GLM/2023_02_05_sequencingDepth.Rmd
 create mode 100644 results/v3/GLM/2023_02_06_replicates.Rmd
 create mode 100644 results/v3/Tuto/2023_02_03_rangeObservedValues.Rmd
 create mode 100644 results/v3/css/splendor.css

diff --git a/results/v3/DESEQ2/2022_02_03_dispersion.Rmd b/results/v3/DESEQ2/2022_02_03_dispersion.Rmd
new file mode 100644
index 0000000..346798b
--- /dev/null
+++ b/results/v3/DESEQ2/2022_02_03_dispersion.Rmd
@@ -0,0 +1,75 @@
+---
+title: "Dispersion"
+date: "2023-02-01"
+output: 
+  html_document:
+    toc: true 
+css: ../css/air.css
+---
+
+## Required
+
+
+```{r setup, message=FALSE, warning=FALSE, results="hide"}
+library(HTRsim)
+```
+
+
+## Simulation
+
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- SETUP
+n_G = 1000
+n_genoT = 3
+n_env = 2
+max_n_rep = 15
+sequencing_fact = 1
+thr = 2
+###########
+
+# -- Simulate counts
+mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
+    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact)
+
+# -- count table & experimental design
+count_table = mock$countTable %>% as.data.frame()
+bioDesign = mock$design
+
+# -- dispersion value per gene & per condition
+mock$actualParameters$dispersion %>% head()
+
+```
+
+## Estimation
+
+### Launch DESEQ2 on simulated data
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- DESEQ2
+dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
+dispersion_estimated = getDispersionFromDDS(dds_simu)
+```
+
+### Join Actual & Inference
+
+```{r}
+# -- one dispersion per gene
+dispersion_actual = mock$actualParameters$dispersion %>% 
+          rowMeans() %>% 
+          as.data.frame() %>% 
+          dplyr::rename(., dispersion.actual = ".") %>% tibble::rownames_to_column("gene_id")
+
+dispersion_actual = data.table::data.table(dispersion_actual, key = "gene_id")
+dispersion_estimated = data.table::data.table(dispersion_estimated, key = "gene_id")
+dispersion_comparison = dispersion_actual[dispersion_estimated]
+
+```
+
+### Identity plot 
+
+```{r}
+ggplot(dispersion_comparison) + 
+  geom_point(aes(x = dispersion.actual, y = dispersion.estimate), alpha = 0.5) +
+  geom_abline(intercept = 0, slope = 1) + scale_y_log10() + scale_x_log10()
+```
diff --git a/results/v3/DESEQ2/2023_02_01_postInferenceSelection.Rmd b/results/v3/DESEQ2/2023_02_01_postInferenceSelection.Rmd
new file mode 100644
index 0000000..58b4549
--- /dev/null
+++ b/results/v3/DESEQ2/2023_02_01_postInferenceSelection.Rmd
@@ -0,0 +1,120 @@
+---
+title: "Post Inference Selection"
+date: "2023-02-01"
+output: 
+  html_document:
+    toc: true 
+css: ../css/air.css
+---
+
+
+## Required
+
+
+```{r setup_2, message=FALSE, warning=FALSE, results="hide"}
+library(HTRfit)
+library(HTRsim)
+library(plotROC)
+library(gridExtra)
+```
+
+
+## Simulation
+
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- SETUP
+n_G = 100
+n_genoT = 10 
+n_env = 2
+max_n_rep = 3
+sequencing_fact = 1
+thr = 2
+###########
+
+# -- Simulate counts
+mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
+    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact)
+count_table = mock$countTable %>% as.data.frame()
+bioDesign = mock$design
+```
+
+## Prediction
+
+### Launch DESEQ2 on simulated data
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- DESEQ2
+dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
+deseqFitdtf = getCoefficientsFromDds(dds_simu)
+
+```
+
+### p(|beta| > 0) > 0.95 & |beta| > T => DE
+
+```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- prediction with post inference selection method
+prediction = getPrediction(deseqFitdtf, threshold = thr, 
+                            alphaRisk = 0.05, postInferenceSelection = T)
+expectation = getExpectation(mock$actualParameters$beta, threshold = thr)
+dtf.comp.annot.PIS = getComparison(actual.dtf = expectation, 
+                                    inference.dtf = prediction )
+```
+
+### p(|beta| > T) > 0.95 => DE
+
+```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- prediction
+prediction = getPrediction(deseqFitdtf, threshold = thr, 
+                            alphaRisk = 0.05, postInferenceSelection = F)
+expectation = getExpectation(mock$actualParameters$beta, threshold = thr)
+dtf.comp.annot= getComparison(actual.dtf = expectation, 
+                              inference.dtf = prediction )
+```
+
+## Evaluation
+
+
+### Build graph for evaluation
+
+```{r message=FALSE, warning=FALSE, echo = T, results = 'hide' ,include=TRUE}
+# -- Venn Diag Post inference selection
+p1 = getVennDiagramm(dtf.comp.annot.PIS, 
+                        title = "p(|beta| > 0) > 0.95 & |beta| > T")
+# -- Venn Diag 
+p2 = getVennDiagramm(comparisonDTF  = dtf.comp.annot,
+                     title = "p(|beta| > T) > 0.95")
+
+# -- ROC curves post inference selection
+dtf1 = dtf.comp.annot.PIS %>% 
+                  mutate(method = "p(|beta| > 0) > 0.95 & |beta| > T")
+# -- ROC curves
+dtf2 = dtf.comp.annot %>% 
+                  mutate(method = "p(|beta| > T) > 0.95")
+dtf = rbind(dtf1, dtf2)
+dtf$annotation <- factor(dtf$annotation, levels = c("TRUE", "FALSE"))
+
+# -- Identity plot
+p3 = ggplot(dtf %>% filter(beta != "(Intercept)")) + 
+  geom_point(aes(x = actual.value, y = estimate, col = padj), alpha = 0.5) +  
+  geom_abline(intercept = 0, slope = 1) + 
+  geom_vline(xintercept = c(-thr, thr), linetype = "dotted") +
+  facet_grid(method~beta, scales = "free") #+ 
+  #scale_color_brewer(palette = "Blues")
+
+p4 = ggplot(dtf %>% filter(beta != "(Intercept)"), 
+           aes(d = actual.label , m = padj, color = method)) + 
+  geom_roc(n.cuts = 0, labels = F)
+```
+
+### Conclusion
+
+```{r message=FALSE, warning=FALSE, include=TRUE}
+# -- Venn diagram
+grid.arrange(p1, p2, nrow = 1, ncol = 2)
+# -- Identity plot
+p3
+# -- ROC curves
+p4
+```
+
diff --git a/results/v3/DESEQ2/2023_02_03_FDR.Rmd b/results/v3/DESEQ2/2023_02_03_FDR.Rmd
new file mode 100644
index 0000000..fa564ca
--- /dev/null
+++ b/results/v3/DESEQ2/2023_02_03_FDR.Rmd
@@ -0,0 +1,117 @@
+---
+title: "False discovery rate"
+date: "2023-02-01"
+output: 
+  html_document:
+    toc: true 
+css: ../css/air.css
+---
+
+## Required
+
+
+```{r setup, message=FALSE, warning=FALSE, results="hide"}
+library(HTRfit)
+library(HTRsim)
+library(plotROC)
+library(gridExtra)
+```
+
+
+## Simulation
+
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- SETUP
+n_G = 100
+n_genoT = 5
+n_env = 2
+max_n_rep = 3
+sequencing_fact = 1
+thr = 2
+###########
+
+# -- Simulate counts
+mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
+    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact)
+count_table = mock$countTable %>% as.data.frame()
+bioDesign = mock$design
+mock$actualParameters$dispersion
+
+```
+
+## Prediction
+
+### Launch DESEQ2 on simulated data
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- DESEQ2
+dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
+deseqFitdtf = getCoefficientsFromDds(dds_simu)
+```
+
+### pvalue adjusted
+
+```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- prediction with post inference selection method
+prediction = getPrediction(deseqFitdtf, threshold = thr, 
+                            alphaRisk = 0.05, pvalCorrection = T)
+expectation = getExpectation(mock$actualParameters$beta, threshold = thr)
+dtf.comp.annot.adjusted = getComparison(actual.dtf = expectation, 
+                                    inference.dtf = prediction )
+```
+### pvalue adjusted
+
+```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- prediction 
+prediction = getPrediction(deseqFitdtf, threshold = thr, 
+                            alphaRisk = 0.05, pvalCorrection = F)
+dtf.comp.annot.unadjusted = getComparison(actual.dtf = expectation, 
+                                    inference.dtf = prediction )
+```
+
+
+
+## Evaluation
+
+### Build graph for evaluation
+
+```{r message=FALSE, warning=FALSE, echo = T, results = 'hide' ,include=TRUE}
+# -- Venn Diag adjusted pvalue
+p1 = getVennDiagramm(dtf.comp.annot.adjusted, 
+                        title = "padjusted")
+# -- Venn Diag unadjusted pvalue
+p2 = getVennDiagramm(comparisonDTF  = dtf.comp.annot.unadjusted,
+                     title = "pvalue")
+
+# -- ROC curves post inference selection
+dtf1 = dtf.comp.annot.adjusted %>% 
+                  mutate(method = "padjusted")
+# -- ROC curves
+dtf2 = dtf.comp.annot.unadjusted %>% 
+                  mutate(method = "pvalue")
+dtf = rbind(dtf1, dtf2)
+dtf$annotation <- factor(dtf$annotation, levels = c("TRUE", "FALSE"))
+
+# -- Identity plot
+p3 = ggplot(dtf %>% filter(beta != "(Intercept)")) + 
+  geom_point(aes(x = actual.value, y = estimate, col = padj), alpha = 0.5) +  
+  geom_abline(intercept = 0, slope = 1) + 
+  geom_vline(xintercept = c(-thr, thr), linetype = "dotted") +
+  facet_grid(method~beta, scales = "free") #+ 
+  #scale_color_brewer(palette = "Set2")
+p4 = ggplot(dtf %>% filter(beta != "(Intercept)"), 
+           aes(d = actual.label , m = padj, color = method)) + 
+  geom_roc(n.cuts = 0, labels = F)
+```
+
+### Visualization
+
+```{r message=FALSE, warning=FALSE, include=TRUE}
+# -- Venn diagram
+grid.arrange(p1, p2, nrow = 1, ncol = 2)
+# -- Identity plot
+p3
+# -- ROC curves
+p4
+```
diff --git a/results/v3/DESEQ2/2023_02_05_sequencingDepth.Rmd b/results/v3/DESEQ2/2023_02_05_sequencingDepth.Rmd
new file mode 100644
index 0000000..c6f9f75
--- /dev/null
+++ b/results/v3/DESEQ2/2023_02_05_sequencingDepth.Rmd
@@ -0,0 +1,128 @@
+---
+title: "Sequencing depth effect"
+date: "2023-02-01"
+output: 
+  html_document:
+    toc: true 
+css: ../css/air.css
+---
+
+## Required
+
+
+```{r setup, message=FALSE, warning=FALSE, results="hide"}
+library(HTRsim)
+library(plotROC)
+```
+
+
+## Simulation
+
+### Parameters
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- SETUP
+n_G = 1000
+n_genoT = 3
+n_env = 2
+max_n_rep = 3
+thr = 2
+```
+
+### Ground truth
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- Fit mvnorm
+dds.extraction = loadEmbedded_ObservedValues()
+fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
+# -- Ground truth
+beta.actual <- getBetaforSimulation(
+                      n_G,
+                      n_genoT,
+                      fit.mvnorm)
+# -- build input for simulation 
+model.matx <- getModelMatrix()
+log_qij <- getLog_qij(beta.actual, model.matx)
+```
+
+### Sequencing depth increased
+
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+
+# -- loop on
+sequencing_factor_list = c(0.01, 0.1, 1, 2)
+
+# -- sequencing factor effect
+for (sequencing_factor in sequencing_factor_list){
+    mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
+    sample_ids <- colnames(mu_ij)
+    gene_dispersion.vec <- dds.extraction$gene_dispersion
+    dispersion.matrix <- getGenesDispersions(n_G,
+                              sample_ids,
+                              dispersion.vec = gene_dispersion.vec)
+  # -- Design replicates 
+  designReplication.matx <- getReplicationDesign(
+                                max_n_rep,
+                                n_genoT,
+                                n_env)
+  
+  # -- build counts table
+  count_table <- getCountTable(mu_ij, dispersion.matrix,
+                    n_G, n_genoT,
+                    sample_id_list = sample_ids,
+                    replication.matx = designReplication.matx)
+  bioDesign <- summariseDesign(count_table)
+  
+  # -- number of reads simulate
+  reads_counts = count_table %>% 
+                      colSums() %>% 
+                      sum() %>% 
+                      format(., scientific = TRUE, digits=1)
+
+  # -- DESEQ2 on simulated data
+  dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
+  deseqFitdtf = getCoefficientsFromDds(dds_simu)
+  prediction = getPrediction(deseqFitdtf, threshold = thr, alphaRisk = 0.05)
+  expectation = getExpectation(beta.actual, threshold = thr)
+  # -- join actual & inference
+  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
+  # -- annotation
+  comparison = comparison  %>% 
+              mutate(reads_sequenced = reads_counts)
+  # save results
+  if (exists('df2evaluation')){
+    df2evaluation = rbind(df2evaluation, comparison)
+  }
+  else{
+    df2evaluation = comparison
+  }
+}
+```
+
+## Evaluation
+
+### Build graph for evaluation 
+
+```{r, message=FALSE, warning=FALSE,}
+# -- ROC curve
+df2evaluation$reads_sequenced <- factor(df2evaluation$reads_sequenced, levels = unique(df2evaluation$reads_sequenced))
+p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), 
+           aes(d = actual.label , m = padj, col = reads_sequenced)) + 
+  geom_roc(n.cuts = 0, labels = F)   + 
+  scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D"))
+#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
+
+# Identity plot
+p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + 
+  geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) +
+  geom_abline(intercept = 0, slope = 1) + 
+  facet_grid(beta~reads_sequenced, scales = "free")
+```
+
+### Visualization
+
+```{r, message=FALSE, warning=FALSE,}
+p1
+p2
+```
diff --git a/results/v3/DESEQ2/2023_02_06_replicates.Rmd b/results/v3/DESEQ2/2023_02_06_replicates.Rmd
new file mode 100644
index 0000000..81c7966
--- /dev/null
+++ b/results/v3/DESEQ2/2023_02_06_replicates.Rmd
@@ -0,0 +1,126 @@
+---
+title: "Sequencing depth effect"
+date: "2023-02-01"
+output: 
+  html_document:
+    toc: true 
+css: ../css/air.css
+---
+
+## Required
+
+
+```{r setup, message=FALSE, warning=FALSE, results="hide"}
+library(HTRsim)
+library(plotROC)
+```
+
+
+## Simulation
+
+### Parameters
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- SETUP
+n_G = 300
+n_genoT = 3
+n_env = 2
+sequencing_factor = 1
+thr = 2
+```
+
+### Ground truth
+
+```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- Fit mvnorm
+dds.extraction = loadEmbedded_ObservedValues()
+fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
+# -- Ground truth
+beta.actual <- getBetaforSimulation(
+                      n_G,
+                      n_genoT,
+                      fit.mvnorm)
+# -- build input for simulation 
+model.matx <- getModelMatrix()
+log_qij <- getLog_qij(beta.actual, model.matx)
+mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
+sample_ids <- colnames(mu_ij)
+gene_dispersion.vec <- dds.extraction$gene_dispersion
+dispersion.matrix <- getGenesDispersions(n_G,
+                          sample_ids,
+                          dispersion.vec = gene_dispersion.vec)
+```
+
+### Number of replicates increased
+
+```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- loop on
+n_rep_list = c(2, 5, 10, 25)
+
+# -- sequencing factor effect
+for (n_rep in n_rep_list){
+  # -- Design replicates 
+  designReplication.matx <- getReplicationDesign(
+                                n_rep,
+                                n_genoT,
+                                n_env)
+  
+  # -- build counts table
+  count_table <- getCountTable(mu_ij, dispersion.matrix,
+                    n_G, n_genoT,
+                    sample_id_list = sample_ids,
+                    replication.matx = designReplication.matx)
+  bioDesign <- summariseDesign(count_table)
+  
+  # -- number of reads simulate
+  reads_counts = count_table %>% 
+                      colSums() %>% 
+                      sum() %>% 
+                      format(., scientific = TRUE, digits=1)
+
+  # -- DESEQ2 on simulated data
+  dds_simu = HTRsim::fit_deseq(count_table, bioDesign)
+  deseqFitdtf = getCoefficientsFromDds(dds_simu)
+  prediction = getPrediction(deseqFitdtf, threshold = thr, alphaRisk = 0.05)
+  expectation = getExpectation(beta.actual, threshold = thr)
+  # -- join actual & inference
+  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
+  # -- annotation
+  comparison = comparison  %>% 
+              mutate(n_replicates = n_rep)
+  # save results
+  if (exists('df2evaluation')){
+    df2evaluation = rbind(df2evaluation, comparison)
+  }
+  else{
+    df2evaluation = comparison
+  }
+}
+```
+
+## Evaluation
+
+### Build graph for evaluation 
+
+```{r, message=FALSE, warning=FALSE,}
+# -- ROC curve
+df2evaluation$n_replicates <- factor(df2evaluation$n_replicates, levels = unique(df2evaluation$n_replicates))
+p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), 
+           aes(d = actual.label , m = padj, col = n_replicates)) + 
+  geom_roc(n.cuts = 0, labels = F)   + 
+  scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D"))
+#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
+
+# Identity plot
+p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + 
+  geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) +
+  geom_abline(intercept = 0, slope = 1) + 
+  facet_grid(beta~n_replicates, scales = "free")
+```
+
+### Visualization
+
+```{r, message=FALSE, warning=FALSE,}
+p1
+p2
+```
diff --git a/results/v3/GLM/2023_02_04_dispersion.Rmd b/results/v3/GLM/2023_02_04_dispersion.Rmd
new file mode 100644
index 0000000..e4a355c
--- /dev/null
+++ b/results/v3/GLM/2023_02_04_dispersion.Rmd
@@ -0,0 +1,78 @@
+---
+title: "Dispersion"
+date: "2023-02-01"
+output: 
+  html_document:
+    toc: true 
+css: ../css/air.css
+---
+
+## Required
+
+
+```{r setup, message=FALSE, warning=FALSE, results="hide"}
+library(HTRsim)
+library(HTRfit)
+```
+
+
+## Simulation
+
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- SETUP
+n_G = 1000
+n_genoT = 3
+n_env = 2
+max_n_rep = 15
+sequencing_fact = 1
+thr = 2
+###########
+
+# -- Simulate counts
+mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, 
+    max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact)
+
+# -- count table & experimental design
+count_table = mock$countTable %>% as.data.frame()
+bioDesign = mock$design
+
+# -- dispersion value per gene & per condition
+mock$actualParameters$dispersion %>% head()
+
+```
+
+## Estimation
+
+### Launch MASS::glm.nb on simulated data
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- MASS:glm.nb
+dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign)
+l = HTRfit::launch.glm(dtf2fit)
+fitDtf =listFit2dtf(l)
+dispersion_estimated = fitDtf$dispersion
+```
+
+### Join Actual & Inference
+
+```{r}
+# -- one dispersion per gene
+dispersion_actual = mock$actualParameters$dispersion %>% 
+          rowMeans() %>% 
+          as.data.frame() %>% 
+          dplyr::rename(., dispersion.actual = ".") %>% tibble::rownames_to_column("gene_id")
+
+dispersion_actual = data.table::data.table(dispersion_actual, key = "gene_id")
+dispersion_estimated = data.table::data.table(dispersion_estimated, key = "gene_id")
+dispersion_comparison = dispersion_actual[dispersion_estimated]
+
+```
+
+### Identity plot 
+
+```{r}
+ggplot(dispersion_comparison) + 
+  geom_point(aes(x = dispersion.actual, y = 1/dispersion.estimate), alpha = 0.5) +
+  geom_abline(intercept = 0, slope = 1) + scale_y_log10() + scale_x_log10()
+```
diff --git a/results/v3/GLM/2023_02_05_sequencingDepth.Rmd b/results/v3/GLM/2023_02_05_sequencingDepth.Rmd
new file mode 100644
index 0000000..081a0d7
--- /dev/null
+++ b/results/v3/GLM/2023_02_05_sequencingDepth.Rmd
@@ -0,0 +1,129 @@
+---
+title: "Sequencing depth effect"
+date: "2023-02-01"
+output: 
+  html_document:
+    toc: true 
+css: ../css/air.css
+---
+
+## Required
+
+
+```{r setup, message=FALSE, warning=FALSE, results="hide"}
+library(HTRsim)
+library(HTRfit)
+library(plotROC)
+```
+
+
+## Simulation
+
+### Parameters
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- SETUP
+n_G = 1000
+n_genoT = 3
+n_env = 2
+max_n_rep = 3
+thr = 2
+```
+
+### Ground truth
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- Fit mvnorm
+dds.extraction = loadEmbedded_ObservedValues()
+fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
+# -- Ground truth
+beta.actual <- getBetaforSimulation(
+                      n_G,
+                      n_genoT,
+                      fit.mvnorm)
+# -- build input for simulation 
+model.matx <- getModelMatrix()
+log_qij <- getLog_qij(beta.actual, model.matx)
+```
+
+### Sequencing depth increased
+
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+
+# -- loop on
+sequencing_factor_list = c(0.01, 0.1, 1, 2)
+
+# -- sequencing factor effect
+for (sequencing_factor in sequencing_factor_list){
+    mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
+    sample_ids <- colnames(mu_ij)
+    gene_dispersion.vec <- dds.extraction$gene_dispersion
+    dispersion.matrix <- getGenesDispersions(n_G,
+                              sample_ids,
+                              dispersion.vec = gene_dispersion.vec)
+  # -- Design replicates 
+  designReplication.matx <- getReplicationDesign(
+                                max_n_rep,
+                                n_genoT,
+                                n_env)
+  
+  # -- build counts table
+  count_table <- getCountTable(mu_ij, dispersion.matrix,
+                    n_G, n_genoT,
+                    sample_id_list = sample_ids,
+                    replication.matx = designReplication.matx)
+  bioDesign <- summariseDesign(count_table)
+  
+  # -- number of reads simulate
+  reads_counts = count_table %>% 
+                      colSums() %>% 
+                      sum() %>% 
+                      format(., scientific = TRUE, digits=1)
+  # -- MASS:glm.nb
+  dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign)
+  l = HTRfit::launch.glm(dtf2fit)
+  fitDtf =listFit2dtf(l)
+  prediction = getPrediction(fitDtf$inference, threshold = thr, alphaRisk = 0.05)
+  expectation = getExpectation(beta.actual, threshold = thr)
+  # -- join actual & inference
+  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
+  # -- annotation
+  comparison = comparison  %>% 
+              mutate(reads_sequenced = reads_counts)
+  # save results
+  if (exists('df2evaluation')){
+    df2evaluation = rbind(df2evaluation, comparison)
+  }
+  else{
+    df2evaluation = comparison
+  }
+}
+```
+
+## Evaluation
+
+### Build graph for evaluation 
+
+```{r, message=FALSE, warning=FALSE,}
+# -- ROC curve
+df2evaluation$reads_sequenced <- factor(df2evaluation$reads_sequenced, levels = unique(df2evaluation$reads_sequenced))
+p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), 
+           aes(d = actual.label , m = padj, col = reads_sequenced)) + 
+  geom_roc(n.cuts = 0, labels = F)   + 
+  scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D"))
+#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
+
+# Identity plot
+p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + 
+  geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) +
+  geom_abline(intercept = 0, slope = 1) + 
+  facet_grid(beta~reads_sequenced, scales = "free")
+```
+
+### Visualization
+
+```{r, message=FALSE, warning=FALSE,}
+p1
+p2
+```
diff --git a/results/v3/GLM/2023_02_06_replicates.Rmd b/results/v3/GLM/2023_02_06_replicates.Rmd
new file mode 100644
index 0000000..a95d5f5
--- /dev/null
+++ b/results/v3/GLM/2023_02_06_replicates.Rmd
@@ -0,0 +1,128 @@
+---
+title: "Sequencing depth effect"
+date: "2023-02-01"
+output: 
+  html_document:
+    toc: true 
+css: ../css/air.css
+---
+
+## Required
+
+
+```{r setup, message=FALSE, warning=FALSE, results="hide"}
+library(HTRsim)
+library(HTRfit)
+library(plotROC)
+```
+
+
+## Simulation
+
+### Parameters
+
+```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- SETUP
+n_G = 300
+n_genoT = 3
+n_env = 2
+sequencing_factor = 1
+thr = 2
+```
+
+### Ground truth
+
+```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- Fit mvnorm
+dds.extraction = loadEmbedded_ObservedValues()
+fit.mvnorm <- getListMvnormFit(dds.extraction$beta)
+# -- Ground truth
+beta.actual <- getBetaforSimulation(
+                      n_G,
+                      n_genoT,
+                      fit.mvnorm)
+# -- build input for simulation 
+model.matx <- getModelMatrix()
+log_qij <- getLog_qij(beta.actual, model.matx)
+mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor)
+sample_ids <- colnames(mu_ij)
+gene_dispersion.vec <- dds.extraction$gene_dispersion
+dispersion.matrix <- getGenesDispersions(n_G,
+                          sample_ids,
+                          dispersion.vec = gene_dispersion.vec)
+```
+
+### Number of replicates increased
+
+```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+# -- loop on
+n_rep_list = c(2, 5, 10, 25)
+
+# -- sequencing factor effect
+for (n_rep in n_rep_list){
+  # -- Design replicates 
+  designReplication.matx <- getReplicationDesign(
+                                n_rep,
+                                n_genoT,
+                                n_env)
+  
+  # -- build counts table
+  count_table <- getCountTable(mu_ij, dispersion.matrix,
+                    n_G, n_genoT,
+                    sample_id_list = sample_ids,
+                    replication.matx = designReplication.matx)
+  bioDesign <- summariseDesign(count_table)
+  
+  # -- number of reads simulate
+  reads_counts = count_table %>% 
+                      colSums() %>% 
+                      sum() %>% 
+                      format(., scientific = TRUE, digits=1)
+
+  # -- MASS:glm.nb
+  dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign)
+  l = HTRfit::launch.glm(dtf2fit)
+  fitDtf =listFit2dtf(l)
+  prediction = getPrediction(fitDtf$inference, threshold = thr, alphaRisk = 0.05)
+  expectation = getExpectation(beta.actual, threshold = thr)
+  # -- join actual & inference
+  comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction)
+  # -- annotation
+  comparison = comparison  %>% 
+              mutate(n_replicates = n_rep)
+  # save results
+  if (exists('df2evaluation')){
+    df2evaluation = rbind(df2evaluation, comparison)
+  }
+  else{
+    df2evaluation = comparison
+  }
+}
+```
+
+## Evaluation
+
+### Build graph for evaluation 
+
+```{r, message=FALSE, warning=FALSE,}
+# -- ROC curve
+df2evaluation$n_replicates <- factor(df2evaluation$n_replicates, levels = unique(df2evaluation$n_replicates))
+p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), 
+           aes(d = actual.label , m = padj, col = n_replicates)) + 
+  geom_roc(n.cuts = 0, labels = F)   + 
+  scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D"))
+#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
+
+# Identity plot
+p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + 
+  geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) +
+  geom_abline(intercept = 0, slope = 1) + 
+  facet_grid(beta~n_replicates, scales = "free")
+```
+
+### Visualization
+
+```{r, message=FALSE, warning=FALSE,}
+p1
+p2
+```
diff --git a/results/v3/Tuto/2023_02_03_rangeObservedValues.Rmd b/results/v3/Tuto/2023_02_03_rangeObservedValues.Rmd
new file mode 100644
index 0000000..a18ef59
--- /dev/null
+++ b/results/v3/Tuto/2023_02_03_rangeObservedValues.Rmd
@@ -0,0 +1,55 @@
+---
+title: "Range of observed values"
+date: "2023-02-01"
+output: 
+  html_document:
+    toc: true 
+css: css/air.css
+---
+
+
+## Required
+
+
+```{r setup, message=FALSE, warning=FALSE, results="hide"}
+library(HTRsim)
+```
+
+## Step by step
+
+### Load counts table
+
+```{r}
+counTable = loadEmbedded_CounTable()
+counTable %>% head()
+```
+
+### Load design
+
+```{r}
+design = loadEmbedded_design()
+design %>% head
+```
+
+### DESEQ2
+
+```{r}
+dds <- fit_deseq(counTable, bioDesign = design)
+```
+
+### Beta & dispersion extraction
+
+```{r}
+dds.extraction <- extraction_embeddedDds(dds_obj = dds)
+dds.extraction$beta %>% head()
+dds.extraction$gene_dispersion %>% head()
+```
+
+
+## all-in
+
+```{r}
+observedValues = embedded_CounTable2observedValues()
+observedValues$beta %>% head()
+observedValues$gene_dispersion %>% head()
+```
diff --git a/results/v3/css/splendor.css b/results/v3/css/splendor.css
new file mode 100644
index 0000000..4121b51
--- /dev/null
+++ b/results/v3/css/splendor.css
@@ -0,0 +1,225 @@
+@media print {
+    *,
+    *:before,
+    *:after {
+      background: transparent !important;
+      color: #000 !important;
+      box-shadow: none !important;
+      text-shadow: none !important;
+    }
+  
+    a,
+    a:visited {
+      text-decoration: underline;
+    }
+  
+    a[href]:after {
+      content: " (" attr(href) ")";
+    }
+  
+    abbr[title]:after {
+      content: " (" attr(title) ")";
+    }
+  
+    a[href^="#"]:after,
+    a[href^="javascript:"]:after {
+      content: "";
+    }
+  
+    pre,
+    blockquote {
+      border: 1px solid #999;
+      page-break-inside: avoid;
+    }
+  
+    thead {
+      display: table-header-group;
+    }
+  
+    tr,
+    img {
+      page-break-inside: avoid;
+    }
+  
+    img {
+      max-width: 100% !important;
+    }
+  
+    p,
+    h2,
+    h3 {
+      orphans: 3;
+      widows: 3;
+    }
+  
+    h2,
+    h3 {
+      page-break-after: avoid;
+    }
+  }
+  
+  html {
+    font-size: 12px;
+  }
+  
+  @media screen and (min-width: 32rem) and (max-width: 48rem) {
+    html {
+      font-size: 15px;
+    }
+  }
+  
+  @media screen and (min-width: 48rem) {
+    html {
+      font-size: 16px;
+    }
+  }
+  
+  body {
+    line-height: 1.85;
+  }
+  
+  p,
+  .splendor-p {
+    font-size: 1rem;
+    margin-bottom: 1.3rem;
+  }
+  
+  h1,
+  .splendor-h1,
+  h2,
+  .splendor-h2,
+  h3,
+  .splendor-h3,
+  h4,
+  .splendor-h4 {
+    margin: 1.414rem 0 .5rem;
+    font-weight: inherit;
+    line-height: 1.42;
+  }
+  
+  h1,
+  .splendor-h1 {
+    margin-top: 0;
+    font-size: 3.998rem;
+  }
+  
+  h2,
+  .splendor-h2 {
+    font-size: 2.827rem;
+  }
+  
+  h3,
+  .splendor-h3 {
+    font-size: 1.999rem;
+  }
+  
+  h4,
+  .splendor-h4 {
+    font-size: 1.414rem;
+  }
+  
+  h5,
+  .splendor-h5 {
+    font-size: 1.121rem;
+  }
+  
+  h6,
+  .splendor-h6 {
+    font-size: .88rem;
+  }
+  
+  small,
+  .splendor-small {
+    font-size: .707em;
+  }
+  
+  /* https://github.com/mrmrs/fluidity */
+  
+  img,
+  canvas,
+  iframe,
+  video,
+  svg,
+  select,
+  textarea {
+    max-width: 100%;
+  }
+  
+  @import url(http://fonts.googleapis.com/css?family=Merriweather:300italic,300);
+  
+  html {
+    font-size: 18px;
+    max-width: 100%;
+  }
+  
+  body {
+    color: #444;
+    font-family: 'Merriweather', Georgia, serif;
+    margin: 0;
+    max-width: 100%;
+  }
+  
+  /* === A bit of a gross hack so we can have bleeding divs/blockquotes. */
+  
+  p,
+  *:not(div):not(img):not(body):not(html):not(li):not(blockquote):not(p) {
+    margin: 1rem auto 1rem;
+    max-width: 36rem;
+    padding: .25rem;
+  }
+  
+  div {
+    width: 100%;
+  }
+  
+  div img {
+    width: 100%;
+  }
+  
+  blockquote p {
+    font-size: 1.5rem;
+    font-style: italic;
+    margin: 1rem auto 1rem;
+    max-width: 48rem;
+  }
+  
+  li {
+    margin-left: 2rem;
+  }
+  
+  /* Counteract the specificity of the gross *:not() chain. */
+  
+  h1 {
+    padding: 4rem 0 !important;
+  }
+  
+  /*  === End gross hack */
+  
+  p {
+    color: #555;
+    height: auto;
+    line-height: 1.45;
+  }
+  
+  pre,
+  code {
+    font-family: Menlo, Monaco, "Courier New", monospace;
+  }
+  
+  pre {
+    background-color: #fafafa;
+    font-size: .8rem;
+    overflow-x: scroll;
+    padding: 1.125em;
+  }
+  
+  a,
+  a:visited {
+    color: #3498db;
+  }
+  
+  a:hover,
+  a:focus,
+  a:active {
+    color: #2980b9;
+  }
\ No newline at end of file
-- 
GitLab