From eefea2283877bee3e53f816f930de771b91c8735 Mon Sep 17 00:00:00 2001 From: aduvermy <arnaud.duvermy@ens-lyon.fr> Date: Sat, 14 Jan 2023 22:05:23 +0100 Subject: [PATCH] Add res v3 --- results/v3/DESEQ2/2022_02_03_dispersion.Rmd | 75 ++++++ .../2023_02_01_postInferenceSelection.Rmd | 120 ++++++++++ results/v3/DESEQ2/2023_02_03_FDR.Rmd | 117 +++++++++ .../v3/DESEQ2/2023_02_05_sequencingDepth.Rmd | 128 ++++++++++ results/v3/DESEQ2/2023_02_06_replicates.Rmd | 126 ++++++++++ results/v3/GLM/2023_02_04_dispersion.Rmd | 78 ++++++ results/v3/GLM/2023_02_05_sequencingDepth.Rmd | 129 ++++++++++ results/v3/GLM/2023_02_06_replicates.Rmd | 128 ++++++++++ .../Tuto/2023_02_03_rangeObservedValues.Rmd | 55 +++++ results/v3/css/splendor.css | 225 ++++++++++++++++++ 10 files changed, 1181 insertions(+) create mode 100644 results/v3/DESEQ2/2022_02_03_dispersion.Rmd create mode 100644 results/v3/DESEQ2/2023_02_01_postInferenceSelection.Rmd create mode 100644 results/v3/DESEQ2/2023_02_03_FDR.Rmd create mode 100644 results/v3/DESEQ2/2023_02_05_sequencingDepth.Rmd create mode 100644 results/v3/DESEQ2/2023_02_06_replicates.Rmd create mode 100644 results/v3/GLM/2023_02_04_dispersion.Rmd create mode 100644 results/v3/GLM/2023_02_05_sequencingDepth.Rmd create mode 100644 results/v3/GLM/2023_02_06_replicates.Rmd create mode 100644 results/v3/Tuto/2023_02_03_rangeObservedValues.Rmd create mode 100644 results/v3/css/splendor.css diff --git a/results/v3/DESEQ2/2022_02_03_dispersion.Rmd b/results/v3/DESEQ2/2022_02_03_dispersion.Rmd new file mode 100644 index 0000000..346798b --- /dev/null +++ b/results/v3/DESEQ2/2022_02_03_dispersion.Rmd @@ -0,0 +1,75 @@ +--- +title: "Dispersion" +date: "2023-02-01" +output: + html_document: + toc: true +css: ../css/air.css +--- + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRsim) +``` + + +## Simulation + + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- SETUP +n_G = 1000 +n_genoT = 3 +n_env = 2 +max_n_rep = 15 +sequencing_fact = 1 +thr = 2 +########### + +# -- Simulate counts +mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, + max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact) + +# -- count table & experimental design +count_table = mock$countTable %>% as.data.frame() +bioDesign = mock$design + +# -- dispersion value per gene & per condition +mock$actualParameters$dispersion %>% head() + +``` + +## Estimation + +### Launch DESEQ2 on simulated data + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- DESEQ2 +dds_simu = HTRsim::fit_deseq(count_table, bioDesign) +dispersion_estimated = getDispersionFromDDS(dds_simu) +``` + +### Join Actual & Inference + +```{r} +# -- one dispersion per gene +dispersion_actual = mock$actualParameters$dispersion %>% + rowMeans() %>% + as.data.frame() %>% + dplyr::rename(., dispersion.actual = ".") %>% tibble::rownames_to_column("gene_id") + +dispersion_actual = data.table::data.table(dispersion_actual, key = "gene_id") +dispersion_estimated = data.table::data.table(dispersion_estimated, key = "gene_id") +dispersion_comparison = dispersion_actual[dispersion_estimated] + +``` + +### Identity plot + +```{r} +ggplot(dispersion_comparison) + + geom_point(aes(x = dispersion.actual, y = dispersion.estimate), alpha = 0.5) + + geom_abline(intercept = 0, slope = 1) + scale_y_log10() + scale_x_log10() +``` diff --git a/results/v3/DESEQ2/2023_02_01_postInferenceSelection.Rmd b/results/v3/DESEQ2/2023_02_01_postInferenceSelection.Rmd new file mode 100644 index 0000000..58b4549 --- /dev/null +++ b/results/v3/DESEQ2/2023_02_01_postInferenceSelection.Rmd @@ -0,0 +1,120 @@ +--- +title: "Post Inference Selection" +date: "2023-02-01" +output: + html_document: + toc: true +css: ../css/air.css +--- + + +## Required + + +```{r setup_2, message=FALSE, warning=FALSE, results="hide"} +library(HTRfit) +library(HTRsim) +library(plotROC) +library(gridExtra) +``` + + +## Simulation + + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- SETUP +n_G = 100 +n_genoT = 10 +n_env = 2 +max_n_rep = 3 +sequencing_fact = 1 +thr = 2 +########### + +# -- Simulate counts +mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, + max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact) +count_table = mock$countTable %>% as.data.frame() +bioDesign = mock$design +``` + +## Prediction + +### Launch DESEQ2 on simulated data + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- DESEQ2 +dds_simu = HTRsim::fit_deseq(count_table, bioDesign) +deseqFitdtf = getCoefficientsFromDds(dds_simu) + +``` + +### p(|beta| > 0) > 0.95 & |beta| > T => DE + +```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- prediction with post inference selection method +prediction = getPrediction(deseqFitdtf, threshold = thr, + alphaRisk = 0.05, postInferenceSelection = T) +expectation = getExpectation(mock$actualParameters$beta, threshold = thr) +dtf.comp.annot.PIS = getComparison(actual.dtf = expectation, + inference.dtf = prediction ) +``` + +### p(|beta| > T) > 0.95 => DE + +```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- prediction +prediction = getPrediction(deseqFitdtf, threshold = thr, + alphaRisk = 0.05, postInferenceSelection = F) +expectation = getExpectation(mock$actualParameters$beta, threshold = thr) +dtf.comp.annot= getComparison(actual.dtf = expectation, + inference.dtf = prediction ) +``` + +## Evaluation + + +### Build graph for evaluation + +```{r message=FALSE, warning=FALSE, echo = T, results = 'hide' ,include=TRUE} +# -- Venn Diag Post inference selection +p1 = getVennDiagramm(dtf.comp.annot.PIS, + title = "p(|beta| > 0) > 0.95 & |beta| > T") +# -- Venn Diag +p2 = getVennDiagramm(comparisonDTF = dtf.comp.annot, + title = "p(|beta| > T) > 0.95") + +# -- ROC curves post inference selection +dtf1 = dtf.comp.annot.PIS %>% + mutate(method = "p(|beta| > 0) > 0.95 & |beta| > T") +# -- ROC curves +dtf2 = dtf.comp.annot %>% + mutate(method = "p(|beta| > T) > 0.95") +dtf = rbind(dtf1, dtf2) +dtf$annotation <- factor(dtf$annotation, levels = c("TRUE", "FALSE")) + +# -- Identity plot +p3 = ggplot(dtf %>% filter(beta != "(Intercept)")) + + geom_point(aes(x = actual.value, y = estimate, col = padj), alpha = 0.5) + + geom_abline(intercept = 0, slope = 1) + + geom_vline(xintercept = c(-thr, thr), linetype = "dotted") + + facet_grid(method~beta, scales = "free") #+ + #scale_color_brewer(palette = "Blues") + +p4 = ggplot(dtf %>% filter(beta != "(Intercept)"), + aes(d = actual.label , m = padj, color = method)) + + geom_roc(n.cuts = 0, labels = F) +``` + +### Conclusion + +```{r message=FALSE, warning=FALSE, include=TRUE} +# -- Venn diagram +grid.arrange(p1, p2, nrow = 1, ncol = 2) +# -- Identity plot +p3 +# -- ROC curves +p4 +``` + diff --git a/results/v3/DESEQ2/2023_02_03_FDR.Rmd b/results/v3/DESEQ2/2023_02_03_FDR.Rmd new file mode 100644 index 0000000..fa564ca --- /dev/null +++ b/results/v3/DESEQ2/2023_02_03_FDR.Rmd @@ -0,0 +1,117 @@ +--- +title: "False discovery rate" +date: "2023-02-01" +output: + html_document: + toc: true +css: ../css/air.css +--- + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRfit) +library(HTRsim) +library(plotROC) +library(gridExtra) +``` + + +## Simulation + + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- SETUP +n_G = 100 +n_genoT = 5 +n_env = 2 +max_n_rep = 3 +sequencing_fact = 1 +thr = 2 +########### + +# -- Simulate counts +mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, + max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact) +count_table = mock$countTable %>% as.data.frame() +bioDesign = mock$design +mock$actualParameters$dispersion + +``` + +## Prediction + +### Launch DESEQ2 on simulated data + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- DESEQ2 +dds_simu = HTRsim::fit_deseq(count_table, bioDesign) +deseqFitdtf = getCoefficientsFromDds(dds_simu) +``` + +### pvalue adjusted + +```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- prediction with post inference selection method +prediction = getPrediction(deseqFitdtf, threshold = thr, + alphaRisk = 0.05, pvalCorrection = T) +expectation = getExpectation(mock$actualParameters$beta, threshold = thr) +dtf.comp.annot.adjusted = getComparison(actual.dtf = expectation, + inference.dtf = prediction ) +``` +### pvalue adjusted + +```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- prediction +prediction = getPrediction(deseqFitdtf, threshold = thr, + alphaRisk = 0.05, pvalCorrection = F) +dtf.comp.annot.unadjusted = getComparison(actual.dtf = expectation, + inference.dtf = prediction ) +``` + + + +## Evaluation + +### Build graph for evaluation + +```{r message=FALSE, warning=FALSE, echo = T, results = 'hide' ,include=TRUE} +# -- Venn Diag adjusted pvalue +p1 = getVennDiagramm(dtf.comp.annot.adjusted, + title = "padjusted") +# -- Venn Diag unadjusted pvalue +p2 = getVennDiagramm(comparisonDTF = dtf.comp.annot.unadjusted, + title = "pvalue") + +# -- ROC curves post inference selection +dtf1 = dtf.comp.annot.adjusted %>% + mutate(method = "padjusted") +# -- ROC curves +dtf2 = dtf.comp.annot.unadjusted %>% + mutate(method = "pvalue") +dtf = rbind(dtf1, dtf2) +dtf$annotation <- factor(dtf$annotation, levels = c("TRUE", "FALSE")) + +# -- Identity plot +p3 = ggplot(dtf %>% filter(beta != "(Intercept)")) + + geom_point(aes(x = actual.value, y = estimate, col = padj), alpha = 0.5) + + geom_abline(intercept = 0, slope = 1) + + geom_vline(xintercept = c(-thr, thr), linetype = "dotted") + + facet_grid(method~beta, scales = "free") #+ + #scale_color_brewer(palette = "Set2") +p4 = ggplot(dtf %>% filter(beta != "(Intercept)"), + aes(d = actual.label , m = padj, color = method)) + + geom_roc(n.cuts = 0, labels = F) +``` + +### Visualization + +```{r message=FALSE, warning=FALSE, include=TRUE} +# -- Venn diagram +grid.arrange(p1, p2, nrow = 1, ncol = 2) +# -- Identity plot +p3 +# -- ROC curves +p4 +``` diff --git a/results/v3/DESEQ2/2023_02_05_sequencingDepth.Rmd b/results/v3/DESEQ2/2023_02_05_sequencingDepth.Rmd new file mode 100644 index 0000000..c6f9f75 --- /dev/null +++ b/results/v3/DESEQ2/2023_02_05_sequencingDepth.Rmd @@ -0,0 +1,128 @@ +--- +title: "Sequencing depth effect" +date: "2023-02-01" +output: + html_document: + toc: true +css: ../css/air.css +--- + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRsim) +library(plotROC) +``` + + +## Simulation + +### Parameters + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- SETUP +n_G = 1000 +n_genoT = 3 +n_env = 2 +max_n_rep = 3 +thr = 2 +``` + +### Ground truth + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- Fit mvnorm +dds.extraction = loadEmbedded_ObservedValues() +fit.mvnorm <- getListMvnormFit(dds.extraction$beta) +# -- Ground truth +beta.actual <- getBetaforSimulation( + n_G, + n_genoT, + fit.mvnorm) +# -- build input for simulation +model.matx <- getModelMatrix() +log_qij <- getLog_qij(beta.actual, model.matx) +``` + +### Sequencing depth increased + + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} + +# -- loop on +sequencing_factor_list = c(0.01, 0.1, 1, 2) + +# -- sequencing factor effect +for (sequencing_factor in sequencing_factor_list){ + mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor) + sample_ids <- colnames(mu_ij) + gene_dispersion.vec <- dds.extraction$gene_dispersion + dispersion.matrix <- getGenesDispersions(n_G, + sample_ids, + dispersion.vec = gene_dispersion.vec) + # -- Design replicates + designReplication.matx <- getReplicationDesign( + max_n_rep, + n_genoT, + n_env) + + # -- build counts table + count_table <- getCountTable(mu_ij, dispersion.matrix, + n_G, n_genoT, + sample_id_list = sample_ids, + replication.matx = designReplication.matx) + bioDesign <- summariseDesign(count_table) + + # -- number of reads simulate + reads_counts = count_table %>% + colSums() %>% + sum() %>% + format(., scientific = TRUE, digits=1) + + # -- DESEQ2 on simulated data + dds_simu = HTRsim::fit_deseq(count_table, bioDesign) + deseqFitdtf = getCoefficientsFromDds(dds_simu) + prediction = getPrediction(deseqFitdtf, threshold = thr, alphaRisk = 0.05) + expectation = getExpectation(beta.actual, threshold = thr) + # -- join actual & inference + comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction) + # -- annotation + comparison = comparison %>% + mutate(reads_sequenced = reads_counts) + # save results + if (exists('df2evaluation')){ + df2evaluation = rbind(df2evaluation, comparison) + } + else{ + df2evaluation = comparison + } +} +``` + +## Evaluation + +### Build graph for evaluation + +```{r, message=FALSE, warning=FALSE,} +# -- ROC curve +df2evaluation$reads_sequenced <- factor(df2evaluation$reads_sequenced, levels = unique(df2evaluation$reads_sequenced)) +p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), + aes(d = actual.label , m = padj, col = reads_sequenced)) + + geom_roc(n.cuts = 0, labels = F) + + scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D")) +#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6) + +# Identity plot +p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + + geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) + + geom_abline(intercept = 0, slope = 1) + + facet_grid(beta~reads_sequenced, scales = "free") +``` + +### Visualization + +```{r, message=FALSE, warning=FALSE,} +p1 +p2 +``` diff --git a/results/v3/DESEQ2/2023_02_06_replicates.Rmd b/results/v3/DESEQ2/2023_02_06_replicates.Rmd new file mode 100644 index 0000000..81c7966 --- /dev/null +++ b/results/v3/DESEQ2/2023_02_06_replicates.Rmd @@ -0,0 +1,126 @@ +--- +title: "Sequencing depth effect" +date: "2023-02-01" +output: + html_document: + toc: true +css: ../css/air.css +--- + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRsim) +library(plotROC) +``` + + +## Simulation + +### Parameters + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- SETUP +n_G = 300 +n_genoT = 3 +n_env = 2 +sequencing_factor = 1 +thr = 2 +``` + +### Ground truth + +```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- Fit mvnorm +dds.extraction = loadEmbedded_ObservedValues() +fit.mvnorm <- getListMvnormFit(dds.extraction$beta) +# -- Ground truth +beta.actual <- getBetaforSimulation( + n_G, + n_genoT, + fit.mvnorm) +# -- build input for simulation +model.matx <- getModelMatrix() +log_qij <- getLog_qij(beta.actual, model.matx) +mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor) +sample_ids <- colnames(mu_ij) +gene_dispersion.vec <- dds.extraction$gene_dispersion +dispersion.matrix <- getGenesDispersions(n_G, + sample_ids, + dispersion.vec = gene_dispersion.vec) +``` + +### Number of replicates increased + +```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- loop on +n_rep_list = c(2, 5, 10, 25) + +# -- sequencing factor effect +for (n_rep in n_rep_list){ + # -- Design replicates + designReplication.matx <- getReplicationDesign( + n_rep, + n_genoT, + n_env) + + # -- build counts table + count_table <- getCountTable(mu_ij, dispersion.matrix, + n_G, n_genoT, + sample_id_list = sample_ids, + replication.matx = designReplication.matx) + bioDesign <- summariseDesign(count_table) + + # -- number of reads simulate + reads_counts = count_table %>% + colSums() %>% + sum() %>% + format(., scientific = TRUE, digits=1) + + # -- DESEQ2 on simulated data + dds_simu = HTRsim::fit_deseq(count_table, bioDesign) + deseqFitdtf = getCoefficientsFromDds(dds_simu) + prediction = getPrediction(deseqFitdtf, threshold = thr, alphaRisk = 0.05) + expectation = getExpectation(beta.actual, threshold = thr) + # -- join actual & inference + comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction) + # -- annotation + comparison = comparison %>% + mutate(n_replicates = n_rep) + # save results + if (exists('df2evaluation')){ + df2evaluation = rbind(df2evaluation, comparison) + } + else{ + df2evaluation = comparison + } +} +``` + +## Evaluation + +### Build graph for evaluation + +```{r, message=FALSE, warning=FALSE,} +# -- ROC curve +df2evaluation$n_replicates <- factor(df2evaluation$n_replicates, levels = unique(df2evaluation$n_replicates)) +p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), + aes(d = actual.label , m = padj, col = n_replicates)) + + geom_roc(n.cuts = 0, labels = F) + + scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D")) +#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6) + +# Identity plot +p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + + geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) + + geom_abline(intercept = 0, slope = 1) + + facet_grid(beta~n_replicates, scales = "free") +``` + +### Visualization + +```{r, message=FALSE, warning=FALSE,} +p1 +p2 +``` diff --git a/results/v3/GLM/2023_02_04_dispersion.Rmd b/results/v3/GLM/2023_02_04_dispersion.Rmd new file mode 100644 index 0000000..e4a355c --- /dev/null +++ b/results/v3/GLM/2023_02_04_dispersion.Rmd @@ -0,0 +1,78 @@ +--- +title: "Dispersion" +date: "2023-02-01" +output: + html_document: + toc: true +css: ../css/air.css +--- + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRsim) +library(HTRfit) +``` + + +## Simulation + + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- SETUP +n_G = 1000 +n_genoT = 3 +n_env = 2 +max_n_rep = 15 +sequencing_fact = 1 +thr = 2 +########### + +# -- Simulate counts +mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, + max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact) + +# -- count table & experimental design +count_table = mock$countTable %>% as.data.frame() +bioDesign = mock$design + +# -- dispersion value per gene & per condition +mock$actualParameters$dispersion %>% head() + +``` + +## Estimation + +### Launch MASS::glm.nb on simulated data + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- MASS:glm.nb +dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign) +l = HTRfit::launch.glm(dtf2fit) +fitDtf =listFit2dtf(l) +dispersion_estimated = fitDtf$dispersion +``` + +### Join Actual & Inference + +```{r} +# -- one dispersion per gene +dispersion_actual = mock$actualParameters$dispersion %>% + rowMeans() %>% + as.data.frame() %>% + dplyr::rename(., dispersion.actual = ".") %>% tibble::rownames_to_column("gene_id") + +dispersion_actual = data.table::data.table(dispersion_actual, key = "gene_id") +dispersion_estimated = data.table::data.table(dispersion_estimated, key = "gene_id") +dispersion_comparison = dispersion_actual[dispersion_estimated] + +``` + +### Identity plot + +```{r} +ggplot(dispersion_comparison) + + geom_point(aes(x = dispersion.actual, y = 1/dispersion.estimate), alpha = 0.5) + + geom_abline(intercept = 0, slope = 1) + scale_y_log10() + scale_x_log10() +``` diff --git a/results/v3/GLM/2023_02_05_sequencingDepth.Rmd b/results/v3/GLM/2023_02_05_sequencingDepth.Rmd new file mode 100644 index 0000000..081a0d7 --- /dev/null +++ b/results/v3/GLM/2023_02_05_sequencingDepth.Rmd @@ -0,0 +1,129 @@ +--- +title: "Sequencing depth effect" +date: "2023-02-01" +output: + html_document: + toc: true +css: ../css/air.css +--- + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRsim) +library(HTRfit) +library(plotROC) +``` + + +## Simulation + +### Parameters + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- SETUP +n_G = 1000 +n_genoT = 3 +n_env = 2 +max_n_rep = 3 +thr = 2 +``` + +### Ground truth + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- Fit mvnorm +dds.extraction = loadEmbedded_ObservedValues() +fit.mvnorm <- getListMvnormFit(dds.extraction$beta) +# -- Ground truth +beta.actual <- getBetaforSimulation( + n_G, + n_genoT, + fit.mvnorm) +# -- build input for simulation +model.matx <- getModelMatrix() +log_qij <- getLog_qij(beta.actual, model.matx) +``` + +### Sequencing depth increased + + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} + +# -- loop on +sequencing_factor_list = c(0.01, 0.1, 1, 2) + +# -- sequencing factor effect +for (sequencing_factor in sequencing_factor_list){ + mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor) + sample_ids <- colnames(mu_ij) + gene_dispersion.vec <- dds.extraction$gene_dispersion + dispersion.matrix <- getGenesDispersions(n_G, + sample_ids, + dispersion.vec = gene_dispersion.vec) + # -- Design replicates + designReplication.matx <- getReplicationDesign( + max_n_rep, + n_genoT, + n_env) + + # -- build counts table + count_table <- getCountTable(mu_ij, dispersion.matrix, + n_G, n_genoT, + sample_id_list = sample_ids, + replication.matx = designReplication.matx) + bioDesign <- summariseDesign(count_table) + + # -- number of reads simulate + reads_counts = count_table %>% + colSums() %>% + sum() %>% + format(., scientific = TRUE, digits=1) + # -- MASS:glm.nb + dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign) + l = HTRfit::launch.glm(dtf2fit) + fitDtf =listFit2dtf(l) + prediction = getPrediction(fitDtf$inference, threshold = thr, alphaRisk = 0.05) + expectation = getExpectation(beta.actual, threshold = thr) + # -- join actual & inference + comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction) + # -- annotation + comparison = comparison %>% + mutate(reads_sequenced = reads_counts) + # save results + if (exists('df2evaluation')){ + df2evaluation = rbind(df2evaluation, comparison) + } + else{ + df2evaluation = comparison + } +} +``` + +## Evaluation + +### Build graph for evaluation + +```{r, message=FALSE, warning=FALSE,} +# -- ROC curve +df2evaluation$reads_sequenced <- factor(df2evaluation$reads_sequenced, levels = unique(df2evaluation$reads_sequenced)) +p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), + aes(d = actual.label , m = padj, col = reads_sequenced)) + + geom_roc(n.cuts = 0, labels = F) + + scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D")) +#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6) + +# Identity plot +p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + + geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) + + geom_abline(intercept = 0, slope = 1) + + facet_grid(beta~reads_sequenced, scales = "free") +``` + +### Visualization + +```{r, message=FALSE, warning=FALSE,} +p1 +p2 +``` diff --git a/results/v3/GLM/2023_02_06_replicates.Rmd b/results/v3/GLM/2023_02_06_replicates.Rmd new file mode 100644 index 0000000..a95d5f5 --- /dev/null +++ b/results/v3/GLM/2023_02_06_replicates.Rmd @@ -0,0 +1,128 @@ +--- +title: "Sequencing depth effect" +date: "2023-02-01" +output: + html_document: + toc: true +css: ../css/air.css +--- + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRsim) +library(HTRfit) +library(plotROC) +``` + + +## Simulation + +### Parameters + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- SETUP +n_G = 300 +n_genoT = 3 +n_env = 2 +sequencing_factor = 1 +thr = 2 +``` + +### Ground truth + +```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- Fit mvnorm +dds.extraction = loadEmbedded_ObservedValues() +fit.mvnorm <- getListMvnormFit(dds.extraction$beta) +# -- Ground truth +beta.actual <- getBetaforSimulation( + n_G, + n_genoT, + fit.mvnorm) +# -- build input for simulation +model.matx <- getModelMatrix() +log_qij <- getLog_qij(beta.actual, model.matx) +mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor) +sample_ids <- colnames(mu_ij) +gene_dispersion.vec <- dds.extraction$gene_dispersion +dispersion.matrix <- getGenesDispersions(n_G, + sample_ids, + dispersion.vec = gene_dispersion.vec) +``` + +### Number of replicates increased + +```{r , message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- loop on +n_rep_list = c(2, 5, 10, 25) + +# -- sequencing factor effect +for (n_rep in n_rep_list){ + # -- Design replicates + designReplication.matx <- getReplicationDesign( + n_rep, + n_genoT, + n_env) + + # -- build counts table + count_table <- getCountTable(mu_ij, dispersion.matrix, + n_G, n_genoT, + sample_id_list = sample_ids, + replication.matx = designReplication.matx) + bioDesign <- summariseDesign(count_table) + + # -- number of reads simulate + reads_counts = count_table %>% + colSums() %>% + sum() %>% + format(., scientific = TRUE, digits=1) + + # -- MASS:glm.nb + dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign) + l = HTRfit::launch.glm(dtf2fit) + fitDtf =listFit2dtf(l) + prediction = getPrediction(fitDtf$inference, threshold = thr, alphaRisk = 0.05) + expectation = getExpectation(beta.actual, threshold = thr) + # -- join actual & inference + comparison = getComparison(actual.dtf = expectation, inference.dtf = prediction) + # -- annotation + comparison = comparison %>% + mutate(n_replicates = n_rep) + # save results + if (exists('df2evaluation')){ + df2evaluation = rbind(df2evaluation, comparison) + } + else{ + df2evaluation = comparison + } +} +``` + +## Evaluation + +### Build graph for evaluation + +```{r, message=FALSE, warning=FALSE,} +# -- ROC curve +df2evaluation$n_replicates <- factor(df2evaluation$n_replicates, levels = unique(df2evaluation$n_replicates)) +p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), + aes(d = actual.label , m = padj, col = n_replicates)) + + geom_roc(n.cuts = 0, labels = F) + + scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D")) +#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6) + +# Identity plot +p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + + geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) + + geom_abline(intercept = 0, slope = 1) + + facet_grid(beta~n_replicates, scales = "free") +``` + +### Visualization + +```{r, message=FALSE, warning=FALSE,} +p1 +p2 +``` diff --git a/results/v3/Tuto/2023_02_03_rangeObservedValues.Rmd b/results/v3/Tuto/2023_02_03_rangeObservedValues.Rmd new file mode 100644 index 0000000..a18ef59 --- /dev/null +++ b/results/v3/Tuto/2023_02_03_rangeObservedValues.Rmd @@ -0,0 +1,55 @@ +--- +title: "Range of observed values" +date: "2023-02-01" +output: + html_document: + toc: true +css: css/air.css +--- + + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRsim) +``` + +## Step by step + +### Load counts table + +```{r} +counTable = loadEmbedded_CounTable() +counTable %>% head() +``` + +### Load design + +```{r} +design = loadEmbedded_design() +design %>% head +``` + +### DESEQ2 + +```{r} +dds <- fit_deseq(counTable, bioDesign = design) +``` + +### Beta & dispersion extraction + +```{r} +dds.extraction <- extraction_embeddedDds(dds_obj = dds) +dds.extraction$beta %>% head() +dds.extraction$gene_dispersion %>% head() +``` + + +## all-in + +```{r} +observedValues = embedded_CounTable2observedValues() +observedValues$beta %>% head() +observedValues$gene_dispersion %>% head() +``` diff --git a/results/v3/css/splendor.css b/results/v3/css/splendor.css new file mode 100644 index 0000000..4121b51 --- /dev/null +++ b/results/v3/css/splendor.css @@ -0,0 +1,225 @@ +@media print { + *, + *:before, + *:after { + background: transparent !important; + color: #000 !important; + box-shadow: none !important; + text-shadow: none !important; + } + + a, + a:visited { + text-decoration: underline; + } + + a[href]:after { + content: " (" attr(href) ")"; + } + + abbr[title]:after { + content: " (" attr(title) ")"; + } + + a[href^="#"]:after, + a[href^="javascript:"]:after { + content: ""; + } + + pre, + blockquote { + border: 1px solid #999; + page-break-inside: avoid; + } + + thead { + display: table-header-group; + } + + tr, + img { + page-break-inside: avoid; + } + + img { + max-width: 100% !important; + } + + p, + h2, + h3 { + orphans: 3; + widows: 3; + } + + h2, + h3 { + page-break-after: avoid; + } + } + + html { + font-size: 12px; + } + + @media screen and (min-width: 32rem) and (max-width: 48rem) { + html { + font-size: 15px; + } + } + + @media screen and (min-width: 48rem) { + html { + font-size: 16px; + } + } + + body { + line-height: 1.85; + } + + p, + .splendor-p { + font-size: 1rem; + margin-bottom: 1.3rem; + } + + h1, + .splendor-h1, + h2, + .splendor-h2, + h3, + .splendor-h3, + h4, + .splendor-h4 { + margin: 1.414rem 0 .5rem; + font-weight: inherit; + line-height: 1.42; + } + + h1, + .splendor-h1 { + margin-top: 0; + font-size: 3.998rem; + } + + h2, + .splendor-h2 { + font-size: 2.827rem; + } + + h3, + .splendor-h3 { + font-size: 1.999rem; + } + + h4, + .splendor-h4 { + font-size: 1.414rem; + } + + h5, + .splendor-h5 { + font-size: 1.121rem; + } + + h6, + .splendor-h6 { + font-size: .88rem; + } + + small, + .splendor-small { + font-size: .707em; + } + + /* https://github.com/mrmrs/fluidity */ + + img, + canvas, + iframe, + video, + svg, + select, + textarea { + max-width: 100%; + } + + @import url(http://fonts.googleapis.com/css?family=Merriweather:300italic,300); + + html { + font-size: 18px; + max-width: 100%; + } + + body { + color: #444; + font-family: 'Merriweather', Georgia, serif; + margin: 0; + max-width: 100%; + } + + /* === A bit of a gross hack so we can have bleeding divs/blockquotes. */ + + p, + *:not(div):not(img):not(body):not(html):not(li):not(blockquote):not(p) { + margin: 1rem auto 1rem; + max-width: 36rem; + padding: .25rem; + } + + div { + width: 100%; + } + + div img { + width: 100%; + } + + blockquote p { + font-size: 1.5rem; + font-style: italic; + margin: 1rem auto 1rem; + max-width: 48rem; + } + + li { + margin-left: 2rem; + } + + /* Counteract the specificity of the gross *:not() chain. */ + + h1 { + padding: 4rem 0 !important; + } + + /* === End gross hack */ + + p { + color: #555; + height: auto; + line-height: 1.45; + } + + pre, + code { + font-family: Menlo, Monaco, "Courier New", monospace; + } + + pre { + background-color: #fafafa; + font-size: .8rem; + overflow-x: scroll; + padding: 1.125em; + } + + a, + a:visited { + color: #3498db; + } + + a:hover, + a:focus, + a:active { + color: #2980b9; + } \ No newline at end of file -- GitLab