From 871ffe49a79b7645000e21c5313c85468ad33f6c Mon Sep 17 00:00:00 2001 From: aduvermy <arnaud.duvermy@ens-lyon.fr> Date: Tue, 17 Jan 2023 18:42:13 +0100 Subject: [PATCH] update results --- results/v3/GLM/2023_02_04_dispersion.Rmd | 2 +- results/v3/GLM/2023_02_08_randomIntercept.Rmd | 141 ++++++++++++++++ .../v3/GLM_mixte/2023_01_17-tgc_backup.tsv | 11 ++ .../v3/GLM_mixte/2023_02_06_dispersion.Rmd | 78 +++++++++ .../2023_02_08_benchmarkingPackages.Rmd | 159 ++++++++++++++++++ .../GLM_mixte/2023_02_08_randomIntercept.Rmd | 132 +++++++++++++++ 6 files changed, 522 insertions(+), 1 deletion(-) create mode 100644 results/v3/GLM/2023_02_08_randomIntercept.Rmd create mode 100644 results/v3/GLM_mixte/2023_01_17-tgc_backup.tsv create mode 100644 results/v3/GLM_mixte/2023_02_06_dispersion.Rmd create mode 100644 results/v3/GLM_mixte/2023_02_08_benchmarkingPackages.Rmd create mode 100644 results/v3/GLM_mixte/2023_02_08_randomIntercept.Rmd diff --git a/results/v3/GLM/2023_02_04_dispersion.Rmd b/results/v3/GLM/2023_02_04_dispersion.Rmd index e4a355c..cecb263 100644 --- a/results/v3/GLM/2023_02_04_dispersion.Rmd +++ b/results/v3/GLM/2023_02_04_dispersion.Rmd @@ -21,7 +21,7 @@ library(HTRfit) ```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} # -- SETUP -n_G = 1000 +n_G = 300 n_genoT = 3 n_env = 2 max_n_rep = 15 diff --git a/results/v3/GLM/2023_02_08_randomIntercept.Rmd b/results/v3/GLM/2023_02_08_randomIntercept.Rmd new file mode 100644 index 0000000..9ef903c --- /dev/null +++ b/results/v3/GLM/2023_02_08_randomIntercept.Rmd @@ -0,0 +1,141 @@ +--- +title: "Random intercept" +date: "2023-02-08" +output: + html_document: + toc: true +css: ../css/air.css +--- + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRsim) +library(HTRfit) +library(plotROC) +``` + + +## Simulation + +### Parameters + +```{r} +# -- SETUP +n_G = 2000 +n_genoT = 26 +n_env = 2 +max_n_rep = 3 +sequencing_fact = 2 +thr = 2 +########### +``` + +### Intercept fixed + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- Simulate counts +mock_fixed = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, + max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = T) + +# -- count table & experimental design +count_table_fixed = mock_fixed$countTable %>% as.data.frame() +bioDesign_fixed = mock_fixed$design + +# -- ground truth +beta.actual_fixed = mock_fixed$actualParam$beta +``` + +### Random intercept + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- Simulate counts +mock_random = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, + max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = F) + +# -- count table & experimental design +count_table_random = mock_random$countTable %>% as.data.frame() +bioDesign_random = mock_random$design + +# -- ground truth +beta.actual_random = mock_random$actualParam$beta + +``` + +## Estimation + +### Launch MASS::glm.nb on simulated data (fixed intercept) + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- MASS:glm.nb +dtf2fit_fixed = HTRfit::reshapeCounTable(count_table_fixed, bioDesign_fixed) +l_fixed = HTRfit::launch.glm(dtf2fit_fixed) +fitDtf_fixed =listFit2dtf(l_fixed) +``` + +### Launch MASS::glm.nb on simulated data (random intercept) + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- MASS:glm.nb +dtf2fit_random = HTRfit::reshapeCounTable(count_table_random, bioDesign_random) +l_random = HTRfit::launch.glm(dtf2fit_random) +fitDtf_random =listFit2dtf(l_random) +``` + +### Join Actual & Inference + +```{r} + +###### -- Fixed Intercept -- ######## +expectation_fixed = getExpectation(beta.actual_fixed, threshold = thr) +prediction_fixed = getPrediction(fitDtf_fixed$inference, threshold = thr, alphaRisk = 0.05) +# -- join actual & inference +comparison_fixed = getComparison(actual.dtf = expectation_fixed, inference.dtf = prediction_fixed) +##################################### + +###### -- Random Intercept -- ####### +expectation_random = getExpectation(beta.actual_random, threshold = thr) +prediction_random = getPrediction(fitDtf_random$inference, threshold = thr, alphaRisk = 0.05) +# -- join actual & inference +comparison_random = getComparison(actual.dtf = expectation_random, inference.dtf = prediction_random) +##################################### + +``` + +### Join random & fixed dataframe + +```{r} +# -- Annotations +comparison_random = comparison_random %>% dplyr::mutate(from = "Random intercept") +comparison_fixed = comparison_fixed %>% dplyr::mutate(from = "Fixed intercept") +# -- join +df2evaluation = rbind(comparison_fixed, comparison_random) +``` + + +## Evaluation + +### Build graph for evaluation + +```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- ROC curve +p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"), + aes(d = actual.label , m = padj, col = from)) + + geom_roc(n.cuts = 0, labels = F) #+ + scale_color_manual(values = c("#yellow","#BDBDBD")) +#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6) + +# Identity plot +p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) + + geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) + + geom_abline(intercept = 0, slope = 1) + + facet_grid(beta~from, scales = "free") +``` + +### Visualization + +```{r, message=FALSE, warning=FALSE} +p1 +p2 +``` diff --git a/results/v3/GLM_mixte/2023_01_17-tgc_backup.tsv b/results/v3/GLM_mixte/2023_01_17-tgc_backup.tsv new file mode 100644 index 0000000..3866054 --- /dev/null +++ b/results/v3/GLM_mixte/2023_01_17-tgc_backup.tsv @@ -0,0 +1,11 @@ +n_genotypes from N timeProcess sd se ci +20 glmmTMB 200 15.130752444267273 2.6080884089635843 0.1844196999912184 0.3636676342866279 +20 lme4 192 20.31276837239663 2.8278579904444543 0.20408307150164248 0.40254610129193336 +50 glmmTMB 200 20.799276530742645 5.514627455256488 0.38994304693293763 0.7689507432848744 +50 lme4 200 40.54833024740219 19.021427312622208 1.345018024060217 2.6523170946821053 +100 glmmTMB 200 25.420783281326294 4.2043931998868596 0.29729549424146057 0.5862537954460072 +100 lme4 200 37.98638904094696 4.275002040382448 0.3022882932340756 0.5960993780936279 +200 glmmTMB 200 39.15629369020462 5.9148858659293335 0.4182455905743096 0.8247621294374486 +200 lme4 200 68.16497600078583 12.042067627674905 0.8515027679035926 1.67912645561597 +400 glmmTMB 200 66.25662302970886 7.7012016308806155 0.5445571896480582 1.0738431138458309 +400 lme4 200 136.99138724803925 35.908038766273386 2.539081771074134 5.0069589148599665 diff --git a/results/v3/GLM_mixte/2023_02_06_dispersion.Rmd b/results/v3/GLM_mixte/2023_02_06_dispersion.Rmd new file mode 100644 index 0000000..909b205 --- /dev/null +++ b/results/v3/GLM_mixte/2023_02_06_dispersion.Rmd @@ -0,0 +1,78 @@ +--- +title: "Dispersion" +date: "2023-02-01" +output: + html_document: + toc: true +css: ../css/air.css +--- + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRsim) +library(HTRfit) +``` + + +## Simulation + + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- SETUP +n_G = 10 +n_genoT = 200 +n_env = 2 +max_n_rep = 5 +sequencing_fact = 1 +thr = 2 +########### + +# -- Simulate counts +mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, + max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact) + +# -- count table & experimental design +count_table = mock$countTable %>% as.data.frame() +bioDesign = mock$design + +# -- dispersion value per gene & per condition +mock$actualParameters$dispersion[1:3, 1:3] + +``` + +## Estimation + +### Launch MASS::glm.nb on simulated data + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- MASS:glm.nb +dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign) +l = HTRfit::launch.glm_mixte(dtf2fit) +fitDtf =listFit2dtf(l) +dispersion_estimated = fitDtf$dispersion +``` + +### Join Actual & Inference + +```{r} +# -- one dispersion per gene +dispersion_actual = mock$actualParameters$dispersion %>% + rowMeans() %>% + as.data.frame() %>% + dplyr::rename(., dispersion.actual = ".") %>% tibble::rownames_to_column("gene_id") + +dispersion_actual = data.table::data.table(dispersion_actual, key = "gene_id") +dispersion_estimated = data.table::data.table(dispersion_estimated, key = "gene_id") +dispersion_comparison = dispersion_actual[dispersion_estimated] + +``` + +### Identity plot + +```{r} +ggplot(dispersion_comparison) + + geom_point(aes(x = dispersion.actual, y = 1/dispersion.estimate), alpha = 0.5) + + geom_abline(intercept = 0, slope = 1) + scale_y_log10() + scale_x_log10() +``` diff --git a/results/v3/GLM_mixte/2023_02_08_benchmarkingPackages.Rmd b/results/v3/GLM_mixte/2023_02_08_benchmarkingPackages.Rmd new file mode 100644 index 0000000..75a7a66 --- /dev/null +++ b/results/v3/GLM_mixte/2023_02_08_benchmarkingPackages.Rmd @@ -0,0 +1,159 @@ +--- +title: "Random intercept" +date: "2023-02-08" +output: + html_document: + toc: true +css: ../css/air.css +--- + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRsim) +library(HTRfit) +#library(plotROC) +library(Rmisc) +``` + + +## Simulation + +### Parameters + +```{r} +# -- SETUP +n_G = 10 +n_genoT_list = c(20, 50, 100, 200, 400) +n_env = 2 +FixIntercept = F +max_n_rep = 5 +sequencing_fact = 1 +n_clus = 3 +thr = 2 +number_ofRepetition = 4 +########### +``` + +### get time to fit + +```{r message=FALSE, warning=FALSE, include=TRUE, results="hide", eval=FALSE} +# ---------------- # +# /!\ Very very long +# Not exectuted in the notebook +# ------------------------- # + +#rm(dtf2evaluation) +for (n_genoT in n_genoT_list){ + for (i in 1:number_ofRepetition){ + # -- traceback + print(paste("Number of genotype:", n_genoT, sep = " ")) + print(paste("Iteration:", i, sep = " ")) + + # -- Simulate counts -- # + mock_random = rnaMock(n_genes = n_G, + n_genotypes = n_genoT, + n_environments = n_env, + max_n_replicates = max_n_rep, + sequencing_factor = sequencing_fact, + fixIntercept = FixIntercept, + n_clusters = n_clus) + + # -- count table & experimental design + count_table_random = mock_random$countTable %>% as.data.frame() + bioDesign_random = mock_random$design + # -- ground truth -- # + beta.actual_random = mock_random$actualParam$beta + + # --- Model fitting --- # + # -- lme4::glm.nb + start_time <- Sys.time() + dtf2fit_random = HTRfit::reshapeCounTable(count_table_random, + bioDesign_random) + l_random.lme4 = HTRfit::launch.glm_mixte(dtf2fit_random, + package = "lme4") + fitDtf_random.lme4 =listFit2dtf(l_random.lme4) + end_time <- Sys.time() + fitDtf_random.lme4$inference$timeProcess = difftime(end_time, + start_time, units = "secs") %>% as.numeric() + fitDtf_random.lme4$inference$from = 'lme4' + fitDtf_random.lme4$inference$n_genotypes = n_genoT + # -- glmmTMB::glmmTMB + start_time <- Sys.time() + l_random.tmb = HTRfit::launch.glm_mixte(dtf2fit_random, package = "glmmTMB") + fitDtf_random.tmb =listFit2dtf(l_random.tmb) + end_time <- Sys.time() + fitDtf_random.tmb$inference$timeProcess = difftime(end_time, + start_time, units = "secs") %>% as.numeric() + fitDtf_random.tmb$inference$from = 'glmmTMB' + fitDtf_random.tmb$inference$n_genotypes = n_genoT + # ------------------------- # + + # -- join LME4 & glmmTMB res -- # + fitDtf.inference = rbind(fitDtf_random.tmb$inference, + fitDtf_random.lme4$inference) + + expectation = getExpectation(beta.actual_random, + toEval = "glm_mixte" , threshold = thr) + prediction = getPrediction(fitDtf.inference, threshold = thr, alphaRisk = 0.05) + + # -- join actual & inference -- # + actual2join.dtf <- data.table::data.table(expectation, + key = c("gene_id", "term")) + inference2join.dtf <- data.table::data.table(prediction, + key = c("gene_id", "term")) + tmp <- actual2join.dtf[inference2join.dtf] + + # ---- Saving results ----- # + if (exists('dtf2evaluation')){ + dtf2evaluation = rbind(dtf2evaluation, tmp) + } + else{ + dtf2evaluation = tmp + } + } +} + + +``` + +## Evaluation + +### Preparing dataframe + +```{r} + +#tgc <- Rmisc::summarySE(dtf2evaluation, +# measurevar="timeProcess", groupvars=c("n_genotypes","from")) +# -- backup +#write_tsv(tgc, file = "2023_01_17-tgc_backup.tsv") + +tgc <- read_tsv('2023_01_17-tgc_backup.tsv', show_col_types = FALSE) +``` + +### Build graph for evaluation + +```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"} +#tgc$n_genotypes <- factor(tgc$n_genotypes) +#tgc$n_genotypes = as.numeric(as.character(tgc$n_genotypes)) +p= ggplot(tgc, aes(x = n_genotypes, y = timeProcess, colour = from)) + + geom_line(aes(x = n_genotypes, y= timeProcess+sd), + linetype = "dashed") + + geom_line(aes(x = n_genotypes, y= timeProcess-sd), + linetype = "dashed") + + geom_ribbon(aes(ymin = timeProcess-sd , + ymax= timeProcess+sd, fill = from), alpha=0.2) + + geom_line(aes(x = n_genotypes, y= timeProcess), + linetype = "solid") + + geom_point() + + scale_y_log10() + + scale_color_manual(values = c('#581845', '#FFC30F')) + + scale_fill_manual(values = c('#581845', '#FFC30F')) +``` + +### Visualization + +```{r, message=FALSE, warning=FALSE} +p +``` diff --git a/results/v3/GLM_mixte/2023_02_08_randomIntercept.Rmd b/results/v3/GLM_mixte/2023_02_08_randomIntercept.Rmd new file mode 100644 index 0000000..9241c4d --- /dev/null +++ b/results/v3/GLM_mixte/2023_02_08_randomIntercept.Rmd @@ -0,0 +1,132 @@ +--- +title: "Random intercept" +date: "2023-02-08" +output: + html_document: + toc: true +css: ../css/air.css +--- + +## Required + + +```{r setup, message=FALSE, warning=FALSE, results="hide"} +library(HTRsim) +library(HTRfit) +library(plotROC) +``` + + +## Simulation + +### Parameters + +```{r} +# -- SETUP +n_G = 10 +n_genoT = 200 +n_env = 2 +FixIntercept = F +max_n_rep = 5 +sequencing_fact = 1 +n_clus = 3 +thr = 2 +########### +``` + +### Random intercept + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- Simulate counts +mock_random = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env, + max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = FixIntercept, n_clusters = n_clus) + +# -- count table & experimental design +count_table_random = mock_random$countTable %>% as.data.frame() +bioDesign_random = mock_random$design + +# -- ground truth +beta.actual_random = mock_random$actualParam$beta + +``` + +## Estimation + +### Launch glmmTMB::glmmTMB on simulated data (random intercept) + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- glmmTMB::glmmTMB +dtf2fit_random = HTRfit::reshapeCounTable(count_table_random, bioDesign_random) +l_random.tmb = HTRfit::launch.glm_mixte(dtf2fit_random, package = "glmmTMB") +fitDtf_random.tmb =listFit2dtf(l_random.tmb) +``` + +### Launch lme4::glmer.nb on simulated data (random intercept) + +```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} +# -- lme4::glm.nb +l_random.lme4 = HTRfit::launch.glm_mixte(dtf2fit_random, package = "lme4") +fitDtf_random.lme4 =listFit2dtf(l_random.lme4) +``` + +### Join Actual & Inference + +```{r} +###### -- LME4 -- ####### +expectation_random = getExpectation(beta.actual_random,toEval = "glm_mixte" , threshold = thr) +prediction_random = getPrediction(fitDtf_random.lme4$inference, threshold = thr, alphaRisk = 0.05) +# -- join actual & inference +actual2join.dtf <- data.table::data.table(expectation_random, key = c("gene_id", "term")) +inference2join.dtf <- data.table::data.table(prediction_random, key = c("gene_id", "term")) +comparison.lme4 <- actual2join.dtf[inference2join.dtf] + + +###### -- GLMTMB -- ####### +expectation_random = getExpectation(beta.actual_random,toEval = "glm_mixte" , threshold = thr) +prediction_random = getPrediction(fitDtf_random.tmb$inference, threshold = thr, alphaRisk = 0.05) +# -- join actual & inference +actual2join.dtf <- data.table::data.table(expectation_random, key = c("gene_id", "term")) +inference2join.dtf <- data.table::data.table(prediction_random, key = c("gene_id", "term")) +comparison.tmb <- actual2join.dtf[inference2join.dtf] + +``` + +### Join lme4 & glmmTMB + +```{r} +###### -- Annotations +comparison.tmb = comparison.tmb %>% dplyr::mutate(from = "glmmTMB") +comparison.lme4 = comparison.lme4 %>% dplyr::mutate(from = "lme4") +# -- join +comparison.dtf = rbind(comparison.lme4, comparison.tmb) +``` +## Evaluation + +### Build graph for evaluation + +```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"} + +# -- preparing dtf +comparison.dtf$term = factor(comparison.dtf$term, levels = c("(Intercept)", "environmentE1","cor__(Intercept).environmentE1", "sd__(Intercept)", "sd__environmentE1")) +comparison.dtf = comparison.dtf %>% + mutate(actual.value = + if_else(str_detect(term, "cor_"), + actual.value, actual.value*log(2) )) + +# -- Identity plot +p = ggplot(comparison.dtf) + + geom_point(aes(x = actual.value, + y = estimate, col = from), alpha = 0.6, size = 3) + + geom_abline(intercept = 0, slope = 1) + + facet_wrap(~term, scales = "free") + + theme(strip.text.x = element_text(size = 7)) + + scale_color_manual(values = c('#581845', '#FFC30F')) + +#ggsave("../img/graph/poc_glmm2_1000.png", p,) +``` + +### Visualization + +```{r, message=FALSE, warning=FALSE} +p +``` -- GitLab