Skip to content
Snippets Groups Projects
Commit 871ffe49 authored by Arnaud Duvermy's avatar Arnaud Duvermy
Browse files

update results

parent c3879a01
No related branches found
No related tags found
No related merge requests found
...@@ -21,7 +21,7 @@ library(HTRfit) ...@@ -21,7 +21,7 @@ library(HTRfit)
```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"} ```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- SETUP # -- SETUP
n_G = 1000 n_G = 300
n_genoT = 3 n_genoT = 3
n_env = 2 n_env = 2
max_n_rep = 15 max_n_rep = 15
......
---
title: "Random intercept"
date: "2023-02-08"
output:
html_document:
toc: true
css: ../css/air.css
---
## Required
```{r setup, message=FALSE, warning=FALSE, results="hide"}
library(HTRsim)
library(HTRfit)
library(plotROC)
```
## Simulation
### Parameters
```{r}
# -- SETUP
n_G = 2000
n_genoT = 26
n_env = 2
max_n_rep = 3
sequencing_fact = 2
thr = 2
###########
```
### Intercept fixed
```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- Simulate counts
mock_fixed = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env,
max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = T)
# -- count table & experimental design
count_table_fixed = mock_fixed$countTable %>% as.data.frame()
bioDesign_fixed = mock_fixed$design
# -- ground truth
beta.actual_fixed = mock_fixed$actualParam$beta
```
### Random intercept
```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- Simulate counts
mock_random = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env,
max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = F)
# -- count table & experimental design
count_table_random = mock_random$countTable %>% as.data.frame()
bioDesign_random = mock_random$design
# -- ground truth
beta.actual_random = mock_random$actualParam$beta
```
## Estimation
### Launch MASS::glm.nb on simulated data (fixed intercept)
```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- MASS:glm.nb
dtf2fit_fixed = HTRfit::reshapeCounTable(count_table_fixed, bioDesign_fixed)
l_fixed = HTRfit::launch.glm(dtf2fit_fixed)
fitDtf_fixed =listFit2dtf(l_fixed)
```
### Launch MASS::glm.nb on simulated data (random intercept)
```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- MASS:glm.nb
dtf2fit_random = HTRfit::reshapeCounTable(count_table_random, bioDesign_random)
l_random = HTRfit::launch.glm(dtf2fit_random)
fitDtf_random =listFit2dtf(l_random)
```
### Join Actual & Inference
```{r}
###### -- Fixed Intercept -- ########
expectation_fixed = getExpectation(beta.actual_fixed, threshold = thr)
prediction_fixed = getPrediction(fitDtf_fixed$inference, threshold = thr, alphaRisk = 0.05)
# -- join actual & inference
comparison_fixed = getComparison(actual.dtf = expectation_fixed, inference.dtf = prediction_fixed)
#####################################
###### -- Random Intercept -- #######
expectation_random = getExpectation(beta.actual_random, threshold = thr)
prediction_random = getPrediction(fitDtf_random$inference, threshold = thr, alphaRisk = 0.05)
# -- join actual & inference
comparison_random = getComparison(actual.dtf = expectation_random, inference.dtf = prediction_random)
#####################################
```
### Join random & fixed dataframe
```{r}
# -- Annotations
comparison_random = comparison_random %>% dplyr::mutate(from = "Random intercept")
comparison_fixed = comparison_fixed %>% dplyr::mutate(from = "Fixed intercept")
# -- join
df2evaluation = rbind(comparison_fixed, comparison_random)
```
## Evaluation
### Build graph for evaluation
```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- ROC curve
p1 = ggplot(df2evaluation %>% filter(beta != "(Intercept)"),
aes(d = actual.label , m = padj, col = from)) +
geom_roc(n.cuts = 0, labels = F) #+
scale_color_manual(values = c("#yellow","#BDBDBD"))
#ggsave("../img/graph/replicates_eff.png", p, height = 4, width = 6)
# Identity plot
p2 = ggplot(df2evaluation %>% filter(beta != "(Intercept)")) +
geom_point(aes(x = actual.value, y = estimate, color = padj), alpha = 0.5) +
geom_abline(intercept = 0, slope = 1) +
facet_grid(beta~from, scales = "free")
```
### Visualization
```{r, message=FALSE, warning=FALSE}
p1
p2
```
n_genotypes from N timeProcess sd se ci
20 glmmTMB 200 15.130752444267273 2.6080884089635843 0.1844196999912184 0.3636676342866279
20 lme4 192 20.31276837239663 2.8278579904444543 0.20408307150164248 0.40254610129193336
50 glmmTMB 200 20.799276530742645 5.514627455256488 0.38994304693293763 0.7689507432848744
50 lme4 200 40.54833024740219 19.021427312622208 1.345018024060217 2.6523170946821053
100 glmmTMB 200 25.420783281326294 4.2043931998868596 0.29729549424146057 0.5862537954460072
100 lme4 200 37.98638904094696 4.275002040382448 0.3022882932340756 0.5960993780936279
200 glmmTMB 200 39.15629369020462 5.9148858659293335 0.4182455905743096 0.8247621294374486
200 lme4 200 68.16497600078583 12.042067627674905 0.8515027679035926 1.67912645561597
400 glmmTMB 200 66.25662302970886 7.7012016308806155 0.5445571896480582 1.0738431138458309
400 lme4 200 136.99138724803925 35.908038766273386 2.539081771074134 5.0069589148599665
---
title: "Dispersion"
date: "2023-02-01"
output:
html_document:
toc: true
css: ../css/air.css
---
## Required
```{r setup, message=FALSE, warning=FALSE, results="hide"}
library(HTRsim)
library(HTRfit)
```
## Simulation
```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- SETUP
n_G = 10
n_genoT = 200
n_env = 2
max_n_rep = 5
sequencing_fact = 1
thr = 2
###########
# -- Simulate counts
mock = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env,
max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact)
# -- count table & experimental design
count_table = mock$countTable %>% as.data.frame()
bioDesign = mock$design
# -- dispersion value per gene & per condition
mock$actualParameters$dispersion[1:3, 1:3]
```
## Estimation
### Launch MASS::glm.nb on simulated data
```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- MASS:glm.nb
dtf2fit = HTRfit::reshapeCounTable(count_table, bioDesign)
l = HTRfit::launch.glm_mixte(dtf2fit)
fitDtf =listFit2dtf(l)
dispersion_estimated = fitDtf$dispersion
```
### Join Actual & Inference
```{r}
# -- one dispersion per gene
dispersion_actual = mock$actualParameters$dispersion %>%
rowMeans() %>%
as.data.frame() %>%
dplyr::rename(., dispersion.actual = ".") %>% tibble::rownames_to_column("gene_id")
dispersion_actual = data.table::data.table(dispersion_actual, key = "gene_id")
dispersion_estimated = data.table::data.table(dispersion_estimated, key = "gene_id")
dispersion_comparison = dispersion_actual[dispersion_estimated]
```
### Identity plot
```{r}
ggplot(dispersion_comparison) +
geom_point(aes(x = dispersion.actual, y = 1/dispersion.estimate), alpha = 0.5) +
geom_abline(intercept = 0, slope = 1) + scale_y_log10() + scale_x_log10()
```
---
title: "Random intercept"
date: "2023-02-08"
output:
html_document:
toc: true
css: ../css/air.css
---
## Required
```{r setup, message=FALSE, warning=FALSE, results="hide"}
library(HTRsim)
library(HTRfit)
#library(plotROC)
library(Rmisc)
```
## Simulation
### Parameters
```{r}
# -- SETUP
n_G = 10
n_genoT_list = c(20, 50, 100, 200, 400)
n_env = 2
FixIntercept = F
max_n_rep = 5
sequencing_fact = 1
n_clus = 3
thr = 2
number_ofRepetition = 4
###########
```
### get time to fit
```{r message=FALSE, warning=FALSE, include=TRUE, results="hide", eval=FALSE}
# ---------------- #
# /!\ Very very long
# Not exectuted in the notebook
# ------------------------- #
#rm(dtf2evaluation)
for (n_genoT in n_genoT_list){
for (i in 1:number_ofRepetition){
# -- traceback
print(paste("Number of genotype:", n_genoT, sep = " "))
print(paste("Iteration:", i, sep = " "))
# -- Simulate counts -- #
mock_random = rnaMock(n_genes = n_G,
n_genotypes = n_genoT,
n_environments = n_env,
max_n_replicates = max_n_rep,
sequencing_factor = sequencing_fact,
fixIntercept = FixIntercept,
n_clusters = n_clus)
# -- count table & experimental design
count_table_random = mock_random$countTable %>% as.data.frame()
bioDesign_random = mock_random$design
# -- ground truth -- #
beta.actual_random = mock_random$actualParam$beta
# --- Model fitting --- #
# -- lme4::glm.nb
start_time <- Sys.time()
dtf2fit_random = HTRfit::reshapeCounTable(count_table_random,
bioDesign_random)
l_random.lme4 = HTRfit::launch.glm_mixte(dtf2fit_random,
package = "lme4")
fitDtf_random.lme4 =listFit2dtf(l_random.lme4)
end_time <- Sys.time()
fitDtf_random.lme4$inference$timeProcess = difftime(end_time,
start_time, units = "secs") %>% as.numeric()
fitDtf_random.lme4$inference$from = 'lme4'
fitDtf_random.lme4$inference$n_genotypes = n_genoT
# -- glmmTMB::glmmTMB
start_time <- Sys.time()
l_random.tmb = HTRfit::launch.glm_mixte(dtf2fit_random, package = "glmmTMB")
fitDtf_random.tmb =listFit2dtf(l_random.tmb)
end_time <- Sys.time()
fitDtf_random.tmb$inference$timeProcess = difftime(end_time,
start_time, units = "secs") %>% as.numeric()
fitDtf_random.tmb$inference$from = 'glmmTMB'
fitDtf_random.tmb$inference$n_genotypes = n_genoT
# ------------------------- #
# -- join LME4 & glmmTMB res -- #
fitDtf.inference = rbind(fitDtf_random.tmb$inference,
fitDtf_random.lme4$inference)
expectation = getExpectation(beta.actual_random,
toEval = "glm_mixte" , threshold = thr)
prediction = getPrediction(fitDtf.inference, threshold = thr, alphaRisk = 0.05)
# -- join actual & inference -- #
actual2join.dtf <- data.table::data.table(expectation,
key = c("gene_id", "term"))
inference2join.dtf <- data.table::data.table(prediction,
key = c("gene_id", "term"))
tmp <- actual2join.dtf[inference2join.dtf]
# ---- Saving results ----- #
if (exists('dtf2evaluation')){
dtf2evaluation = rbind(dtf2evaluation, tmp)
}
else{
dtf2evaluation = tmp
}
}
}
```
## Evaluation
### Preparing dataframe
```{r}
#tgc <- Rmisc::summarySE(dtf2evaluation,
# measurevar="timeProcess", groupvars=c("n_genotypes","from"))
# -- backup
#write_tsv(tgc, file = "2023_01_17-tgc_backup.tsv")
tgc <- read_tsv('2023_01_17-tgc_backup.tsv', show_col_types = FALSE)
```
### Build graph for evaluation
```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
#tgc$n_genotypes <- factor(tgc$n_genotypes)
#tgc$n_genotypes = as.numeric(as.character(tgc$n_genotypes))
p= ggplot(tgc, aes(x = n_genotypes, y = timeProcess, colour = from)) +
geom_line(aes(x = n_genotypes, y= timeProcess+sd),
linetype = "dashed") +
geom_line(aes(x = n_genotypes, y= timeProcess-sd),
linetype = "dashed") +
geom_ribbon(aes(ymin = timeProcess-sd ,
ymax= timeProcess+sd, fill = from), alpha=0.2) +
geom_line(aes(x = n_genotypes, y= timeProcess),
linetype = "solid") +
geom_point() +
scale_y_log10() +
scale_color_manual(values = c('#581845', '#FFC30F')) +
scale_fill_manual(values = c('#581845', '#FFC30F'))
```
### Visualization
```{r, message=FALSE, warning=FALSE}
p
```
---
title: "Random intercept"
date: "2023-02-08"
output:
html_document:
toc: true
css: ../css/air.css
---
## Required
```{r setup, message=FALSE, warning=FALSE, results="hide"}
library(HTRsim)
library(HTRfit)
library(plotROC)
```
## Simulation
### Parameters
```{r}
# -- SETUP
n_G = 10
n_genoT = 200
n_env = 2
FixIntercept = F
max_n_rep = 5
sequencing_fact = 1
n_clus = 3
thr = 2
###########
```
### Random intercept
```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- Simulate counts
mock_random = rnaMock(n_genes = n_G, n_genotypes = n_genoT, n_environments = n_env,
max_n_replicates = max_n_rep, sequencing_factor = sequencing_fact, fixIntercept = FixIntercept, n_clusters = n_clus)
# -- count table & experimental design
count_table_random = mock_random$countTable %>% as.data.frame()
bioDesign_random = mock_random$design
# -- ground truth
beta.actual_random = mock_random$actualParam$beta
```
## Estimation
### Launch glmmTMB::glmmTMB on simulated data (random intercept)
```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- glmmTMB::glmmTMB
dtf2fit_random = HTRfit::reshapeCounTable(count_table_random, bioDesign_random)
l_random.tmb = HTRfit::launch.glm_mixte(dtf2fit_random, package = "glmmTMB")
fitDtf_random.tmb =listFit2dtf(l_random.tmb)
```
### Launch lme4::glmer.nb on simulated data (random intercept)
```{r, message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- lme4::glm.nb
l_random.lme4 = HTRfit::launch.glm_mixte(dtf2fit_random, package = "lme4")
fitDtf_random.lme4 =listFit2dtf(l_random.lme4)
```
### Join Actual & Inference
```{r}
###### -- LME4 -- #######
expectation_random = getExpectation(beta.actual_random,toEval = "glm_mixte" , threshold = thr)
prediction_random = getPrediction(fitDtf_random.lme4$inference, threshold = thr, alphaRisk = 0.05)
# -- join actual & inference
actual2join.dtf <- data.table::data.table(expectation_random, key = c("gene_id", "term"))
inference2join.dtf <- data.table::data.table(prediction_random, key = c("gene_id", "term"))
comparison.lme4 <- actual2join.dtf[inference2join.dtf]
###### -- GLMTMB -- #######
expectation_random = getExpectation(beta.actual_random,toEval = "glm_mixte" , threshold = thr)
prediction_random = getPrediction(fitDtf_random.tmb$inference, threshold = thr, alphaRisk = 0.05)
# -- join actual & inference
actual2join.dtf <- data.table::data.table(expectation_random, key = c("gene_id", "term"))
inference2join.dtf <- data.table::data.table(prediction_random, key = c("gene_id", "term"))
comparison.tmb <- actual2join.dtf[inference2join.dtf]
```
### Join lme4 & glmmTMB
```{r}
###### -- Annotations
comparison.tmb = comparison.tmb %>% dplyr::mutate(from = "glmmTMB")
comparison.lme4 = comparison.lme4 %>% dplyr::mutate(from = "lme4")
# -- join
comparison.dtf = rbind(comparison.lme4, comparison.tmb)
```
## Evaluation
### Build graph for evaluation
```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
# -- preparing dtf
comparison.dtf$term = factor(comparison.dtf$term, levels = c("(Intercept)", "environmentE1","cor__(Intercept).environmentE1", "sd__(Intercept)", "sd__environmentE1"))
comparison.dtf = comparison.dtf %>%
mutate(actual.value =
if_else(str_detect(term, "cor_"),
actual.value, actual.value*log(2) ))
# -- Identity plot
p = ggplot(comparison.dtf) +
geom_point(aes(x = actual.value,
y = estimate, col = from), alpha = 0.6, size = 3) +
geom_abline(intercept = 0, slope = 1) +
facet_wrap(~term, scales = "free") +
theme(strip.text.x = element_text(size = 7)) +
scale_color_manual(values = c('#581845', '#FFC30F'))
#ggsave("../img/graph/poc_glmm2_1000.png", p,)
```
### Visualization
```{r, message=FALSE, warning=FALSE}
p
```
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment