diff --git a/results/v2/2022-07-25_dev.Rmd b/results/v2/2022-07-25_dev.Rmd
index c83afc108e815117927c036ebc1e0576fb38fdbb..36cc31ea1425557ca456e1bc7168e3fdb6e45b6c 100644
--- a/results/v2/2022-07-25_dev.Rmd
+++ b/results/v2/2022-07-25_dev.Rmd
@@ -107,7 +107,6 @@ kij.simulated = getK_ij(mu_ij, gene_dispersion)
 
 #kij.simulated = kij.simulated %>% data.frame() %>% filter_all(all_vars(. < 1000000)) 
 
-
 ```
 
 
@@ -148,15 +147,15 @@ ggsave(filename = "figures/DESEQ_dispersion.png", plot = p, width = 15, height =
 
 ```{r}
 #### params #####
-n_G = 100
+n_G = 10
 n_E = 2
-n_genes = 100
+n_genes = 2
 ##################
 
 ## Get beta for simulation
 beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
 
-design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 15)
+design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 3)
 design2simulate$model_matrix %>% dim()
 log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
 #log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
@@ -425,9 +424,502 @@ environment = design2simulate$design2simulate$environment
 df_gene_i = list(y = y , genotype = genotype,environment = environment) %>% data.frame()
 rownames(df_gene_i) <- NULL
 #print(i)
-    fit = MASS::glm.nb(y ~ genotype + environment + genotype:environment, data = df_gene_i, link = log)
+fit = MASS::glm.nb(y ~ genotype + environment + genotype:environment, data = df_gene_i, link = log)
+library(aod)
   
+wald.test.res = wald.test(Sigma = vcov(fit), b = coef(fit), Terms = 1, H0 = 0.40)# %>% .[3]
+wald.test.pvalue = wald.test.res$result$chi2 %>% data.frame() %>% .[3,]
+
+
+                                            
+
+
+                                                
+test_wald(fit, 0.3)
+coef(fit) %>% abs %>% which.min() 
+coef(fit) %>% abs %>% min()
+coef(fit) %>% length()
+
 pnorm(summary(m)$coefficients[2,3], lower.tail = F)
 
+res = coef(summary(fit))[,c(1,4)] %>%
+              data.frame() %>%
+              dplyr::rename(., pval = "Pr...z.." , Inference = "Estimate") %>%
+              dplyr::mutate(beta = stringr::str_remove_all(rownames(.), "[//(//)]")) %>%
+              dplyr::mutate(beta = stringr::str_replace(beta, ":", "."))
+
+
+test_wald <- function(model.res, term, threshold){
+  wald.test.res = wald.test(Sigma = vcov(model.res), b = coef(model.res), Terms = term, H0 = threshold)
+  wald.test.pvalue = wald.test.res$result$chi2 %>% data.frame() %>% .[3,]
+  return(wald.test.pvalue)
+}
+
+
+glmglrt::p_value.glm(fit)
+
+list_pval = 1:length(coef(fit)) %>% map(.x = .,  ~ test_wald(fit,.x , 0)) %>% unlist()
+list_pval.adj = p.adjust(list_pval, method = 'fdr')  
+
+glmglrt::p_value_contrast(fit, contrast = x ,  alternative ="two.sided", method = "Wald", H0 = 0)
+
+coef(fit %>% summary)
+res %>% mutate(p.adj = list_pval.adj )
+
+
+
+## news
+coef(fit %>% summary)[2,]
+
+x = c(rep(0, length(coef(fit))-1),1)
+
+
+test_wald2 <- function(model.res, term, threshold, initvec){
+  constrast_vec = replace(initvec, term , 1)
+  wald.test.pvalue = glmglrt::p_value_contrast(fit, contrast = constrast_vec ,  alternative ="two.sided", method = "Wald", H0 = threshold)
+  return(wald.test.pvalue %>% as.numeric())
+}
+
+
+kij.simulated %>% dim()
+design2simulate$model_matrix %>% dim()
+
+vecofzero = rep(0, length(coef(fit)))
+test_wald
+
+list_pval = 1:length(coef(fit)) %>% furrr::future_map(.x = .,  ~ test_wald2(model.res = fit, term= .x ,   threshold = 0.5, initvec = vecofzero) , print("hd") ) %>% unlist()
+
+
+which.min(coef(fit))
+library(aod)
+
+
+coef(fit)
+
+
+
+WaldTest = function(L,thetahat,Vn,h=0) # H0: L theta = h
+ # Note Vn is the asymptotic covariance matrix, so it's the
+# Consistent estimator divided by n. For true Wald tests 
+# based on numerical MLEs, just use the inverse of the Hessian. 
+{
+WaldTest = numeric(3)
+names(WaldTest) = c("W","df","p-value")
+r = dim(L)[1]
+W = t(L%*%thetahat-h) %*% solve(L%*%Vn%*%t(L)) %*% (L%*%thetahat-h)
+W = as.numeric(W)
+pval = 1-pchisq(W,r)
+WaldTest[1] = W; WaldTest[2] = r; WaldTest[3] = pval
+WaldTest
+} # End function WaldTest
+
+WaldTest(coef(fit),thetahat,vcov(fit))
+p <- 1 - pchisq(0.3, df = 1)
+
+
+which.min(coef(fit))
+coef(fit)[41]
+
+
+wald.test.res = wald.test(b = abs(coef(fit)), Sigma = vcov(fit), Terms = 41, H0 = 1) 
+stat.chi2  = wald.test.res$result$chi2[1] %>% as.numeric()
+pchisq(stat.chi2, df = 1, lower.tail = T)
+
+
+
+initvec = rep(0, length(coef(fit)))
+x = replace(initvec, 41 , 1)
+
+
+
+
+```
+
+
+
+
+## mvronrom
+
+```{r}
+getBetaforSimulation2 <- function(n_genes = 100, n_genotypes = 20, n_environments = 2, beta.dtf, theta = 10 ){
+
+  x = beta.dtf %>% as.matrix()
+  fit.mvrnorm <- Rfast::mvnorm.mle(x)
+  print(fit.mvrnorm$mu)
+  x <- NULL
+  print('base')
+  print(diag(fit.mvrnorm$sigma))
+  print(MASS::mvrnorm(n = n_genes,
+                                 mu = fit.mvrnorm$mu,
+                                 Sigma = fit.mvrnorm$sigma ) %>% data.frame() %>% summarise_if(is.numeric, sd))
+  print("modif")
+  diag(fit.mvrnorm$sigma) <- diag(fit.mvrnorm$sigma) + 10
+  beta.matrix.tmp <- MASS::mvrnorm(n = n_genes,
+                                 mu = fit.mvrnorm$mu,
+                                 Sigma = fit.mvrnorm$sigma )
+
+  print(fit.mvrnorm$sigma) 
+  print(diag(fit.mvrnorm$sigma))
+  print(beta.matrix.tmp %>% cov())
+  replicate_beta <- function(beta_vec, n, theta){
+    beta_vec.rep = rep(beta_vec, n)
+    beta_vec.rep + rnorm(length(beta_vec.rep), mean = 0, sd = abs(beta_vec/theta))
+  }
+
+  beta0 = beta.matrix.tmp[,1]
+  beta.matrix.tmp = purrr::map2(.x = c(2,3,4), .y =  c(n_genotypes-1,
+                                              n_environments-1,
+                                              (n_genotypes-1)*(n_environments-1)),
+                            ~ replicate_beta(beta.matrix.tmp[,.x], .y, theta) %>% matrix(ncol = .y)) %>%
+                    do.call(cbind, .)
+
+
+
+  beta.matrix = cbind(beta0, beta.matrix.tmp)
+  betaG.colnames = base::paste("genotype", "G", 1:(n_genotypes-1), sep = "")
+  betaE.colnames = base::paste("environment", "E", 1:(n_environments-1), sep = "")
+  betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
+  matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
+
+  colnames(beta.matrix) = matrix.colnames
+  rownames(beta.matrix) = base::paste("gene", 1:(n_genes), sep = "")
+
+  return(beta.matrix)
+}
+
+beta.input2 = getBetaforSimulation2(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
+```
+
+
+```{r}
+
+# set up the custom data simulation function
+n_subj = 100 # number of subjects
+n_ingroup = 25 # number of ingroup stimuli
+n_outgroup = 25 # number of outgroup stimuli
+beta_0 = 800 # grand mean
+beta_1 = 50 # effect of category
+omega_0 = 80 # by-item random intercept sd
+tau_0 = 100 # by-subject random intercept sd
+tau_1 = 40 # by-subject random slope sd
+rho = 0.2 # correlation between intercept and slope
+sigma = 200 
+
+  items <- data.frame(
+      item_id = seq_len(n_ingroup + n_outgroup),
+      category = rep(c("ingroup", "outgroup"), c(n_ingroup, n_outgroup)),
+      X_i = rep(c(-0.5, 0.5), c(n_ingroup, n_outgroup)),
+      O_0i = rnorm(n = n_ingroup + n_outgroup, mean = 0, sd = omega_0))
+  items    
+  # variance-covariance matrix
+  cov_mx <- matrix(
+      c(tau_0^2, rho * tau_0 * tau_1,
+        rho * tau_0 * tau_1, tau_1^2 ),
+        nrow = 2, byrow = TRUE)
+  
+      subjects <- data.frame(subj_id = seq_len(n_subj),
+      
+                              MASS::mvrnorm(n = n_subj,
+                                mu = c(T_0s = 0, T_1s = 0),
+                                  Sigma = cov_mx))
+      
+      crossing(subjects, items) %>%
+            mutate(e_si = rnorm(nrow(.), mean = 0, sd = sigma),
+                RT = beta_0 + T_0s + O_0i + (beta_1 + T_1s) * X_i + e_si) %>%
+                  select(subj_id, item_id, category, X_i, RT)
+
+
+
+
+my_sim_data()
+```
+
+
+```{r}
+get_beta_gene_i <- function(fit.mvnorm, n_G, n_E){
+  beta.matrix.tmp <- MASS::mvrnorm(n = (n_E-1)*(n_G-1),
+                                 mu = fit.mvnorm$mu,
+                                 Sigma = fit.mvnorm$sigma )
+  
+  beta0 = beta.matrix.tmp[1,1] %>% unname()
+  betaG = beta.matrix.tmp[1:(n_G-1),2]
+  betaE = beta.matrix.tmp[1:(n_E-1),3]
+  betaGE = beta.matrix.tmp[,4]
+  
+  ### name
+  betaG.colnames = base::paste("genotype", "G", 1:(n_G-1), sep = "")
+  betaE.colnames = base::paste("environment", "E", 1:(n_E-1), sep = "")
+  betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
+  matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
+  
+  beta_gene_i <- c( beta0, betaG, betaE, betaGE)
+  names(beta_gene_i) = matrix.colnames
+
+  return(beta_gene_i)
+}
+
+
+n_g = 1
+n_group_gene = 3
+n_E = 2
+n_G = 400
+
+
+dds.extraction$beta 
+kmean.res = kmeans(dds.extraction$beta[,c(2,3)], n_group_gene)
+
+
+beta.cluster1= dds.extraction$beta[kmean.res$cluster == 1,]
+beta.cluster2= dds.extraction$beta[kmean.res$cluster == 2,]
+beta.cluster3= dds.extraction$beta[kmean.res$cluster == 3,]
+
+
+x = beta.cluster2 %>% as.matrix()
+fit.mvrnorm <- Rfast::mvnorm.mle(x)
+fit.mvrnorm$sigma[c(2,4),c(2,4)] = fit.mvrnorm$sigma[c(2,4),c(2,4)]*3
+#diag(fit.mvrnorm$sigma) <- diag(fit.mvrnorm$sigma) 
+
+#### BETAG.E #####
+fit.mvrnorm$mu[4] = 0
+fit.mvrnorm$sigma[,4] = 0
+fit.mvrnorm$sigma[4,] = 0
+#################
+#fit.mvrnorm$mu[0] = 30
+
+#### BETA0 #####
+fit.mvrnorm$mu[1] = 0
+fit.mvrnorm$sigma[,1] = 0
+fit.mvrnorm$sigma[1,] = 0
+#################
+
+#### BETAE #####
+fit.mvrnorm$mu[3] = 0
+fit.mvrnorm$sigma[,3] = 0
+fit.mvrnorm$sigma[3,] = 0
+
+#fit.mvrnorm$mu[2] = 8
+#fit.mvrnorm$sigma[2,2]= 6
+x <- NULL
+
+
+n_gene = n_g
+a = purrr::map(.x = 1:n_gene, ~ get_beta_gene_i(fit.mvrnorm, n_G, n_E)) 
+beta.matrix = do.call(rbind, a) 
+rownames(beta.matrix) = base::paste("gene", 1:(n_g), sep = "")
+
+
+
+x = beta.matrix %>% data.frame() %>% #%>% select(!starts_with("environment")) %>%
+  rownames_to_column('gene_id') %>% reshape2::melt(., id = "gene_id") %>% dplyr::mutate(type = dplyr::case_when(
+      str_detect(variable, "genotypeG\\d+\\.environment") ~ "GxE",
+      str_detect(variable, "genotypeG\\d+$") ~ "G",
+      str_detect(variable, "environmentE\\d+$") ~ "E",
+      str_detect(variable, "Intercept$") ~ "Intercept")) %>%  reshape2::dcast(., gene_id  ~ type, value.var = "value", fun.aggregate = list)
+
+
+g = x$G
+names(g) <- x$gene_id
+df_tmp2 = data.frame(g) %>% mutate(type = 'betaG') %>% reshape2::melt( id = "type", value.name = "betaG")
+g = x$GxE
+
+names(g) <- x$gene_id
+df_tmp3 = data.frame(g) %>% mutate(type = 'GxE') %>% reshape2::melt( id = "type", value.name = "betaGxE")
+
+
+df = cbind(df_tmp2, df_tmp3) %>% dplyr::select(c(betaGxE, betaG)) %>% mutate(from = 'Simulated') %>% mutate(cluster = 3)
+df_simu = df
+df_simu = rbind(df_simu, df)
+
+
+df_simu$cluster <- factor(df_simu$cluster)
+ggplot(df_simu) + geom_point(aes(x = betaG, y = betaGxE, col = cluster), alpha = 0.2)
+
+```
+
+```{r}
+
+### Visualisation
+dtf = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster ) %>%
+      reshape2::melt(. ,id=c("cluster"), value.name = "value", variable.name= "type") 
+dtf$cluster <- factor(dtf$cluster)
+
+
+p = ggplot(dtf) + geom_density(aes(x = value, fill = cluster ), alpha = 0.4) + facet_grid(~ type, scales = "free")
+
+p
+ggsave("figures/densityBeta_kmeans.png", p)
+
+
+betas = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster) %>% mutate(from = "Actual")
+betas$cluster = factor(betas$cluster)
+df2 <- rbind(betas %>% dplyr::select(betaG, betaGE, from, cluster), df_simu %>% rename(betaGE = "betaGxE"))
+p = ggplot(df2) + geom_point(aes(x= betaG, betaGE, col = cluster), alpha = 0.1) + facet_grid(~from)
+
+p
+ggsave("figures/scatterplot_clustering_covIncreased.png", p, width = 10)
+
+```
+
+```{r}
+
+beta.input = beta.matrix
+n_genes = n_g
+design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 15)
+#rowSums(beta.input) %>% which.max()
+#rowSums(beta.input) %>% max()
+log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
+log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
+
+
+gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
+
+
+#2* 2^log_qij
+mu_ij = getMu_ij(log_qij, 1)
+#max(mu_ij)
+kij.simulated = getK_ij(mu_ij, gene_dispersion)
+
 ```
 
+
+## Mixte model
+
+
+```{r}
+library(lme4)
+kij.simulated %>% dim()
+df_2glmmm <- function(y , design2simulate, i){
+  genotype = design2simulate$design2simulate$genotype
+  environment = design2simulate$design2simulate$environment
+  #message("Fitting model ...")
+
+  df_gene_i = list(y = y , genotype = genotype,environment = environment) %>% data.frame() %>% mutate(inter = paste(environment, genotype, sep = '_'))
+  df_gene_i$inter <- factor( df_gene_i$inter )
+  rownames(df_gene_i) <- NULL
+  df_gene_i = df_gene_i %>% mutate(gene_id = paste("gene", i, sep = ""))
+  return(df_gene_i)
+}
+
+
+library(broom.mixed)
+overdisp_fun <- function(model) {
+    rdf <- df.residual(model)
+    rp <- residuals(model,type="pearson")
+    Pearson.chisq <- sum(rp^2)
+    prat <- Pearson.chisq/rdf
+    pval <- pchisq(Pearson.chisq, df=rdf, lower.tail=FALSE)
+    c(chisq=Pearson.chisq,ratio=prat,rdf=rdf,p=pval)
+}
+
+tidy_quasi <- function(model, phi=overdisp_fun(model)["ratio"], conf.level=0.95) {
+    tt <- (tidy(model, effects="ran_vals") %>% 
+              mutate(std.error=std.error*sqrt(phi),
+                   statistic=estimate/std.error,
+                   p.value=2*pnorm(abs(statistic), lower.tail=FALSE))
+    )
+    return(tt)
+}
+
+```
+
+```{r}
+
+beta.matrix.long = beta.matrix %>% data.frame() %>% rownames_to_column('gene_id') %>% reshape2::melt(.,id= "gene_id",  value.name = "value", variable.name= "beta")
+beta.matrix.long = beta.matrix.long %>% filter(gene_id == "gene1")  %>% dplyr::mutate(type = dplyr::case_when(
+      str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
+      str_detect(beta, "genotypeG\\d+$") ~ "G",
+      str_detect(beta, "environmentE\\d+$") ~ "E",
+      str_detect(beta, "Intercept$") ~ "Intercept"))
+
+beta.long.actual = beta.matrix.long %>% filter(type == "G") %>% mutate(from = "actual") %>% dplyr::select(-gene_id)
+
+```
+
+
+
+```{r}
+library(lme4)
+f = map(1:1, ~df_2glmmm(kij.simulated[1,], design2simulate, .x) )
+n = do.call(rbind, f)
+n2 = n %>% mutate(group = paste(gene_id, genotype, sep = "_"))
+data_glmm = n
+m.nb <- glmer.nb(y ~ environment +  ( 1 | genotype ) , data=data_glmm, verbose=TRUE)
+
+summary(m.nb)
+tidy(m.nb)
+fit.mvrnorm$sigma
+fit.mvrnorm$mu
+beta.input[1,]
+tidy_quasi(m.nb)
+mean(res.glmm$value)
+sd(res.glmm$value)
+sd((ranef(m.nb)$genotype[,1]))
+
+plot(density(coef(m.nb)$genotype[,1]))
+
+res.glmm = tidy_quasi(m.nb) %>% dplyr::select(estimate, level)  %>% 
+      dplyr::mutate(beta = str_replace(level, "G", "genotypeG")) %>% rename(c("type" = level, 'value' =  estimate))
+
+B0 = tidy(m.nb, effects = 'fixed') %>% .$estimate %>% .[1]
+
+res.glmm$value = res.glmm$value + B0
+coef(m.nb)
+
+## TIPS
+tidy_quasi(m.nb) %>% rbind(tidy(m.nb, effects = 'fixed') %>% mutate(group ="NA") %>% mutate(level = 'NA'))
+tidy(m.nb)
+
+```
+
+
+```{r}
+res.glmm = res.glmm %>% mutate(from = 'infered')
+beta.long.actual$value = beta.long.actual$value + beta.matrix %>% data.frame() %>% .$Intercept
+#beta.long.actual$value = beta.long.actual$value*log(2)
+df_distrib = rbind(beta.long.actual, res.glmm)
+
+
+B0 = tidy(m.nb, effects = 'fixed') %>% .$estimate %>% .[1]
+stadd = tidy(m.nb)  %>% .$estimate %>% .[2]
+rnorm_df = list(beta = df_distrib$beta %>% unique(), value = rnorm(n = 100000, mean = B0, sd = stadd  ), type = "G", from = paste('rnorm(', B0 %>% signif(2),",", stadd %>% signif(2), ')', sep = '')) %>% data.frame()
+
+df_distrib = rbind(df_distrib, rnorm_df)
+ggplot(df_distrib) + geom_density(aes(x = value, col = from ), alpha = 0.5)
+
+
+df_distrib.short = df_distrib %>% filter(from %in% c("actual", 'infered')) %>% reshape2::dcast(., beta  ~ from, value.var = "value")
+ggplot(df_distrib.short) + geom_point(aes(x = actual, y = infered )) + geom_abline(slope=1, intercept=0)#
+
+
+fit.mvrnorm
+
+```
+
+```{r}
+
+beta0.input = beta.input[,1]
+######## beta G
+mean_betaG = beta.input[,2:n_G] %>% mean()
+mean.input = beta0.input + mean_betaG
+sd_betaG = beta.input[,2:n_G] %>% sd()
+######## beta E
+mean_betaE = beta.input[,(n_G+1):(n_G+n_E-1)] %>% mean()
+
+
+rnorm_df_input = list(beta = df_distrib$beta %>% unique(), value = rnorm(n = 100000, mean = mean.input, sd = sd_betaG  ), type = "G", from = "Actual") %>% data.frame()
+
+df_distrib = rbind(rnorm_df_input, rnorm_df)
+ggplot(df_distrib) + geom_density(aes(x = value, col = from ), alpha = 0.5)
+
+```
+
+
+### GLMTMBB
+
+```{r}
+library(glmmTMB)
+
+glmmTMB::glmmTMB(y ~ environment +  ( genotype | genotype ) , data=data_glmm,
+  family=nbinom2, verbose = T)
+
+```
diff --git a/results/v2/2022-09-01_simulationBetaDemo.Rmd b/results/v2/2022-09-01_simulationBetaDemo.Rmd
index 3cc3cd9433e7b3f5bb2a7f745975ca36af9f5453..bd812d29076dac146f9dd3338b982f61e93f8a59 100644
--- a/results/v2/2022-09-01_simulationBetaDemo.Rmd
+++ b/results/v2/2022-09-01_simulationBetaDemo.Rmd
@@ -169,7 +169,7 @@ a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,],
                                               .x, 
                                               design2simulate$design2simulate) )
 
-c = do.call(rbind, a)
+c = do.call(rbind, a) %>% mutate(p.val.adj = p.adjust(p.val, method= "fdr"))
 beta.input.long = beta.matrix %>% data.frame() %>%
       tibble::rownames_to_column(., var = "gene_id") %>%
       dplyr::mutate(origin = "Actual") %>%
@@ -178,11 +178,13 @@ beta.input.long = beta.matrix %>% data.frame() %>%
       dplyr::select(-origin)
 
 
-df_comparison = merge(c, beta.input.long) %>% mutate(Ngenotype = n_genotypes)
+df_comparison = merge(c, beta.input.long) %>% mutate(Ngenotype = n_genotypes) %>% mutate(pred = ifelse(p.val.adj < 0.05, "DE", "nonDE"))
+
+df_comparison %>% dplyr::filter(type %in% c("G", "GxE")) %>% group_by(type, pred) %>% tally()
 
 ggplot(df_comparison %>% dplyr::filter(type %in% c("G", "GxE")) )  +
-    geom_histogram(aes(x=Inference, y = ..density..), fill = 'white' ) +
-   geom_density(aes(x = Actual)) +
+    geom_histogram(aes(x=Inference, y = ..density.., col = pred, fill = pred),  ) +
+   geom_density(aes(x = Actual*log(2))) +
     facet_grid(~type, scales = "free") 
 df_comparison$type
 ggplot(df_comparison %>% dplyr::filter(type %in% c("Intercept")) )  +
@@ -209,92 +211,123 @@ replicate_beta <- function(beta_vec, n, theta){
   beta_vec.rep + rnorm(length(beta_vec.rep), mean =  beta_vec/theta , sd = 1)
 }
 
+
 ## Params
 n_genes = 1
 n_environments = 2
 theta = 10
-gen_vector = c(100, 300 ,500, 700, 1000)
-for (n_genotypes in gen_vector ){
-
-  beta.matrix.tmp = purrr::map2(.x = c(2,3,4), .y =  c(n_genotypes-1,
-                                              n_environments-1,
-                                              (n_genotypes-1)*(n_environments-1)),
-                            ~ replicate_beta(beta.matrix.template[.x], .y, theta) %>% matrix(ncol = .y)) %>%
-                    do.call(cbind, .)
-  
-  beta.matrix = cbind(beta.matrix.template[1], beta.matrix.tmp)
-  betaG.colnames = base::paste("genotype", "G", 1:(n_genotypes-1), sep = "")
-  betaE.colnames = base::paste("environment", "E", 1:(n_environments-1), sep = "")
-  betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
-  matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
-  
-  colnames(beta.matrix) = matrix.colnames
-  rownames(beta.matrix) = base::paste("gene", 1:(n_genes), sep = "")
-  beta.input = beta.matrix
-  #beta.matrix %>% dim()
-  
-  ########## kij ################
-  n_G = n_genotypes
-  n_E = 2
-
-  design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 25)
-  design2simulate$model_matrix %>% dim()
-  log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.matrix)
-  #log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
+#gen_vector = c(100, 300 ,500, 700, 1000)
+gen_vector = 150
+n_genotypes = 150
+thr =  c(0, 0.07, 0.48, 1)
+#thr = c(0.04, 1)
+
+beta.matrix.tmp = purrr::map2(.x = c(2,3,4), .y =  c(n_genotypes-1,
+                                            n_environments-1,
+                                            (n_genotypes-1)*(n_environments-1)),
+                          ~ replicate_beta(beta.matrix.template[.x], .y, theta) %>% matrix(ncol = .y)) %>%
+                  do.call(cbind, .)
+
+beta.matrix = cbind(beta.matrix.template[1], beta.matrix.tmp)
+betaG.colnames = base::paste("genotype", "G", 1:(n_genotypes-1), sep = "")
+betaE.colnames = base::paste("environment", "E", 1:(n_environments-1), sep = "")
+betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
+matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
+
+colnames(beta.matrix) = matrix.colnames
+beta.matrix %>% dim()
+rownames(beta.matrix) = base::paste("gene", 1:(n_genes), sep = "")
+beta.input = beta.matrix
+#beta.matrix %>% dim()
+
+########## kij ################
+n_G = n_genotypes
+n_E = 2
+
+design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 10)
+design2simulate$model_matrix %>% dim()
+log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.matrix)
+#log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
+
+
+gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
+                                            n_genotypes = n_G, 
+                                            n_environments = n_E, 
+                                            dispersion.vec = dds.extraction$gene_dispersion, 
+                                            model_matrix = design2simulate$model_matrix, 
+                                                      dispUniform_btweenCondition = T)
+    
+    
+mu_ij = getMu_ij(log_qij, 1)
+
+kij.simulated = getK_ij(mu_ij, gene_dispersion)
+
+for (t in thr){
+  for (n_genotypes in gen_vector ){
   
+   
+    
+    ############# GLM fitting ###################
+    plan(multisession, workers = 1)
+    a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], 
+                                                .x, 
+                                                design2simulate$design2simulate, threshold = t) )
   
-  gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
-                                              n_genotypes = n_G, 
-                                              n_environments = n_E, 
-                                              dispersion.vec = dds.extraction$gene_dispersion, 
-                                              model_matrix = design2simulate$model_matrix, 
-                                                        dispUniform_btweenCondition = T)
+    #kij.simulated %>% dim()
+    ############ save res #####
+    c = do.call(rbind, a)
+    beta.input.long = beta.input %>% data.frame() %>%
+            tibble::rownames_to_column(., var = "gene_id") %>%
+            dplyr::mutate(origin = "Actual") %>%
+            reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
+            dplyr::rename(Actual = "value") %>% 
+            dplyr::select(-origin)
       
+      df_tmp = merge(c, beta.input.long) %>% mutate(Ngenotype = n_genotypes)  %>% mutate(p.val.adj = p.adjust(p.val, method= "fdr")) %>% mutate(threshold = t)
       
-  mu_ij = getMu_ij(log_qij, 1)
-  
-  kij.simulated = getK_ij(mu_ij, gene_dispersion)
-  
-  ############# GLM fitting ###################
-  plan(multisession, workers = 1)
-  a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], 
-                                              .x, 
-                                              design2simulate$design2simulate) )
-
-  
-  ############ save res #####
-  c = do.call(rbind, a)
-    beta.input.long = beta.input %>% data.frame() %>%
-          tibble::rownames_to_column(., var = "gene_id") %>%
-          dplyr::mutate(origin = "Actual") %>%
-          reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
-          dplyr::rename(Actual = "value") %>% 
-          dplyr::select(-origin)
-    
-    df_tmp = merge(c, beta.input.long) %>% mutate(Ngenotype = n_genotypes)
-    
-    if (exists('df_comparison') && is.data.frame(get('df_comparison'))){
-        df_comparison = rbind(df_comparison, df_tmp)
- 
-    }
-    else{
-        df_comparison = df_tmp
-    }
-          
+      if (exists('df_comparison') && is.data.frame(get('df_comparison'))){
+          df_comparison = rbind(df_comparison, df_tmp) 
+   
+      }
+      else{
+            df_comparison = df_tmp
+      }
+            
+  }
 }
 
+df_comparison$threshold %>% table()
 
 df_comparison$type <- factor(df_comparison$type, levels = c("Intercept", "G", "E", "GxE"))
 
-df_comparison %>% 
+
+df_comparison = df_comparison %>% mutate(pred = ifelse(p.val.adj < 0.05, "DE", "nonDE"))
+
+
+
+ggplot(df_comparison) + geom_bar(aes(x= p.val)) + xlim(c(0,0.1))
+
+
+df_comparison %>% filter(type %in% c("G", "GxE")) %>% group_by(threshold, type, pred, Ngenotype  ) %>% tally()
+
+df_comparison %>%  filter(type %in% c("G", "GxE")) %>% group_by(type, threshold, pred) %>% tally()
+
+
+df_comparison$Ngenotype %>% table()
 
 
-p = ggplot(df_comparison %>% filter(type %in% c("G", "GxE")) )  +
-    geom_histogram(aes(x=Inference, y = ..density..), fill = 'white' ) +
-   geom_density(aes(x = Actual * log(2))) +
-    facet_grid(type~Ngenotype, scales = "free") 
+#df_comparison = df_comparison %>% filter(gene_id == "gene1")
+p = ggplot(df_comparison %>% dplyr::filter(type %in% c("G", "GxE")))  +
+    geom_histogram(aes(x = Inference, y=..density..) , fill = 'black' ) + 
+    geom_histogram(data = df_comparison %>% dplyr::filter(type %in% c("G", "GxE")) %>% dplyr::filter(pred == "DE"),  aes(x=Inference, y = ..density.. , fill = pred )) +
+    facet_grid(type~threshold , scales = "free") +
+    geom_vline(aes(xintercept= -threshold), linetype="dotted", 
+                color = "blue") +
+  geom_vline(aes(xintercept= threshold), linetype="dotted", 
+                color = "blue") +
+   geom_density(aes(x = Actual * log(2) )) 
 p
-ggsave(filename = "figures/GLM_genotype_distrib.png", plot = p, width = 20, height = 12)
+ggsave(filename = "../../results/v2/figures/GLM_distrib.png", plot = p, width = 20, height = 12)
 
 
 ```
diff --git a/results/v2/2022-09-07_Postinference_filtering.Rmd b/results/v2/2022-09-07_Postinference_filtering.Rmd
index eff9b29bba0090d682c763c5d1b2bc3256dc3db3..ffbeb9299cb3d21339a32280a75860e2865ee234 100644
--- a/results/v2/2022-09-07_Postinference_filtering.Rmd
+++ b/results/v2/2022-09-07_Postinference_filtering.Rmd
@@ -132,6 +132,10 @@ dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$des
 
 
 ```{r}
+n_G = 40
+n_E = 2
+n_genes = 10
+
 
 rm(df2return)
 threshold_list = c(0, 0.04, 0.07, 0.2, 0.48, 1)
diff --git a/results/v2/2022-09-22_NgenotypesEffectBIS.Rmd b/results/v2/2022-09-22_NgenotypesEffectBIS.Rmd
new file mode 100644
index 0000000000000000000000000000000000000000..120c215aa3575a4b948148365693f05a51b62a1f
--- /dev/null
+++ b/results/v2/2022-09-22_NgenotypesEffectBIS.Rmd
@@ -0,0 +1,187 @@
+---
+title: "HTRSIM"
+date: '2022-07-26'
+output:   
+  html_document:
+
+css: 
+ - css/template.css
+
+---
+
+
+```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+library(HTRSIM)
+library(furrr)
+#library(tidyverse)
+#library(reshape2)
+#library(kableExtra)
+#library(gridExtra)
+#library(MatrixGenerics)
+```
+
+
+
+## Simu
+
+```{r}
+n_genes = 80
+n_genotypes_list = c( 100, 200, 400 )
+threshold = 0.5
+n_E = 2
+
+
+rm(df_roc)
+
+
+for (n_G in n_genotypes_list){
+  
+  beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
+  design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 3)
+  log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
+  gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, 
+                                                        n_genotypes = n_G, n_environments = n_E, 
+                                                        dispersion.vec = dds.extraction$gene_dispersion, 
+                                                        model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
+  mu_ij = getMu_ij(log_qij, 1)
+  kij.simulated = getK_ij(mu_ij, gene_dispersion)
+  
+  
+  
+  ######################### DESEQ
+      beta.actual.matrix = beta.input
+      ## 2 modifY
+      #dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
+      
+      #############
+      # Param
+      threshold = 0.5
+      ############
+      
+      #listBeta = DESeq2::resultsNames(dds_simu)
+      #plan(multisession, workers = 4)
+      #res = listBeta %>% furrr::future_map(.x = ., ~DESeq2::results(dds_simu, contrast=list(.x), lfcThreshold = threshold) %>% data.frame() %>% .$padj)
+      #padj.matrix = do.call("cbind", res)
+        
+        
+        
+      #dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
+        
+      #dds.simu.mcols.colnamesReshaped = colnames(dds_simu.mcols) %>%
+      #                                      stringr::str_replace(., "_vs_G0", "") %>%
+      #                                      stringr::str_replace(., "_vs_E0", "") %>%
+      #                                      stringr::str_replace_all(., "_", "") %>%
+      #                                      stringr::str_replace(., "\\.", ":")
+        
+      #columnOfInterest =  design2simulate$model_matrix %>% base::colnames() %>% stringr::str_replace_all(., "[//(//)]", "")
+      #dds_simu.mcols[,columnOfInterest]
+      
+      ## Get only column of interest
+      #idx_cols = base::match(columnOfInterest, dds.simu.mcols.colnamesReshaped)
+      #beta.infered = dds_simu.mcols[,idx_cols]
+      
+      ## homogeneize column names & rownames
+      #idx_cols = base::match(columnOfInterest, beta.actual.matrix %>% colnames())
+      #beta.actual.matrix = beta.actual.matrix[,idx_cols]
+      #colnames(beta.infered) = base::colnames(beta.actual.matrix)
+      #colnames(padj.matrix) = base::colnames(beta.actual.matrix)
+      #rownames(padj.matrix) = base::rownames(beta.actual.matrix)
+      #beta.actual.matrix %>% dim()
+      #padj.matrix %>% dim()
+      #beta.infer.long = beta.infered %>% data.frame() %>%
+      #                          tibble::rownames_to_column(., var = "gene_id") %>%
+      #                          dplyr::mutate(origin = "Inference") %>%
+      #                          reshape2::melt(., value.name = "value", variable.name= "beta")
+      #beta.actual.matrix.long = beta.actual.matrix %>% data.frame() %>%
+      #                          tibble::rownames_to_column(., var = "gene_id") %>%
+      #                          dplyr::mutate(origin = "Actual") %>%
+      #                          reshape2::melt(., value.name = "value", variable.name= "beta")
+      #padj.matrix.long = padj.matrix  %>% data.frame() %>%
+      #                          tibble::rownames_to_column(., var = "gene_id") %>%
+      #                          dplyr::mutate(origin = "padj") %>%
+      #                          reshape2::melt(., value.name = "value", variable.name= "beta")
+      
+      #beta.merged.long = rbind(beta.infer.long, beta.actual.matrix.long, padj.matrix.long)
+      #beta.merged.long$beta %>% unique()
+      
+      #beta.merged.long.reshape = beta.merged.long %>% dplyr::mutate(type = dplyr::case_when(
+      #  str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
+      #  str_detect(beta, "genotypeG\\d+$") ~ "G",
+      #  str_detect(beta, "environmentE\\d+$") ~ "E",
+      #  str_detect(beta, "Intercept$") ~ "Intercept")
+      #)
+      
+      
+      #beta.merged.long.reshape2 = beta.merged.long.reshape %>% reshape2::dcast(.,  gene_id + beta + type ~ origin)
+      #beta.merged.long.reshape2$type = factor(beta.merged.long.reshape2$type, levels = c("Intercept", "G", "E", "GxE"))
+      #beta.merged.long.reshape2$threshold = threshold
+      
+      
+      
+      ## 2 modifY
+      #df_roc_deseq = beta.merged.long.reshape2  %>% mutate(from = "DESEQ2") %>% mutate(n_genotype = n_G)
+  
+    ############## GLM
+      a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], ## 2 modify !!!
+                                              .x, 
+                                              design2simulate$design2simulate, threshold = threshold) )
+
+      ############ save res #####
+      c = do.call(rbind, a)
+      beta.input.long = beta.input %>% data.frame() %>%
+              tibble::rownames_to_column(., var = "gene_id") %>%
+              dplyr::mutate(origin = "Actual") %>%
+              reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
+              dplyr::rename(Actual = "value") %>% 
+              dplyr::select(-origin)
+        
+      df_tmp = merge(c, beta.input.long)  %>% mutate(padj = p.adjust(pval, method= "fdr")) %>% mutate(threshold = threshold)
+      
+      
+      
+      
+      
+      ## 2 modify
+      df_roc_glm = df_tmp  %>% dplyr::select(c(-dispersion, -pval)) %>% mutate(from = 'MASS::glm.nb') %>% mutate(n_genotype = n_G)
+      
+      #### merge 
+      
+      #df_roc_deseq = df_roc_deseq %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
+      #              mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
+      df_roc_glm = df_roc_glm %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
+                          mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
+      
+      #df_tmp2 = rbind(df_roc_deseq, df_roc_glm)
+      df_tmp2 = df_roc_glm
+      if (exists('df_roc') && is.data.frame(get('df_roc'))){
+      df_roc = rbind(df_roc, df_tmp2)
+      }
+      else{
+      df_roc = df_tmp2
+      }
+
+      
+     
+}
+
+df_roc %>% group_by(from, n_genotype) %>% tally()
+```
+
+```{r}
+df_roc %>% filter(n_genotype == 100)
+
+
+df_roc$n_genotype %>% table()
+library(plotROC)
+p = ggplot(df_roc %>% filter(type %in% c("G","GxE"))  , aes(d = label , m = padj, color = from)) + 
+  geom_roc(n.cuts = 0, labels = F)  + facet_grid(~n_genotype) 
+ 
+p  + style_roc()
+ggsave(filename = "figures/ROCcurve18.png", plot = p + style_roc(), width = 20) 
+
+p = ggplot(calc_auc(p), aes(x = threshold, y = AUC, group=1)) + geom_point() + geom_line() + facet_grid(~type) 
+ggsave(filename = "figures/AUC.png", plot = p, width = 15, height = 10) 
+### boxplot FP
+
+```
+
diff --git a/results/v2/2022-09-22_kmean.Rmd b/results/v2/2022-09-22_kmean.Rmd
new file mode 100644
index 0000000000000000000000000000000000000000..ab2e4c3343fd723530910501d5deb472e00f1667
--- /dev/null
+++ b/results/v2/2022-09-22_kmean.Rmd
@@ -0,0 +1,327 @@
+---
+title: "HTRSIM"
+date: '2022-09-22'
+output:   
+  html_document:
+
+css: 
+ - css/template.css
+
+---
+
+
+```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+library(HTRSIM)
+library(furrr)
+#library(tidyverse)
+#library(reshape2)
+#library(kableExtra)
+#library(gridExtra)
+#library(MatrixGenerics)
+```
+
+
+## Public data
+
+```{r}
+## Import & reshape table counts
+fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
+tabl_cnts <- read.table(file = fn, header = TRUE)
+rownames(tabl_cnts) <- tabl_cnts$gene_id
+tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
+tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
+## DESIGN
+fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
+bioDesign <- read.table(file = fn, header = T, sep = ';')
+
+## defining reference
+bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
+bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
+
+
+bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
+tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
+#tabl_cntsA = tabl_cnts
+
+```
+
+```{r }
+## Launch DESEQ2
+dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
+
+## Extract
+
+dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
+#dds.extraction$gene_dispersion
+
+```
+
+## params visualization
+
+
+```{r}
+beta_obs.dtf.long = dds.extraction$beta %>% reshape2::melt(. , na.rm = T, variable.name = "parameter")
+
+alpha_obs.dtf.long = dds.extraction$gene_dispersion %>% base::data.frame() %>%
+                          dplyr::rename(., value = ".") %>%
+                          dplyr::mutate(parameter = "dispersion")
+
+dtf.params_obs = rbind(beta_obs.dtf.long, alpha_obs.dtf.long)
+
+
+
+p = ggplot(dtf.params_obs, aes(x= value)) +
+      geom_histogram(aes(y=..density..), colour="black", fill="white") + facet_grid(~parameter)+
+        theme(strip.text.x = element_text(size = 13),
+        axis.title = element_text(size = 5),
+        axis.text  = element_text(size = 5))
+
+p
+```
+
+
+
+## gene segemntation
+
+
+```{r}
+dds.extraction$beta 
+kmean.res = kmeans(dds.extraction$beta[,c(2,3)], 3)
+
+pca.obj= prcomp(dds.extraction$beta[,c(2,3)])
+
+
+res = summary(pca.obj)
+library(kableExtra)
+res$importance[,1:2] %>% 
+  kbl(., caption = "Table: Variance explained per Principal Component", position = "bottom", align = 'c') %>% 
+  kable_styling(full_width = F)
+
+
+dtp <- data.frame( 'cluster' = kmean.res$cluster ,
+                  pca.obj$x[,1:2]) # the first two components are selected (NB: you can also select 3 for 3D plotting or 3+)
+
+
+dtp$cluster <- factor(dtp$cluster)
+## Plot
+P1 <- ggplot(data = dtp) + 
+       geom_point(aes(x = PC1, y = PC2, 
+                      col = cluster),
+                      size =3) + 
+       theme_minimal() 
+P1 
+
+ggsave("../results/figures/ACP_kmeans.png", P1)
+
+dtf = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster ) %>%
+      reshape2::melt(. ,id=c("cluster"), value.name = "value", variable.name= "type") 
+dtf$cluster <- factor(dtf$cluster)
+
+
+p = ggplot(dtf) + geom_density(aes(x = value, fill = cluster ), alpha = 0.4) + facet_grid(~ type, scales = "free")
+p
+ggsave("figures/densityBeta_kmeans.png", p)
+
+
+betas = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster)
+betas$cluster = factor(betas$cluster)
+p = ggplot(betas) + geom_point(aes(x= betaG, betaGE, col = cluster))
+p
+ggsave("figures/scattrplot_clustering.png", p)
+
+```
+
+
+```{r}
+dds.extraction.bis = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster)
+
+
+beta.cluster1= dds.extraction$beta[kmean.res$cluster == 1,]
+beta.cluster2= dds.extraction$beta[kmean.res$cluster == 2,]
+beta.cluster3= dds.extraction$beta[kmean.res$cluster == 3,]
+
+
+```
+
+
+
+
+
+
+## all-in
+
+```{r}
+
+beta.input1 = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = beta.cluster1)
+beta.input2  = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = beta.cluster2)
+rownames(beta.input2) =  base::paste("gene", 101:(200), sep = "")
+beta.input3  = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = beta.cluster3)
+rownames(beta.input3) =  base::paste("gene", 201:(300), sep = "")
+
+beta.input = rbind(beta.input1, beta.input2, beta.input3)
+
+design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 15)
+log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
+gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
+mu_ij = getMu_ij(log_qij, 1)
+kij.simulated = getK_ij(mu_ij, gene_dispersion)
+
+kij.simulated %>% dim()
+```
+
+## Deseq 
+
+```{r}
+
+beta.actual.matrix = beta.input
+## 2 modifY
+dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
+
+#############
+# Param
+threshold = 0.5
+############
+listBeta = DESeq2::resultsNames(dds_simu)
+plan(multisession, workers = 4)
+res = listBeta %>% furrr::future_map(.x = ., ~DESeq2::results(dds_simu, contrast=list(.x), lfcThreshold = threshold) %>% data.frame() %>% .$padj)
+padj.matrix = do.call("cbind", res)
+  
+  
+  
+dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
+  
+dds.simu.mcols.colnamesReshaped = colnames(dds_simu.mcols) %>%
+                                      stringr::str_replace(., "_vs_G0", "") %>%
+                                      stringr::str_replace(., "_vs_E0", "") %>%
+                                      stringr::str_replace_all(., "_", "") %>%
+                                      stringr::str_replace(., "\\.", ":")
+  
+columnOfInterest =  design2simulate$model_matrix %>% base::colnames() %>% stringr::str_replace_all(., "[//(//)]", "")
+#dds_simu.mcols[,columnOfInterest]
+
+## Get only column of interest
+idx_cols = base::match(columnOfInterest, dds.simu.mcols.colnamesReshaped)
+beta.infered = dds_simu.mcols[,idx_cols]
+
+## homogeneize column names & rownames
+idx_cols = base::match(columnOfInterest, beta.actual.matrix %>% colnames())
+beta.actual.matrix = beta.actual.matrix[,idx_cols]
+colnames(beta.infered) = base::colnames(beta.actual.matrix)
+colnames(padj.matrix) = base::colnames(beta.actual.matrix)
+rownames(padj.matrix) = base::rownames(beta.actual.matrix)
+beta.actual.matrix %>% dim()
+padj.matrix %>% dim()
+beta.infer.long = beta.infered %>% data.frame() %>%
+                          tibble::rownames_to_column(., var = "gene_id") %>%
+                          dplyr::mutate(origin = "Inference") %>%
+                          reshape2::melt(., value.name = "value", variable.name= "beta")
+beta.actual.matrix.long = beta.actual.matrix %>% data.frame() %>%
+                          tibble::rownames_to_column(., var = "gene_id") %>%
+                          dplyr::mutate(origin = "Actual") %>%
+                          reshape2::melt(., value.name = "value", variable.name= "beta")
+padj.matrix.long = padj.matrix  %>% data.frame() %>%
+                          tibble::rownames_to_column(., var = "gene_id") %>%
+                          dplyr::mutate(origin = "padj") %>%
+                          reshape2::melt(., value.name = "value", variable.name= "beta")
+
+beta.merged.long = rbind(beta.infer.long, beta.actual.matrix.long, padj.matrix.long)
+#beta.merged.long$beta %>% unique()
+
+beta.merged.long.reshape = beta.merged.long %>% dplyr::mutate(type = dplyr::case_when(
+  str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
+  str_detect(beta, "genotypeG\\d+$") ~ "G",
+  str_detect(beta, "environmentE\\d+$") ~ "E",
+  str_detect(beta, "Intercept$") ~ "Intercept")
+)
+
+
+beta.merged.long.reshape2 = beta.merged.long.reshape %>% reshape2::dcast(.,  gene_id + beta + type ~ origin)
+beta.merged.long.reshape2$type = factor(beta.merged.long.reshape2$type, levels = c("Intercept", "G", "E", "GxE"))
+beta.merged.long.reshape2$threshold = threshold
+
+
+
+## 2 modifY
+df_roc_deseq = beta.merged.long.reshape2  %>% mutate(from = "DESEQ2")
+
+
+df_roc_deseq %>% group_by(gene_id, type) %>% tally()
+df_roc_deseq$gene_id %>% unique() %>% length
+```
+
+## GLM
+
+
+```{r}
+kij.simulated %>% dim()
+############# GLM fitting ###################
+plan(multisession, workers = 4)
+a = 1:300 %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], ## 2 modify !!!
+                                              .x, 
+                                              design2simulate$design2simulate, threshold = threshold) )
+
+#kij.simulated %>% dim()
+############ save res #####
+
+c = do.call(rbind, a)
+beta.input.long = beta.input %>% data.frame() %>%
+        tibble::rownames_to_column(., var = "gene_id") %>%
+        dplyr::mutate(origin = "Actual") %>%
+        reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
+        dplyr::rename(Actual = "value") %>% 
+        dplyr::select(-origin)
+  
+df_tmp = merge(c, beta.input.long)  %>% mutate(padj = p.adjust(pval, method= "fdr")) %>% mutate(threshold = threshold)
+
+
+
+
+
+## 2 modify
+df_roc_glm = df_tmp  %>% dplyr::select(c(-dispersion, -pval)) %>% mutate(from = 'MASS::glm.nb')
+
+
+df_roc_glm %>% group_by(gene_id, type) %>% tally()
+df_roc_glm$gene_id %>% unique() %>% length()
+```
+
+
+## merge df
+
+```{r}
+
+df_roc_deseq = df_roc_deseq %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
+                    mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
+df_roc_glm = df_roc_glm %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
+                    mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
+
+df_roc = rbind(df_roc_deseq, df_roc_glm)
+
+
+cluster1 =  base::paste("gene", 1:(100), sep = "")
+cluster2 =  base::paste("gene", 101:(200), sep = "")
+cluster3 =  base::paste("gene", 201:(300), sep = "")
+
+df_roc = df_roc %>% mutate(cluster = ifelse(gene_id %in% cluster1, 1, NA)) %>% mutate(cluster = ifelse(gene_id %in% cluster2, 2, cluster)) %>% mutate(cluster = ifelse(gene_id %in% cluster3, 3, cluster))
+df_roc$cluster = factor(df_roc$cluster)
+```
+
+
+```{r}
+
+df_roc %>% dplyr::filter(cluster == 2 ) %>%  group_by(type, label, from ) %>% tally()
+#%>% filter(cluster == 2 )
+
+library(plotROC)
+p = ggplot(df_roc %>% filter(type != "Intercept") , aes(d = label , m = padj, color = from)) + 
+  geom_roc(n.cuts = 0, labels = F)  + facet_grid(type~cluster) 
+ 
+p  + style_roc()
+ggsave(filename = "figures/ROCcurve3.png", plot = p + style_roc(), width = 15) 
+
+p = ggplot(calc_auc(p), aes(x = threshold, y = AUC, group=1)) + geom_point() + geom_line() + facet_grid(~type) 
+ggsave(filename = "figures/AUC.png", plot = p, width = 15, height = 10) 
+### boxplot FP
+
+```
+
diff --git a/results/v2/2022-11-23_dev2.Rmd b/results/v2/2022-11-23_dev2.Rmd
new file mode 100644
index 0000000000000000000000000000000000000000..eafef0c4056680f2ede0a67298ce49af7435a399
--- /dev/null
+++ b/results/v2/2022-11-23_dev2.Rmd
@@ -0,0 +1,418 @@
+---
+title: "HTRSIM"
+date: '2022-11-23'
+output:   
+  html_document:
+
+css: 
+ - css/template.css
+
+---
+
+
+```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+library(HTRSIM)
+library(furrr)
+#library(tidyverse)
+#library(reshape2)
+#library(kableExtra)
+#library(gridExtra)
+#library(MatrixGenerics)
+```
+
+
+## Simulation without ru_rm_5 - all genes
+
+
+```{r}
+## Import & reshape table counts
+fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
+tabl_cnts <- read.table(file = fn, header = TRUE)
+rownames(tabl_cnts) <- tabl_cnts$gene_id
+tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
+tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
+## DESIGN
+fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
+bioDesign <- read.table(file = fn, header = T, sep = ';')
+
+## defining reference
+bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
+bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
+
+
+bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
+tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
+#tabl_cntsA = tabl_cnts
+
+```
+
+
+
+
+```{r }
+## Launch DESEQ2
+dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
+
+## Extract
+
+dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
+#dds.extraction$gene_dispersion
+
+```
+
+
+```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+## Data viz
+p<- ddsExtraction.viz(dds.extraction = dds.extraction)
+p
+ggsave(filename = "figures/inputParams_distrib.png", plot = p, width = 20, height = 12) 
+
+```
+
+## Gene segmentation
+
+```{r}
+
+n_group_gene = 3
+dds.extraction$beta 
+kmean.res = kmeans(dds.extraction$beta[,c(2,3)], n_group_gene)
+
+
+beta.cluster1= dds.extraction$beta[kmean.res$cluster == 1,]
+beta.cluster2= dds.extraction$beta[kmean.res$cluster == 2,]
+beta.cluster3= dds.extraction$beta[kmean.res$cluster == 3,]
+
+
+x = beta.cluster1 %>% as.matrix()
+fit.mvrnorm <- Rfast::mvnorm.mle(x)
+#### BETAG.E #####
+fit.mvrnorm$sigma[,2] = fit.mvrnorm$sigma[,2]
+fit.mvrnorm$sigma[4,] = fit.mvrnorm$sigma[4,]*2
+```
+
+
+
+## BETA
+
+
+```{r}
+
+getGenesDispersions <- function( n_genes, sample_ids, dispersion.vec ,dispUniform_btweenCondition = T ){
+
+   if (dispUniform_btweenCondition == T ) {
+          gene_dispersion.dtf = base::sample(  dispersion.vec , replace = T, size = n_genes) %>% base::data.frame()
+          n_rep =  length(sample_ids)
+          gene_dispersion.dtf = gene_dispersion.dtf[,base::rep(base::seq_len(base::ncol(gene_dispersion.dtf)), n_rep)]
+          rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "")
+          colnames(gene_dispersion.dtf) = sample_ids
+
+  }
+
+  else {
+
+          replication_table = sample_ids %>% stringr::str_replace(., pattern = "_[0-9]+","" ) %>% table()
+          gene_dispersion.dtf = replication_table %>% purrr::map(., ~sample(  dispersion.vec, replace = T, size = n_genes) ) %>% data.frame()
+          gene_dispersion.dtf = gene_dispersion.dtf[,rep(seq_len(ncol(gene_dispersion.dtf)), replication_table %>% as.numeric())]
+          colnames(gene_dispersion.dtf) = sample_ids
+          rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "")
+
+  }
+
+  return(gene_dispersion.dtf %>% as.matrix)
+
+}
+
+
+get_kij <- function(mu_ij.matx, dispersion.matx, n_genes, sample_id_list, idx_replicat){
+  n_sples = length(sample_id_list)
+  alpha_gene = 1/dispersion.matx
+  k_ij = stats::rnbinom(length(mu_ij.matx), size = alpha_gene  , mu = mu_ij.matx) %>% matrix(. , nrow = n_genes, ncol = n_sples )
+  k_ij[is.na(k_ij)] = 0
+  colnames(k_ij) = base::paste(sample_id_list, idx_replicat, sep = '_')
+  rownames(k_ij) = rownames(mu_ij.matrix) 
+  return(k_ij)
+}
+
+######### Utils ###########################
+uniform_replication <- function(maxN, n_samples) return(rep(T, time = maxN) %>% rep(., each = n_samples ) %>% matrix(ncol = n_samples))
+
+random_replication <- function(maxN, n_samples){
+  replicating <- function(maxN) return(sample(x = c(T,F), size = maxN, replace = T))
+  res = purrr::map(1:n_samples, ~replicating(maxN-1))
+  rep_table = do.call(cbind, res)
+  rep_table = rbind(rep(T, times = n_samples), rep_table)
+  return(rep_table)
+}
+#################################
+
+```
+
+```{r}
+######### Settings #############
+n_genes = 4
+n_genotypes = 100
+n_E = 2
+max_n_replicates = 15
+#################################
+
+
+##### Sampling from mvnorm ########
+beta.matrix <- MASS::mvrnorm(n = n_genes*(n_genotypes),
+                                 mu = fit.mvrnorm$mu,
+                                 Sigma = fit.mvrnorm$sigma )
+###################################
+
+####  Some reshaping ##############
+genes_vec = base::paste("gene", 1:n_genes, sep = "") 
+genotype_vec = base::paste("G", 0:(n_genotypes-1), sep = "")
+environment_vec = base::paste("E", 0:(n_E-1), sep = "")
+########################################
+m = c(1,1,0,0,1,1,1,1)
+design.matrix = matrix(data =  m , ncol = 2, byrow = F)
+colnames(design.matrix) = environment_vec
+rownames(design.matrix) = beta.matrix %>% colnames()
+##################################
+
+#### Computing log_qij & mu_ij ###########
+log_qij = beta.matrix %*% design.matrix
+mu_ij = getMu_ij(log_qij, 2)
+
+
+label_genotype = genotype_vec %>% rep(time = n_genes)
+label_gene = rep(genes_vec, each = n_genotypes)
+
+
+##### Preparing data for simulation ########"
+mu_ij.matrix = mu_ij %>%
+      data.frame() %>% 
+      dplyr::mutate(genotype = label_genotype) %>%
+      dplyr::mutate(gene_id = label_gene ) %>%
+      #dplyr::mutate(replicate_idx = 1 ) %>% 
+      reshape2::melt(., id.vars = c("gene_id", 'genotype'),  
+          value.name = "mu_ij", variable.name= "environment") %>% 
+      reshape2::dcast(., gene_id  ~  genotype + environment , value.var = "mu_ij") %>% 
+      column_to_rownames("gene_id") %>% as.matrix()
+
+
+#########################################
+
+#replication.matrix = random_replication(maxN = max_n_replicates, n_samples = n_genotypes*n_E)
+replication.matrix  = uniform_replication(maxN = max_n_replicates, n_samples = n_genotypes*n_E)
+
+########### SIMU 
+sample_ids = colnames(mu_ij.matrix)
+dispersion.matrix = getGenesDispersions(n_genes, sample_ids, dispersion.vec = dds.extraction$gene_dispersion, dispUniform_btweenCondition = T )
+kij.list = purrr::map(.x = 1:max_n_replicates, .f = ~get_kij(mu_ij.matrix[ ,replication.matrix[.x, ]], 
+                                                    dispersion.matrix[ ,replication.matrix[.x, ]],
+                                                    n_genes = n_genes,
+                                                    sample_ids[replication.matrix[.x, ]],
+                                                    .x
+                                                    )  )
+kij.simulated = do.call(cbind, kij.list)
+
+kij.simulated %>% dim() 
+  
+
+```
+## FIT
+
+
+
+```{r}
+## benchmarking lme4 / glmmTMB
+library(lme4)
+library(glmmTMB)
+library(broom.mixed)
+library(mice)
+
+df_2glmmm <- function(y , design_simulation, gene_name){
+  df_gene_i = cbind(design_simulation, y)
+  df_gene_i = df_gene_i %>% mutate(gene_id = gene_name)
+  return(df_gene_i)
+}
+
+
+fit_extraction <- function(fit){
+    fit.res = broom.mixed::tidy(fit)  %>% arrange(term)%>% .$estimate %>% as.numeric()
+    B0 = fit.res[3] 
+    BE = fit.res[4]
+    sd_BGE_E0 = fit.res[5]
+    sd_BGE_E1 = fit.res[7]
+    sd_BG_E0 = fit.res[6] 
+    sd_BG_E1 = fit.res[8] 
+    correlation_genotype =  fit.res[2]
+    correlation_interaction =  fit.res[1]
+    ####################################################################
+    res = list(mean_E0 = B0, mean_E1 = BE,  sd_BGE_E0 = sd_BGE_E0, sd_BGE_E1 = sd_BGE_E1, sd_BG_E0 = sd_BG_E0, sd_BG_E1 = sd_BG_E1, correlation_genotype = correlation_genotype, correlation_interaction = correlation_interaction) %>% data.frame() 
+    return(res)
+}
+
+fit_glmm <- function(data_glmm, i){
+    gene_i = paste('gene', i, sep = "")
+    data_glmm.filter = data_glmm %>% filter(gene_id == gene_i)
+    ###########################  FIT LME4   #################################
+    print("LME4")
+    m.nb <- glmer.nb(y ~ 0 + environment  + ( 1 + environment | genotype/environment )  , data= data_glmm.filter , verbose=F)
+    res_lme4 = fit_extraction(m.nb)
+    res_lme4 = res_lme4 %>% mutate(gene_id = gene_i) %>% mutate(from = "lme4")
+    
+    #####################  FIT GMTMB 1  #########################
+    print("glmTMB 1")
+    m.nb <- glmmTMB::glmmTMB(y ~  0 + environment  + ( 1 + environment | genotype/environment )   , data=data_glmm.filter, family=nbinom1, verbose = F)
+    res_glmTMB1 = fit_extraction(m.nb)
+    res_glmTMB1 = res_glmTMB1 %>% mutate(gene_id = gene_i) %>% mutate(from = "glmTMB nbinom1")
+    
+    #####################  FIT GMTMB 2  #########################
+    print("glmTMB 2")
+    
+    m.nb <- glmmTMB::glmmTMB(y ~ 0 + environment  + ( 1 + environment | genotype/environment )  , data=data_glmm.filter, family=nbinom2, verbose = F)
+    res_glmTMB2 = fit_extraction(m.nb)
+    res_glmTMB2 = res_glmTMB2 %>% mutate(gene_id = gene_i) %>% mutate(from = "glmTMB nbinom2")
+    
+    res = rbind(res_lme4, res_glmTMB1, res_glmTMB2)
+    return(res)
+}
+
+```
+
+```{r}
+####### ESTIMATION ############*
+experimental_design = colnames(kij.simulated) %>% 
+                            str_split(., pattern = '_', simplify = T)  %>%
+                              .[,c(1,2)] %>% data.frame() 
+colnames(experimental_design) = c("genotype", "environment")
+gene_id_list = rownames(kij.simulated)
+f = map(.x = gene_id_list, ~df_2glmmm(kij.simulated[.x,], experimental_design, .x) )
+data_glmm = do.call(rbind, f)
+plan(multisession, workers = 4)
+results = furrr::future_map(.x = 1:n_genes, ~fit_glmm(data_glmm, .x) )
+res = do.call(rbind, results)
+#################################################################
+
+ground_truth = beta.matrix %>% data.frame() %>%
+                  mutate(gene_id = label_gene) %>%
+                  mutate(genotype = label_genotype) %>%
+                  mutate(E0 = beta0 + betaG   ) %>%
+                  mutate(E1 = beta0 + betaE + betaG + betaGE  )
+
+gt_glmm = ground_truth %>% group_by(gene_id) %>% 
+              summarize( mean_E0 = mean(E0),
+                         mean_E1 = mean(E1),
+                         sd_BG_E0 = sd(betaG), ## ok
+                         sd_BG_E1 = sd(betaGE),
+                         sd_BGE_E0 = sd(betaE),                       
+                         sd_BGE_E1 = sd(betaE)) %>%
+              mutate(correlation = NA)
+
+
+######### JOIN #####
+res.long = res %>% reshape2::melt( id = c("gene_id", "from"), value.name = "Inference") #%>% mutate(type = 'Inference')
+input.long = gt_glmm %>% reshape2::melt( id = "gene_id", value.name = "Actual")
+df_merged = merge(res.long, input.long, by = c("gene_id", "variable")) #%>% mutate(n_genotype = n_G)
+df_benchmark = df_merged
+```
+
+```{r}
+
+p = ggplot(df_benchmark) + geom_point(aes(x = Actual , y = Inference, color = from ), alpha = 0.5, size = 4) + 
+                          facet_wrap(~variable, scales = "free", ncol = 2 ) + geom_abline(slope=1, intercept=0) +
+                          scale_color_manual(values = c("#D55E00", "#E69F00", "#0072B2")) 
+p
+ggsave(filename = 'figures/benchmark_identity8.png',p, width = 12)
+
+
+
+df_benchmark %>% filter(variable == 'correlation_interaction') %>% .$Inference %>% mean()
+df_benchmark %>% filter(variable == 'mean_E1') %>% .$Inference %>% mean()
+
+6.5-22+2.34
+
+
+fit.mvrnorm$sigma[2,3] = 0.1
+fit.mvrnorm$sigma[3,2] = 0.1
+
+
+mean(res$correlation)
+mean(res$sd_E0*res$sd_E1*res$correlation)
+```
+
+## GLM classique
+
+```{r}
+
+run.glm2 <- function(data_glm, i, threshold = 0) {
+  rownames(data_glm) <- NULL
+  tryCatch({
+    fit = MASS::glm.nb(y ~ genotype + environment + genotype:environment, data = data_glm, link = log)
+    return(reshapeGlmRes(fit, i, threshold))
+  },
+
+  error = function(cnd){
+    print('noop')
+    return(reshapeGlmRes(NULL, i, threshold  , error_bool = T))
+}
+)
+}
+
+run.glm3 <- function(data_glm, i, threshold = 0) {
+  rownames(data_glmm) <- NULL
+  #data_glm = data_glm %>% select(-gene_id)
+  #print(data_glm)
+  fit = MASS::glm.nb(y ~ genotype + environment + genotype:environment, data = data_glm, link = log)
+  reshapeGlmRes(fit, i, threshold)
+}
+
+############# GLM fitting ###################
+data_glm  = data_glmm
+rownames(data_glm) <- NULL
+plan(multisession, workers = 4)
+a = gene_id_list %>% furrr::future_map(.x = ., ~run.glm3(data_glm %>% filter(gene_id == .x), 
+                                              .x, 
+                                              threshold = 0.5) )
+
+
+
+.############ save res #####
+c = do.call(rbind, a)
+
+c %>% filter(beta == 'genotypeG5.environmentE1')
+```
+
+```{r}
+ground_truth = beta.matrix %>% data.frame() %>%
+                  mutate(gene_id = label_gene) %>%
+                  mutate(genotype = label_genotype) %>%
+                  reshape2::melt(., id = c("gene_id", 'genotype'), value.name = "Actual", variable.name = "beta") 
+
+
+part1  = ground_truth %>% filter(beta %in% c("beta0", 'betaE')) %>%
+                  group_by(gene_id, beta) %>%
+                  summarize( Actual = mean(Actual)) %>%
+                  mutate(beta = ifelse(beta == "beta0", "Intercept", 'betaE')) 
+                  
+part2 = ground_truth %>% filter(beta == "betaG") %>%
+                          mutate(beta = base::paste("genotype", genotype, sep = "")) %>% select(-genotype)
+
+part3 = ground_truth %>% filter(beta == "betaGE") %>%
+                          mutate(beta = base::paste("genotype", genotype, ".environmentE1", sep = "")) %>% select(-genotype)
+
+
+input = rbind(part1,part2, part3) %>% ungroup()
+input %>% dim()
+df_merged = merge(c, input, by = c("gene_id", "beta")) #%>% mutate(n_genotype = n_G)
+
+```
+
+```{r}
+
+
+
+p = ggplot(df_merged) + geom_point(aes(x = Actual * log(2) , y = Inference ), alpha = 0.5, size = 4) + 
+                          facet_grid(~type, scales = "free" ) + geom_abline(slope=1, intercept=0) +
+                          scale_color_manual(values = c("#D55E00", "#E69F00", "#0072B2")) 
+p
+
+
+```
+
+
diff --git a/results/v2/glmm_benchmark.Rmd b/results/v2/glmm_benchmark.Rmd
new file mode 100644
index 0000000000000000000000000000000000000000..11978f9a920216806de8d1a2247ef014cd6d302d
--- /dev/null
+++ b/results/v2/glmm_benchmark.Rmd
@@ -0,0 +1,421 @@
+---
+title: "HTRSIM"
+date: '2022-07-26'
+output:   
+  html_document:
+
+css: 
+ - css/template.css
+
+---
+
+
+```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+library(HTRSIM)
+library(furrr)
+#library(tidyverse)
+#library(reshape2)
+#library(kableExtra)
+#library(gridExtra)
+#library(MatrixGenerics)
+```
+
+
+## Simulation without ru_rm_5 - all genes
+
+
+```{r}
+## Import & reshape table counts
+fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
+tabl_cnts <- read.table(file = fn, header = TRUE)
+rownames(tabl_cnts) <- tabl_cnts$gene_id
+tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
+tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
+## DESIGN
+fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
+bioDesign <- read.table(file = fn, header = T, sep = ';')
+
+## defining reference
+bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
+bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
+
+
+bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
+tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
+#tabl_cntsA = tabl_cnts
+
+```
+
+
+
+
+```{r }
+## Launch DESEQ2
+dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
+
+## Extract
+
+dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
+#dds.extraction$gene_dispersion
+
+```
+
+
+```{r message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+## Data viz
+p<- ddsExtraction.viz(dds.extraction = dds.extraction)
+p
+ggsave(filename = "figures/inputParams_distrib.png", plot = p, width = 20, height = 12) 
+
+```
+
+## Gene segmentation
+
+```{r}
+get_beta_gene_i <- function(fit.mvnorm, n_G, n_E){
+  beta.matrix.tmp <- MASS::mvrnorm(n = (n_E-1)*(n_G-1),
+                                 mu = fit.mvnorm$mu,
+                                 Sigma = fit.mvnorm$sigma )
+  
+  beta0 = beta.matrix.tmp[1,1] %>% unname()
+  betaG = beta.matrix.tmp[1:(n_G-1),2]
+  betaE = beta.matrix.tmp[1:(n_E-1),3]
+  betaGE = beta.matrix.tmp[,4]
+  
+  ### name
+  betaG.colnames = base::paste("genotype", "G", 1:(n_G-1), sep = "")
+  betaE.colnames = base::paste("environment", "E", 1:(n_E-1), sep = "")
+  betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":"))
+  matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames)
+  
+  beta_gene_i <- c( beta0, betaG, betaE, betaGE)
+  names(beta_gene_i) = matrix.colnames
+
+  return(beta_gene_i)
+}
+
+
+n_g = 1
+n_group_gene = 3
+n_E = 2
+n_G = 400
+
+
+dds.extraction$beta 
+kmean.res = kmeans(dds.extraction$beta[,c(2,3)], n_group_gene)
+
+
+beta.cluster1= dds.extraction$beta[kmean.res$cluster == 1,]
+beta.cluster2= dds.extraction$beta[kmean.res$cluster == 2,]
+beta.cluster3= dds.extraction$beta[kmean.res$cluster == 3,]
+
+
+x = beta.cluster3 %>% as.matrix()
+fit.mvrnorm <- Rfast::mvnorm.mle(x)
+fit.mvrnorm$sigma[c(2,4),c(2,4)] = fit.mvrnorm$sigma[c(2,4),c(2,4)]*3
+#diag(fit.mvrnorm$sigma) <- diag(fit.mvrnorm$sigma) 
+
+#### BETAG.E #####
+fit.mvrnorm$mu[4] = 0
+fit.mvrnorm$sigma[,4] = 0
+fit.mvrnorm$sigma[4,] = 0
+#################
+#fit.mvrnorm$mu[0] = 30
+
+#### BETA0 #####
+fit.mvrnorm$mu[1] = 0
+fit.mvrnorm$sigma[,1] = 0
+fit.mvrnorm$sigma[1,] = 0
+#################
+
+#### BETAE #####
+fit.mvrnorm$mu[3] = 0
+fit.mvrnorm$sigma[,3] = 0
+fit.mvrnorm$sigma[3,] = 0
+
+#fit.mvrnorm$mu[2] = 8
+#fit.mvrnorm$sigma[2,2]= 6
+x <- NULL
+
+
+n_gene = n_g
+a = purrr::map(.x = 1:n_gene, ~ get_beta_gene_i(fit.mvrnorm, n_G, n_E)) 
+beta.matrix = do.call(rbind, a) 
+rownames(beta.matrix) = base::paste("gene", 1:(n_g), sep = "")
+
+
+
+x = beta.matrix %>% data.frame() %>% #%>% select(!starts_with("environment")) %>%
+  rownames_to_column('gene_id') %>% reshape2::melt(., id = "gene_id") %>% dplyr::mutate(type = dplyr::case_when(
+      str_detect(variable, "genotypeG\\d+\\.environment") ~ "GxE",
+      str_detect(variable, "genotypeG\\d+$") ~ "G",
+      str_detect(variable, "environmentE\\d+$") ~ "E",
+      str_detect(variable, "Intercept$") ~ "Intercept")) %>%  reshape2::dcast(., gene_id  ~ type, value.var = "value", fun.aggregate = list)
+
+
+g = x$G
+names(g) <- x$gene_id
+df_tmp2 = data.frame(g) %>% mutate(type = 'betaG') %>% reshape2::melt( id = "type", value.name = "betaG")
+g = x$GxE
+
+names(g) <- x$gene_id
+df_tmp3 = data.frame(g) %>% mutate(type = 'GxE') %>% reshape2::melt( id = "type", value.name = "betaGxE")
+
+
+df = cbind(df_tmp2, df_tmp3) %>% dplyr::select(c(betaGxE, betaG)) %>% mutate(from = 'Simulated') %>% mutate(cluster = 3)
+#df_simu = df
+df_simu = rbind(df_simu, df)
+
+
+df_simu$cluster <- factor(df_simu$cluster)
+ggplot(df_simu) + geom_point(aes(x = betaG, y = betaGxE, col = cluster), alpha = 0.2)
+
+```
+
+```{r}
+
+### Visualisation
+dtf = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster ) %>%
+      reshape2::melt(. ,id=c("cluster"), value.name = "value", variable.name= "type") 
+dtf$cluster <- factor(dtf$cluster)
+
+
+p = ggplot(dtf) + geom_density(aes(x = value, fill = cluster ), alpha = 0.4) + facet_grid(~ type, scales = "free")
+
+p
+ggsave("figures/densityBeta_kmeans.png", p)
+
+
+betas = dds.extraction$beta %>% mutate(cluster = kmean.res$cluster) %>% mutate(from = "Actual")
+betas$cluster = factor(betas$cluster)
+df2 <- rbind(betas %>% dplyr::select(betaG, betaGE, from, cluster), df_simu %>% rename(betaGE = "betaGxE"))
+p = ggplot(df2) + geom_point(aes(x= betaG, betaGE, col = cluster), alpha = 0.1) + facet_grid(~from)
+
+p
+ggsave("figures/scatterplot.png", p, width = 10)
+
+```
+
+## Build simulated counts 
+
+```{r}
+df_2glmmm <- function(y , design2simulate, i){
+  genotype = design2simulate$design2simulate$genotype
+  environment = design2simulate$design2simulate$environment
+  #message("Fitting model ...")
+
+  df_gene_i = list(y = y , genotype = genotype,environment = environment) %>% data.frame() %>% mutate(inter = paste(environment, genotype, sep = '_'))
+  df_gene_i$inter <- factor( df_gene_i$inter )
+  rownames(df_gene_i) <- NULL
+  df_gene_i = df_gene_i %>% mutate(gene_id = paste("gene", i, sep = ""))
+  return(df_gene_i)
+}
+
+## benchmarking lme4 / glmmTMB
+library(lme4)
+library(glmmTMB)
+library(broom.mixed)
+library(mice)
+
+
+list_nb_G = c( 10, 100, 400, 700, 1000)
+n_attempt = 1:6
+remove(res_benchmark)
+
+n_G = 10
+for (n_G in list_nb_G){
+      ################### Simulation counts ###############
+      n_gene = 4
+      a = purrr::map(.x = 1:n_gene, ~ get_beta_gene_i(fit.mvrnorm, n_G, n_E)) 
+      beta.matrix = do.call(rbind, a) 
+      beta.input = beta.matrix
+      n_genes = n_g
+      design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 10)
+      
+      log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
+      gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
+      mu_ij = getMu_ij(log_qij, 1)
+      kij.simulated = getK_ij(mu_ij, gene_dispersion)
+      #######################################################
+      f = map(.x = 1:n_genes, ~df_2glmmm(kij.simulated[.x,], design2simulate, .x) )
+      data_glmm = do.call(rbind, f)
+      
+      
+      ## benchmark lme4
+      print('lme4')
+      start_time <- Sys.time()
+      m.nb <- glmer.nb(y ~ environment +  ( 1 | genotype ) , data=data_glmm, verbose=F)
+      end_time <- Sys.time()
+      time_process = difftime(end_time, start_time, units = "secs") %>% as.numeric()
+      res_lme4 = list(package = "lme4", n_genotype = n_G, duration = time_process, attempt = i) %>% data.frame()
+      
+      
+      print('glmm')
+      start_time <- Sys.time()
+      m.nb <- glmmTMB::glmmTMB(y ~ environment +  ( 1 | genotype ) , data=data_glmm, family=nbinom1, verbose = F)
+      end_time <- Sys.time()
+      time_process = difftime(end_time, start_time, units = "secs") %>% as.numeric()
+      res_glmmTMB = list(package = "glmmTMB nbinom1", n_genotype = n_G, duration = time_process, attempt = i) %>% data.frame()
+      
+      ## bencbmark glmTMB
+      print('glmm')
+      start_time <- Sys.time()
+      m.nb <- glmmTMB::glmmTMB(y ~ environment +  ( 1 | genotype ) , data=data_glmm, family=nbinom2, verbose = F)
+      end_time <- Sys.time()
+      time_process = difftime(end_time, start_time, units = "secs") %>% as.numeric()
+      res_glmmTMB2 = list(package = "glmmTMB nbinom2", n_genotype = n_G, duration = time_process, attempt = i) %>% data.frame()
+      
+      tmp = rbind( res_lme4, res_glmmTMB, res_glmmTMB2 )
+      
+      if (exists("res_benchmark")) res_benchmark = rbind( res_benchmark, tmp )
+      else res_benchmark = tmp
+  
+}
+
+
+```
+
+```{r}
+
+p= ggplot(res_benchmark) + geom_violin(aes(x = duration, y = package, fill = package )) + scale_fill_manual(values = c("#D55E00", "#E69F00", "#0072B2"))
+
+ggsave(filename = 'figures/benchmark_violin_glmmm.png',p, height = 6)
+
+
+res_benchmark2 = res_benchmark
+res_benchmark2$n_genotype = factor(res_benchmark2$n_genotype)
+p = ggplot(res_benchmark2, aes(x = n_genotype, y = duration, color = package )) + geom_boxplot() + geom_point(position = position_jitterdodge()) + scale_y_log10() + ylab('duration (sec)') + scale_color_manual(values = c("#D55E00", "#E69F00", "#0072B2"))
+
+p
+
+ggsave(filename = 'figures/benchmark_glmm.png',p, height = 6)
+
+```
+
+
+```{r}
+
+fit_extraction <- function(fit){
+    fit.res = broom.mixed::tidy(fit)  %>% arrange(term)%>% .$estimate %>% as.numeric()
+    B0 = fit.res[2] 
+    BE = fit.res[3]
+    sd_BGE = fit.res[5]
+    correlation =  fit.res[1]
+    sd_BG = fit.res[4] 
+    ####################################################################
+    res = list(B0 = B0, BE = BE,  sd_BG = sd_BG, sd_BGE = sd_BGE, correlation = correlation) %>% data.frame() 
+    return(res)
+}
+
+fit_glmm <- function(data_glmm, i){
+    gene_i = paste('gene', i, sep = "")
+    data_glmm.filter = data_glmm %>% filter(gene_id == gene_i)
+    ###########################  FIT LME4   #################################
+    print("LME4")
+    m.nb <- glmer.nb(y ~ 0 + environment  + ( 1 + environment | genotype )  , data= data_glmm.filter , verbose=F)
+    res_lme4 = fit_extraction(m.nb)
+    res_lme4 = res_lme4 %>% mutate(gene_id = gene_i) %>% mutate(from = "lme4")
+    
+    #####################  FIT GMTMB 1  #########################
+    print("glmTMB 1")
+    m.nb <- glmmTMB::glmmTMB(y ~  0 + environment  + ( 1 + environment | genotype )   , data=data_glmm.filter, family=nbinom1, verbose = F)
+    res_glmTMB1 = fit_extraction(m.nb)
+    res_glmTMB1 = res_glmTMB1 %>% mutate(gene_id = gene_i) %>% mutate(from = "glmTMB nbinom1")
+    
+    #####################  FIT GMTMB 2  #########################
+    print("glmTMB 2")
+    
+    m.nb <- glmmTMB::glmmTMB(y ~ 0 + environment  + ( 1 + environment | genotype )  , data=data_glmm.filter, family=nbinom2, verbose = F)
+    res_glmTMB2 = fit_extraction(m.nb)
+    res_glmTMB2 = res_glmTMB2 %>% mutate(gene_id = gene_i) %>% mutate(from = "glmTMB nbinom2")
+    
+    res = rbind(res_lme4, res_glmTMB1, res_glmTMB2)
+    return(res)
+}
+
+
+######################## 
+##        SETUP
+########################
+n_gene = 4
+list_nb_G = c(200)
+remove(df_benchmark)
+#########################
+for (n_G in list_nb_G){
+      ################### Simulation counts ###############
+      a = purrr::map(.x = 1:n_gene, ~ get_beta_gene_i(fit.mvrnorm, n_G, n_E)) 
+      beta.matrix = do.call(rbind, a) 
+      beta.input = beta.matrix
+      design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 8)
+      log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
+      gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_gene, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
+      mu_ij = getMu_ij(log_qij, 1)
+      kij.simulated = getK_ij(mu_ij, gene_dispersion)
+      
+      
+      ######## INPUT #######
+      beta0.input = beta.input[,1] 
+      mean_betaG = beta.input[,2:n_G] %>% rowMeans() 
+      mean_betaE = beta.input[,(n_G+1):(n_G+n_E-1)] 
+      mean_betaGE = beta.input[,(n_G+n_E):dim(beta.input)[2]] %>% rowMeans() 
+      beta0 = beta0.input + mean_betaG
+      #sd(beta0)
+      betaE = beta0.input + mean_betaE + mean_betaGE + mean_betaG
+      #sd_betaG = sd(beta0)
+      #sd_betaGE = sd(betaE)
+      sd_betaG = apply(beta.input[,2:n_G], 1, sd) 
+      sd_betaGE = apply(beta.input[,(n_G+n_E):dim(beta.input)[2]], 1, sd) 
+      
+      sd_betaG = apply(beta.input, 1, sd) 
+      sd_betaGE = apply(beta.input[,2:dim(beta.input)[2]], 1, sd) 
+      input = list(B0 = beta0, BE = betaE, sd_BG = sd_betaG, sd_BGE = sd_betaGE, gene_id = paste('gene', 1:n_gene, sep = ''), correlation = NA) %>% data.frame()
+      #######################################################
+     
+      
+      
+      ####### ESTIMATION ############*
+      f = map(.x = 1:n_gene, ~df_2glmmm(kij.simulated[.x,], design2simulate, .x) )
+      data_glmm = do.call(rbind, f)
+      plan(multisession, workers = 4)
+      results = furrr::future_map(.x = 1:n_gene, ~fit_glmm(data_glmm, .x) )
+      res = do.call(rbind, results)
+      #################################################################
+      
+      ########## JOIN #####
+      res.long = res %>% reshape2::melt( id = c("gene_id", "from"), value.name = "Inference") #%>% mutate(type = 'Inference')
+      input.long = input %>% reshape2::melt( id = "gene_id", value.name = "Actual")
+      df_merged = merge(res.long, input.long, by = c("gene_id", "variable")) %>% mutate(n_genotype = n_G)
+
+      if (exists("df_benchmark")) df_benchmark = rbind( df_benchmark, df_merged )
+      else df_benchmark = df_merged
+  
+}
+
+```
+
+```{r}
+
+p = ggplot(df_benchmark %>% filter(variable != 'correlation')) + geom_point(aes(x = Actual * log(2), y = Inference, color = from ), alpha = 0.5, size = 4) + 
+                          facet_wrap(n_genotype~variable, ncol = 4, scales = "free_x") + geom_abline(slope=1, intercept=0) +
+                          scale_color_manual(values = c("#D55E00", "#E69F00", "#0072B2")) 
+p
+ggsave(filename = 'figures/benchmark_identity8.png',p, width = 12)
+
+
+df_benchmark %>% filter(variable == 'sd_BGE') %>% .$Inference %>% mean()
+df_benchmark %>% filter(variable == 'sd_BG') %>% .$Inference %>% mean()
+
+df_benchmark %>% filter(variable == 'correlation') %>% .$Inference %>% mean()
+
+df_benchmark %>% filter(variable == 'correlation') %>% .$Inference %>% mean()*(df_benchmark %>% filter(variable == 'sd_BG') %>% .$Inference %>% mean() * df_benchmark %>% filter(variable == 'sd_BGE') %>% .$Inference %>% mean())
+
+fit.mvrnorm$sigma[4,2] = 0.0001
+fit.mvrnorm$sigma[2,4] = 0.0001
+
+fit.mvrnorm$mu[4] = -2
+```
+
+
+
+
+
diff --git a/results/v2/rocCuves.Rmd b/results/v2/rocCuves.Rmd
new file mode 100644
index 0000000000000000000000000000000000000000..af79e1ee367e0269343ef34fd52716e03cd1c565
--- /dev/null
+++ b/results/v2/rocCuves.Rmd
@@ -0,0 +1,294 @@
+---
+title: "HTRSIM"
+date: '2022-09-22'
+output:   
+  html_document:
+
+css: 
+ - css/template.css
+
+---
+
+
+```{r setup,  message=FALSE, warning=FALSE, include=TRUE, results="hide"}
+library(HTRSIM)
+library(furrr)
+#install.packages("tidyverse")
+#library(reshape2)
+#library(kableExtra)
+#library(gridExtra)
+#library(MatrixGenerics)
+```
+
+
+## Public data
+
+```{r}
+## Import & reshape table counts
+fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRSIM")
+tabl_cnts <- read.table(file = fn, header = TRUE)
+rownames(tabl_cnts) <- tabl_cnts$gene_id
+tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id)##suppr colonne GeneID
+tabl_cnts = tabl_cnts[order(tabl_cnts %>% rownames()),]
+## DESIGN
+fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRSIM")
+bioDesign <- read.table(file = fn, header = T, sep = ';')
+
+## defining reference
+bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11"))
+bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated"))
+
+
+bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) 
+tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) 
+#tabl_cntsA = tabl_cnts
+
+```
+
+```{r }
+## Launch DESEQ2
+dds = run.deseq(tabl_cnts, bioDesign = bioDesign)
+
+## Extract
+
+dds.extraction = HTRSIM::extractDistributionFromDDS(dds_obj = dds)
+#dds.extraction$gene_dispersion
+
+```
+
+## params visualization
+
+
+```{r}
+beta_obs.dtf.long = dds.extraction$beta %>% reshape2::melt(. , na.rm = T, variable.name = "parameter")
+
+alpha_obs.dtf.long = dds.extraction$gene_dispersion %>% base::data.frame() %>%
+                          dplyr::rename(., value = ".") %>%
+                          dplyr::mutate(parameter = "dispersion")
+
+dtf.params_obs = rbind(beta_obs.dtf.long, alpha_obs.dtf.long)
+
+
+
+p = ggplot(dtf.params_obs, aes(x= value)) +
+      geom_histogram(aes(y=..density..), colour="black", fill="white") + facet_grid(~parameter)+
+        theme(strip.text.x = element_text(size = 13),
+        axis.title = element_text(size = 5),
+        axis.text  = element_text(size = 5))
+
+p
+```
+
+
+## Simu
+
+```{r}
+#### params #####
+n_G = 30
+n_E = 2
+n_genes = 100
+
+
+##################
+beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
+
+
+design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 15)
+#rowSums(beta.input) %>% which.max()
+#rowSums(beta.input) %>% max()
+log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
+log_qij %>% as.numeric() %>% .[log_qij %>% which.max()]
+
+
+gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, dispersion.vec = dds.extraction$gene_dispersion, model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
+
+
+#2* 2^log_qij
+mu_ij = getMu_ij(log_qij, 1)
+#max(mu_ij)
+kij.simulated = getK_ij(mu_ij, gene_dispersion)
+#max(kij.simulated)
+
+#kij.simulated = kij.simulated %>% data.frame() %>% filter_all(all_vars(. < 1000000)) 
+
+
+```
+
+
+## DESEQ2
+
+```{r}
+
+beta.actual.matrix = beta.input
+dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
+
+#############
+# Param
+threshold = 0.07
+############
+listBeta = DESeq2::resultsNames(dds_simu)
+plan(multisession, workers = 4)
+res = listBeta %>% furrr::future_map(.x = ., ~DESeq2::results(dds_simu, contrast=list(.x), lfcThreshold = threshold) %>% data.frame() %>% .$padj)
+padj.matrix = do.call("cbind", res)
+  
+  
+  
+  dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
+  
+  dds.simu.mcols.colnamesReshaped = colnames(dds_simu.mcols) %>%
+                                      stringr::str_replace(., "_vs_G0", "") %>%
+                                      stringr::str_replace(., "_vs_E0", "") %>%
+                                      stringr::str_replace_all(., "_", "") %>%
+                                      stringr::str_replace(., "\\.", ":")
+  
+  columnOfInterest =  design2simulate$model_matrix %>% base::colnames() %>% stringr::str_replace_all(., "[//(//)]", "")
+  #dds_simu.mcols[,columnOfInterest]
+  
+  ## Get only column of interest
+  idx_cols = base::match(columnOfInterest, dds.simu.mcols.colnamesReshaped)
+  beta.infered = dds_simu.mcols[,idx_cols]
+  
+  ## homogeneize column names & rownames
+  idx_cols = base::match(columnOfInterest, beta.actual.matrix %>% colnames())
+  beta.actual.matrix = beta.actual.matrix[,idx_cols]
+  colnames(beta.infered) = base::colnames(beta.actual.matrix)
+  colnames(padj.matrix) = base::colnames(beta.actual.matrix)
+  rownames(padj.matrix) = base::rownames(beta.actual.matrix)
+  beta.actual.matrix %>% dim()
+  padj.matrix %>% dim()
+  beta.infer.long = beta.infered %>% data.frame() %>%
+                            tibble::rownames_to_column(., var = "gene_id") %>%
+                            dplyr::mutate(origin = "Inference") %>%
+                            reshape2::melt(., value.name = "value", variable.name= "beta")
+  beta.actual.matrix.long = beta.actual.matrix %>% data.frame() %>%
+                            tibble::rownames_to_column(., var = "gene_id") %>%
+                            dplyr::mutate(origin = "Actual") %>%
+                            reshape2::melt(., value.name = "value", variable.name= "beta")
+  padj.matrix.long = padj.matrix  %>% data.frame() %>%
+                            tibble::rownames_to_column(., var = "gene_id") %>%
+                            dplyr::mutate(origin = "padj") %>%
+                            reshape2::melt(., value.name = "value", variable.name= "beta")
+  
+  beta.merged.long = rbind(beta.infer.long, beta.actual.matrix.long, padj.matrix.long)
+  #beta.merged.long$beta %>% unique()
+  
+  beta.merged.long.reshape = beta.merged.long %>% dplyr::mutate(type = dplyr::case_when(
+    str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
+    str_detect(beta, "genotypeG\\d+$") ~ "G",
+    str_detect(beta, "environmentE\\d+$") ~ "E",
+    str_detect(beta, "Intercept$") ~ "Intercept")
+  )
+  
+  
+  beta.merged.long.reshape2 = beta.merged.long.reshape %>% reshape2::dcast(.,  gene_id + beta + type ~ origin)
+  beta.merged.long.reshape2$type = factor(beta.merged.long.reshape2$type, levels = c("Intercept", "G", "E", "GxE"))
+  beta.merged.long.reshape2$threshold = threshold
+```
+
+```{r}
+  
+df_comparison = beta.merged.long.reshape2  
+
+
+  
+df_comparison = df_comparison  %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
+                    mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
+
+df_comparison$label <- factor(df_comparison$label)
+df_comparison$threshold <- factor(df_comparison$threshold)
+
+
+  
+```
+
+
+```{r}
+## ROC curve dtf
+df_comparison$threshold = factor(df_comparison$threshold)
+df_roc_deseq = df_comparison
+
+```
+
+## GLM
+
+
+```{r}
+design2simulate$design2simulate %>% dim()
+kij.simulated %>% dim()
+
+############# GLM fitting ###################
+plan(multisession, workers = 4)
+a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], 
+                                              .x, 
+                                              design2simulate$design2simulate, threshold = threshold) )
+
+#kij.simulated %>% dim()
+############ save res #####
+c = do.call(rbind, a)
+beta.input.long = beta.input %>% data.frame() %>%
+        tibble::rownames_to_column(., var = "gene_id") %>%
+        dplyr::mutate(origin = "Actual") %>%
+        reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
+        dplyr::rename(Actual = "value") %>% 
+        dplyr::select(-origin)
+  
+df_tmp = merge(c, beta.input.long)  %>% mutate(padj = p.adjust(p.val, method= "fdr")) %>% mutate(threshold = threshold)
+
+
+df_tmp = df_tmp  %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
+                    mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
+
+df_tmp$label <- factor(df_tmp$label)
+df_tmp$threshold <- factor(df_tmp$threshold)
+
+```
+
+
+## merge DESEQ GLM
+
+```{r}
+df_roc_glm = df_tmp  %>% dplyr::select(c(-dispersion, -pval, -p.val))
+df_roc_glm = df_roc_glm %>% mutate(from = 'MASS::glm.nb')
+
+df_roc_deseq = df_roc_deseq %>% mutate(from = "DESEQ2") 
+
+
+df_roc = rbind(df_roc_deseq, df_roc_glm)
+
+```
+
+
+
+
+
+```{r}
+
+library(plotROC)
+p = ggplot(df_roc %>% filter(type != "Intercept") , aes(d = label , m = padj, color = from)) + 
+  geom_roc(n.cuts = 0, labels = F)  + facet_grid(~type) 
+ 
+p  + style_roc()
+ggsave(filename = "figures/ROCcurve2.png", plot = p + style_roc(), width = 15, height = 8) 
+
+p = ggplot(calc_auc(p), aes(x = threshold, y = AUC, group=1)) + geom_point() + geom_line() + facet_grid(~type) 
+ggsave(filename = "figures/AUC.png", plot = p, width = 15, height = 10) 
+### boxplot FP
+
+df_comparison2 = df_comparison %>% 
+      filter(type != "Intercept") %>% 
+      group_by(label, prediction, type, threshold) %>% 
+      tally() %>% 
+      ungroup() %>% 
+      group_by(label, type, threshold) %>% 
+      mutate(tt = sum(n)) %>% 
+      mutate(proportion = n/tt) %>%
+      mutate(predict = ifelse(label == prediction, "true", "false"))
+
+
+df_comparison2$threshold = factor(df_comparison2$threshold)
+p = ggplot(df_comparison2) + geom_bar(aes(x = threshold , y = proportion, fill = predict),stat="identity") + facet_grid(type~label)
+p
+p = ggsave(filename = "figures/boxplotFP.png", plot = p, width = 15, height = 8) 
+
+```
+
diff --git a/results/v2/roccurvesBISBIS.R b/results/v2/roccurvesBISBIS.R
new file mode 100644
index 0000000000000000000000000000000000000000..3d7053e5a205ec5ef2688a05625d352919141eda
--- /dev/null
+++ b/results/v2/roccurvesBISBIS.R
@@ -0,0 +1,186 @@
+## Simu
+
+
+n_genes = 80
+n_rep_list = c( 2, 3, 5, 15, 25 )
+threshold = 0.5
+n_E = 2
+n_G = 100
+
+
+rm(df_roc)
+
+
+
+
+beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
+
+for (n_rep in n_rep_list){
+design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = n_rep)
+log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
+gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes,
+                                                   n_genotypes = n_G, n_environments = n_E,
+                                                   dispersion.vec = dds.extraction$gene_dispersion,
+                                                   model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
+mu_ij = getMu_ij(log_qij, 1)
+kij.simulated = getK_ij(mu_ij, gene_dispersion)
+
+
+
+######################### DESEQ
+beta.actual.matrix = beta.input
+## 2 modifY
+#dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
+
+#############
+# Param
+threshold = 0.5
+
+############## GLM
+a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], ## 2 modify !!!
+                                                     .x,
+                                                     design2simulate$design2simulate, threshold = threshold) )
+
+############ save res #####
+c = do.call(rbind, a)
+beta.input.long = beta.input %>% data.frame() %>%
+  tibble::rownames_to_column(., var = "gene_id") %>%
+  dplyr::mutate(origin = "Actual") %>%
+  reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
+  dplyr::rename(Actual = "value") %>%
+  dplyr::select(-origin)
+
+df_tmp = merge(c, beta.input.long)  %>% mutate(padj = p.adjust(pval, method= "fdr")) %>% mutate(threshold = threshold)
+
+
+
+
+
+## 2 modify
+df_roc_glm = df_tmp  %>% dplyr::select(c(-dispersion, -pval)) %>% mutate(from = 'MASS::glm.nb') %>% mutate(n_rep = n_rep)
+
+#### merge
+
+#df_roc_deseq = df_roc_deseq %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
+#              mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
+df_roc_glm = df_roc_glm %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
+  mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
+
+#df_tmp2 = rbind(df_roc_deseq, df_roc_glm)
+df_tmp2 = df_roc_glm
+if (exists('df_roc') && is.data.frame(get('df_roc'))){
+  df_roc = rbind(df_roc, df_tmp2)
+}
+else{
+  df_roc = df_tmp2
+}
+
+
+
+}
+
+df_roc %>% filter(type %in% c("G","GxE"))%>% group_by(from, n_rep) %>% tally()
+
+df_roc$n_rep <- factor(df_roc$n_rep)
+df_roc_REP = df_roc
+library(plotROC)
+p = ggplot(df_roc %>% filter(type %in% c("G","GxE"))  , aes(d = label , m = padj, color = n_rep)) +
+  geom_roc(n.cuts = 0, labels = F)
+
+
+p2  = p + style_roc() + scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D", "#696969" ))
+ggsave(filename = "../../../results/v2/figures/ROCcurve9.png", plot = p2, height = 6, width = 8)
+
+######################
+
+n_genes = 80
+sizeFac_vector = c(0.1, 1, 5, 10, 20 )
+threshold = 0.5
+n_E = 2
+n_G = 20
+
+
+rm(df_roc)
+
+
+
+
+beta.input = getBetaforSimulation(n_genes = n_genes, n_genotypes = n_G, n_environments = n_E, beta.dtf = dds.extraction$beta)
+
+for (sj in sizeFac_vector ){
+  print(sj)
+
+  design2simulate = buildDesign2simulate(n_genotype = n_G, n_environment = n_E, n_replicate = 5)
+  log_qij = getLog_qij(model_matrix = design2simulate$model_matrix, beta.matrix.input = beta.input)
+  gene_dispersion = getGenesDispersionsForSimulation(n_genes = n_genes,
+                                                     n_genotypes = n_G, n_environments = n_E,
+                                                     dispersion.vec = dds.extraction$gene_dispersion,
+                                                     model_matrix = design2simulate$model_matrix, dispUniform_btweenCondition = T)
+  mu_ij = getMu_ij(log_qij, sj)
+  kij.simulated = getK_ij(mu_ij, gene_dispersion)
+
+
+  ######################### DESEQ
+  beta.actual.matrix = beta.input
+  ## 2 modifY
+  #dds_simu = run.deseq(tabl_cnts = kij.simulated , bioDesign = design2simulate$design2simulate )
+
+  #############
+  # Param
+  threshold = 0.5
+
+  ############## GLM
+  a = 1:n_genes %>% furrr::future_map(.x = ., ~run.glm(kij.simulated[.x,], ## 2 modify !!!
+                                                       .x,
+                                                       design2simulate$design2simulate, threshold = threshold) )
+
+  ############ save res #####
+  c = do.call(rbind, a)
+  beta.input.long = beta.input %>% data.frame() %>%
+    tibble::rownames_to_column(., var = "gene_id") %>%
+    dplyr::mutate(origin = "Actual") %>%
+    reshape2::melt(.,  value.name = "value", variable.name= "beta") %>%
+    dplyr::rename(Actual = "value") %>%
+    dplyr::select(-origin)
+
+  df_tmp = merge(c, beta.input.long)  %>% mutate(padj = p.adjust(pval, method= "fdr")) %>% mutate(threshold = threshold)
+
+
+
+
+
+  ## 2 modify
+  mean_readsPerSample = kij.simulated %>% colSums() %>% mean() %>% round() %>% format(., scientific = FALSE, big.mark = ',' )
+  df_roc_glm = df_tmp  %>% dplyr::select(c(-dispersion, -pval)) %>% mutate(from = 'MASS::glm.nb') %>% mutate(Depth_seq = mean_readsPerSample)
+
+  #### merge
+
+  #df_roc_deseq = df_roc_deseq %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
+  #              mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
+  df_roc_glm = df_roc_glm %>% mutate(label = ifelse(abs(Actual) > threshold, "DE", "nonDE" )) %>%
+    mutate( prediction = ifelse(padj < 0.05, "DE", "nonDE"  ))
+
+  #df_tmp2 = rbind(df_roc_deseq, df_roc_glm)
+  df_tmp2 = df_roc_glm
+  if (exists('df_roc') && is.data.frame(get('df_roc'))){
+    df_roc = rbind(df_roc, df_tmp2)
+  }
+  else{
+    df_roc = df_tmp2
+  }
+
+
+
+}
+
+df_roc$Depth_seq %>% unique()
+df_roc$Depth_seq <- factor(df_roc$Depth_seq, levels = c("24,546", "250,278" , "1,223,084", "2,470,148","4,995,472"))
+#df_roc_REP = df_roc
+library(plotROC)
+p = ggplot(df_roc %>% filter(type %in% c("G","GxE"))  , aes(d = label , m = padj, color = Depth_seq)) +
+  geom_roc(n.cuts = 0, labels = F)
+p
+p + style_roc() + scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D", "#696969" ))
+p
+p2  = p + style_roc() + scale_color_manual(values = c("#D3D3D3","#BDBDBD", "#9E9E9E", "#7D7D7D", "#696969" ))
+ggsave(filename = "../../../results/v2/figures/ROCcurve10.png", plot = p2, height = 6, width = 8)
diff --git a/src/v2/HTRSIM/DESCRIPTION b/src/v2/HTRSIM/DESCRIPTION
index 3191056902d98e3a426ac306c70517fbf9a629c5..5c93a758b217026790e10370a079ad22b0dae2f9 100644
--- a/src/v2/HTRSIM/DESCRIPTION
+++ b/src/v2/HTRSIM/DESCRIPTION
@@ -18,6 +18,7 @@ Depends:
 Imports: 
     DESeq2,
     furrr,
+    glmglrt,
     MASS,
     S4Vectors,
     stats,
diff --git a/src/v2/HTRSIM/R/countsGenerator.R b/src/v2/HTRSIM/R/countsGenerator.R
index fb17cfc189b64ae15b684fe89d7154c2750cfd40..3af9bf322fa2b1ed25bc362bb51e1482909eba32 100644
--- a/src/v2/HTRSIM/R/countsGenerator.R
+++ b/src/v2/HTRSIM/R/countsGenerator.R
@@ -1,7 +1,3 @@
-
-
-
-
 #' Get beta_ij
 #'
 #' @param n_genes  an integer
diff --git a/src/v2/HTRSIM/R/evaluation.R b/src/v2/HTRSIM/R/evaluation.R
index 8e9609433abf4577fd8581463f7bc4dc046cd616..6f246c2c003a967ccffb1e6f297b0b174abb305a 100644
--- a/src/v2/HTRSIM/R/evaluation.R
+++ b/src/v2/HTRSIM/R/evaluation.R
@@ -24,9 +24,7 @@ getDfComparison <- function(dds_simu , model_matrix, beta.actual.matrix, threads
     padj.matrix = do.call("cbind", res)
 
 
-
     dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE)
-
     dds.simu.mcols.colnamesReshaped = colnames(dds_simu.mcols) %>%
                                         stringr::str_replace(., "_vs_G0", "") %>%
                                         stringr::str_replace(., "_vs_E0", "") %>%
diff --git a/src/v2/HTRSIM/R/modelFitting.R b/src/v2/HTRSIM/R/modelFitting.R
index a374328d48df144fdcd256350482639e00272f09..6918c11c074e9d339e4be3ab5602878c20de3fef 100644
--- a/src/v2/HTRSIM/R/modelFitting.R
+++ b/src/v2/HTRSIM/R/modelFitting.R
@@ -21,7 +21,7 @@ test_wald <- function(model.res, term, threshold, initvec){
 #' Launch MASS:GLM.NB
 #'
 #' @param gene_count a row of kij simulated table
-#' @param i index of the gene
+#' @param gene_name name of the gene
 #' @import dplyr
 #' @import stringr
 #' @import stats
@@ -31,10 +31,9 @@ test_wald <- function(model.res, term, threshold, initvec){
 #' @export
 #'
 #' @examples
-reshapeGlmRes <- function(fit, i, threshold  ,  error_bool = F){
+reshapeGlmRes <- function(fit, gene_name, threshold  ,  error_bool = F){
     if (error_bool == T){ ## error while fiting model
-      res = list(Inference = NA, pval = NA,
-                 beta = NA, gene_id = paste("gene", i, sep = ""), type = NA, deviance = NA, p.val = NA) %>%
+      res = list(Inference = NA, pval = NA, beta = NA, gene_id = paste("gene", i, sep = ""), type = NA, dispersion = NA) %>%
             data.frame()
     }
     else{ ## success to fit model
@@ -57,7 +56,7 @@ reshapeGlmRes <- function(fit, i, threshold  ,  error_bool = F){
 
       rownames(res) <- NULL
       res = res  %>%
-              dplyr::mutate(gene_id = paste("gene", i, sep = "") ) %>%
+              dplyr::mutate(gene_id = gene_name ) %>%
               dplyr::mutate(type = dplyr::case_when(
                 stringr::str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE",
                 stringr::str_detect(beta, "genotypeG\\d+$") ~ "G",
diff --git a/src/v2/HTRSIM/devtools_history.R b/src/v2/HTRSIM/devtools_history.R
index 4b905d399a8bd38d8957e4af0e11492f568b6c9a..9ad37d1dbbceb46a1b30e98d8129245ca7c390d4 100644
--- a/src/v2/HTRSIM/devtools_history.R
+++ b/src/v2/HTRSIM/devtools_history.R
@@ -5,4 +5,5 @@ usethis::use_package("stringr")
 usethis::use_package("MASS")
 usethis::use_package("DESeq2")
 usethis::use_package("furrr")
+usethis::use_package("glmglrt")