diff --git a/src/counts_matrix_generator.R b/src/counts_matrix_generator.R index c86b73cee8d29e2d6fef8ada123cc2883beeb557..d6f7fa4050f1d3d8b2c58f6139514467a699d157 100644 --- a/src/counts_matrix_generator.R +++ b/src/counts_matrix_generator.R @@ -2,105 +2,65 @@ set.seed(123) -# Params to specify design -N_rep = 2 -N_cond = 2 -N_gene = 6000 - - ## simulation functions -count_generator <- function(n_value, mu_theo, size_theo){ - rnbinom(n=n_value, mu = mu_theo, size = size_theo) -} - -matrix_generator <- function(mu_theo, size_theo){ - n_value = N_gene*N_cond*N_rep #number of counts expected - mtx <- matrix(count_generator(n= n_value , mu = mu_theo, size = size_theo), ncol= N_cond*N_rep) - return(mtx) -} - -mu_effect <- function(vec_of_mu){ - statistical_power <- c() ## Init results of Differential expression analysis - res_DEA <- c() - for (mu in vec_of_mu){ - - # Print advancement message - cat(sprintf("Simulation for mu = %d\n", mu)) - - cnts <- matrix_generator(mu, 1) - cond <- factor(rep(1:2, each=N_rep)) - dds <- DESeqDataSetFromMatrix(cnts, DataFrame(cond), ~ cond) - - # standard analysis - dds <- DESeq(dds) - res <- results(dds) - - # reproducibility of signal - repr_cond1 <- ifelse(cnts[,2]==0, NA, cnts[,1]/cnts[,2]) - repr_cond1 <- mean(repr_cond1, na.rm=TRUE) - repr_cond2 <- ifelse(cnts[,4]==0, NA, cnts[,1]/cnts[,4]) - repr_cond2 <- mean(repr_cond2,na.rm=TRUE) - - # results of DEA - cat(sprintf("Length table = %d\n", length(table(res$padj < 0.05)))) - if (length(table(res$padj < 0.05)) == 1){ - res_DEA <- c(res_DEA, 0) ## case 1 : no DEG found by DESEQ2 - statistical_power <- c(statistical_power, NA) - } - else { - res_DEA <- c(res_DEA, table(res$padj < 0.05)[["TRUE"]]) ## case 2 : Nb DEG found by deseq2 - statistical_power <- c(statistical_power, min(abs(res$log2FoldChange[res$padj < 0.05]))) - } - } - return (data.frame(vec_of_mu, res_DEA, statistical_power, repr_cond1, repr_cond2)) -} +source("mydatalocal/counts_simulation/src/simulators.R") # visualization functions -library(ggpubr) mu_effect_visualization <- function(mu_effect_res){ - p1 <- mu_effect_res %>% ggplot(., aes(x=vec_of_mu, y=statistical_power)) + - geom_point() + xlab("mu") + ylab("min(|LFC|)") - p2 <- mu_effect_res %>% ggplot(., aes(x=vec_of_mu, y=res_DEA)) + - geom_point() + xlab("mu") + ylab("Number of DEG") - p3 <- mu_effect_res %>% ggplot(., aes(x=vec_of_mu, y=repr_cond1)) + - geom_point() + xlab("mu") + ylab("reproducibility signal c1") - p4 <- mu_effect_res %>% ggplot(., aes(x=vec_of_mu, y=repr_cond2)) + - geom_point() + xlab("mu") + ylab("reproducibility signal c2") - figure <- ggarrange(p1, p2, p3, p4, labels = c("A", "B","C","D"), - ncol = 2, nrow = 2) + figure = mu_effect_res %>% ggplot(., aes(x=vec_of_mu, y = value, col=factor(N_rep))) + + geom_point() + facet_wrap(~variable, scales = "free_y") return(figure) } -mean(c(1,2,NA)) -mean(cnts[,1]/cnts[,2]) -mean(cnts) -## main -mu_simul = seq(100, 15000, by = 200) -mu_simul -mu_simul <- rep.int(1500, 4) -mu_simul -res_simul <- mu_effect(mu_simul) -res_simul -mu_effect_visualization(res_simul) - +size_effect_visualization <- function(alpha_effect_res){ + figure = alpha_effect_res %>% ggplot(., aes(x=vec_of_alpha, y = value, na.rm = TRUE)) + + geom_point() + facet_wrap(~variable, scales = "free_y") + return(figure) +} -df <- data.frame( - Monday=c(2,3), - Tuesday=c(3,6), - Wednesday=c(4,7), - Friday=c(5,5), - Saturday=c(6,1), - Total=c(20,22)) -df +## main +# Params to specify design +min_rep = 2 #/!\ = 1 forbidden +max_rep = 10 +N_cond = 2 +N_gene = 6000 -df %>% - mutate( - across(c(1:5), - .fns = ~./Total)) +mu_simul = seq(100, 15000, by = 200) +mu_simul +mu_simul <- rep.int(1500, 8) +res_simul <- mu_effect(alpha = 2.9, mu_simul) +reshape_res_simul <- res_simul %>% reshape2::melt(.,id = c("vec_of_mu")) +mu_effect_visualization(reshape_res_simul) + + +alpha_simul = seq(0.01, 10, by = 0.1) +alpha_simul +res_simul2 <- size_effect(mu = 10000, alpha_simul) +res_simul2 +reshape_res_simul2 <- res_simul2 %>% reshape2::melt(.,id = c("vec_of_alpha")) +size_effect_visualization(reshape_res_simul2) + + +## replicate effect +n_rep_sim = seq(2, 5, by = 1) + +mu_simul_dtf_res <- data.frame() +for (N_rep in n_rep_sim){ + mu_simul = seq(2500, 12000, by = 200) + #mu_simul + #mu_simul <- rep.int(1500, 8) + res_simul <- mu_effect(alpha = 2, mu_simul) + res_simul$N_rep <- N_rep + tmp_reshape_res_simul <- res_simul %>% reshape2::melt(.,id = c("vec_of_mu", "N_rep")) + mu_simul_dtf_res <- rbind(mu_simul_dtf_res, tmp_reshape_res_simul) +} +mu_simul_dtf_res +mu_effect_visualization(mu_simul_dtf_res) -mean(cnts[,1]/cnts[,2]) +for (N_rep in n_rep_sim){ + print(N_rep) +} -#cnts[,3]