diff --git a/src/v3/HTRsim/R/countsGenerator.R b/src/v3/HTRsim/R/countsGenerator.R new file mode 100644 index 0000000000000000000000000000000000000000..ac8ae6ceada3e3250c4b3b73386f42a51f596664 --- /dev/null +++ b/src/v3/HTRsim/R/countsGenerator.R @@ -0,0 +1,237 @@ +#' Get beta_ij +#' +#' @param n_genes an integer +#' @param n_genotype A int. +#' @param mvrnorm.fit an object fit mvronorm +#' @import MASS +#' @return a dataframe +#' @export +#' +#' @examples +getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, mvrnorm.fit){ + ##### Sampling from mvnorm ######## + beta.matrix <- MASS::mvrnorm(n = n_genes*(n_genotypes), + mu = mvrnorm.fit$mu, + Sigma = mvrnorm.fit$sigma ) + + genes_vec = base::paste("gene", 1:n_genes, sep = "") + genotype_vec = base::paste("G", 0:(n_genotypes-1), sep = "") + genotype = genotype_vec %>% rep(time = n_genes) + gene_id = rep(genes_vec, each = n_genotypes) + beta.dtf = beta.matrix %>% data.frame() + return( cbind(gene_id, genotype, beta.dtf) ) +} + +#' Get model matrix +#' +#' @param n_environments an integer +#' @return a matrix +#' @export +#' +#' @examples +getModelMatrix <- function(n_environments = 2) { + environment_vec = base::paste("E", 0:(n_environments-1), sep = "") + ######################################## + m = c(1,1,0,0,1,1,1,1) + model.matrix = matrix(data = m , ncol = 2, byrow = F) + colnames(model.matrix) = environment_vec + rownames(model.matrix) = c("beta0", "betaG", "betaE", "betaGE") + return(model.matrix) +} + + +#' Get log(q_ij) +#' +#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE. +#' @param model.matx an output of stat::model.matrix() +#' @return a dataframe +#' @export +#' +#' @examples +getLog_qij <- function( beta.dtf , model.matx){ + beta.matx = beta.dtf[ ,c("beta0", 'betaG', 'betaE', "betaGE") ] %>% as.matrix() + log_qij.matx = beta.matx %*% model.matx ## j samples, i genes + ### Some reshaping ### + log_qij.dtf = log_qij.matx %>% data.frame() + annotations = beta.dtf[ , c("gene_id", 'genotype')] + log_qij.dtf = cbind(annotations, log_qij.dtf) + return( log_qij.dtf ) + +} + +#' Get mu_ij +#' +#' @param log_qij.dtf a dtf +#' @param size_factor a scalar +#' @return a matrix +#' @export +#' +#' @examples +getMu_ij <- function( log_qij.dtf, size_factor ){ + + log_qij.matx = log_qij.dtf[ , c("E0", 'E1') ] %>% as.matrix() + + mu_ij.matx = size_factor * 2^log_qij.matx ## size factor * log(qij) + + mu_ij.dtf = mu_ij.matx %>% data.frame() + annotations = log_qij.dtf[ , c("gene_id", 'genotype')] + mu_ij.dtf = cbind(annotations, mu_ij.dtf) + mu_ij.matx = mu_ij.dtf %>% reshape2::melt(., id.vars = c("gene_id", 'genotype'), + value.name = "mu_ij", variable.name= "environment") %>% + reshape2::dcast(., gene_id ~ genotype + environment , value.var = "mu_ij") %>% + column_to_rownames("gene_id") %>% as.matrix() + + return( mu_ij.matx ) + +} + +#' Get genes dispersion +#' +#' @param n_genes an integer +#' @param n_genotype A int. +#' @param n_environment A int. +#' @param dispersion.vec A vector of observed dispersion. +#' @param dispUniform_btweenCondition logical +#' @param model_matrix an output of stat::model.matrix() +#' @import stringr +#' @import purrr +#' @return a dataframe with the gene dispersion for each samples +#' @export +#' +#' @examples +getGenesDispersions <- function( n_genes, sample_id_list, + dispersion.vec , dispUniform_btweenCondition = T ) { + + if (dispUniform_btweenCondition == T ) { + gene_dispersion.dtf = base::sample( dispersion.vec , + replace = T, + size = n_genes) %>% base::data.frame() + n_rep = length(sample_id_list) + gene_dispersion.dtf = gene_dispersion.dtf[ ,base::rep(base::seq_len(base::ncol(gene_dispersion.dtf)), n_rep)] + rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "") + colnames(gene_dispersion.dtf) = sample_id_list + + } + else { + replication_table = sample_ids %>% + stringr::str_replace(., pattern = "_[0-9]+","" ) %>% table() + gene_dispersion.dtf = replication_table %>% + purrr::map(., ~sample( dispersion.vec, replace = T, size = n_genes) ) %>% data.frame() + gene_dispersion.dtf = gene_dispersion.dtf[ ,rep(seq_len(ncol(gene_dispersion.dtf)), replication_table %>% as.numeric())] + colnames(gene_dispersion.dtf) = sample_ids + rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "") + + } + return(gene_dispersion.dtf %>% as.matrix) +} + + +#' Get K_ij : gene counts +#' +#' @param mu_ij.matx a matrix of mu_ij +#' @param dispersion.matx a matrix of gene dispersion +#' @param n_genes a matrix of gene dispersion +#' @param sample_id_list list of sample_ids +#' @param idx_replicat +#' @import stats +#' @return a matrix with counts per genes and samples +#' @export +#' +#' @examples +get_kij <- function(mu_ij.matx, dispersion.matx, n_genes, + sample_id_list, idx_replicat) { + n_sples = length(sample_id_list) + alpha_gene = 1/dispersion.matx + k_ij = stats::rnbinom( length(mu_ij.matx), + size = alpha_gene , + mu = mu_ij.matx) %>% + matrix(. , nrow = n_genes, ncol = n_sples ) + k_ij[is.na(k_ij)] = 0 + colnames(k_ij) = base::paste(sample_id_list, idx_replicat, sep = '_') + rownames(k_ij) = rownames(mu_ij.matx) + return(k_ij) +} + + +#' Get count table +#' +#' @param mu_ij.matx a matrix +#' @param dispersion.matx a matrix +#' @param n_genes number of genes +#' @param n_genotypes number of genotypes +#' @param n_environments number of environments +#' @param sample_id_list vector of sample ids +#' @param maxN max number of replicate +#' @param uniformNumberOfReplicates bool +#' @import purrr +#' @return a matrix +#' @export +#' +#' @examples +getCountTable <- function( mu_ij.matx, dispersion.matx, + n_genes, n_genotypes, + n_environments = 2, sample_id_list, + replication.matx ) { + + ### Iterate on each replicates + kij.simu.list = purrr::map(.x = 1:max_n_replicates, + .f = ~get_kij(mu_ij.matx[ ,replication.matx[.x, ]], + dispersion.matx[ ,replication.matx[.x, ]], + n_genes = n_genes, + sample_id_list[replication.matx[.x, ]], + .x) ) + tableCounts.simulated = do.call(cbind, kij.simu.list) + + return(tableCounts.simulated) + +} + + +#' get matrix of replication +#' +#' @param maxN a integer : number of replicates +#' @param n_samples an integer : number of samples +#' @return a matrix of 0 and 1 +#' @export +#' +#' @examples +uniform_replication <- function(maxN, n_samples) { + return(rep(T, time = maxN) %>% + rep(., each = n_samples ) %>% + matrix(ncol = n_samples) ) +} + + +#' get matrix of replication +#' +#' @param maxN a integer : number of replicates +#' @param n_samples an integer : number of samples +#' @return a matrix of 0 and 1 +#' @export +#' +#' @examples +random_replication <- function(maxN, n_samples){ + replicating <- function(maxN) return(sample(x = c(T,F), size = maxN, replace = T)) + res = purrr::map(1:n_samples, ~replicating(maxN-1)) + rep_table = do.call(cbind, res) + rep_table = rbind(rep(T, times = n_samples), rep_table) + return(rep_table) +} + +#' get matrix of replication +#' +#' @param maxN a integer : number of replicates +#' @param n_genotypes an integer : number of genotypes +#' @param n_environments +#' @param uniformNumberOfReplicates bool +#' @return a matrix of 0 and 1 +#' @export +#' +#' @examples +getReplicationDesign <- function(maxN, n_genotypes, n_environments = 2, + uniformNumberOfReplicates = T ) { + nb_sample = n_genotypes*n_environments + if ( uniformNumberOfReplicates == T ) rep.matrix = uniform_replication( maxN, nb_sample ) + if ( uniformNumberOfReplicates == F) rep.matrix = random_replication( maxN, nb_sample ) + return(rep.matrix) +} diff --git a/src/v3/HTRsim/R/extractionFromDDS.R b/src/v3/HTRsim/R/manipulationsDDS_obj.R similarity index 74% rename from src/v3/HTRsim/R/extractionFromDDS.R rename to src/v3/HTRsim/R/manipulationsDDS_obj.R index 8079ccf295b5414707e1d0977914bbd2d06cf396..0a4cdbc7525bc5fd0cdd59293616842bb5272bd0 100644 --- a/src/v3/HTRsim/R/extractionFromDDS.R +++ b/src/v3/HTRsim/R/manipulationsDDS_obj.R @@ -1,3 +1,22 @@ +#' Launch Deseq +#' +#' @param tabl_cnts table containing counts per genes & samples +#' @param bioDesign table describing bioDesgin of input +#' @import DESeq2 +#' @return DESEQ2 object +#' @export +#' +#' @examples +run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + genotype:environment){ + + + dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model ) + dds <- DESeq2::DESeq(dds) + return(dds) + +} + + #' Extract beta distribution from DESEQ2 object #' #' @param dds_obj a DESEQ2 object