Skip to content
Snippets Groups Projects
Commit 199dc333 authored by Arnaud Duvermy's avatar Arnaud Duvermy
Browse files

simulation ok !

parent 816e046c
No related branches found
No related tags found
No related merge requests found
#' Get beta_ij
#'
#' @param n_genes an integer
#' @param n_genotype A int.
#' @param mvrnorm.fit an object fit mvronorm
#' @import MASS
#' @return a dataframe
#' @export
#'
#' @examples
getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, mvrnorm.fit){
##### Sampling from mvnorm ########
beta.matrix <- MASS::mvrnorm(n = n_genes*(n_genotypes),
mu = mvrnorm.fit$mu,
Sigma = mvrnorm.fit$sigma )
genes_vec = base::paste("gene", 1:n_genes, sep = "")
genotype_vec = base::paste("G", 0:(n_genotypes-1), sep = "")
genotype = genotype_vec %>% rep(time = n_genes)
gene_id = rep(genes_vec, each = n_genotypes)
beta.dtf = beta.matrix %>% data.frame()
return( cbind(gene_id, genotype, beta.dtf) )
}
#' Get model matrix
#'
#' @param n_environments an integer
#' @return a matrix
#' @export
#'
#' @examples
getModelMatrix <- function(n_environments = 2) {
environment_vec = base::paste("E", 0:(n_environments-1), sep = "")
########################################
m = c(1,1,0,0,1,1,1,1)
model.matrix = matrix(data = m , ncol = 2, byrow = F)
colnames(model.matrix) = environment_vec
rownames(model.matrix) = c("beta0", "betaG", "betaE", "betaGE")
return(model.matrix)
}
#' Get log(q_ij)
#'
#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE.
#' @param model.matx an output of stat::model.matrix()
#' @return a dataframe
#' @export
#'
#' @examples
getLog_qij <- function( beta.dtf , model.matx){
beta.matx = beta.dtf[ ,c("beta0", 'betaG', 'betaE', "betaGE") ] %>% as.matrix()
log_qij.matx = beta.matx %*% model.matx ## j samples, i genes
### Some reshaping ###
log_qij.dtf = log_qij.matx %>% data.frame()
annotations = beta.dtf[ , c("gene_id", 'genotype')]
log_qij.dtf = cbind(annotations, log_qij.dtf)
return( log_qij.dtf )
}
#' Get mu_ij
#'
#' @param log_qij.dtf a dtf
#' @param size_factor a scalar
#' @return a matrix
#' @export
#'
#' @examples
getMu_ij <- function( log_qij.dtf, size_factor ){
log_qij.matx = log_qij.dtf[ , c("E0", 'E1') ] %>% as.matrix()
mu_ij.matx = size_factor * 2^log_qij.matx ## size factor * log(qij)
mu_ij.dtf = mu_ij.matx %>% data.frame()
annotations = log_qij.dtf[ , c("gene_id", 'genotype')]
mu_ij.dtf = cbind(annotations, mu_ij.dtf)
mu_ij.matx = mu_ij.dtf %>% reshape2::melt(., id.vars = c("gene_id", 'genotype'),
value.name = "mu_ij", variable.name= "environment") %>%
reshape2::dcast(., gene_id ~ genotype + environment , value.var = "mu_ij") %>%
column_to_rownames("gene_id") %>% as.matrix()
return( mu_ij.matx )
}
#' Get genes dispersion
#'
#' @param n_genes an integer
#' @param n_genotype A int.
#' @param n_environment A int.
#' @param dispersion.vec A vector of observed dispersion.
#' @param dispUniform_btweenCondition logical
#' @param model_matrix an output of stat::model.matrix()
#' @import stringr
#' @import purrr
#' @return a dataframe with the gene dispersion for each samples
#' @export
#'
#' @examples
getGenesDispersions <- function( n_genes, sample_id_list,
dispersion.vec , dispUniform_btweenCondition = T ) {
if (dispUniform_btweenCondition == T ) {
gene_dispersion.dtf = base::sample( dispersion.vec ,
replace = T,
size = n_genes) %>% base::data.frame()
n_rep = length(sample_id_list)
gene_dispersion.dtf = gene_dispersion.dtf[ ,base::rep(base::seq_len(base::ncol(gene_dispersion.dtf)), n_rep)]
rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "")
colnames(gene_dispersion.dtf) = sample_id_list
}
else {
replication_table = sample_ids %>%
stringr::str_replace(., pattern = "_[0-9]+","" ) %>% table()
gene_dispersion.dtf = replication_table %>%
purrr::map(., ~sample( dispersion.vec, replace = T, size = n_genes) ) %>% data.frame()
gene_dispersion.dtf = gene_dispersion.dtf[ ,rep(seq_len(ncol(gene_dispersion.dtf)), replication_table %>% as.numeric())]
colnames(gene_dispersion.dtf) = sample_ids
rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "")
}
return(gene_dispersion.dtf %>% as.matrix)
}
#' Get K_ij : gene counts
#'
#' @param mu_ij.matx a matrix of mu_ij
#' @param dispersion.matx a matrix of gene dispersion
#' @param n_genes a matrix of gene dispersion
#' @param sample_id_list list of sample_ids
#' @param idx_replicat
#' @import stats
#' @return a matrix with counts per genes and samples
#' @export
#'
#' @examples
get_kij <- function(mu_ij.matx, dispersion.matx, n_genes,
sample_id_list, idx_replicat) {
n_sples = length(sample_id_list)
alpha_gene = 1/dispersion.matx
k_ij = stats::rnbinom( length(mu_ij.matx),
size = alpha_gene ,
mu = mu_ij.matx) %>%
matrix(. , nrow = n_genes, ncol = n_sples )
k_ij[is.na(k_ij)] = 0
colnames(k_ij) = base::paste(sample_id_list, idx_replicat, sep = '_')
rownames(k_ij) = rownames(mu_ij.matx)
return(k_ij)
}
#' Get count table
#'
#' @param mu_ij.matx a matrix
#' @param dispersion.matx a matrix
#' @param n_genes number of genes
#' @param n_genotypes number of genotypes
#' @param n_environments number of environments
#' @param sample_id_list vector of sample ids
#' @param maxN max number of replicate
#' @param uniformNumberOfReplicates bool
#' @import purrr
#' @return a matrix
#' @export
#'
#' @examples
getCountTable <- function( mu_ij.matx, dispersion.matx,
n_genes, n_genotypes,
n_environments = 2, sample_id_list,
replication.matx ) {
### Iterate on each replicates
kij.simu.list = purrr::map(.x = 1:max_n_replicates,
.f = ~get_kij(mu_ij.matx[ ,replication.matx[.x, ]],
dispersion.matx[ ,replication.matx[.x, ]],
n_genes = n_genes,
sample_id_list[replication.matx[.x, ]],
.x) )
tableCounts.simulated = do.call(cbind, kij.simu.list)
return(tableCounts.simulated)
}
#' get matrix of replication
#'
#' @param maxN a integer : number of replicates
#' @param n_samples an integer : number of samples
#' @return a matrix of 0 and 1
#' @export
#'
#' @examples
uniform_replication <- function(maxN, n_samples) {
return(rep(T, time = maxN) %>%
rep(., each = n_samples ) %>%
matrix(ncol = n_samples) )
}
#' get matrix of replication
#'
#' @param maxN a integer : number of replicates
#' @param n_samples an integer : number of samples
#' @return a matrix of 0 and 1
#' @export
#'
#' @examples
random_replication <- function(maxN, n_samples){
replicating <- function(maxN) return(sample(x = c(T,F), size = maxN, replace = T))
res = purrr::map(1:n_samples, ~replicating(maxN-1))
rep_table = do.call(cbind, res)
rep_table = rbind(rep(T, times = n_samples), rep_table)
return(rep_table)
}
#' get matrix of replication
#'
#' @param maxN a integer : number of replicates
#' @param n_genotypes an integer : number of genotypes
#' @param n_environments
#' @param uniformNumberOfReplicates bool
#' @return a matrix of 0 and 1
#' @export
#'
#' @examples
getReplicationDesign <- function(maxN, n_genotypes, n_environments = 2,
uniformNumberOfReplicates = T ) {
nb_sample = n_genotypes*n_environments
if ( uniformNumberOfReplicates == T ) rep.matrix = uniform_replication( maxN, nb_sample )
if ( uniformNumberOfReplicates == F) rep.matrix = random_replication( maxN, nb_sample )
return(rep.matrix)
}
#' Launch Deseq
#'
#' @param tabl_cnts table containing counts per genes & samples
#' @param bioDesign table describing bioDesgin of input
#' @import DESeq2
#' @return DESEQ2 object
#' @export
#'
#' @examples
run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + genotype:environment){
dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model )
dds <- DESeq2::DESeq(dds)
return(dds)
}
#' Extract beta distribution from DESEQ2 object
#'
#' @param dds_obj a DESEQ2 object
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment