From 3c9de622ab7ac6084e3c6ba8a62a11b7832741bb Mon Sep 17 00:00:00 2001 From: aduvermy <arnaud.duvermy@ens-lyon.fr> Date: Wed, 21 Sep 2022 09:45:42 +0200 Subject: [PATCH] add v2 --- src/v2/HTRSIM/.Rbuildignore | 3 + src/v2/HTRSIM/DESCRIPTION | 24 +++ src/v2/HTRSIM/HTRSIM.Rproj | 22 +++ src/v2/HTRSIM/NAMESPACE | 24 +++ src/v2/HTRSIM/R/countsGenerator.R | 172 ++++++++++++++++++ src/v2/HTRSIM/R/designSimulationBuilder.R | 51 ++++++ src/v2/HTRSIM/R/evaluation.R | 78 ++++++++ src/v2/HTRSIM/R/modelFitting.R | 88 +++++++++ src/v2/HTRSIM/R/ublicsTableCountsExtractor.R | 30 +++ src/v2/HTRSIM/R/visualization.R | 65 +++++++ src/v2/HTRSIM/devtools_history.R | 8 + src/v2/HTRSIM/man/buildDesign2simulate.Rd | 31 ++++ src/v2/HTRSIM/man/ddsExtraction.viz.Rd | 17 ++ .../HTRSIM/man/extractDistributionFromDDS.Rd | 17 ++ src/v2/HTRSIM/man/getBetaforSimulation.Rd | 33 ++++ src/v2/HTRSIM/man/getDfComparison.Rd | 21 +++ .../man/getGenesDispersionsForSimulation.Rd | 34 ++++ src/v2/HTRSIM/man/getK_ij.Rd | 19 ++ src/v2/HTRSIM/man/getLog_qij.Rd | 19 ++ src/v2/HTRSIM/man/getMu_ij.Rd | 19 ++ src/v2/HTRSIM/man/rnorm.distrib.beta.Rd | 23 +++ src/v2/HTRSIM/man/run.deseq.Rd | 23 +++ src/v2/HTRSIM/tests/testthat.R | 4 + 23 files changed, 825 insertions(+) create mode 100644 src/v2/HTRSIM/.Rbuildignore create mode 100644 src/v2/HTRSIM/DESCRIPTION create mode 100644 src/v2/HTRSIM/HTRSIM.Rproj create mode 100644 src/v2/HTRSIM/NAMESPACE create mode 100644 src/v2/HTRSIM/R/countsGenerator.R create mode 100644 src/v2/HTRSIM/R/designSimulationBuilder.R create mode 100644 src/v2/HTRSIM/R/evaluation.R create mode 100644 src/v2/HTRSIM/R/modelFitting.R create mode 100644 src/v2/HTRSIM/R/ublicsTableCountsExtractor.R create mode 100644 src/v2/HTRSIM/R/visualization.R create mode 100644 src/v2/HTRSIM/devtools_history.R create mode 100644 src/v2/HTRSIM/man/buildDesign2simulate.Rd create mode 100644 src/v2/HTRSIM/man/ddsExtraction.viz.Rd create mode 100644 src/v2/HTRSIM/man/extractDistributionFromDDS.Rd create mode 100644 src/v2/HTRSIM/man/getBetaforSimulation.Rd create mode 100644 src/v2/HTRSIM/man/getDfComparison.Rd create mode 100644 src/v2/HTRSIM/man/getGenesDispersionsForSimulation.Rd create mode 100644 src/v2/HTRSIM/man/getK_ij.Rd create mode 100644 src/v2/HTRSIM/man/getLog_qij.Rd create mode 100644 src/v2/HTRSIM/man/getMu_ij.Rd create mode 100644 src/v2/HTRSIM/man/rnorm.distrib.beta.Rd create mode 100644 src/v2/HTRSIM/man/run.deseq.Rd create mode 100644 src/v2/HTRSIM/tests/testthat.R diff --git a/src/v2/HTRSIM/.Rbuildignore b/src/v2/HTRSIM/.Rbuildignore new file mode 100644 index 0000000..86257dd --- /dev/null +++ b/src/v2/HTRSIM/.Rbuildignore @@ -0,0 +1,3 @@ +^HTRSIM\.Rproj$ +^\.Rproj\.user$ +^devtools_history\.R$ diff --git a/src/v2/HTRSIM/DESCRIPTION b/src/v2/HTRSIM/DESCRIPTION new file mode 100644 index 0000000..3191056 --- /dev/null +++ b/src/v2/HTRSIM/DESCRIPTION @@ -0,0 +1,24 @@ +Package: HTRSIM +Title: RNAseq counts simulation +Version: 0.0.0.9000 +Authors@R: + person("Arnaud", "Duvermy", , "first.last@example.com", role = c("aut", "cre"), + comment = c(ORCID = "YOUR-ORCID-ID")) +Description: RNAseq counts simulation. +License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a + license +Encoding: UTF-8 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.1.2 +Suggests: + testthat (>= 3.0.0) +Config/testthat/edition: 3 +Depends: + tidyverse +Imports: + DESeq2, + furrr, + MASS, + S4Vectors, + stats, + stringr diff --git a/src/v2/HTRSIM/HTRSIM.Rproj b/src/v2/HTRSIM/HTRSIM.Rproj new file mode 100644 index 0000000..69fafd4 --- /dev/null +++ b/src/v2/HTRSIM/HTRSIM.Rproj @@ -0,0 +1,22 @@ +Version: 1.0 + +RestoreWorkspace: No +SaveWorkspace: No +AlwaysSaveHistory: Default + +EnableCodeIndexing: Yes +UseSpacesForTab: Yes +NumSpacesForTab: 2 +Encoding: UTF-8 + +RnwWeave: Sweave +LaTeX: pdfLaTeX + +AutoAppendNewline: Yes +StripTrailingWhitespace: Yes +LineEndingConversion: Posix + +BuildType: Package +PackageUseDevtools: Yes +PackageInstallArgs: --no-multiarch --with-keep.source +PackageRoxygenize: rd,collate,namespace diff --git a/src/v2/HTRSIM/NAMESPACE b/src/v2/HTRSIM/NAMESPACE new file mode 100644 index 0000000..2bf9a20 --- /dev/null +++ b/src/v2/HTRSIM/NAMESPACE @@ -0,0 +1,24 @@ +# Generated by roxygen2: do not edit by hand + +export(ddsExtraction.viz) +export(extractDistributionFromDDS) +export(getBetaforSimulation) +export(getDfComparison) +export(getGenesDispersionsForSimulation) +export(buildDesign2simulate) +export(getK_ij) +export(getLog_qij) +export(getMu_ij) +export(rnorm.distrib.beta) +export(run.deseq) +export(run.glm) +export(reshapeGlmRes) +import(DESeq2) +import(dplyr) +import(ggplot2) +import(reshape2) +import(stats) +import(stringr) +import(tidyverse) +import(MASS) + diff --git a/src/v2/HTRSIM/R/countsGenerator.R b/src/v2/HTRSIM/R/countsGenerator.R new file mode 100644 index 0000000..fb17cfc --- /dev/null +++ b/src/v2/HTRSIM/R/countsGenerator.R @@ -0,0 +1,172 @@ + + + + +#' Get beta_ij +#' +#' @param n_genes an integer +#' @param n_genotype A int. +#' @param n_environment A int. +#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE. +#' @param model_matrix an output of stat::model.matrix() +#' @param theta a float to control the noise introduce around betaG, betaE, betaGxE +#' @import stringr +#' @import base +#' @import dplyr +#' @import Rfast +#' @import MASS +#' @import purrr +#' @return a dataframe with the gene dispersion for each samples +#' @export +#' +#' @examples +getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, n_environments = 2, beta.dtf, theta = 10 ){ + + x = beta.dtf %>% as.matrix() + fit.mvrnorm <- Rfast::mvnorm.mle(x) + x <- NULL + beta.matrix.tmp <- MASS::mvrnorm(n = n_genes, + mu = fit.mvrnorm$mu, + Sigma = fit.mvrnorm$sigma ) + + replicate_beta <- function(beta_vec, n, theta){ + beta_vec.rep = rep(beta_vec, n) + beta_vec.rep + rnorm(length(beta_vec.rep), mean = 0, sd = abs(beta_vec/theta)) + } + + beta0 = beta.matrix.tmp[,1] + beta.matrix.tmp = purrr::map2(.x = c(2,3,4), .y = c(n_genotypes-1, + n_environments-1, + (n_genotypes-1)*(n_environments-1)), + ~ replicate_beta(beta.matrix.tmp[,.x], .y, theta) %>% matrix(ncol = .y)) %>% + do.call(cbind, .) + + + + beta.matrix = cbind(beta0, beta.matrix.tmp) + betaG.colnames = base::paste("genotype", "G", 1:(n_genotypes-1), sep = "") + betaE.colnames = base::paste("environment", "E", 1:(n_environments-1), sep = "") + betaGE.colnames = as.vector(outer(betaG.colnames, betaE.colnames, paste, sep=":")) + matrix.colnames = c('Intercept', betaG.colnames, betaE.colnames, betaGE.colnames) + + colnames(beta.matrix) = matrix.colnames + rownames(beta.matrix) = base::paste("gene", 1:(n_genes), sep = "") + + return(beta.matrix) +} + + + +#' Get log(q_ij) +#' +#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE. +#' @param model_matrix an output of stat::model.matrix() +#' @import stringr +#' @import base +#' @import dplyr +#' @import +#' @return a dataframe with the gene dispersion for each samples +#' @export +#' +#' @examples +getLog_qij <- function( beta.matrix.input , model_matrix){ + + log_qij = beta.matrix.input %*% t(model_matrix) ## j samples (n_genotypes * n_environments), i genes + + return(log_qij ) + +} + + + +#' Get mu_ij +#' +#' @param beta.dtf a dtf of beta0,betaG, betaE, betaGE. +#' @param model_matrix an output of stat::model.matrix() +#' @import stringr +#' @import base +#' @import dplyr +#' @import +#' @return a dataframe with the gene dispersion for each samples +#' @export +#' +#' @examples +getMu_ij <- function( log_qij.matrix, size_factor ){ + + mu_ij = size_factor * 2^log_qij.matrix ## size factor * log(qij) + + return(mu_ij ) + +} + +#' Get genes dispersion +#' +#' @param n_genes an integer +#' @param n_genotype A int. +#' @param n_environment A int. +#' @param dispersion.vec A vector of observed dispersion. +#' @param dispUniform_btweenCondition logical +#' @param model_matrix an output of stat::model.matrix() +#' @import stringr +#' @import base +#' @import dplyr +#' @import +#' @return a dataframe with the gene dispersion for each samples +#' @export +#' +#' @examples +getGenesDispersionsForSimulation <- function( n_genes = 100, n_genotypes, n_environments, dispersion.vec ,dispUniform_btweenCondition = T, model_matrix ){ + + + if (dispUniform_btweenCondition == T ) { + gene_dispersion.dtf = base::sample( dds.extraction$gene_dispersion, replace = T, size = n_genes) %>% base::data.frame() + n_rep = length(rownames(model_matrix)) + gene_dispersion.dtf = gene_dispersion.dtf[,base::rep(base::seq_len(base::ncol(gene_dispersion.dtf)), n_rep)] + rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "") + colnames(gene_dispersion.dtf) = rownames(model_matrix) + + } + + else { + + replication_table = rownames(model_matrix) %>% stringr::str_replace(., pattern = "_[0-9]+","" ) %>% table() + gene_dispersion.dtf = replication_table %>% purrr::map(., ~sample( dispersion.vec, replace = T, size = n_genes) ) %>% data.frame() + gene_dispersion.dtf = gene_dispersion.dtf[,rep(seq_len(ncol(gene_dispersion.dtf)), replication_table %>% as.numeric())] + colnames(gene_dispersion.dtf) = rownames(model_matrix) + rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "") + + } + + return(gene_dispersion.dtf %>% as.matrix) + +} + + + + + +#' Get K_ij : gene counts +#' +#' @param log_qij.matrix a matrix of log_qij +#' @param gene_disp.matrix a matrix of gene dispersion +#' @import stats +#' @return a dataframe with the gene dispersion for each samples +#' @export +#' +#' @examples +getK_ij <- function( mu_ij.matrix , gene_disp.matrix ){ + + n_genes = nrow(mu_ij.matrix) + n_sples = ncol(mu_ij.matrix) + alpha_gene = 1/gene_disp.matrix + k_ij = stats::rnbinom(length(mu_ij.matrix), size = alpha_gene , mu = mu_ij.matrix) %>% matrix(. , nrow = n_genes, ncol = n_sples ) + k_ij[is.na(k_ij)] = 0 + + colnames(k_ij) = colnames(mu_ij.matrix) + rownames(k_ij) = rownames(mu_ij.matrix) + + return(k_ij) + +} + + diff --git a/src/v2/HTRSIM/R/designSimulationBuilder.R b/src/v2/HTRSIM/R/designSimulationBuilder.R new file mode 100644 index 0000000..f360907 --- /dev/null +++ b/src/v2/HTRSIM/R/designSimulationBuilder.R @@ -0,0 +1,51 @@ + + +#' Build a design dataframe +#' +#' @param n_genotype A int. +#' @param n_environment A int. +#' @param n_replicate A int. +#' @param uniform_nb_rep logical +#' @examples +#' buildDesign2simulate(1000, 2, 30) +# +#' @return dataframe with n_genotype rows and 3 columns (sample_id, genotype, environment) +#' @import tidyverse +#' @import stats +#' +buildDesign2simulate <- function(n_genotype , n_environment, n_replicate , uniform_nb_rep = T ){ + + + genotypes = base::paste("G", 0:(n_genotype-1), sep = "") + environments = base::paste("E", 0:(n_environment-1), sep = "") + sample_ids = as.vector(outer(genotypes, environments, paste, sep="_")) %>% sort() + + + sample_id.split = sample_ids %>% str_split(., "_", simplify = T) + design_without_rep = base::list(sample_id = sample_ids , + environment = sample_id.split[,2], + genotype = sample_id.split[,1] ) %>% data.frame() + + + rows = c(1:nrow(design_without_rep)) + if (uniform_nb_rep == T) times = rep(n_replicate, base::nrow(design_without_rep)) + else { times = sample(1:n_replicate, base::nrow(design_without_rep), replace=T) ; message("uniform_nb_rep = FALSE \nn_replicate consider as a maximum number of replicates possible") } + + design = design_without_rep %>% + dplyr::mutate(rep = times) %>% + dplyr::group_by(sample_id, genotype, environment) %>% + tidyr::expand(rep = seq(1:rep)) %>% + tidyr::unite(sample_id, sample_id, rep) %>% + dplyr::ungroup() + + if (n_genotype > 1) design$genotype <- factor(x = design$genotype,levels = c("G0", unique(design$genotype)[-1])) + if (n_environment > 1) design$environment <- factor(x = design$environment, levels = c( "E0", unique(design$environment)[-1])) + + model_matrix = stats::model.matrix(~ genotype + environment + genotype:environment, design) + rownames(model_matrix) = design$sample_id + + return( base::list(model_matrix = model_matrix, + design2simulate = design) ) +} + + diff --git a/src/v2/HTRSIM/R/evaluation.R b/src/v2/HTRSIM/R/evaluation.R new file mode 100644 index 0000000..8e96094 --- /dev/null +++ b/src/v2/HTRSIM/R/evaluation.R @@ -0,0 +1,78 @@ + +#' Get getDfComparison +#' +#' @param dds_simu.mcols dds object obtain on simulation +#' @param model_matrix a stats::model.matrix output +#' @param beta.actual.matrix.matrix a matrix of beta used as input for simulation +#' @param threads +#' @import stringr +#' @import base +#' @import dplyr +#' @import reshape2 +#' @import DESeq2 +#' @import furrr +#' @return a dataframe for beta comparison +#' @export +#' +#' @examples +getDfComparison <- function(dds_simu , model_matrix, beta.actual.matrix, threads = 4 ){ + + + listBeta = DESeq2::resultsNames(dds_simu) + plan(multisession, workers = 4) + res = listBeta %>% furrr::future_map(.x = ., ~DESeq2::results(dds_simu, contrast=list(.x)) %>% data.frame() %>% .$padj) + padj.matrix = do.call("cbind", res) + + + + dds_simu.mcols = S4Vectors::mcols(dds_simu,use.names=TRUE) + + dds.simu.mcols.colnamesReshaped = colnames(dds_simu.mcols) %>% + stringr::str_replace(., "_vs_G0", "") %>% + stringr::str_replace(., "_vs_E0", "") %>% + stringr::str_replace_all(., "_", "") %>% + stringr::str_replace(., "\\.", ":") + + columnOfInterest = model_matrix %>% base::colnames() %>% stringr::str_replace_all(., "[//(//)]", "") + #dds_simu.mcols[,columnOfInterest] + + ## Get only column of interest + idx_cols = base::match(columnOfInterest, dds.simu.mcols.colnamesReshaped) + beta.infered = dds_simu.mcols[,idx_cols] + + ## homogeneize column names & rownames + idx_cols = base::match(columnOfInterest, beta.actual.matrix %>% colnames()) + beta.actual.matrix = beta.actual.matrix[,idx_cols] + colnames(beta.infered) = base::colnames(beta.actual.matrix) + colnames(padj.matrix) = base::colnames(beta.actual.matrix) + rownames(padj.matrix) = base::rownames(beta.actual.matrix) + + beta.infer.long = beta.infered %>% data.frame() %>% + tibble::rownames_to_column(., var = "gene_id") %>% + dplyr::mutate(origin = "Inference") %>% + reshape2::melt(., value.name = "value", variable.name= "beta") + beta.actual.matrix.long = beta.actual.matrix %>% data.frame() %>% + tibble::rownames_to_column(., var = "gene_id") %>% + dplyr::mutate(origin = "Actual") %>% + reshape2::melt(., value.name = "value", variable.name= "beta") + padj.matrix.long = padj.matrix %>% data.frame() %>% + tibble::rownames_to_column(., var = "gene_id") %>% + dplyr::mutate(origin = "padj") %>% + reshape2::melt(., value.name = "value", variable.name= "beta") + + beta.merged.long = rbind(beta.infer.long, beta.actual.matrix.long, padj.matrix.long) + #beta.merged.long$beta %>% unique() + + beta.merged.long.reshape = beta.merged.long %>% dplyr::mutate(type = dplyr::case_when( + str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE", + str_detect(beta, "genotypeG\\d+$") ~ "G", + str_detect(beta, "environmentE\\d+$") ~ "E", + str_detect(beta, "Intercept$") ~ "Intercept") + ) + + + beta.merged.long.reshape2 = beta.merged.long.reshape %>% reshape2::dcast(., gene_id + beta + type ~ origin) + beta.merged.long.reshape2$type = factor(beta.merged.long.reshape2$type, levels = c("Intercept", "G", "E", "GxE")) + + return(beta.merged.long.reshape2) +} diff --git a/src/v2/HTRSIM/R/modelFitting.R b/src/v2/HTRSIM/R/modelFitting.R new file mode 100644 index 0000000..7cbf226 --- /dev/null +++ b/src/v2/HTRSIM/R/modelFitting.R @@ -0,0 +1,88 @@ +#' Launch MASS:GLM.NB +#' +#' @param gene_count a row of kij simulated table +#' @param i index of the gene +#' @import dplyr +#' @import stringr +#' @import stats +#' @return a dtf +#' @export +#' +#' @examples +reshapeGlmRes <- function(fit, i, error_bool = F){ + if (error_bool == T){ ## error while fiting model + res = list(Inference = NA, pval = NA, + beta = NA, gene_id = paste("gene", i, sep = ""), type = NA, deviance = NA) %>% + data.frame() + } + else{ ## success to fit model + res = coef(summary(fit))[,c(1,4)] %>% + data.frame() %>% + dplyr::rename(., pval = "Pr...z.." , Inference = "Estimate") %>% + dplyr::mutate(beta = stringr::str_remove_all(rownames(.), "[//(//)]")) %>% + dplyr::mutate(beta = stringr::str_replace(beta, ":", ".")) + rownames(res) <- NULL + res = res %>% + dplyr::mutate(gene_id = paste("gene", i, sep = "") ) %>% + dplyr::mutate(type = dplyr::case_when( + stringr::str_detect(beta, "genotypeG\\d+\\.environment") ~ "GxE", + stringr::str_detect(beta, "genotypeG\\d+$") ~ "G", + stringr::str_detect(beta, "environmentE\\d+$") ~ "E", + stringr::str_detect(beta, "Intercept$") ~ "Intercept")) %>% + dplyr::mutate(dispersion = fit$theta) + #stats::summary.glm(fit)$dispersion + } + return(res) +} + + + + +#' Launch MASS:GLM.NB +#' +#' @param gene_count a row of kij simulated table +#' @param i index of the gene +#' @param design.dtf a dtf of the design simulated +#' @import MASS +#' @return a dtf +#' @export +#' +#' @examples +run.glm <- function(gene_count, i, design.dtf) { + y = gene_count + genotype = design2simulate$design2simulate$genotype + environment = design2simulate$design2simulate$environment + df_gene_i = list(y = y , genotype = genotype,environment = environment) %>% data.frame() + rownames(df_gene_i) <- NULL + print(i) + tryCatch({ + + fit = MASS::glm.nb(y ~ genotype + environment + genotype:environment, data = df_gene_i, link = log) + return(reshapeGlmRes(fit, i)) + }, + + error = function(cnd){ + return(reshapeGlmRes(NULL, i, error_bool = T)) +} +) +} + + + +#' Launch Deseq +#' +#' @param tabl_cnts table containing counts per genes & samples +#' @param bioDesign table describing bioDesgin of input +#' @import DESeq2 +#' @return DESEQ2 object +#' @export +#' +#' @examples +run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + genotype:environment){ + + + dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model ) + dds <- DESeq2::DESeq(dds) + return(dds) + +} diff --git a/src/v2/HTRSIM/R/ublicsTableCountsExtractor.R b/src/v2/HTRSIM/R/ublicsTableCountsExtractor.R new file mode 100644 index 0000000..cc68262 --- /dev/null +++ b/src/v2/HTRSIM/R/ublicsTableCountsExtractor.R @@ -0,0 +1,30 @@ +#' Extract beta distribution from DESEQ2 object +#' +#' @param dds_obj a DESEQ2 object +#' @import S4vectors +#' @return a list containing 1- mean and sd of BetaG 2- mean and sd of BetaE 3- mean and sd of BetaGE 5- mean and sd of gene dispersion +#' @export +#' +#' @examples +extractDistributionFromDDS <- function(dds_obj){ + ## Beta + dds.mcols = S4Vectors::mcols(dds, use.names=TRUE) + beta0 <- dds.mcols$Intercept + betaG <- dds.mcols$genotype_RM11_vs_GSY147 + betaE <- dds.mcols$environment_treated_vs_untreated + betaGE <- dds.mcols$genotypeRM11.environmenttreated + beta.dtf = cbind(beta0,betaG,betaE,betaGE) %>% as.data.frame() %>% drop_na() + + ## Dispersion + gene_disp = dds.mcols$dispersion %>% na.omit() + + + return(list(beta = beta.dtf, + gene_dispersion = gene_disp, + beta0.mean = mean(beta.dtf$beta0, na.rm = T), beta0.sd = sd(beta.dtf$beta0, na.rm = T) , + betaG.mean = mean(beta.dtf$betaG, na.rm = T), betaG.sd = sd(beta.dtf$betaG, na.rm = T) , + betaE.mean = mean(beta.dtf$betaE, na.rm = T) , betaE.sd = sd(beta.dtf$betaE, na.rm = T) , + betaGE.mean = mean(beta.dtf$betaGE, na.rm = T) , betaGE.sd = sd(beta.dtf$betaGE, na.rm = T), + gene_disp.mean = mean(gene_disp, na.rm = T) , gene_disp.sd = sd(gene_disp, na.rm = T))) + +} diff --git a/src/v2/HTRSIM/R/visualization.R b/src/v2/HTRSIM/R/visualization.R new file mode 100644 index 0000000..47c1e44 --- /dev/null +++ b/src/v2/HTRSIM/R/visualization.R @@ -0,0 +1,65 @@ + + + +#' Sampling value from a known Gaussian distribution +#' +#' @param beta_type a string (beta0, betaG, betaE, betaGE) +#' @param mean.distrib the mean of the distribution (dbl) +#' @param sd.distrib the standard deviation of the distribution (dbl) +#' @param n an integer defining the number of value sampling +#' @import dplyr +#' @import base +#' @return a dataframe +#' @export +#' +#' @examples +rnorm.distrib.beta <- function(parameter, mean.distrib , sd.distrib , n = 1000 ){ + + rnorm.distrib.dtf = rnorm(1000, mean = mean.distrib, sd = sd.distrib) %>% + base::data.frame() %>% + dplyr::mutate( parameter = parameter) %>% + dplyr::rename(., value = ".") %>% + dplyr::mutate(sampling_from = paste("rnorm(mean = ", signif(mean.distrib, 3), ", sd=" , signif(sd.distrib, 3), ")", sep = "")) + + + return(rnorm.distrib.dtf) +} + + +#' Vizualize the output of extractDistributionFromDDS +#' +#' @param dds.extraction output of extractDistributionFromDDS +#' @import ggplot2 +#' @import base +#' @import dplyr +#' @return a plot +#' @export +#' +#' @examples +ddsExtraction.viz <- function(dds.extraction){ + beta_obs.dtf.long = dds.extraction$beta %>% reshape2::melt(. , na.rm = T, variable.name = "parameter") + + alpha_obs.dtf.long = dds.extraction$gene_dispersion %>% base::data.frame() %>% + dplyr::rename(., value = ".") %>% + dplyr::mutate(parameter = "dispersion") + + dtf.params_obs = rbind(beta_obs.dtf.long, alpha_obs.dtf.long) + + rnorm.distrib.beta0 = rnorm.distrib.beta(parameter = "beta0", mean.distrib = dds.extraction$beta0.mean, sd = dds.extraction$beta0.sd ) + rnorm.distrib.betaG = rnorm.distrib.beta(parameter = "betaG", mean.distrib = dds.extraction$betaG.mean, sd = dds.extraction$betaG.sd ) + rnorm.distrib.betaE = rnorm.distrib.beta(parameter = "betaE", mean.distrib = dds.extraction$betaE.mean, sd = dds.extraction$betaE.sd ) + rnorm.distrib.betaGE = rnorm.distrib.beta(parameter = "betaGE", mean.distrib = dds.extraction$betaGE.mean, sd = dds.extraction$betaGE.sd ) + rnorm.distrib.alpha = rnorm.distrib.beta(parameter = "dispersion", mean.distrib = dds.extraction$gene_disp.mean, sd = dds.extraction$gene_disp.sd ) + + dtf.rnorm.param = base::rbind(rnorm.distrib.beta0, rnorm.distrib.betaG, rnorm.distrib.betaE, rnorm.distrib.betaGE, rnorm.distrib.alpha) + + p = ggplot(dtf.params_obs, aes(x= value)) + + geom_histogram(aes(y=..density..), colour="black", fill="white")+ + geom_density(data = dtf.rnorm.param, aes(x = value, fill= sampling_from), alpha=.4) + facet_grid(~parameter) + + + + + return(p) + +} diff --git a/src/v2/HTRSIM/devtools_history.R b/src/v2/HTRSIM/devtools_history.R new file mode 100644 index 0000000..4b905d3 --- /dev/null +++ b/src/v2/HTRSIM/devtools_history.R @@ -0,0 +1,8 @@ +usethis::use_build_ignore("devtools_history.R") +usethis::use_package('tidyverse', type = "depends") +usethis::use_package('stats') +usethis::use_package("stringr") +usethis::use_package("MASS") +usethis::use_package("DESeq2") +usethis::use_package("furrr") + diff --git a/src/v2/HTRSIM/man/buildDesign2simulate.Rd b/src/v2/HTRSIM/man/buildDesign2simulate.Rd new file mode 100644 index 0000000..e3c699c --- /dev/null +++ b/src/v2/HTRSIM/man/buildDesign2simulate.Rd @@ -0,0 +1,31 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/designSimulationBuilder.R +\name{buildDesign2simulate} +\alias{buildDesign2simulate} +\title{Build a design dataframe} +\usage{ +buildDesign2simulate( + n_genotype, + n_environment, + n_replicate, + uniform_nb_rep = T +) +} +\arguments{ +\item{n_genotype}{A int.} + +\item{n_environment}{A int.} + +\item{n_replicate}{A int.} + +\item{uniform_nb_rep}{logical} +} +\value{ +dataframe with n_genotype rows and 3 columns (sample_id, genotype, environment) +} +\description{ +Build a design dataframe +} +\examples{ +buildDesign2simulate(1000, 2, 30) +} diff --git a/src/v2/HTRSIM/man/ddsExtraction.viz.Rd b/src/v2/HTRSIM/man/ddsExtraction.viz.Rd new file mode 100644 index 0000000..02be5ed --- /dev/null +++ b/src/v2/HTRSIM/man/ddsExtraction.viz.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{ddsExtraction.viz} +\alias{ddsExtraction.viz} +\title{Vizualize the output of extractDistributionFromDDS} +\usage{ +ddsExtraction.viz(dds.extraction) +} +\arguments{ +\item{dds.extraction}{output of extractDistributionFromDDS} +} +\value{ +a plot +} +\description{ +Vizualize the output of extractDistributionFromDDS +} diff --git a/src/v2/HTRSIM/man/extractDistributionFromDDS.Rd b/src/v2/HTRSIM/man/extractDistributionFromDDS.Rd new file mode 100644 index 0000000..fcfc012 --- /dev/null +++ b/src/v2/HTRSIM/man/extractDistributionFromDDS.Rd @@ -0,0 +1,17 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ublicsTableCountsExtractor.R +\name{extractDistributionFromDDS} +\alias{extractDistributionFromDDS} +\title{Extract beta distribution from DESEQ2 object} +\usage{ +extractDistributionFromDDS(dds_obj) +} +\arguments{ +\item{dds_obj}{a DESEQ2 object} +} +\value{ +a list containing 1- mean and sd of BetaG 2- mean and sd of BetaE 3- mean and sd of BetaGE 5- mean and sd of gene dispersion +} +\description{ +Extract beta distribution from DESEQ2 object +} diff --git a/src/v2/HTRSIM/man/getBetaforSimulation.Rd b/src/v2/HTRSIM/man/getBetaforSimulation.Rd new file mode 100644 index 0000000..1fdf6fc --- /dev/null +++ b/src/v2/HTRSIM/man/getBetaforSimulation.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/countsGenerator.R +\name{getBetaforSimulation} +\alias{getBetaforSimulation} +\title{Get beta_ij} +\usage{ +getBetaforSimulation( + n_genes = 100, + n_genotypes = 20, + n_environments = 2, + beta.dtf, + threshold = 15 +) +} +\arguments{ +\item{n_genes}{an integer} + +\item{beta.dtf}{a dtf of beta0,betaG, betaE, betaGE.} + +\item{threshold}{an integer)} + +\item{n_genotype}{A int.} + +\item{n_environment}{A int.} + +\item{model_matrix}{an output of stat::model.matrix()} +} +\value{ +a dataframe with the gene dispersion for each samples +} +\description{ +Get beta_ij +} diff --git a/src/v2/HTRSIM/man/getDfComparison.Rd b/src/v2/HTRSIM/man/getDfComparison.Rd new file mode 100644 index 0000000..7f80baa --- /dev/null +++ b/src/v2/HTRSIM/man/getDfComparison.Rd @@ -0,0 +1,21 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/evaluation.R +\name{getDfComparison} +\alias{getDfComparison} +\title{Get getDfComparison} +\usage{ +getDfComparison(dds_simu, model_matrix, beta.actual.matrix) +} +\arguments{ +\item{model_matrix}{a stats::model.matrix output} + +\item{dds_simu.mcols}{dds object obtain on simulation} + +\item{beta.actual.matrix.matrix}{a matrix of beta used as input for simulation} +} +\value{ +a dataframe for beta comparison +} +\description{ +Get getDfComparison +} diff --git a/src/v2/HTRSIM/man/getGenesDispersionsForSimulation.Rd b/src/v2/HTRSIM/man/getGenesDispersionsForSimulation.Rd new file mode 100644 index 0000000..e2b8cff --- /dev/null +++ b/src/v2/HTRSIM/man/getGenesDispersionsForSimulation.Rd @@ -0,0 +1,34 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/countsGenerator.R +\name{getGenesDispersionsForSimulation} +\alias{getGenesDispersionsForSimulation} +\title{Get genes dispersion} +\usage{ +getGenesDispersionsForSimulation( + n_genes = 100, + n_genotypes, + n_environments, + dispersion.vec, + dispUniform_btweenCondition = T, + model_matrix +) +} +\arguments{ +\item{n_genes}{an integer} + +\item{dispersion.vec}{A vector of observed dispersion.} + +\item{dispUniform_btweenCondition}{logical} + +\item{model_matrix}{an output of stat::model.matrix()} + +\item{n_genotype}{A int.} + +\item{n_environment}{A int.} +} +\value{ +a dataframe with the gene dispersion for each samples +} +\description{ +Get genes dispersion +} diff --git a/src/v2/HTRSIM/man/getK_ij.Rd b/src/v2/HTRSIM/man/getK_ij.Rd new file mode 100644 index 0000000..08c5b70 --- /dev/null +++ b/src/v2/HTRSIM/man/getK_ij.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/countsGenerator.R +\name{getK_ij} +\alias{getK_ij} +\title{Get K_ij : gene counts} +\usage{ +getK_ij(mu_ij.matrix, gene_disp.matrix) +} +\arguments{ +\item{gene_disp.matrix}{a matrix of gene dispersion} + +\item{log_qij.matrix}{a matrix of log_qij} +} +\value{ +a dataframe with the gene dispersion for each samples +} +\description{ +Get K_ij : gene counts +} diff --git a/src/v2/HTRSIM/man/getLog_qij.Rd b/src/v2/HTRSIM/man/getLog_qij.Rd new file mode 100644 index 0000000..9bbc605 --- /dev/null +++ b/src/v2/HTRSIM/man/getLog_qij.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/countsGenerator.R +\name{getLog_qij} +\alias{getLog_qij} +\title{Get log(q_ij)} +\usage{ +getLog_qij(beta.matrix.input, model_matrix) +} +\arguments{ +\item{model_matrix}{an output of stat::model.matrix()} + +\item{beta.dtf}{a dtf of beta0,betaG, betaE, betaGE.} +} +\value{ +a dataframe with the gene dispersion for each samples +} +\description{ +Get log(q_ij) +} diff --git a/src/v2/HTRSIM/man/getMu_ij.Rd b/src/v2/HTRSIM/man/getMu_ij.Rd new file mode 100644 index 0000000..e4b7949 --- /dev/null +++ b/src/v2/HTRSIM/man/getMu_ij.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/countsGenerator.R +\name{getMu_ij} +\alias{getMu_ij} +\title{Get mu_ij} +\usage{ +getMu_ij(log_qij.matrix, size_factor) +} +\arguments{ +\item{beta.dtf}{a dtf of beta0,betaG, betaE, betaGE.} + +\item{model_matrix}{an output of stat::model.matrix()} +} +\value{ +a dataframe with the gene dispersion for each samples +} +\description{ +Get mu_ij +} diff --git a/src/v2/HTRSIM/man/rnorm.distrib.beta.Rd b/src/v2/HTRSIM/man/rnorm.distrib.beta.Rd new file mode 100644 index 0000000..63a174c --- /dev/null +++ b/src/v2/HTRSIM/man/rnorm.distrib.beta.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/visualization.R +\name{rnorm.distrib.beta} +\alias{rnorm.distrib.beta} +\title{Sampling value from a known Gaussian distribution} +\usage{ +rnorm.distrib.beta(parameter, mean.distrib, sd.distrib, n = 1000) +} +\arguments{ +\item{mean.distrib}{the mean of the distribution (dbl)} + +\item{sd.distrib}{the standard deviation of the distribution (dbl)} + +\item{n}{an integer defining the number of value sampling} + +\item{beta_type}{a string (beta0, betaG, betaE, betaGE)} +} +\value{ +a dataframe +} +\description{ +Sampling value from a known Gaussian distribution +} diff --git a/src/v2/HTRSIM/man/run.deseq.Rd b/src/v2/HTRSIM/man/run.deseq.Rd new file mode 100644 index 0000000..02d00fc --- /dev/null +++ b/src/v2/HTRSIM/man/run.deseq.Rd @@ -0,0 +1,23 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/ublicsTableCountsExtractor.R +\name{run.deseq} +\alias{run.deseq} +\title{Launch Deseq} +\usage{ +run.deseq( + tabl_cnts, + bioDesign, + model = ~genotype + environment + genotype:environment +) +} +\arguments{ +\item{tabl_cnts}{table containing counts per genes & samples} + +\item{bioDesign}{table describing bioDesgin of input} +} +\value{ +DESEQ2 object +} +\description{ +Launch Deseq +} diff --git a/src/v2/HTRSIM/tests/testthat.R b/src/v2/HTRSIM/tests/testthat.R new file mode 100644 index 0000000..16b641e --- /dev/null +++ b/src/v2/HTRSIM/tests/testthat.R @@ -0,0 +1,4 @@ +library(testthat) +library(HTRSIM) + +test_check("HTRSIM") -- GitLab