diff --git a/src/model.fit/HTRfit/DESCRIPTION b/src/model.fit/HTRfit/DESCRIPTION new file mode 100644 index 0000000000000000000000000000000000000000..4682d71e0ef5d4d89c3e39dff3bc3ea0eee307b5 --- /dev/null +++ b/src/model.fit/HTRfit/DESCRIPTION @@ -0,0 +1,22 @@ +Package: HTRfit +Title: What the Package Does (One Line, Title Case) +Version: 0.0.0.9000 +Authors@R: + person("First", "Last", , "first.last@example.com", role = c("aut", "cre"), + comment = c(ORCID = "YOUR-ORCID-ID")) +Description: What the package does (one paragraph). +License: `use_mit_license()`, `use_gpl3_license()` or friends to pick a + license +Encoding: UTF-8 +Roxygen: list(markdown = TRUE) +RoxygenNote: 7.2.2 +Depends: + tidyverse +Imports: + broom, + dplyr, + furrr, + future, + MASS, + purrr, + stats diff --git a/src/model.fit/HTRfit/NAMESPACE b/src/model.fit/HTRfit/NAMESPACE new file mode 100644 index 0000000000000000000000000000000000000000..c3cb52f58d274f3b9711f056d7e60af5b2879721 --- /dev/null +++ b/src/model.fit/HTRfit/NAMESPACE @@ -0,0 +1,17 @@ +# Generated by roxygen2: do not edit by hand + +export(fit.glm) +export(getStatisticWaldTest) +export(getdf2fit) +export(launch.glm) +export(listFit2dtf) +export(reshapeCounTable) +export(tidySummary) +export(wald_test) +import(MASS) +import(broom) +import(dplyr) +import(furrr) +import(future) +import(purrr) +import(stats) diff --git a/src/model.fit/HTRfit/R/model_fitting.R b/src/model.fit/HTRfit/R/model_fitting.R index f4d9b472888bc74c805adcec688ccce4be71f1f4..9c23dc098281791552a00cefb135974dbc7d79c1 100644 --- a/src/model.fit/HTRfit/R/model_fitting.R +++ b/src/model.fit/HTRfit/R/model_fitting.R @@ -6,63 +6,71 @@ #' @param threads #' @import future #' @import furrr -#' @return fit list +#' @return fit list #' @export #' #' @examples -launch.glm <- function( data2fit, model2fit = k_ij ~ genotype + environment + genotype:environment, - fit_by = "gene_id", threads = 4 ) { - - iteration_list = data2fit[fit_by] %>% unique() %>% as.vector() %>% unname() %>% .[[1]] +launch.glm <- function(data2fit, + model2fit = k_ij ~ genotype + environment + genotype:environment, + fit_by = "gene_id", + threads = 4) { + iteration_list <- data2fit[fit_by] %>% + unique() %>% + as.vector() %>% + unname() %>% + .[[1]] future::plan(multisession, workers = threads) - fit_list = iteration_list %>% furrr::future_map(.x = ., - ~fit.glm( data2fit[data2fit[fit_by] == .x,], - .x, - model2fit) ) + fit_list <- iteration_list %>% furrr::future_map( + .x = ., + ~ fit.glm( + data2fit[data2fit[fit_by] == .x, ], + .x, + model2fit + ) + ) return(fit_list) - } #' fit model on data -#' @param data +#' @param data #' @param id #' @import MASS #' @import dplyr -#' @return fit +#' @return fit #' @export #' #' @examples fit.glm <- function(data, id, model2fit) { - fit = MASS::glm.nb(model2fit, data = data, link = log) - fit.dtf = tidySummary(fit) - fit.dtf$inference = fit.dtf$inference %>% dplyr::mutate(gene_id = id) - fit.dtf$fitQuality = fit.dtf$fitQuality %>% dplyr::mutate(gene_id = id) - return(fit.dtf) + fit <- MASS::glm.nb(model2fit, data = data, link = log) + fit.dtf <- tidySummary(fit) + fit.dtf$inference <- fit.dtf$inference %>% dplyr::mutate(gene_id = id) + fit.dtf$fitQuality <- fit.dtf$fitQuality %>% dplyr::mutate(gene_id = id) + return(fit.dtf) } #' fit model on data -#' @param fit +#' @param fit #' @import broom -#' @return list of element +#' @return list of element #' @export #' #' @examples -tidySummary <- function(fit){ - dtf = broom::tidy(fit) - gl = broom::glance(fit) - return( list(inference = dtf, fitQuality = gl) ) +tidySummary <- function(fit) { + dtf <- broom::tidy(fit) + gl <- broom::glance(fit) + return(list(inference = dtf, fitQuality = gl)) } #' convert list to dataframe -#' @param fit +#' @param fit #' @return list of dtf #' @export #' #' @examples -listFit2dtf <- function(list_fit){ - tmp = do.call(cbind, list_fit) - inference.dtf = do.call(rbind, tmp[1,]) - fitQuality.dtf = do.call(rbind, tmp[2,]) - return( list(inference = inference.dtf, fitQuality = fitQuality.dtf) ) -} \ No newline at end of file +listFit2dtf <- function(list_fit) { + tmp <- do.call(cbind, list_fit) + inference.dtf <- do.call(rbind, tmp[1, ]) + fitQuality.dtf <- do.call(rbind, tmp[2, ]) + return(list(inference = inference.dtf, fitQuality = fitQuality.dtf)) +} diff --git a/src/v3/HTRsim/R/countsGenerator.R b/src/v3/HTRsim/R/countsGenerator.R index 6f723f6fe73756ffe798511228f5d2cbc7c44641..e51f78cdce1536fdb73b23dfdedd75cc9a5bf954 100644 --- a/src/v3/HTRsim/R/countsGenerator.R +++ b/src/v3/HTRsim/R/countsGenerator.R @@ -3,25 +3,45 @@ #' @param n_genes an integer #' @param n_genotype A int. #' @param mvrnorm.fit an object fit mvronorm +#' @fixIntercept +#' @fixBetaE #' @import MASS +#' @import dplyr #' @return a dataframe #' @export #' #' @examples -getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, mvrnorm.fit){ - message( "Get actuals beta for each genes ...") - ##### Sampling from mvnorm ######## - n_samplings = n_genes*n_genotypes - beta.matrix <- MASS::mvrnorm(n = n_samplings, - mu = mvrnorm.fit$mu, - Sigma = mvrnorm.fit$sigma ) - - genes_vec = base::paste("gene", 1:n_genes, sep = "") - genotype_vec = base::paste("G", 0:(n_genotypes-1), sep = "") - genotype = genotype_vec %>% rep(time = n_genes) - gene_id = rep(genes_vec, each = n_genotypes) - beta.dtf = beta.matrix %>% data.frame() - return( cbind(gene_id, genotype, beta.dtf) ) +getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, mvrnorm.fit, fixIntercept = TRUE, fixBetaE = TRUE) { + message("Get actuals beta for each genes ...") + ##### Sampling from mvnorm ######## + n_samplings <- n_genes * n_genotypes + beta.matrix <- MASS::mvrnorm( + n = n_samplings, + mu = mvrnorm.fit$mu, + Sigma = mvrnorm.fit$sigma + ) + + genes_vec <- base::paste("gene", 1:n_genes, sep = "") + genotype_vec <- base::paste("G", 0:(n_genotypes - 1), sep = "") + genotype <- genotype_vec %>% rep(time = n_genes) + gene_id <- rep(genes_vec, each = n_genotypes) + beta.dtf <- beta.matrix %>% data.frame() + colnames(beta.dtf) <- c("(Intercept)", "betaG", "betaE", "betaGE") + res <- cbind(gene_id, genotype, beta.dtf) + ## fixing Genotype and GxE effects to 0 for G0 (reference) + ## -> hide in (Intercept) + res[res$genotype == "G0", c("betaG", "betaGE")] <- 0 + if (fixIntercept) { + res <- res %>% + dplyr::group_by(gene_id) %>% + dplyr::mutate("(Intercept)" = mean(`(Intercept)`)) + } + if (fixBetaE) { + res <- res %>% + dplyr::group_by(gene_id) %>% + dplyr::mutate(betaE = mean(betaE)) + } + return(res) } #' Get model matrix @@ -32,13 +52,13 @@ getBetaforSimulation <- function(n_genes = 100, n_genotypes = 20, mvrnorm.fit){ #' #' @examples getModelMatrix <- function(n_environments = 2) { - environment_vec = base::paste("E", 0:(n_environments-1), sep = "") - ######################################## - m = c(1,1,0,0,1,1,1,1) - model.matrix = matrix(data = m , ncol = 2, byrow = F) - colnames(model.matrix) = environment_vec - rownames(model.matrix) = c("beta0", "betaG", "betaE", "betaGE") - return(model.matrix) + environment_vec <- base::paste("E", 0:(n_environments - 1), sep = "") + ######################################## + m <- c(1, 1, 0, 0, 1, 1, 1, 1) + model.matrix <- matrix(data = m, ncol = 2, byrow = F) + colnames(model.matrix) <- environment_vec + rownames(model.matrix) <- c("(Intercept)", "betaG", "betaE", "betaGE") + return(model.matrix) } @@ -50,15 +70,14 @@ getModelMatrix <- function(n_environments = 2) { #' @export #' #' @examples -getLog_qij <- function( beta.dtf , model.matx){ - beta.matx = beta.dtf[ ,c("beta0", 'betaG', 'betaE', "betaGE") ] %>% as.matrix() - log_qij.matx = beta.matx %*% model.matx ## j samples, i genes - ### Some reshaping ### - log_qij.dtf = log_qij.matx %>% data.frame() - annotations = beta.dtf[ , c("gene_id", 'genotype')] - log_qij.dtf = cbind(annotations, log_qij.dtf) - return( log_qij.dtf ) - +getLog_qij <- function(beta.dtf, model.matx) { + beta.matx <- beta.dtf[, c("(Intercept)", "betaG", "betaE", "betaGE")] %>% as.matrix() + log_qij.matx <- beta.matx %*% model.matx ## j samples, i genes + ### Some reshaping ### + log_qij.dtf <- log_qij.matx %>% data.frame() + annotations <- beta.dtf[, c("gene_id", "genotype")] + log_qij.dtf <- cbind(annotations, log_qij.dtf) + return(log_qij.dtf) } #' Get mu_ij @@ -69,22 +88,24 @@ getLog_qij <- function( beta.dtf , model.matx){ #' @export #' #' @examples -getMu_ij <- function( log_qij.dtf, size_factor ){ - - log_qij.matx = log_qij.dtf[ , c("E0", 'E1') ] %>% as.matrix() - - mu_ij.matx = size_factor * 2^log_qij.matx ## size factor * log(qij) - - mu_ij.dtf = mu_ij.matx %>% data.frame() - annotations = log_qij.dtf[ , c("gene_id", 'genotype')] - mu_ij.dtf = cbind(annotations, mu_ij.dtf) - mu_ij.matx = mu_ij.dtf %>% reshape2::melt(., id.vars = c("gene_id", 'genotype'), - value.name = "mu_ij", variable.name= "environment") %>% - reshape2::dcast(., gene_id ~ genotype + environment , value.var = "mu_ij") %>% - column_to_rownames("gene_id") %>% as.matrix() - - return( mu_ij.matx ) +getMu_ij <- function(log_qij.dtf, size_factor) { + log_qij.matx <- log_qij.dtf[, c("E0", "E1")] %>% as.matrix() + + mu_ij.matx <- size_factor * 2^log_qij.matx ## size factor * log(qij) + mu_ij.dtf <- mu_ij.matx %>% data.frame() + annotations <- log_qij.dtf[, c("gene_id", "genotype")] + mu_ij.dtf <- cbind(annotations, mu_ij.dtf) + mu_ij.matx <- mu_ij.dtf %>% + reshape2::melt(., + id.vars = c("gene_id", "genotype"), + value.name = "mu_ij", variable.name = "environment" + ) %>% + reshape2::dcast(., gene_id ~ genotype + environment, value.var = "mu_ij") %>% + column_to_rownames("gene_id") %>% + as.matrix() + + return(mu_ij.matx) } #' Get genes dispersion @@ -101,34 +122,33 @@ getMu_ij <- function( log_qij.dtf, size_factor ){ #' @export #' #' @examples -getGenesDispersions <- function( n_genes, sample_id_list, - dispersion.vec , dispUniform_btweenCondition = T ) { - - if (dispUniform_btweenCondition == T ) { - message( "Get dispersion for each genes ...\n") - - gene_dispersion.dtf = base::sample( dispersion.vec , - replace = T, - size = n_genes) %>% base::data.frame() - n_rep = length(sample_id_list) - gene_dispersion.dtf = gene_dispersion.dtf[ ,base::rep(base::seq_len(base::ncol(gene_dispersion.dtf)), n_rep)] - rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "") - colnames(gene_dispersion.dtf) = sample_id_list - - } - else { - message( "Get dispersion for each genes within each conditions ...\n") +getGenesDispersions <- function(n_genes, sample_id_list, + dispersion.vec, dispUniform_btweenCondition = T) { + if (dispUniform_btweenCondition == T) { + message("Get dispersion for each genes ...\n") - replication_table = sample_ids %>% - stringr::str_replace(., pattern = "_[0-9]+","" ) %>% table() - gene_dispersion.dtf = replication_table %>% - purrr::map(., ~sample( dispersion.vec, replace = T, size = n_genes) ) %>% data.frame() - gene_dispersion.dtf = gene_dispersion.dtf[ ,rep(seq_len(ncol(gene_dispersion.dtf)), replication_table %>% as.numeric())] - colnames(gene_dispersion.dtf) = sample_ids - rownames(gene_dispersion.dtf) = base::paste("gene", 1:(n_genes), sep = "") + gene_dispersion.dtf <- base::sample(dispersion.vec, + replace = T, + size = n_genes + ) %>% base::data.frame() + n_rep <- length(sample_id_list) + gene_dispersion.dtf <- gene_dispersion.dtf[, base::rep(base::seq_len(base::ncol(gene_dispersion.dtf)), n_rep)] + rownames(gene_dispersion.dtf) <- base::paste("gene", 1:(n_genes), sep = "") + colnames(gene_dispersion.dtf) <- sample_id_list + } else { + message("Get dispersion for each genes within each conditions ...\n") + replication_table <- sample_ids %>% + stringr::str_replace(., pattern = "_[0-9]+", "") %>% + table() + gene_dispersion.dtf <- replication_table %>% + purrr::map(., ~ sample(dispersion.vec, replace = T, size = n_genes)) %>% + data.frame() + gene_dispersion.dtf <- gene_dispersion.dtf[, rep(seq_len(ncol(gene_dispersion.dtf)), replication_table %>% as.numeric())] + colnames(gene_dispersion.dtf) <- sample_ids + rownames(gene_dispersion.dtf) <- base::paste("gene", 1:(n_genes), sep = "") } - return(gene_dispersion.dtf %>% as.matrix) + return(gene_dispersion.dtf %>% as.matrix()) } @@ -137,25 +157,25 @@ getGenesDispersions <- function( n_genes, sample_id_list, #' @param mu_ij.matx a matrix of mu_ij #' @param dispersion.matx a matrix of gene dispersion #' @param n_genes a matrix of gene dispersion -#' @param sample_id_list list of sample_ids -#' @param idx_replicat +#' @param sample_id_list list of sample_ids +#' @param idx_replicat #' @import stats #' @return a matrix with counts per genes and samples #' @export #' #' @examples -get_kij <- function(mu_ij.matx, dispersion.matx, n_genes, +get_kij <- function(mu_ij.matx, dispersion.matx, n_genes, sample_id_list, idx_replicat) { - - n_sples = length(sample_id_list) - alpha_gene = 1/dispersion.matx - k_ij = stats::rnbinom( length(mu_ij.matx), - size = alpha_gene , - mu = mu_ij.matx) %>% - matrix(. , nrow = n_genes, ncol = n_sples ) - k_ij[is.na(k_ij)] = 0 - colnames(k_ij) = base::paste(sample_id_list, idx_replicat, sep = '_') - rownames(k_ij) = rownames(mu_ij.matx) + n_sples <- length(sample_id_list) + alpha_gene <- 1 / dispersion.matx + k_ij <- stats::rnbinom(length(mu_ij.matx), + size = alpha_gene, + mu = mu_ij.matx + ) %>% + matrix(., nrow = n_genes, ncol = n_sples) + k_ij[is.na(k_ij)] <- 0 + colnames(k_ij) <- base::paste(sample_id_list, idx_replicat, sep = "_") + rownames(k_ij) <- rownames(mu_ij.matx) return(k_ij) } @@ -175,24 +195,26 @@ get_kij <- function(mu_ij.matx, dispersion.matx, n_genes, #' @export #' #' @examples -getCountTable <- function( mu_ij.matx, dispersion.matx, - n_genes, n_genotypes, - n_environments = 2, sample_id_list, - replication.matx ) { - - ### Iterate on each replicates - message( "Get k_ij :") - message( "k_ij ~ NegativBinomial( mu_ij, dispersion_ij )\n") - kij.simu.list = purrr::map(.x = 1:max_n_replicates, - .f = ~get_kij(mu_ij.matx[ ,replication.matx[.x, ]], - dispersion.matx[ ,replication.matx[.x, ]], - n_genes = n_genes, - sample_id_list[replication.matx[.x, ]], - .x) ) - tableCounts.simulated = do.call(cbind, kij.simu.list) - - return(tableCounts.simulated) +getCountTable <- function(mu_ij.matx, dispersion.matx, + n_genes, n_genotypes, + n_environments = 2, sample_id_list, + replication.matx) { + ### Iterate on each replicates + message("Get k_ij :") + message("k_ij ~ NegativBinomial( mu_ij, dispersion_ij )\n") + max_n_replicates <- dim(replication.matx)[1] + kij.simu.list <- purrr::map( + .x = 1:max_n_replicates, + .f = ~ get_kij(mu_ij.matx[, replication.matx[.x, ]], + dispersion.matx[, replication.matx[.x, ]], + n_genes = n_genes, + sample_id_list[replication.matx[.x, ]], + .x + ) + ) + tableCounts.simulated <- do.call(cbind, kij.simu.list) + return(tableCounts.simulated) } @@ -205,9 +227,9 @@ getCountTable <- function( mu_ij.matx, dispersion.matx, #' #' @examples uniform_replication <- function(maxN, n_samples) { - return(rep(T, time = maxN) %>% - rep(., each = n_samples ) %>% - matrix(ncol = n_samples) ) + return(rep(TRUE, time = maxN) %>% + rep(., each = n_samples) %>% + matrix(ncol = n_samples)) } @@ -220,11 +242,13 @@ uniform_replication <- function(maxN, n_samples) { #' @export #' #' @examples -random_replication <- function(maxN, n_samples){ - replicating <- function(maxN) return(sample(x = c(T,F), size = maxN, replace = T)) - res = purrr::map(1:n_samples, ~replicating(maxN-1)) - rep_table = do.call(cbind, res) - rep_table = rbind(rep(T, times = n_samples), rep_table) +random_replication <- function(maxN, n_samples) { + replicating <- function(maxN) { + return(sample(x = c(TRUE, FALSE), size = maxN, replace = T)) + } + res <- purrr::map(1:n_samples, ~ replicating(maxN - 1)) + rep_table <- do.call(cbind, res) + rep_table <- rbind(rep(TRUE, times = n_samples), rep_table) return(rep_table) } @@ -232,16 +256,16 @@ random_replication <- function(maxN, n_samples){ #' #' @param maxN a integer : number of replicates #' @param n_genotypes an integer : number of genotypes -#' @param n_environments +#' @param n_environments #' @param uniformNumberOfReplicates bool #' @return a matrix of 0 and 1 #' @export #' #' @examples -getReplicationDesign <- function(maxN, n_genotypes, n_environments = 2, - uniformNumberOfReplicates = T ) { - nb_sample = n_genotypes*n_environments - if ( uniformNumberOfReplicates == T ) rep.matrix = uniform_replication( maxN, nb_sample ) - if ( uniformNumberOfReplicates == F) rep.matrix = random_replication( maxN, nb_sample ) - return(rep.matrix) +getReplicationDesign <- function(maxN, n_genotypes, n_environments = 2, + uniformNumberOfReplicates = T) { + nb_sample <- n_genotypes * n_environments + if (uniformNumberOfReplicates == T) rep.matrix <- uniform_replication(maxN, nb_sample) + if (uniformNumberOfReplicates == F) rep.matrix <- random_replication(maxN, nb_sample) + return(rep.matrix) } diff --git a/src/v3/HTRsim/R/evaluation.R b/src/v3/HTRsim/R/evaluation.R new file mode 100644 index 0000000000000000000000000000000000000000..0c974a80934660c4c967a92db4198a6852588f86 --- /dev/null +++ b/src/v3/HTRsim/R/evaluation.R @@ -0,0 +1,137 @@ +#' Get deseq results +#' +#' @param dds_simu.mcols dds object obtain on simulation +#' @param threads +#' @import stringr +#' @import dplyr +#' @import future +#' @import DESeq2 +#' @import furrr +#' @return a dataframe of beta obtained with deseq2 +#' @export +#' +#' @examples +deseq_fitExtraction <- function(dds_simu, threads = 4) { + listBeta <- DESeq2::resultsNames(dds_simu) + future::plan(multisession, workers = threads) + res <- listBeta %>% furrr::future_map( + .x = ., + ~ DESeq2::results(dds_simu, + contrast = list(.x), + tidy = TRUE + ) %>% + dplyr::select(-baseMean) %>% + dplyr::mutate(term = .x) %>% + dplyr::rename( + estimate = log2FoldChange, + std.error = lfcSE, + statistic = stat, + p.value = pvalue, + gene_id = row + ), + .options = furrr_options(seed = TRUE) + ) + deseq_inference <- do.call("rbind", res) + deseq_inference.dtf <- deseq_inference %>% mutate(term = term %>% + stringr::str_replace("Intercept", "(Intercept)") %>% + stringr::str_replace("_vs_G0", "") %>% + stringr::str_replace("_vs_E0", "") %>% + stringr::str_replace_all("_", "") %>% + stringr::str_replace("\\.", ":")) + + return(deseq_inference.dtf) +} + + + + +#' Get deseq results +#' +#' @param inference a obj output of [deseq, glm, glmm] +#' @param actual parameters used during simulation +#' @param threads nb of threads +#' @param threshold +#' @param alphaRisk +#' @import reshape2 +#' @import dplyr +#' @import data.table +#' @return a dataframe to compare actual and infered betas +#' @export +#' +#' @examples +getDfComparison <- function(inference, actual, threads = 4) { + actual.dtf <- actual %>% + reshape2::melt( + id = c("gene_id", "genotype"), + value.name = "value", variable.name = "beta" + ) %>% + dplyr::group_by(gene_id, beta) %>% + dplyr::mutate( + actual.value = + dplyr::if_else(beta %in% c("(Intercept)", "betaE"), + mean(value), value + ) + ) %>% + ungroup() %>% + dplyr::select(-value) %>% + dplyr::mutate( + term = + dplyr::case_when( + beta == "(Intercept)" ~ "(Intercept)", + beta == "betaG" ~ paste("genotype", genotype, sep = ""), + beta == "betaE" ~ paste("environment", "E1", sep = ""), + beta == "betaGE" ~ paste("genotype", genotype, ":environmentE1", sep = "") + ) + ) + + actual.dtf <- actual.dtf %>% dplyr::select(-genotype) + actual_undup.dtf <- actual.dtf[!duplicated(actual.dtf), ] + actual2join.dtf <- data.table::data.table(actual_undup.dtf, key = c("gene_id", "term")) + + inference.dtf <- deseq_fitExtraction(inference, threads = threads) + inference2join.dtf <- data.table::data.table(inference.dtf, key = c("gene_id", "term")) + + joined.dtf <- actual2join.dtf[inference2join.dtf] + return(joined.dtf) +} + + +#' Get deseq results +#' +#' @param comparison.Dtf a obj output of getDfComparison +#' @param threshold +#' @param alphaRisk +#' @import dplyr +#' @return a dataframe to compare actual and infered betas with annotations +#' @export +#' +#' @examples +getAnnotation <- function(comparison.dtf, threshold = 0, alphaRisk = 0.05) { + ## Post inference selection + comparison.dtf <- comparison.dtf %>% + dplyr::mutate( + prediction.label = + dplyr::if_else(( + abs(estimate) > threshold & padj < alphaRisk), + "DE", "nonDE" + ) + ) + + comparison.dtf <- comparison.dtf %>% + dplyr::mutate( + actual.label = + dplyr::if_else(abs(actual.value) < threshold, + "nonDE", "DE" + ) + ) + + comparison.dtf <- comparison.dtf %>% dplyr::mutate( + annotation = + dplyr::case_when( + (actual.label == "DE" & prediction.label == "DE") ~ "TRUE", + (actual.label == "nonDE" & prediction.label == "nonDE") ~ "TRUE", + TRUE ~ "FALSE" + ) + ) + return(comparison.dtf) +} diff --git a/src/v3/HTRsim/R/loadEmbeddedFiles.R b/src/v3/HTRsim/R/loadEmbeddedFiles.R index 2ba458dcfc33bfda49c3fa9c2bbe57e13f01d224..da61905c2d4a2be614afa511c275251107b05225 100644 --- a/src/v3/HTRsim/R/loadEmbeddedFiles.R +++ b/src/v3/HTRsim/R/loadEmbeddedFiles.R @@ -1,13 +1,13 @@ #' Load beta dtf embedded in package #' -#' @return an object dds.Extraction +#' @return an object dds.Extraction #' @export #' #' @examples -loadObservedValues <- function(){ - ## Import public beta observed - fn = system.file("extdata/", "SRP217588_YM_observedParams.rds", package = "HTRsim") - dds.extraction = readRDS(file = fn) +loadObservedValues <- function() { + ## Import public beta observed + fn <- system.file("extdata/", "SRP217588_YM_observedParams.rds", package = "HTRsim") + dds.extraction <- readRDS(file = fn) return(dds.extraction) } @@ -19,14 +19,14 @@ loadObservedValues <- function(){ #' @export #' #' @examples -loadPubliCounTable <- function(){ +loadPubliCounTable <- function() { ## Import public counts table - fn = system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRsim") + fn <- system.file("extdata/", "SRP217588_YM_vkallisto.tsv", package = "HTRsim") tabl_cnts <- utils::read.table(file = fn, header = TRUE) rownames(tabl_cnts) <- tabl_cnts$gene_id - tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id) ##suppr colonne GeneID - tabl_cnts = tabl_cnts[ order(tabl_cnts %>% rownames()), ] - tabl_cnts = tabl_cnts %>% dplyr::select(., !matches( "ru_rm_5")) + tabl_cnts <- tabl_cnts %>% dplyr::select(-gene_id) ## suppr colonne GeneID + tabl_cnts <- tabl_cnts[order(tabl_cnts %>% rownames()), ] + tabl_cnts <- tabl_cnts %>% dplyr::select(!matches("ru_rm_5")) return(tabl_cnts) } @@ -38,13 +38,13 @@ loadPubliCounTable <- function(){ #' @export #' #' @examples -loadPublicDesign <- function(){ +loadPublicDesign <- function() { ## DESIGN - fn = system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRsim") - bioDesign <- utils::read.table(file = fn, header = T, sep = ';') + fn <- system.file("extdata/", "SRP217588_YM_bioDesign.csv", package = "HTRsim") + bioDesign <- utils::read.table(file = fn, header = TRUE, sep = ";") ## defining reference - bioDesign$genotype <- factor(x = bioDesign$genotype,levels = c("GSY147", "RM11")) - bioDesign$environment <- factor(x = bioDesign$environment, levels = c( "untreated", "treated")) - bioDesign = bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5") ) + bioDesign$genotype <- factor(x = bioDesign$genotype, levels = c("GSY147", "RM11")) + bioDesign$environment <- factor(x = bioDesign$environment, levels = c("untreated", "treated")) + bioDesign <- bioDesign %>% dplyr::filter(!str_detect(sample, "ru_rm_5")) return(bioDesign) -} \ No newline at end of file +} diff --git a/src/v3/HTRsim/R/manipulationsDDS_obj.R b/src/v3/HTRsim/R/manipulationsDDS_obj.R index 3689f0faf83611321a477df722fe406c32e65d79..c4a758cab14c186cda46ce7f4200e643e4014e9e 100644 --- a/src/v3/HTRsim/R/manipulationsDDS_obj.R +++ b/src/v3/HTRsim/R/manipulationsDDS_obj.R @@ -2,19 +2,19 @@ #' #' @param tabl_cnts table containing counts per genes & samples #' @param bioDesign table describing bioDesgin of input +#' @param model #' @import DESeq2 #' @import dplyr #' @return DESEQ2 object #' @export #' #' @examples -run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + genotype:environment){ - message( "Run DESEQ2 ...\n") - tabl_cnts = tabl_cnts %>% dplyr::mutate(across(where(is.double), as.integer)) - dds = DESeq2::DESeqDataSetFromMatrix( countData = round(tabl_cnts), colData = bioDesign , design = model ) +run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + genotype:environment) { + message("Run DESEQ2 ...\n") + tabl_cnts <- tabl_cnts %>% dplyr::mutate(across(where(is.double), as.integer)) + dds <- DESeq2::DESeqDataSetFromMatrix(countData = round(tabl_cnts), colData = bioDesign, design = model) dds <- DESeq2::DESeq(dds, quiet = TRUE) return(dds) - } @@ -28,39 +28,43 @@ run.deseq <- function(tabl_cnts, bioDesign, model = ~ genotype + environment + g #' @export #' #' @examples -ddsExtraction <- function(dds_obj){ +ddsExtraction <- function(dds_obj) { ## Beta - dds.mcols = S4Vectors::mcols(dds_obj, use.names=TRUE) - beta0 <- dds.mcols$Intercept + dds.mcols <- S4Vectors::mcols(dds_obj, use.names = TRUE) + Intercept <- dds.mcols$Intercept betaG <- dds.mcols$genotype_RM11_vs_GSY147 betaE <- dds.mcols$environment_treated_vs_untreated betaGE <- dds.mcols$genotypeRM11.environmenttreated - beta.dtf = cbind(beta0,betaG,betaE,betaGE) %>% as.data.frame() %>% tidyr::drop_na() + beta.dtf <- cbind(Intercept, betaG, betaE, betaGE) %>% + as.data.frame() %>% + tidyr::drop_na() + colnames(beta.dtf) <- c("(Intercept)", "betaG", "betaE", "betaGE") ## Dispersion - gene_disp = dds.mcols$dispersion %>% stats::na.omit() - + gene_disp <- dds.mcols$dispersion %>% stats::na.omit() - return(list(beta = beta.dtf, - gene_dispersion = gene_disp)) + return(list( + beta = beta.dtf, + gene_dispersion = gene_disp + )) } #' Extract beta distribution from DESEQ2 object #' #' @import dplyr #' @import utils -#' @return output of dds extraction +#' @return output of dds extraction #' @export #' #' @examples -publicDDS_extraction <- function(){ - tabl_cnts = loadPubliCounTable() - bioDesgin = loadPublicDesign() +publicDDS_extraction <- function() { + tabl_cnts <- loadPubliCounTable() + bioDesign <- loadPublicDesign() ## Launch DESEQ2 - dds = run.deseq(tabl_cnts, bioDesign = bioDesign) + dds <- run.deseq(tabl_cnts, bioDesign = bioDesign) ## Extract - dds.extraction = ddsExtraction(dds_obj = dds) + dds.extraction <- ddsExtraction(dds_obj = dds) return(dds.extraction) -} \ No newline at end of file +} diff --git a/src/v3/HTRsim/R/workflow.R b/src/v3/HTRsim/R/workflow.R index d09d2e0ab3002f03e15ad69022764916a50a9695..eeb1ac08f3c76599182e13ca0965068315f3dcc3 100644 --- a/src/v3/HTRsim/R/workflow.R +++ b/src/v3/HTRsim/R/workflow.R @@ -1,58 +1,90 @@ +#' get design +#' @param count_table +#' @import stringr +#' @import dplyr +#' @return dataframe +#' @export +#' +#' @examples +summariseDesign <- function(count_table) { + sample_id_list <- colnames(count_table) + experimental_design <- sample_id_list %>% + stringr::str_split(pattern = "_", simplify = TRUE) %>% .[, c(1, 2)] %>% data.frame() + colnames(experimental_design) <- c("genotype", "environment") + experimental_design <- experimental_design %>% + dplyr::mutate(sample_id = sample_id_list) %>% + dplyr::select("sample_id", "genotype", "environment") + return(experimental_design) +} + + #' perform all workflow #' #' @param n_genes number of genes -#' @param n_genotypes number of genotypes +#' @param n_genotypes number of genotypes #' @param n_environments number of env +#' @param sequencing_factor #' @param dds.extraction output of dds.extraction #' @param max_n_replicates max number of replicates #' @param uniformNumberOfReplicates boolean #' @param uniformDispersion boolean -#' @return list +#' @return list #' @export #' #' @examples -rnaMock <- function(n_genes, - n_genotypes, - n_environments = 2, - max_n_replicates, - uniformNumberOfReplicates = T, +rnaMock <- function(n_genes, + n_genotypes, + n_environments = 2, + max_n_replicates, + sequencing_factor = 2, + uniformNumberOfReplicates = T, uniformDispersion = T, - dds.extraction = loadObservedValues() ) { - - ## Fit mvnorm ## - fit.mvnorm = mvnormFitting(dds.extraction$beta) - - ##### Ground truth ###### - beta.actual = getBetaforSimulation( n_genes, - n_genotypes, - fit.mvnorm ) - - ##### build input for simulation #### - model.matx = getModelMatrix() - log_qij = getLog_qij( beta.actual, model.matx ) - mu_ij = getMu_ij(log_qij.dtf = log_qij, 1) - sample_ids = colnames(mu_ij) - gene_dispersion.vec = dds.extraction$gene_dispersion - dispersion.matrix = getGenesDispersions(n_genes, - sample_ids, - dispersion.vec = gene_dispersion.vec, - uniformDispersion ) - - ##### Design replicates ###### - designReplication.matx = getReplicationDesign( max_n_replicates, - n_genotypes, - n_environments, - uniformNumberOfReplicates) - - ##### build counts table #### - countTable = getCountTable( mu_ij, dispersion.matrix, - n_genes, n_genotypes , - sample_id_list = sample_ids, - replication.matx = designReplication.matx) - - return( list( countTable = countTable, dispersion = dispersion.matrix, - beta = beta.actual, mvnorm = fit.mvnorm )) + dds.extraction = loadObservedValues()) { + ## Fit mvnorm ## + fit.mvnorm <- mvnormFitting(dds.extraction$beta) + + ##### Ground truth ###### + beta.actual <- getBetaforSimulation( + n_genes, + n_genotypes, + fit.mvnorm + ) + + ##### build input for simulation #### + model.matx <- getModelMatrix() + log_qij <- getLog_qij(beta.actual, model.matx) + mu_ij <- getMu_ij(log_qij.dtf = log_qij, sequencing_factor) + sample_ids <- colnames(mu_ij) + gene_dispersion.vec <- dds.extraction$gene_dispersion + dispersion.matrix <- getGenesDispersions(n_genes, + sample_ids, + dispersion.vec = gene_dispersion.vec, + uniformDispersion + ) + + ##### Design replicates ###### + designReplication.matx <- getReplicationDesign( + max_n_replicates, + n_genotypes, + n_environments, + uniformNumberOfReplicates + ) + ##### build counts table #### + countTable <- getCountTable(mu_ij, dispersion.matrix, + n_genes, n_genotypes, + sample_id_list = sample_ids, + replication.matx = designReplication.matx + ) + design <- summariseDesign(countTable) + actualParam <- list( + dispersion = dispersion.matrix, + beta = beta.actual, mvnorm = fit.mvnorm + ) + return(list( + design = design, countTable = countTable, + actualParameters = actualParam + )) } diff --git a/src/v3/HTRsim/devtools_history.R b/src/v3/HTRsim/devtools_history.R index aee52fac6c104d5d505eec6f956e088d401233fa..9cfb13ef81146dfb7f8fe894a6b869a009b8a2a4 100644 --- a/src/v3/HTRsim/devtools_history.R +++ b/src/v3/HTRsim/devtools_history.R @@ -11,5 +11,6 @@ usethis::use_package("base") usethis::use_package("dplyr") usethis::use_package("utils") usethis::use_package("Rfast") +usethis::use_package("data.table")