Skip to content
Snippets Groups Projects
Commit 234086ab authored by Arnaud Duvermy's avatar Arnaud Duvermy
Browse files

Merge branch 'v1.0.2' into 'master'

V1.0.2

See merge request aduvermy/HTRfit!2
parents e92c9dc7 3bd3380e
No related branches found
No related tags found
No related merge requests found
Showing with 283 additions and 137 deletions
......@@ -13,4 +13,15 @@ To do:
- The identity plot's point shape has been modified, enhancing the visual clarity of comparisons between HTRfit and DESeq2 results.
- Similar to the identity plot, the dispersion plot also benefits from improved point shape, contributing to a more effective visual analysis.
- Add xlab/ylab to ROC plot
- The vignette section now includes a new topic, "About mixed model evaluation".
\ No newline at end of file
- The vignette section now includes a new topic, "About mixed model evaluation".
# v1.0.2 : R files reorganization, log file enhancement and versioning
- R files have been restructured to enhance clarity.
- The log file location has been relocated to the R temporary directory.
- A bug that was causing issues with writing the 'group' information in log files during model update has been fixed.
- Fix NOTE "no visible binding for global variable" linked to ggplot2
- Fix bug with packageVersion("HTRfit")
Package: HTRfit
Title: HTRfit
Version: 0.0.0.9000
Version: 1.0.2
Authors@R:
person("First", "Last", , "first.last@example.com", role = c("aut", "cre"),
comment = c(ORCID = "YOUR-ORCID-ID"))
......
# Generated by roxygen2: do not edit by hand
export("%>%")
export(.fitModel)
export(.getColumnWithSampleID)
export(.isDispersionMatrixValid)
export(.parallel_fit)
export(.parallel_update)
export(.replicateByGroup)
export(.replicateMatrix)
export(.replicateRows)
export(.subsetData_andfit)
export(addBasalExpression)
export(add_interaction)
export(already_init_variable)
......@@ -33,15 +24,16 @@ export(drop_randfx)
export(endsWithDigit)
export(evaluateDispersion)
export(exportReportFile)
export(extractDESeqDispersion)
export(extractTMBDispersion)
export(extract_ddsDispersion)
export(extract_fixed_effect)
export(extract_ran_pars)
export(extract_tmbDispersion)
export(fillInCovarMatrice)
export(fillInInteraction)
export(fillInVariable)
export(filter_dataframe)
export(findAttribute)
export(fitModel)
export(fitModelParallel)
export(fitUpdate)
export(generateActualForMainFixEff)
......@@ -50,7 +42,7 @@ export(generateActualInteractionX3_FixEff)
export(generateCountTable)
export(generateGridCombination_fromListVar)
export(generateReplicationMatrix)
export(generate_BE)
export(generate_basal_expression)
export(getActualInteractionFixEff)
export(getActualIntercept)
export(getActualMainFixEff)
......@@ -58,6 +50,7 @@ export(getActualMixed_typeI)
export(getBinExpression)
export(getCategoricalVar_inFixedEffect)
export(getCoefficients)
export(getColumnWithSampleID)
export(getCovarianceMatrix)
export(getData2computeActualFixEffect)
export(getDataFromMvrnorm)
......@@ -66,12 +59,15 @@ export(getDispersionComparison)
export(getDispersionMatrix)
export(getEstimate_df)
export(getGeneMetadata)
export(getGivenAttribute)
export(getGlance)
export(getGridCombination)
export(getGrobTable)
export(getInput2mvrnorm)
export(getInput2simulation)
export(getLabelExpected)
export(getLabels)
export(getListVar)
export(getLog_qij)
export(getMu_ij)
export(getMu_ij_matrix)
......@@ -84,7 +80,7 @@ export(getSettingsTable)
export(getStandardDeviationInCorrelation)
export(getTidyGlmmTMB)
export(getValidDispersion)
export(get_inference)
export(get_inference_dds)
export(glance_tmb)
export(group_logQij_per_genes_and_labels)
export(handleAnovaError)
......@@ -94,6 +90,7 @@ export(inferenceToExpected_withMixedEff)
export(init_variable)
export(inputs_checking)
export(isValidInput2fit)
export(is_dispersionMatrixValid)
export(is_formula_mixedTypeI)
export(is_fullrank)
export(is_mixedEffect_inFormula)
......@@ -104,18 +101,24 @@ export(launchUpdate)
export(medianRatioNormalization)
export(metrics_plot)
export(mock_rnaseq)
export(parallel_fit)
export(parallel_update)
export(prepareData2computeInteraction)
export(prepareData2fit)
export(removeDigitsAtEnd)
export(removeDuplicatedWord)
export(renameColumns)
export(reorderColumns)
export(replicateByGroup)
export(replicateMatrix)
export(replicateRows)
export(roc_plot)
export(samplingFromMvrnorm)
export(scaleCountsTable)
export(set_correlation)
export(simulationReport)
export(subsetByTermLabel)
export(subsetData_andfit)
export(subsetFixEffectInferred)
export(subsetGenes)
export(subset_glance)
......@@ -123,8 +126,9 @@ export(tidy_results)
export(tidy_tmb)
export(updateParallel)
export(wald_test)
export(wrapper_DESeq2)
export(wrap_dds)
export(wrapper_var_cor)
importFrom(MASS,mvrnorm)
importFrom(car,Anova)
importFrom(data.table,data.table)
importFrom(data.table,setDT)
......@@ -158,9 +162,11 @@ importFrom(plotROC,geom_roc)
importFrom(reshape2,dcast)
importFrom(reshape2,melt)
importFrom(rlang,":=")
importFrom(rlang,.data)
importFrom(stats,anova)
importFrom(stats,as.formula)
importFrom(stats,cor)
importFrom(stats,drop.terms)
importFrom(stats,median)
importFrom(stats,model.matrix)
importFrom(stats,p.adjust)
......
File moved
......@@ -19,22 +19,6 @@ averageByGroup <- function(data, column, group_by) {
return(result)
}
#' Convert specified columns to factor
#'
#' @param data The input data frame
#' @param columns The column names to be converted to factors
#' @return The modified data frame with specified columns converted to factors
#' @export
convert2Factor <- function(data, columns) {
if (is.character(columns)) {
columns <- match(columns, colnames(data))
}
if (length(columns) > 1) data[, columns] <- lapply(data[, columns], as.factor )
else data[, columns] <- as.factor(data[, columns])
return(data)
}
#' Subset Fixed Effect Inferred Terms
#'
#' This function subsets the tidy TMB object to extract the fixed effect inferred terms
......@@ -240,7 +224,3 @@ getActualMainFixEff <- function( l_term , fixeEff_dataActual , df_actualIntercep
return(df_actual)
}
......@@ -5,7 +5,7 @@
#'
#' This function handles ANOVA errors and warnings during the ANOVA calculation process.
#'
#' @param l_TMB A list of fitted glmmTMB models.
#' @param list_tmb A list of fitted glmmTMB models.
#' @param group A character string indicating the group for which ANOVA is calculated.
#' @param ... Additional arguments to be passed to the \code{car::Anova} function.
#'
......@@ -13,17 +13,17 @@
#' @export
#'
#' @examples
#' l_tmb <- fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length,
#' list_tmb <- fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length,
#' data = iris, group_by = "Species", n.cores = 1)
#' anova_res <- handleAnovaError(l_tmb, "setosa", type = "III")
#' anova_res <- handleAnovaError(list_tmb, "setosa", type = "III")
#'
#' @importFrom car Anova
#' @export
handleAnovaError <- function(l_TMB, group, ...) {
handleAnovaError <- function(list_tmb, group, ...) {
tryCatch(
expr = {
withCallingHandlers(
car::Anova(l_TMB[[group]], ...),
car::Anova(list_tmb[[group]], ...),
warning = function(w) {
message(paste(Sys.time(), "warning for group", group, ":", conditionMessage(w)))
invokeRestart("muffleWarning")
......@@ -43,7 +43,7 @@ handleAnovaError <- function(l_TMB, group, ...) {
#' models in parallel for different groups specified in the list. It returns a list
#' of ANOVA results for each group.
#'
#' @param l_tmb A list of \code{glmmTMB} models, with model names corresponding to the groups.
#' @param list_tmb A list of \code{glmmTMB} models, with model names corresponding to the groups.
#' @param ... Additional arguments passed to \code{\link[stats]{anova}} function.
#'
#' @return A list of ANOVA results for each group.
......@@ -51,14 +51,14 @@ handleAnovaError <- function(l_TMB, group, ...) {
#' @examples
#' # Perform ANOVA
#' data(iris)
#' l_tmb<- fitModelParallel( Sepal.Length ~ Sepal.Width + Petal.Length,
#' list_tmb<- fitModelParallel( Sepal.Length ~ Sepal.Width + Petal.Length,
#' data = iris, group_by = "Species", n.cores = 1 )
#' anov_res <- anovaParallel(l_tmb , type = "III")
#' anov_res <- anovaParallel(list_tmb , type = "III")
#' @importFrom stats anova
#' @export
anovaParallel <- function(l_tmb, ...) {
l_group <- attributes(l_tmb)$names
lapply(stats::setNames(l_group, l_group), function(group) handleAnovaError(l_tmb, group, ...))
anovaParallel <- function(list_tmb, ...) {
l_group <- attributes(list_tmb)$names
lapply(stats::setNames(l_group, l_group), function(group) handleAnovaError(list_tmb, group, ...))
}
......@@ -30,7 +30,7 @@ getBinExpression <- function(dtf_coef, n_bins){
#' Generate BE data.
#'
#' This function generates BE data for a given number of genes, in a vector of BE values.
#' This function generates basal expression data for a given number of genes, in a vector of basal expression values.
#'
#' @param n_genes The number of genes to generate BE data for.
#' @param basal_expression a numeric vector from which sample BE for eacg genes
......@@ -38,10 +38,10 @@ getBinExpression <- function(dtf_coef, n_bins){
#' @return A data frame containing gene IDs, BE values
#'
#' @examples
#' generate_BE(n_genes = 100, 10)
#' generate_basal_expression(n_genes = 100, 10)
#'
#' @export
generate_BE <- function(n_genes, basal_expression) {
generate_basal_expression <- function(n_genes, basal_expression) {
## --avoid bug if one value in basal_expr
pool2sample <- c(basal_expression, basal_expression)
BE <- sample(x = pool2sample, size = n_genes, replace = T)
......@@ -56,7 +56,7 @@ generate_BE <- function(n_genes, basal_expression) {
#'
#' This function takes the coefficients data frame \code{dtf_coef} and computes
#' basal expression for gene expression. The scaling factors are generated
#' using the function \code{generate_BE}.
#' using the function \code{generate_basal_expression}.
#'
#' @param dtf_coef A data frame containing the coefficients for gene expression.
#' @param n_genes number of genes in simulation
......@@ -72,7 +72,7 @@ generate_BE <- function(n_genes, basal_expression) {
#' dtf_coef <- getLog_qij(dtf_coef)
#' addBasalExpression(dtf_coef, N_GENES, 1)
addBasalExpression <- function(dtf_coef, n_genes, basal_expression){
BE_df <- generate_BE(n_genes, basal_expression )
BE_df <- generate_basal_expression(n_genes, basal_expression )
dtf_coef <- join_dtf(dtf_coef, BE_df, "geneID", "geneID")
return(dtf_coef)
}
......
File moved
......@@ -94,37 +94,6 @@ fillInCovarMatrice <- function(covarMatrice, covar){
return(covarMatrice)
}
#' Check if a matrix is positive definite
#' This function checks whether a given matrix is positive definite, i.e., all of its eigenvalues are positive.
#' @param mat The matrix to be checked.
#' @return A logical value indicating whether the matrix is positive definite.
#' @export
#' @examples
#' # Create a positive definite matrix
#' mat1 <- matrix(c(4, 2, 2, 3), nrow = 2)
#' is_positive_definite(mat1)
#' # Expected output: TRUE
#'
#' # Create a non-positive definite matrix
#' mat2 <- matrix(c(4, 2, 2, -3), nrow = 2)
#' is_positive_definite(mat2)
#' # Expected output: FALSE
#'
#' # Check an empty matrix
#' mat3 <- matrix(nrow = 0, ncol = 0)
#' is_positive_definite(mat3)
#' # Expected output: TRUE
#'
#' @export
is_positive_definite <- function(mat) {
if (nrow(mat) == 0 && ncol(mat) == 0) return(TRUE)
eigenvalues <- eigen(mat)$values
all(eigenvalues > 0)
}
#' getGeneMetadata
#'
#' @inheritParams init_variable
......@@ -141,7 +110,7 @@ is_positive_definite <- function(mat) {
getGeneMetadata <- function(list_var, n_genes) {
metaData <- generateGridCombination_fromListVar(list_var)
n_combinations <- dim(metaData)[1]
genes_vec <- base::paste("gene", 1:n_genes, sep = "")
genes_vec <- paste("gene", 1:n_genes, sep = "")
geneID <- rep(genes_vec, each = n_combinations)
metaData <- cbind(geneID, metaData)
......@@ -190,7 +159,7 @@ getDataFromMvrnorm <- function(list_var, input2mvrnorm, n_genes = 1) {
#' samples generated from multivariate normal distribution
#'
#' @export
#'
#' @importFrom MASS mvrnorm
#' @examples
#' n <- 100
#' mu <- c(0, 0)
......@@ -198,7 +167,6 @@ getDataFromMvrnorm <- function(list_var, input2mvrnorm, n_genes = 1) {
#' samples <- samplingFromMvrnorm(n_samplings = n, l_mu = mu, matx_cov = covMatrix)
samplingFromMvrnorm <- function(n_samplings, l_mu, matx_cov) {
mvrnormSamp <- MASS::mvrnorm(n = n_samplings, mu = l_mu, Sigma = matx_cov, empirical = TRUE)
return(mvrnormSamp)
}
......@@ -40,7 +40,6 @@ evaluateDispersion <- function(TMB_dispersion_df, DESEQ_dispersion_df, color2use
#' @examples
#' \dontrun{
#' dispersion_comparison <- getDispersionComparison(inferred_disp, actual_disp)
#' print(dispersion_comparison)
#' }
getDispersionComparison <- function(inferred_dispersion, actual_dispersion) {
actual_disp <- data.frame(actual_dispersion = actual_dispersion)
......@@ -55,7 +54,7 @@ getDispersionComparison <- function(inferred_dispersion, actual_dispersion) {
#'
#' Extracts inferred dispersion values from a DESeq2 wrapped object.
#'
#' @param deseq_wrapped A DESeq2 wrapped object containing dispersion values.
#' @param dds_wrapped A DESeq2 wrapped object containing dispersion values.
#'
#' @return A data frame containing inferred dispersion values.
#'
......@@ -63,11 +62,10 @@ getDispersionComparison <- function(inferred_dispersion, actual_dispersion) {
#'
#' @examples
#' \dontrun{
#' dispersion_df <- extractDESeqDispersion(deseq2_object)
#' print(dispersion_df)
#' dispersion_df <- extract_ddsDispersion(deseq2_object)
#' }
extractDESeqDispersion <- function(deseq_wrapped) {
inferred_dispersion <- data.frame(inferred_dispersion = deseq_wrapped$dispersion)
extract_ddsDispersion <- function(dds_wrapped) {
inferred_dispersion <- data.frame(inferred_dispersion = dds_wrapped$dispersion)
inferred_dispersion$geneID <- rownames(inferred_dispersion)
rownames(inferred_dispersion) <- NULL
return(inferred_dispersion)
......@@ -78,7 +76,7 @@ extractDESeqDispersion <- function(deseq_wrapped) {
#'
#' Extracts inferred dispersion values from a TMB result object.
#'
#' @param l_tmb A TMB result object containing dispersion values.
#' @param list_tmb A TMB result object containing dispersion values.
#'
#' @return A data frame containing inferred dispersion values.
#'
......@@ -86,11 +84,10 @@ extractDESeqDispersion <- function(deseq_wrapped) {
#'
#' @examples
#' \dontrun{
#' dispersion_df <- extractTMBDispersion(tmb_result)
#' print(dispersion_df)
#' dispersion_df <- extract_tmbDispersion(tmb_result)
#' }
extractTMBDispersion <- function(l_tmb) {
glanceRes <- glance_tmb(l_tmb)
extract_tmbDispersion <- function(list_tmb) {
glanceRes <- glance_tmb(list_tmb)
inferred_dispersion <- data.frame(inferred_dispersion = glanceRes$dispersion)
inferred_dispersion$geneID <- rownames(glanceRes)
rownames(inferred_dispersion) <- NULL
......@@ -106,6 +103,7 @@ extractTMBDispersion <- function(l_tmb) {
#' @param eval_dispersion A data frame containing actual and inferred dispersion values.
#' @param ... Additional arguments to be passed to the ggplot2::aes function.
#' @importFrom ggplot2 ggplot geom_point aes geom_abline theme_bw ggtitle scale_x_log10 scale_y_log10
#' @importFrom rlang .data
#' @return A ggplot2 scatter plot.
#'
#' @export
......@@ -120,7 +118,7 @@ dispersion_plot <- function(eval_dispersion, ...) {
args <- lapply(list(...), function(x) if (!is.null(x)) ggplot2::sym(x))
p <- ggplot2::ggplot(eval_dispersion) +
ggplot2::geom_point(ggplot2::aes(x = actual_dispersion, y = inferred_dispersion, !!!args), size = 3, alpha = 0.6) +
ggplot2::geom_point(ggplot2::aes(x = .data$actual_dispersion, y = .data$inferred_dispersion, !!!args), size = 3, alpha = 0.6) +
ggplot2::geom_abline(intercept = 0, slope = 1, lty = 3, col = 'red', linewidth = 1) +
ggplot2::theme_bw() +
ggplot2::ggtitle("Dispersion evaluation") +
......
File moved
......@@ -13,7 +13,6 @@
#' data(iris)
#' formula <- Sepal.Length ~ Sepal.Width + Petal.Length
#' isValidInput2fit(iris, formula) # Returns TRUE if all required variables are present
#' @keywords internal
#' @export
isValidInput2fit <- function(data2fit, formula){
vec_bool <- all.vars(formula) %in% colnames(data2fit)
......@@ -43,7 +42,7 @@ isValidInput2fit <- function(data2fit, formula){
#' # Drop the random effects related to 'group'
#' modified_formula <- drop_randfx(formula)
#'
#' @importFrom stats terms
#' @importFrom stats terms drop.terms
#' @importFrom stats update
#'
#' @export
......@@ -106,18 +105,19 @@ is_fullrank <- function(metadata, formula) {
#' Fit a model using the fitModel function.
#'
#' @param group group id to save in glmmTMB obj (usefull for update !)
#' @param formula Formula specifying the model formula
#' @param data Data frame containing the data
#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function
#' @return Fitted model object or NULL if there was an error
#' @export
#' @examples
#' .fitModel(formula = mpg ~ cyl + disp, data = mtcars)
.fitModel <- function(formula, data, ...) {
#' fitModel("mtcars" , formula = mpg ~ cyl + disp, data = mtcars)
fitModel <- function(group, formula, data, ...) {
# Fit the model using glm.nb from the GLmmTMB package
model <- glmmTMB::glmmTMB(formula, ..., data = data )
model$frame <- data
model$groupId <- group
## family in ... => avoid error in future update
additional_args <- list(...)
familyArgs <- additional_args[['family']]
......@@ -140,12 +140,12 @@ is_fullrank <- function(metadata, formula) {
#' @return Fitted model object or NULL if there was an error
#' @export
#' @examples
#' .subsetData_andfit(group = "setosa", group_by = "Species",
#' subsetData_andfit(group = "setosa", group_by = "Species",
#' formula = Sepal.Length ~ Sepal.Width + Petal.Length,
#' data = iris )
.subsetData_andfit <- function(group, group_by, formula, data, ...) {
subsetData_andfit <- function(group, group_by, formula, data, ...) {
subset_data <- data[data[[group_by]] == group, ]
fit_res <- .fitModel(formula, subset_data, ...)
fit_res <- fitModel(group, formula, subset_data, ...)
#glance_df <- glance.negbin(group_by ,group , fit_res)
#tidy_df <- tidy.negbin(group_by ,group,fit_res )
#list(glance = glance_df, summary = tidy_df)
......@@ -174,7 +174,7 @@ launchFit <- function(group, group_by, formula, data, ...) {
tryCatch(
expr = {
withCallingHandlers(
.subsetData_andfit(group, group_by, formula, data, ...),
subsetData_andfit(group, group_by, formula, data, ...),
warning = function(w) {
message(paste(Sys.time(), "warning for group", group, ":", conditionMessage(w)))
invokeRestart("muffleWarning")
......@@ -198,20 +198,22 @@ launchFit <- function(group, group_by, formula, data, ...) {
#' @param data Data frame containing the data
#' @param n.cores The number of CPU cores to use for parallel processing.
#' If set to NULL (default), the number of available CPU cores will be automatically detected.
#' @param log_file File to write log (default : log.txt)
#' @param log_file File to write log (default : Rtmpdir/htrfit.log)
#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function
#' @return List of fitted model objects or NULL for any errors
#' @importFrom stats setNames
#' @export
#' @examples
#' .parallel_fit(group_by = "Species", "setosa",
#' parallel_fit(group_by = "Species", "setosa",
#' formula = Sepal.Length ~ Sepal.Width + Petal.Length,
#' data = iris, n.cores = 1, log_file = "log.txt" )
.parallel_fit <- function(groups, group_by, formula, data, n.cores = NULL, log_file, ...) {
#' data = iris, n.cores = 1 )
parallel_fit <- function(groups, group_by, formula, data, n.cores = NULL,
log_file = paste(tempdir(check = FALSE), "htrfit.log", sep = "/"), ...) {
if (is.null(n.cores)) n.cores <- parallel::detectCores()
clust <- parallel::makeCluster(n.cores, outfile = log_file)
parallel::clusterExport(clust, c(".subsetData_andfit", ".fitModel"), envir=environment())
parallel::clusterExport(clust, c("subsetData_andfit", "fitModel"), envir=environment())
results_fit <- parallel::parLapply(clust, X = stats::setNames(groups, groups), fun = launchFit,
group_by = group_by, formula = formula, data = data, ...)
......@@ -228,23 +230,25 @@ launchFit <- function(group, group_by, formula, data, ...) {
#' @param group_by Column name in data representing the grouping variable
#' @param n.cores The number of CPU cores to use for parallel processing.
#' If set to NULL (default), the number of available CPU cores will be automatically detected.
#' @param log_file File path to save the log messages (default : log.txt)
#' @param log_file File path to save the log messages (default : Rtmpdir/htrfit.log)
#' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function
#' @return List of fitted model objects or NULL for any errors
#' @export
#' @examples
#' fitModelParallel(formula = Sepal.Length ~ Sepal.Width + Petal.Length,
#' data = iris, group_by = "Species", n.cores = 1)
fitModelParallel <- function(formula, data, group_by, n.cores = NULL, log_file = "log.txt", ...) {
fitModelParallel <- function(formula, data, group_by, n.cores = NULL, log_file = paste(tempdir(check = FALSE), "htrfit.log", sep = "/"), ...) {
## SOme verification
isValidInput2fit(data, formula)
is_fullrank(data, formula)
## -- print log location
message( paste("Log file location", log_file, sep =': ') )
groups <- unique(data[[group_by]])
# Fit models in parallel and capture the results
results <- .parallel_fit(groups, group_by, formula, data, n.cores, log_file, ...)
results <- parallel_fit(groups, group_by, formula, data, n.cores, log_file, ...)
#results <- mergeListDataframes(results)
return(results)
}
......
......@@ -6,7 +6,7 @@
#' This function takes a list of glmmTMB models and extracts the summary statistics (AIC, BIC, logLik, deviance,
#' df.resid, and dispersion) for each model and returns them as a single DataFrame.
#'
#' @param l_tmb A list of glmmTMB models or a unique glmmTMB obj model
#' @param list_tmb A list of glmmTMB models or a unique glmmTMB obj model
#' @return A DataFrame with the summary statistics for all the glmmTMB models in the list.
#' @export
#' @importFrom stats setNames
......@@ -15,10 +15,10 @@
#' models <- fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length,
#' group_by = "Species",n.cores = 1, data = iris)
#' result <- glance_tmb(models)
glance_tmb <- function(l_tmb){
if (identical(class(l_tmb), "glmmTMB")) return(getGlance(l_tmb))
l_group <- attributes(l_tmb)$names
l_glance <- lapply(stats::setNames(l_group, l_group), function(group) getGlance(l_tmb[[group]]))
glance_tmb <- function(list_tmb){
if (identical(class(list_tmb), "glmmTMB")) return(getGlance(list_tmb))
l_group <- attributes(list_tmb)$names
l_glance <- lapply(stats::setNames(l_group, l_group), function(group) getGlance(list_tmb[[group]]))
return(do.call("rbind", l_glance))
}
......
......@@ -10,6 +10,7 @@
#' @return A ggplot2 identity plot.
#'
#' @importFrom ggplot2 sym aes geom_point geom_abline facet_wrap theme_bw ggtitle scale_x_log10 scale_y_log10
#' @importFrom rlang .data
#' @export
#' @examples
#' comparison_data <- data.frame(
......@@ -24,7 +25,7 @@ identity_plot <- function(comparison_df, ...){
ggplot2::ggplot(comparison_df) +
ggplot2::geom_point(ggplot2::aes(x = actual, y = estimate, !!!args), alpha = 0.6, size = 2) +
ggplot2::geom_point(ggplot2::aes(x = .data$actual, y = .data$estimate, !!!args), alpha = 0.6, size = 2) +
ggplot2::geom_abline(intercept = 0, slope = 1, lty = 3, col = 'red', linewidth = 1) +
ggplot2::facet_wrap(~description, scales = "free") +
ggplot2::theme_bw() +
......
......@@ -12,8 +12,8 @@
#' @examples
#' matx_dispersion <- matrix(1:12, nrow = 3, ncol = 4)
#' matx_bool_replication <- matrix(TRUE, nrow = 3, ncol = 4)
#' .isDispersionMatrixValid(matx_dispersion, matx_bool_replication)
.isDispersionMatrixValid <- function(matx_dispersion, matx_bool_replication) {
#' is_dispersionMatrixValid(matx_dispersion, matx_bool_replication)
is_dispersionMatrixValid <- function(matx_dispersion, matx_bool_replication) {
expected_nb_column <- dim(matx_bool_replication)[2]
if (expected_nb_column != dim(matx_dispersion)[2]) {
return(FALSE)
......@@ -81,7 +81,7 @@ mock_rnaseq <- function(list_var, n_genes, min_replicates, max_replicates, seque
matx_Muij <- getMu_ij_matrix(df_inputSimulation)
l_sampleID <- getSampleID(list_var)
matx_bool_replication <- generateReplicationMatrix(list_var, min_replicates, max_replicates)
mu_ij_matx_rep <- .replicateMatrix(matx_Muij, matx_bool_replication)
mu_ij_matx_rep <- replicateMatrix(matx_Muij, matx_bool_replication)
dispersion <- getValidDispersion(dispersion)
......@@ -92,7 +92,7 @@ mock_rnaseq <- function(list_var, n_genes, min_replicates, max_replicates, seque
## same order as mu_ij_matx_rep
matx_dispersion <- matx_dispersion[ order(row.names(matx_dispersion)), ]
matx_dispersion_rep <- .replicateMatrix(matx_dispersion, matx_bool_replication)
matx_dispersion_rep <- replicateMatrix(matx_dispersion, matx_bool_replication)
matx_countsTable <- generateCountTable(mu_ij_matx_rep, matx_dispersion_rep)
message("Counts simulation: Done")
......@@ -196,8 +196,8 @@ generateReplicationMatrix <- function(list_var, min_replicates, max_replicates)
#' @examples
#' matrix <- matrix(1:9, nrow = 3, ncol = 3)
#' replication_matrix <- matrix(TRUE, nrow = 3, ncol = 3)
#' .replicateMatrix(matrix, replication_matrix)
.replicateMatrix <- function(matrix, replication_matrix) {
#' replicateMatrix(matrix, replication_matrix)
replicateMatrix <- function(matrix, replication_matrix) {
n_genes <- dim(matrix)[1]
rep_list <- colSums(replication_matrix)
replicated_indices <- rep(seq_len(ncol(matrix)), times = rep_list)
......
......@@ -35,7 +35,7 @@ subset_glance <- function(glance_df, focus){
#' This function generates a density plot of the specified metrics for the given
#' list of generalized linear mixed models (GLMMs).
#'
#' @param l_tmb A list of GLMM objects to extract metrics from.
#' @param list_tmb A list of GLMM objects to extract metrics from.
#' @param focus A character vector specifying the metrics to focus on. Possible
#' values include "AIC", "BIC", "logLik", "deviance", "df.resid", and
#' "dispersion". If \code{NULL}, all available metrics will be plotted.
......@@ -44,22 +44,22 @@ subset_glance <- function(glance_df, focus){
#'
#' @importFrom reshape2 melt
#' @importFrom ggplot2 aes facet_wrap geom_histogram theme_bw theme ggtitle
#'
#' @importFrom rlang .data
#' @export
#'
#' @examples
#' models_list <- fitModelParallel(Sepal.Length ~ Sepal.Width + Petal.Length,
#' group_by = "Species",n.cores = 1, data = iris)
#' metrics_plot(models_list, focus = c("AIC", "BIC", "deviance"))
metrics_plot <- function(l_tmb, focus = NULL) {
glance_df <- glance_tmb(l_tmb)
metrics_plot <- function(list_tmb, focus = NULL) {
glance_df <- glance_tmb(list_tmb)
glance_df$group_id <- rownames(glance_df)
if (!is.null(focus)) {
glance_df <- subset_glance(glance_df, focus)
}
long_glance_df <- reshape2::melt(glance_df, variable.name = "metric")
p <- ggplot2::ggplot(long_glance_df) +
ggplot2::geom_histogram(ggplot2::aes(x = value, col = metric, fill = metric)) +
ggplot2::geom_histogram(ggplot2::aes(x = .data$value, col = .data$metric, fill = .data$metric)) +
ggplot2::facet_wrap(~metric, scales = "free") +
ggplot2::theme_bw() +
ggplot2::theme(legend.position = 'null') +
......
......@@ -36,8 +36,8 @@ countMatrix_2longDtf <- function(countMatrix, value_name = "kij", id_vars = "gen
#' list_var <- init_variable()
#' mock_data <- mock_rnaseq(list_var, n_genes = 3, 2,2, 2)
#' dtf_countLong <- countMatrix_2longDtf(mock_data$counts)
#' .getColumnWithSampleID(dtf_countLong, mock_data$metadata)
.getColumnWithSampleID <- function(dtf_countsLong, metadata) {
#' getColumnWithSampleID(dtf_countLong, mock_data$metadata)
getColumnWithSampleID <- function(dtf_countsLong, metadata) {
example_spleID <- as.character(dtf_countsLong[1, "sampleID"])
regex <- paste("^", as.character(dtf_countsLong[1, "sampleID"]), "$", sep = "")
for (indice_col in dim(metadata)[2]) {
......@@ -76,7 +76,7 @@ prepareData2fit <- function(countMatrix, metadata, normalization = TRUE , respon
}
dtf_countsLong <- countMatrix_2longDtf(countMatrix, response_name)
metadata_columnForjoining <- .getColumnWithSampleID(dtf_countsLong, metadata)
metadata_columnForjoining <- getColumnWithSampleID(dtf_countsLong, metadata)
if (is.na(metadata_columnForjoining)) {
stop("SampleIDs do not seem to correspond between countMatrix and metadata")
}
......
......@@ -48,7 +48,7 @@ getLabelExpected <- function(comparison_df, coeff_threshold, alt_hypothesis) {
#' @return A ggplot object representing the ROC curve plot.
#' @importFrom plotROC geom_roc
#' @importFrom ggplot2 ggtitle theme_bw aes sym xlab ylab
#'
#' @importFrom rlang .data
#' @examples
#' comparison_data <- data.frame(
#' geneID = c("gene1", "gene2", "gene3"),
......@@ -72,7 +72,7 @@ roc_plot <- function(comparison_df, ...) {
args <- lapply(list(...), function(x) if (!is.null(x)) ggplot2::sym(x))
#comparison_df$isDE <- factor(comparison_df$isDE, levels= c(TRUE, FALSE))
p <- ggplot2::ggplot(comparison_df, ggplot2::aes(d = !isDE , m = p.adj, !!!args )) +
p <- ggplot2::ggplot(comparison_df, ggplot2::aes(d = !.data$isDE , m = .data$p.adj, !!!args )) +
plotROC::geom_roc(n.cuts = 0, labels = FALSE) +
ggplot2::theme_bw() +
ggplot2::ggtitle("ROC curve") +
......
File moved
......@@ -73,6 +73,10 @@ getCoefficients <- function(list_var, l_dataFromMvrnorm, l_dataFromUser, n_genes
#' @param dtf_coef The coefficient data frame.
#' @return The coefficient data frame with log_qij column added.
#' @export
#' @examples
#' list_var <- init_variable()
#' dtf_coef <- getInput2simulation(list_var, 10)
#' dtf_coef <- getLog_qij(dtf_coef)
getLog_qij <- function(dtf_coef) {
dtf_beta_numeric <- dtf_coef[sapply(dtf_coef, is.numeric)]
dtf_coef$log_qij <- rowSums(dtf_beta_numeric, na.rm = TRUE)
......@@ -114,6 +118,13 @@ getMu_ij <- function(dtf_coef) {
#' @export
#' @return A Mu_ij matrix.
#' @examples
#' list_var <- init_variable()
#' dtf_coef <- getInput2simulation(list_var, 10)
#' dtf_coef <- getLog_qij(dtf_coef)
#' dtf_coef <- addBasalExpression(dtf_coef, 10, c(10, 20, 0))
#' dtf_coef<- getMu_ij(dtf_coef)
#' getMu_ij_matrix(dtf_coef)
getMu_ij_matrix <- function(dtf_coef) {
column_names <- colnames(dtf_coef)
idx_var <- grepl(pattern = "label", column_names)
......@@ -167,4 +178,171 @@ getSubCountsTable <- function(matx_Muij, matx_dispersion, replicateID, l_bool_re
return(matx_kij)
}
#' getReplicationMatrix
#'
#' @param minN Minimum number of replicates for each sample
#' @param maxN Maximum number of replicates for each sample
#' @param n_samples Number of samples
#' @export
#' @return A replication matrix indicating which samples are replicated
getReplicationMatrix <- function(minN, maxN, n_samples) {
# Create a list of logical vectors representing the minimum number of replicates
l_replication_minimum = lapply(1:n_samples,
FUN = function(i) rep(TRUE, times = minN) )
# Create a list of random logical vectors representing additional replicates
l_replication_random = lapply(1:n_samples,
FUN = function(i) sample(x = c(TRUE, FALSE), size = maxN-minN, replace = T) )
# Combine the replication vectors into matrices
matx_replication_minimum <- do.call(cbind, l_replication_minimum)
matx_replication_random <- do.call(cbind, l_replication_random)
# Combine the minimum replicates and random replicates into a single matrix
matx_replication <- rbind(matx_replication_minimum, matx_replication_random)
# Sort the columns of the replication matrix in descending order
matx_replication = apply(matx_replication, 2, sort, decreasing = TRUE ) %>% matrix(nrow = maxN)
return(matx_replication)
}
#' getCountsTable
#'
#' @param matx_Muij Matrix of mean expression values for each gene and sample
#' @param matx_dispersion Matrix of dispersion values for each gene and sample
#' @param matx_bool_replication Replication matrix indicating which samples are replicated
#'
#' @return A counts table containing simulated read counts for each gene and sample
getCountsTable <- function(matx_Muij , matx_dispersion, matx_bool_replication ){
max_replicates <- dim(matx_bool_replication)[1]
# Apply the getSubCountsTable function to each row of the replication matrix
l_countsTable = lapply(1:max_replicates, function(i) getSubCountsTable(matx_Muij , matx_dispersion, i, matx_bool_replication[i,] ))
# Combine the counts tables into a single matrix
countsTable = do.call(cbind, l_countsTable)
return(countsTable %>% as.data.frame())
}
#' getDispersionMatrix
#'
#' @param list_var A list of variables (already initialized)
#' @param n_genes Number of genes
#' @param dispersion Vector of dispersion values for each gene
#' @export
#'
#' @return A matrix of dispersion values for each gene and sample
getDispersionMatrix <- function(list_var, n_genes, dispersion = stats::runif(n_genes, min = 0, max = 1000)){
l_geneID = paste("gene", 1:n_genes, sep = "")
l_sampleID = getSampleID(list_var)
n_samples = length(l_sampleID)
l_dispersion <- dispersion
# Create a data frame for the dispersion values
dtf_dispersion = list(dispersion = l_dispersion) %>% as.data.frame()
dtf_dispersion <- dtf_dispersion[, rep("dispersion", n_samples)]
rownames(dtf_dispersion) = l_geneID
colnames(dtf_dispersion) = l_sampleID
matx_dispersion = dtf_dispersion %>% as.matrix()
return(matx_dispersion)
}
#' Replicate rows of a data frame by group
#'
#' Replicates the rows of a data frame based on a grouping variable and replication counts for each group.
#'
#' @param df Data frame to replicate
#' @param group_var Name of the grouping variable in the data frame
#' @param rep_list Vector of replication counts for each group
#' @return Data frame with replicated rows
#' @examples
#' df <- data.frame(group = c("A", "B"), value = c(1, 2))
#' replicateByGroup(df, "group", c(2, 3))
#'
#' @export
replicateByGroup <- function(df, group_var, rep_list) {
l_group_var <- df[[group_var]]
group_levels <- unique(l_group_var)
names(rep_list) <- group_levels
group_indices <- rep_list[l_group_var]
replicated_indices <- rep(seq_len(nrow(df)), times = group_indices)
replicated_df <- df[replicated_indices, ]
suffix_sampleID <- sequence(group_indices)
replicated_df[["sampleID"]] <- paste(replicated_df[["sampleID"]], suffix_sampleID, sep = "_")
rownames(replicated_df) <- NULL
return(replicated_df)
}
#' Replicate rows of a data frame
#'
#' Replicates the rows of a data frame by a specified factor.
#'
#' @param df Data frame to replicate
#' @param n Replication factor for each row
#' @return Data frame with replicated rows
#' @export
#' @examples
#' df <- data.frame(a = 1:3, b = letters[1:3])
#' replicateRows(df, 2)
#'
replicateRows <- function(df, n) {
indices <- rep(seq_len(nrow(df)), each = n)
replicated_df <- df[indices, , drop = FALSE]
rownames(replicated_df) <- NULL
return(replicated_df)
}
#' Get sample metadata
#'
#' Generates sample metadata based on the input variables, replication matrix, and number of genes.
#'
#' @param list_var A list of variables (already initialized)
#' @param replicationMatrix Replication matrix
#' @param n_genes Number of genes
#' @return Data frame of sample metadata
#' @importFrom data.table setorderv
#' @export
#' @examples
#' list_var <- init_variable()
#' n_genes <- 10
#' replicationMatrix <- generateReplicationMatrix(list_var ,2, 3)
#' getSampleMetadata(list_var, n_genes, replicationMatrix)
getSampleMetadata <- function(list_var, n_genes, replicationMatrix) {
l_sampleIDs = getSampleID(list_var)
metaData <- generateGridCombination_fromListVar(list_var)
metaData[] <- lapply(metaData, as.character) ## before reordering
data.table::setorderv(metaData, cols = colnames(metaData))
metaData[] <- lapply(metaData, as.factor)
metaData$sampleID <- l_sampleIDs
rep_list <- colSums(replicationMatrix)
metaData$sampleID <- as.character(metaData$sampleID) ## before replicating
sampleMetadata <- replicateByGroup(metaData, "sampleID", rep_list)
colnames(sampleMetadata) <- gsub("label_", "", colnames(sampleMetadata))
return(sampleMetadata)
}
#' getSampleID
#'
#' @param list_var A list of variables (already initialized)
#' @export
#' @return A sorted vector of sample IDs
#' @examples
#' getSampleID(init_variable())
getSampleID <- function(list_var){
gridCombination <- generateGridCombination_fromListVar(list_var)
l_sampleID <- apply( gridCombination , 1 , paste , collapse = "_" ) %>% unname()
return(sort(l_sampleID))
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment