From f664caecf682cc9692606bf41b12a50a742e0083 Mon Sep 17 00:00:00 2001 From: aduvermy <arnaud.duvermy@ens-lyon.fr> Date: Thu, 26 Oct 2023 15:48:54 +0200 Subject: [PATCH] fix issues with writing the 'group' information in log files during model update --- R/basal_expression__scaling.R | 82 ------------------------ R/fitmodel.R | 9 +-- R/update_fittedmodel.R | 14 ++-- dev/flat_full.Rmd | 49 ++++++++------ man/addBasalExpression.Rd | 17 +---- man/fitModel.Rd | 6 +- man/fitUpdate.Rd | 6 +- man/generate_basal_expression.Rd | 11 +--- man/getBinExpression.Rd | 13 +--- tests/testthat/test-fitmodel.R | 17 +++-- tests/testthat/test-update_fittedmodel.R | 9 ++- vignettes/htrfit.Rmd | 1 - 12 files changed, 71 insertions(+), 163 deletions(-) delete mode 100644 R/basal_expression__scaling.R diff --git a/R/basal_expression__scaling.R b/R/basal_expression__scaling.R deleted file mode 100644 index c2ddc5d..0000000 --- a/R/basal_expression__scaling.R +++ /dev/null @@ -1,82 +0,0 @@ -# WARNING - Generated by {fusen} from /dev/flat_full.Rmd: do not edit by hand - - - - - -#' Get bin expression for a data frame. -#' -#' This function divides the values of a specified column in a data frame into \code{n_bins} bins of equal width. -#' The bin labels are then added as a new column in the data frame. -#' -#' @param dtf_coef A data frame containing the values to be binned. -#' @param n_bins The number of bins to create. -#' -#' @return A data frame with an additional column named \code{binExpression}, containing the bin labels. -#' @export -#' @examples -#' dtf <- data.frame(mu_ij = c(10, 20, 30, 15, 25, 35, 40, 5, 12, 22)) -#' dtf_with_bins <- getBinExpression(dtf, n_bins = 3) -#' -getBinExpression <- function(dtf_coef, n_bins){ - col2bin <- "mu_ij" - bin_labels <- cut(dtf_coef[[col2bin]], n_bins, labels = paste("BinExpression", 1:n_bins, sep = "_")) - dtf_coef$binExpression <- bin_labels - return(dtf_coef) -} - - - - -#' Generate BE data. -#' -#' This function generates basal expression data for a given number of genes, in a vector of basal expression values. -#' -#' @param n_genes The number of genes to generate BE data for. -#' @param basal_expression a numeric vector from which sample BE for eacg genes -#' -#' @return A data frame containing gene IDs, BE values -#' -#' @examples -#' generate_basal_expression(n_genes = 100, 10) -#' -#' @export -generate_basal_expression <- function(n_genes, basal_expression) { - ## --avoid bug if one value in basal_expr - pool2sample <- c(basal_expression, basal_expression) - BE <- sample(x = pool2sample, size = n_genes, replace = T) - l_geneID <- base::paste("gene", 1:n_genes, sep = "") - ret <- list(geneID = l_geneID, basalExpr = BE) %>% as.data.frame() - return(ret) -} - - - -#' Compute basal expresion for gene expression based on the coefficients data frame. -#' -#' This function takes the coefficients data frame \code{dtf_coef} and computes -#' basal expression for gene expression. The scaling factors are generated -#' using the function \code{generate_basal_expression}. -#' -#' @param dtf_coef A data frame containing the coefficients for gene expression. -#' @param n_genes number of genes in simulation -#' @param basal_expression gene basal expression vector -#' -#' @return A modified data frame \code{dtf_coef} with an additional column containing -#' the scaling factors for gene expression. -#' @export -#' @examples -#' list_var <- init_variable() -#' N_GENES <- 5 -#' dtf_coef <- getInput2simulation(list_var, N_GENES) -#' dtf_coef <- getLog_qij(dtf_coef) -#' addBasalExpression(dtf_coef, N_GENES, 1) -addBasalExpression <- function(dtf_coef, n_genes, basal_expression){ - BE_df <- generate_basal_expression(n_genes, basal_expression ) - dtf_coef <- join_dtf(dtf_coef, BE_df, "geneID", "geneID") - return(dtf_coef) -} - - - - diff --git a/R/fitmodel.R b/R/fitmodel.R index de0d84b..b38b1df 100644 --- a/R/fitmodel.R +++ b/R/fitmodel.R @@ -105,18 +105,19 @@ is_fullrank <- function(metadata, formula) { #' Fit a model using the fitModel function. -#' +#' @param group group id to save in glmmTMB obj (usefull for update !) #' @param formula Formula specifying the model formula #' @param data Data frame containing the data #' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function #' @return Fitted model object or NULL if there was an error #' @export #' @examples -#' fitModel(formula = mpg ~ cyl + disp, data = mtcars) -fitModel <- function(formula, data, ...) { +#' fitModel("mtcars" , formula = mpg ~ cyl + disp, data = mtcars) +fitModel <- function(group, formula, data, ...) { # Fit the model using glm.nb from the GLmmTMB package model <- glmmTMB::glmmTMB(formula, ..., data = data ) model$frame <- data + model$groupId <- group ## family in ... => avoid error in future update additional_args <- list(...) familyArgs <- additional_args[['family']] @@ -144,7 +145,7 @@ fitModel <- function(formula, data, ...) { #' data = iris ) subsetData_andfit <- function(group, group_by, formula, data, ...) { subset_data <- data[data[[group_by]] == group, ] - fit_res <- fitModel(formula, subset_data, ...) + fit_res <- fitModel(group, formula, subset_data, ...) #glance_df <- glance.negbin(group_by ,group , fit_res) #tidy_df <- tidy.negbin(group_by ,group,fit_res ) #list(glance = glance_df, summary = tidy_df) diff --git a/R/update_fittedmodel.R b/R/update_fittedmodel.R index 5184432..ffafac2 100644 --- a/R/update_fittedmodel.R +++ b/R/update_fittedmodel.R @@ -74,7 +74,7 @@ parallel_update <- function(formula, list_tmb, n.cores = NULL, #' Fit and update a GLMNB model. #' #' This function fits and updates a GLMNB model using the provided formula. -#' +#' @param group group id to save in glmmTMB obj (usefull for update !) #' @param glmm_obj A glmmTMB object to be updated. #' @param formula Formula for the updated GLMNB model. #' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function. @@ -88,11 +88,13 @@ parallel_update <- function(formula, list_tmb, n.cores = NULL, #' formula <- Sepal.Length ~ Sepal.Width + Petal.Length #' fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1) #' new_formula <- Sepal.Length ~ Sepal.Width -#' updated_model <- fitUpdate(fitted_models[[1]], new_formula) -fitUpdate <- function(glmm_obj, formula , ...){ - data = glmm_obj$frame +#' updated_model <- fitUpdate("setosa", fitted_models[[1]], new_formula) +fitUpdate <- function(group, glmm_obj, formula , ...){ + data <- glmm_obj$frame resUpdt <- stats::update(glmm_obj, formula, ...) resUpdt$frame <- data + ## save groupID => avoid error in future update + resUpdt$groupId <- group ## family in ... => avoid error in future update additional_args <- list(...) familyArgs <- additional_args[['family']] @@ -123,11 +125,11 @@ fitUpdate <- function(glmm_obj, formula , ...){ #' new_formula <- Sepal.Length ~ Sepal.Width #' updated_model <- launchUpdate(fitted_models[[1]], new_formula) launchUpdate <- function(glmm_obj, formula, ...) { - group = deparse(substitute(glmm_obj)) + group <- glmm_obj$groupId tryCatch( expr = { withCallingHandlers( - fitUpdate(glmm_obj, formula, ...), + fitUpdate(group ,glmm_obj, formula, ...), warning = function(w) { message(paste(Sys.time(), "warning for group", group ,":", conditionMessage(w))) invokeRestart("muffleWarning") diff --git a/dev/flat_full.Rmd b/dev/flat_full.Rmd index c6694dc..d634a49 100644 --- a/dev/flat_full.Rmd +++ b/dev/flat_full.Rmd @@ -2517,18 +2517,19 @@ is_fullrank <- function(metadata, formula) { #' Fit a model using the fitModel function. -#' +#' @param group group id to save in glmmTMB obj (usefull for update !) #' @param formula Formula specifying the model formula #' @param data Data frame containing the data #' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function #' @return Fitted model object or NULL if there was an error #' @export #' @examples -#' fitModel(formula = mpg ~ cyl + disp, data = mtcars) -fitModel <- function(formula, data, ...) { +#' fitModel("mtcars" , formula = mpg ~ cyl + disp, data = mtcars) +fitModel <- function(group, formula, data, ...) { # Fit the model using glm.nb from the GLmmTMB package model <- glmmTMB::glmmTMB(formula, ..., data = data ) model$frame <- data + model$groupId <- group ## family in ... => avoid error in future update additional_args <- list(...) familyArgs <- additional_args[['family']] @@ -2556,7 +2557,7 @@ fitModel <- function(formula, data, ...) { #' data = iris ) subsetData_andfit <- function(group, group_by, formula, data, ...) { subset_data <- data[data[[group_by]] == group, ] - fit_res <- fitModel(formula, subset_data, ...) + fit_res <- fitModel(group, formula, subset_data, ...) #glance_df <- glance.negbin(group_by ,group , fit_res) #tidy_df <- tidy.negbin(group_by ,group,fit_res ) #list(glance = glance_df, summary = tidy_df) @@ -2688,25 +2689,30 @@ test_that("isValidInput2fit raises an error for missing variable", { test_that("fitModel returns a fitted model object", { data(mtcars) formula <- mpg ~ cyl + disp - fitted_model <- suppressWarnings(fitModel(formula, mtcars)) + fitted_model <- suppressWarnings(fitModel("mtcars", formula, mtcars)) #expect_warning(fitModel(formula, mtcars)) expect_s3_class(fitted_model, "glmmTMB") # Test with invalid formula invalid_formula <- mpg ~ cyl + disp + invalid_var - expect_error(fitModel(invalid_formula, mtcars)) + expect_error(fitModel("mtcars", invalid_formula, mtcars)) + + ## check groupID attr + expect_equal(fitted_model$groupId, "mtcars") # Additional parameters: #change family + formula formula <- Sepal.Length ~ Sepal.Width + Petal.Length + (1 | Species) - fitted_models <- suppressWarnings(fitModel(formula = formula, - data = iris, - family = glmmTMB::nbinom1(link = "log") )) + fitted_models <- suppressWarnings(fitModel("mtcars", + formula = formula, + data = iris, + family = glmmTMB::nbinom1(link = "log") )) expect_s3_class(fitted_models$call$family, "family") expect_equal(fitted_models$call$formula, formula) #change control settings - fitted_models <- suppressWarnings(fitModel(formula = formula, + fitted_models <- suppressWarnings(fitModel("mtcars", + formula = formula, data = iris, family = glmmTMB::nbinom1(link = "log"), control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3, @@ -3014,7 +3020,7 @@ parallel_update <- function(formula, list_tmb, n.cores = NULL, #' Fit and update a GLMNB model. #' #' This function fits and updates a GLMNB model using the provided formula. -#' +#' @param group group id to save in glmmTMB obj (usefull for update !) #' @param glmm_obj A glmmTMB object to be updated. #' @param formula Formula for the updated GLMNB model. #' @param ... Additional arguments to be passed to the glmmTMB::glmmTMB function. @@ -3028,11 +3034,13 @@ parallel_update <- function(formula, list_tmb, n.cores = NULL, #' formula <- Sepal.Length ~ Sepal.Width + Petal.Length #' fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1) #' new_formula <- Sepal.Length ~ Sepal.Width -#' updated_model <- fitUpdate(fitted_models[[1]], new_formula) -fitUpdate <- function(glmm_obj, formula , ...){ - data = glmm_obj$frame +#' updated_model <- fitUpdate("setosa", fitted_models[[1]], new_formula) +fitUpdate <- function(group, glmm_obj, formula , ...){ + data <- glmm_obj$frame resUpdt <- stats::update(glmm_obj, formula, ...) resUpdt$frame <- data + ## save groupID => avoid error in future update + resUpdt$groupId <- group ## family in ... => avoid error in future update additional_args <- list(...) familyArgs <- additional_args[['family']] @@ -3063,11 +3071,11 @@ fitUpdate <- function(glmm_obj, formula , ...){ #' new_formula <- Sepal.Length ~ Sepal.Width #' updated_model <- launchUpdate(fitted_models[[1]], new_formula) launchUpdate <- function(glmm_obj, formula, ...) { - group = deparse(substitute(glmm_obj)) + group <- glmm_obj$groupId tryCatch( expr = { withCallingHandlers( - fitUpdate(glmm_obj, formula, ...), + fitUpdate(group ,glmm_obj, formula, ...), warning = function(w) { message(paste(Sys.time(), "warning for group", group ,":", conditionMessage(w))) invokeRestart("muffleWarning") @@ -3178,17 +3186,20 @@ test_that("fitUpdate function returns correct results", { fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1) new_formula <- Sepal.Length ~ Sepal.Width - updated_model <- fitUpdate(fitted_models[[1]], new_formula) + updated_model <- fitUpdate("setosa",fitted_models[[1]], new_formula) expect_is(updated_model, "glmmTMB") + ## -- check groupId presence + expect_equal(updated_model$groupId, "setosa") + # Additional parameters: #change family + formula - updated_model <- suppressWarnings(fitUpdate(fitted_models[[1]], new_formula, + updated_model <- suppressWarnings(fitUpdate("setosa", fitted_models[[1]], new_formula, family = glmmTMB::nbinom1(link = "log") )) expect_s3_class(updated_model$call$family, "family") expect_equal(updated_model$call$formula, new_formula) #change control - updated_model <- suppressWarnings(fitUpdate(fitted_models[[1]], + updated_model <- suppressWarnings(fitUpdate("setosa", fitted_models[[1]], new_formula, family = glmmTMB::nbinom1(link = "log"), control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3, diff --git a/man/addBasalExpression.Rd b/man/addBasalExpression.Rd index fec7fd9..e3c969f 100644 --- a/man/addBasalExpression.Rd +++ b/man/addBasalExpression.Rd @@ -1,12 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/basal_expression__scaling.R, -% R/basal_expression_scaling.R +% Please edit documentation in R/basal_expression_scaling.R \name{addBasalExpression} \alias{addBasalExpression} \title{Compute basal expresion for gene expression based on the coefficients data frame.} \usage{ -addBasalExpression(dtf_coef, n_genes, basal_expression) - addBasalExpression(dtf_coef, n_genes, basal_expression) } \arguments{ @@ -17,17 +14,10 @@ addBasalExpression(dtf_coef, n_genes, basal_expression) \item{basal_expression}{gene basal expression vector} } \value{ -A modified data frame \code{dtf_coef} with an additional column containing -the scaling factors for gene expression. - A modified data frame \code{dtf_coef} with an additional column containing the scaling factors for gene expression. } \description{ -This function takes the coefficients data frame \code{dtf_coef} and computes -basal expression for gene expression. The scaling factors are generated -using the function \code{generate_basal_expression}. - This function takes the coefficients data frame \code{dtf_coef} and computes basal expression for gene expression. The scaling factors are generated using the function \code{generate_basal_expression}. @@ -38,9 +28,4 @@ N_GENES <- 5 dtf_coef <- getInput2simulation(list_var, N_GENES) dtf_coef <- getLog_qij(dtf_coef) addBasalExpression(dtf_coef, N_GENES, 1) -list_var <- init_variable() -N_GENES <- 5 -dtf_coef <- getInput2simulation(list_var, N_GENES) -dtf_coef <- getLog_qij(dtf_coef) -addBasalExpression(dtf_coef, N_GENES, 1) } diff --git a/man/fitModel.Rd b/man/fitModel.Rd index a54618e..56f945e 100644 --- a/man/fitModel.Rd +++ b/man/fitModel.Rd @@ -4,9 +4,11 @@ \alias{fitModel} \title{Fit a model using the fitModel function.} \usage{ -fitModel(formula, data, ...) +fitModel(group, formula, data, ...) } \arguments{ +\item{group}{group id to save in glmmTMB obj (usefull for update !)} + \item{formula}{Formula specifying the model formula} \item{data}{Data frame containing the data} @@ -20,5 +22,5 @@ Fitted model object or NULL if there was an error Fit a model using the fitModel function. } \examples{ -fitModel(formula = mpg ~ cyl + disp, data = mtcars) +fitModel("mtcars" , formula = mpg ~ cyl + disp, data = mtcars) } diff --git a/man/fitUpdate.Rd b/man/fitUpdate.Rd index b904948..b4e7af9 100644 --- a/man/fitUpdate.Rd +++ b/man/fitUpdate.Rd @@ -4,9 +4,11 @@ \alias{fitUpdate} \title{Fit and update a GLMNB model.} \usage{ -fitUpdate(glmm_obj, formula, ...) +fitUpdate(group, glmm_obj, formula, ...) } \arguments{ +\item{group}{group id to save in glmmTMB obj (usefull for update !)} + \item{glmm_obj}{A glmmTMB object to be updated.} \item{formula}{Formula for the updated GLMNB model.} @@ -26,5 +28,5 @@ group_by <- "Species" formula <- Sepal.Length ~ Sepal.Width + Petal.Length fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1) new_formula <- Sepal.Length ~ Sepal.Width -updated_model <- fitUpdate(fitted_models[[1]], new_formula) +updated_model <- fitUpdate("setosa", fitted_models[[1]], new_formula) } diff --git a/man/generate_basal_expression.Rd b/man/generate_basal_expression.Rd index 3c96870..25d7446 100644 --- a/man/generate_basal_expression.Rd +++ b/man/generate_basal_expression.Rd @@ -1,12 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/basal_expression__scaling.R, -% R/basal_expression_scaling.R +% Please edit documentation in R/basal_expression_scaling.R \name{generate_basal_expression} \alias{generate_basal_expression} \title{Generate BE data.} \usage{ -generate_basal_expression(n_genes, basal_expression) - generate_basal_expression(n_genes, basal_expression) } \arguments{ @@ -15,18 +12,12 @@ generate_basal_expression(n_genes, basal_expression) \item{basal_expression}{a numeric vector from which sample BE for eacg genes} } \value{ -A data frame containing gene IDs, BE values - A data frame containing gene IDs, BE values } \description{ -This function generates basal expression data for a given number of genes, in a vector of basal expression values. - This function generates basal expression data for a given number of genes, in a vector of basal expression values. } \examples{ generate_basal_expression(n_genes = 100, 10) -generate_basal_expression(n_genes = 100, 10) - } diff --git a/man/getBinExpression.Rd b/man/getBinExpression.Rd index f0c234e..14063ff 100644 --- a/man/getBinExpression.Rd +++ b/man/getBinExpression.Rd @@ -1,12 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/basal_expression__scaling.R, -% R/basal_expression_scaling.R +% Please edit documentation in R/basal_expression_scaling.R \name{getBinExpression} \alias{getBinExpression} \title{Get bin expression for a data frame.} \usage{ -getBinExpression(dtf_coef, n_bins) - getBinExpression(dtf_coef, n_bins) } \arguments{ @@ -15,14 +12,9 @@ getBinExpression(dtf_coef, n_bins) \item{n_bins}{The number of bins to create.} } \value{ -A data frame with an additional column named \code{binExpression}, containing the bin labels. - A data frame with an additional column named \code{binExpression}, containing the bin labels. } \description{ -This function divides the values of a specified column in a data frame into \code{n_bins} bins of equal width. -The bin labels are then added as a new column in the data frame. - This function divides the values of a specified column in a data frame into \code{n_bins} bins of equal width. The bin labels are then added as a new column in the data frame. } @@ -30,7 +22,4 @@ The bin labels are then added as a new column in the data frame. dtf <- data.frame(mu_ij = c(10, 20, 30, 15, 25, 35, 40, 5, 12, 22)) dtf_with_bins <- getBinExpression(dtf, n_bins = 3) -dtf <- data.frame(mu_ij = c(10, 20, 30, 15, 25, 35, 40, 5, 12, 22)) -dtf_with_bins <- getBinExpression(dtf, n_bins = 3) - } diff --git a/tests/testthat/test-fitmodel.R b/tests/testthat/test-fitmodel.R index f1a3d80..edd210a 100644 --- a/tests/testthat/test-fitmodel.R +++ b/tests/testthat/test-fitmodel.R @@ -19,25 +19,30 @@ test_that("isValidInput2fit raises an error for missing variable", { test_that("fitModel returns a fitted model object", { data(mtcars) formula <- mpg ~ cyl + disp - fitted_model <- suppressWarnings(fitModel(formula, mtcars)) + fitted_model <- suppressWarnings(fitModel("mtcars", formula, mtcars)) #expect_warning(fitModel(formula, mtcars)) expect_s3_class(fitted_model, "glmmTMB") # Test with invalid formula invalid_formula <- mpg ~ cyl + disp + invalid_var - expect_error(fitModel(invalid_formula, mtcars)) + expect_error(fitModel("mtcars", invalid_formula, mtcars)) + + ## check groupID attr + expect_equal(fitted_model$groupId, "mtcars") # Additional parameters: #change family + formula formula <- Sepal.Length ~ Sepal.Width + Petal.Length + (1 | Species) - fitted_models <- suppressWarnings(fitModel(formula = formula, - data = iris, - family = glmmTMB::nbinom1(link = "log") )) + fitted_models <- suppressWarnings(fitModel("mtcars", + formula = formula, + data = iris, + family = glmmTMB::nbinom1(link = "log") )) expect_s3_class(fitted_models$call$family, "family") expect_equal(fitted_models$call$formula, formula) #change control settings - fitted_models <- suppressWarnings(fitModel(formula = formula, + fitted_models <- suppressWarnings(fitModel("mtcars", + formula = formula, data = iris, family = glmmTMB::nbinom1(link = "log"), control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3, diff --git a/tests/testthat/test-update_fittedmodel.R b/tests/testthat/test-update_fittedmodel.R index f38b308..74702cd 100644 --- a/tests/testthat/test-update_fittedmodel.R +++ b/tests/testthat/test-update_fittedmodel.R @@ -94,17 +94,20 @@ test_that("fitUpdate function returns correct results", { fitted_models <- fitModelParallel(formula, iris, group_by, n.cores = 1) new_formula <- Sepal.Length ~ Sepal.Width - updated_model <- fitUpdate(fitted_models[[1]], new_formula) + updated_model <- fitUpdate("setosa",fitted_models[[1]], new_formula) expect_is(updated_model, "glmmTMB") + ## -- check groupId presence + expect_equal(updated_model$groupId, "setosa") + # Additional parameters: #change family + formula - updated_model <- suppressWarnings(fitUpdate(fitted_models[[1]], new_formula, + updated_model <- suppressWarnings(fitUpdate("setosa", fitted_models[[1]], new_formula, family = glmmTMB::nbinom1(link = "log") )) expect_s3_class(updated_model$call$family, "family") expect_equal(updated_model$call$formula, new_formula) #change control - updated_model <- suppressWarnings(fitUpdate(fitted_models[[1]], + updated_model <- suppressWarnings(fitUpdate("setosa", fitted_models[[1]], new_formula, family = glmmTMB::nbinom1(link = "log"), control = glmmTMB::glmmTMBControl(optCtrl=list(iter.max=1e3, diff --git a/vignettes/htrfit.Rmd b/vignettes/htrfit.Rmd index cbd246a..f299228 100644 --- a/vignettes/htrfit.Rmd +++ b/vignettes/htrfit.Rmd @@ -15,7 +15,6 @@ knitr::opts_chunk$set( ``` ```{r setup} -devtools::load_all() library(HTRfit) ``` -- GitLab