diff --git a/DESCRIPTION b/DESCRIPTION index 4b689be38dcb09ef2a07c591d00271754d8dc6e4..c630ea9b26c4868b61e9f453dc027a59c78489bb 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -25,7 +25,6 @@ Imports: data.table, ggplot2, glmmTMB, - gridExtra, magrittr, MASS, parallel, diff --git a/NAMESPACE b/NAMESPACE index b212e1747bc643e98dddaa1a94812372296ffcae..7430465129f64a0bfc34573f81a4aa43000154ad 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -37,7 +37,6 @@ export(drop_randfx) export(endsWithDigit) export(eval_identityTerm) export(evaluation_report) -export(exportReportFile) export(extract_ddsDispersion) export(extract_fixed_effect) export(extract_ran_pars) @@ -76,7 +75,6 @@ export(getGeneMetadata) export(getGivenAttribute) export(getGlance) export(getGridCombination) -export(getGrobTable) export(getInput2mvrnorm) export(getInput2simulation) export(getLabelExpected) @@ -97,7 +95,6 @@ export(getValidDispersion) export(get_eval_data) export(get_eval_data_from_dds) export(get_eval_data_from_ltmb) -export(get_eval_metrics) export(get_inference_dds) export(get_label_y_position) export(get_ml_metrics_obj) @@ -180,7 +177,6 @@ importFrom(ggplot2,geom_path) importFrom(ggplot2,geom_point) importFrom(ggplot2,geom_text) importFrom(ggplot2,ggplot) -importFrom(ggplot2,ggsave) importFrom(ggplot2,ggtitle) importFrom(ggplot2,scale_color_manual) importFrom(ggplot2,scale_shape_manual) @@ -188,13 +184,8 @@ importFrom(ggplot2,scale_x_log10) importFrom(ggplot2,sym) importFrom(ggplot2,theme) importFrom(ggplot2,theme_bw) -importFrom(ggplot2,unit) importFrom(ggplot2,xlim) importFrom(ggplot2,ylim) -importFrom(gridExtra,arrangeGrob) -importFrom(gridExtra,grid.arrange) -importFrom(gridExtra,tableGrob) -importFrom(gridExtra,ttheme_minimal) importFrom(magrittr,"%>%") importFrom(reshape2,dcast) importFrom(reshape2,melt) diff --git a/R/simulation_report.R b/R/simulation_report.R index 29b2c5e12efe7c5671144ab0c8b621f8bbeee858..ddff1209a1f847078e680c487667aed237010555 100644 --- a/R/simulation_report.R +++ b/R/simulation_report.R @@ -1,67 +1,6 @@ # WARNING - Generated by {fusen} from /dev/flat_full.Rmd: do not edit by hand -#' Export the Analysis Report to a File -#' -#' This function generates an analysis report by arranging and combining various plots -#' and tables, and then exports the report to a specified file. -#' -#' @param report_file Path to the file where the report will be exported. -#' @param table_settings A table containing settings and parameters used in the analysis. -#' @param roc_curve A plot displaying the Receiver Operating Characteristic (ROC) curve. -#' @param id_plot A plot displaying unique identifiers. -#' @param counts_plot A plot displaying the gene counts. -#' -#' -#' @importFrom gridExtra arrangeGrob grid.arrange -#' @importFrom ggplot2 ggsave -#' -#' -#' @return report -#' @export -exportReportFile <- function(report_file, table_settings, roc_curve, id_plot, counts_plot){ - - left_part <- gridExtra::arrangeGrob(table_settings, roc_curve, counts_plot ,heights = c(1, 1, 1)) - p2export <- gridExtra::grid.arrange(left_part, id_plot ,ncol = 3, widths = c(1,1,2)) - - if (!is.null(report_file)) ggplot2::ggsave(report_file, p2export, height = 10, width = 15) - - return(p2export) -} - - -#' Generate a Formatted Table as a Grid Graphics Object -#' -#' This function generates a formatted table using the provided data frame and returns -#' it as a grid graphics object. -#' -#' @param df The data frame to be converted into a formatted table. -#' -#' @return A grid graphics object representing the formatted table. -#' @export -#' @importFrom ggplot2 unit -#' @importFrom gridExtra tableGrob ttheme_minimal -#' @examples -#' # Create a sample data frame -#' sample_data <- data.frame( -#' Name = c("Alice", "Bob", "Charlie"), -#' Age = c(25, 30, 28) -#' ) -#' -#' # Generate the formatted table -#' table_grob <- getGrobTable(sample_data) -getGrobTable <- function(df){ - theme_custom <- gridExtra::ttheme_minimal( - core = list(bg_params = list(fill = c("#F8F9F9", "#E5E8E8"), col=NA)), - colhead = list(fg_params=list(col="white", fontface=4L), - bg_params = list(fill = "#5D6D7E", col=NA)), base_size = 15) - grob_df <- gridExtra::tableGrob(df, rows=NULL, - theme = theme_custom, - widths = ggplot2::unit(x = c(0.4,0.3), "npc" ) ) - return(grob_df) -} - - #' Check Validity of Truth Labels #' @@ -141,6 +80,12 @@ is_truthLabels_valid <- function(eval_data, col_param = "description", col_truth #' \item{counts}{A counts plot generated from mock object.} #' \item{performances}{A summary of the performances obtained.} #' @export +#' @examples +#' \dontrun{ +#' report <- evaluation_report(l_res, NULL, mock_data, +#' coeff_threshold = 0.67, alt_hypothesis = "greaterAbs") +#' } +#' evaluation_report <- function(list_tmb, dds, mock_obj, coeff_threshold, alt_hypothesis, alpha_risk = 0.05, palette_color = c(DESeq2 = "#500472", HTRfit ="#79cbb8"), palette_shape = c(DESeq2 = 17, HTRfit = 19), @@ -288,9 +233,8 @@ compute_metrics_summary <- function(dt) { #' Get classification metrics for evaluation object #' -#' This function extracts the classification metrics from an evaluation object generated by -#' \code{get_eval_metrics} function. It takes as input the identity term of the evaluation object -#' (\code{evaldata_params}) and an optional risk level for the alpha risk (\code{alpha_risk}). +#' This function extracts the classification metrics. It takes as input (\code{evaldata_params}) +#' and an optional risk level for the alpha risk (\code{alpha_risk}). #' It retrieves the p-values from the identity term and computes the binary classification #' result by thresholding with the alpha risk level. It then computes the classification metrics #' using \code{compute_metrics_summary} function separately for each parameter value as well as @@ -324,41 +268,6 @@ get_ml_metrics_obj <- function(evaldata_params, alpha_risk = 0.05, col_param = " return(list( byparams = as.data.frame(byparam_metrics), aggregate = as.data.frame(agg_metrics))) } -#' Compute evaluation metrics from evaluation object -#' -#' This function computes the evaluation metrics from the evaluation object generated by -#' \code{evaluation_report} function. It retrieves the R2 values from the identity plot, -#' ROC AUC and PR AUC from ROC and precision-recall curves and combines them into a single -#' data frame for easier analysis and interpretation. -#' -#' @param eval_obj Evaluation object generated from \code{evaluation_report} function. -#' -#' @return A data frame containing the R2 values from identity plot, ROC AUC and PR AUC values -#' from ROC and precision-recall curves. -#' @export -get_eval_metrics <- function(eval_obj){ - data_metrics <- rbind(eval_obj$identity$params$R2, eval_obj$identity$dispersion$R2) - - ## -- rename ROC AUC - idx <- names(eval_obj$roc$byparams$roc_auc) == 'AUC' - names(eval_obj$roc$byparams$roc_auc)[idx] <- 'roc_AUC' - - ## -- rename precisionrecall AUC - idx <- names(eval_obj$precision_recall$byparams$pr_auc) == 'AUC' - names(eval_obj$precision_recall$byparams$pr_auc)[idx] <- 'pr_AUC' - - ## -- join data frames - data_auc <- join_dtf(eval_obj$roc$byparams$roc_auc, eval_obj$precision_recall$byparams$pr_auc, - k1 = c("from", "description"), k2 = c("from", "description") ) - data_metrics <- join_dtf(data_metrics, data_auc , - k1 = c("from", "description"), k2 = c("from", "description") ) - return(data_metrics) -} - - - - - diff --git a/R/subsetgenes.R b/R/subsetgenes.R index af6dd190b9748bb45a5f515ba16c4559ced27fe4..74171bcfa0f0adfdf418c4decf7ea67136747eda 100644 --- a/R/subsetgenes.R +++ b/R/subsetgenes.R @@ -17,20 +17,16 @@ # The function also replaces the total number of genes in 'settings$values' with the length of 'l_genes'. # The result is a more focused and tailored genomic dataset, facilitating precision in subsequent analyses. #' -#' @examples -#' \dontrun{ -#' # Example list of genes to be retained -#' selected_genes <- c("GeneA", "GeneB", "GeneC") -#' -#' # Example data object 'mockObj' (simplified structure) -#' mockObj <- list( -#' # ... (mockObj structure) -#' ) -#' -#' # Using the subsetGenes function to filter 'mockObj' -#' filtered_mockObj <- subsetGenes(selected_genes, mockObj) -#' } #' @export +#' @examples +#' N_GENES = 100 +#' MAX_REPLICATES = 5 +#' MIN_REPLICATES = 5 +#' input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3) +#' mock_data <- mock_rnaseq(input_var_list, N_GENES, +#' min_replicates = MIN_REPLICATES, +#' max_replicates = MAX_REPLICATES) +#' subset_mockobj <- subsetGenes(mock_data, l_genes = c("gene1", "gene4")) subsetGenes <- function(l_genes, mockObj) { # Selects the indices of genes in 'groundTruth$effects$geneID' that are present in 'l_genes'. idx_gt_effects <- mockObj$groundTruth$effects$geneID %in% l_genes diff --git a/README.md b/README.md index 675c8049ec27459560c24c73b2087b216f3d4f30..4750a562a1cdd8fb4cc29d06a1eaf845b7276ca3 100644 --- a/README.md +++ b/README.md @@ -3,9 +3,7 @@    - -[](https://gitbio.ens-lyon.fr/LBMC/yvertlab/vortex/plasticity_mutation/HTRfit/-/commits/master) - + ## Why use HTRfit diff --git a/dev/flat_full.Rmd b/dev/flat_full.Rmd index b1707eb91b2bc1ba6b318be2b9f8a99b29b9cea2..17ebd0e194da258e96fbcf18827aac835a9da030 100644 --- a/dev/flat_full.Rmd +++ b/dev/flat_full.Rmd @@ -7610,67 +7610,6 @@ performance <- function(prediction.obj, ```{r function-simulation_report, filename = "simulation_report" } -#' Export the Analysis Report to a File -#' -#' This function generates an analysis report by arranging and combining various plots -#' and tables, and then exports the report to a specified file. -#' -#' @param report_file Path to the file where the report will be exported. -#' @param table_settings A table containing settings and parameters used in the analysis. -#' @param roc_curve A plot displaying the Receiver Operating Characteristic (ROC) curve. -#' @param id_plot A plot displaying unique identifiers. -#' @param counts_plot A plot displaying the gene counts. -#' -#' -#' @importFrom gridExtra arrangeGrob grid.arrange -#' @importFrom ggplot2 ggsave -#' -#' -#' @return report -#' @export -exportReportFile <- function(report_file, table_settings, roc_curve, id_plot, counts_plot){ - - left_part <- gridExtra::arrangeGrob(table_settings, roc_curve, counts_plot ,heights = c(1, 1, 1)) - p2export <- gridExtra::grid.arrange(left_part, id_plot ,ncol = 3, widths = c(1,1,2)) - - if (!is.null(report_file)) ggplot2::ggsave(report_file, p2export, height = 10, width = 15) - - return(p2export) -} - - -#' Generate a Formatted Table as a Grid Graphics Object -#' -#' This function generates a formatted table using the provided data frame and returns -#' it as a grid graphics object. -#' -#' @param df The data frame to be converted into a formatted table. -#' -#' @return A grid graphics object representing the formatted table. -#' @export -#' @importFrom ggplot2 unit -#' @importFrom gridExtra tableGrob ttheme_minimal -#' @examples -#' # Create a sample data frame -#' sample_data <- data.frame( -#' Name = c("Alice", "Bob", "Charlie"), -#' Age = c(25, 30, 28) -#' ) -#' -#' # Generate the formatted table -#' table_grob <- getGrobTable(sample_data) -getGrobTable <- function(df){ - theme_custom <- gridExtra::ttheme_minimal( - core = list(bg_params = list(fill = c("#F8F9F9", "#E5E8E8"), col=NA)), - colhead = list(fg_params=list(col="white", fontface=4L), - bg_params = list(fill = "#5D6D7E", col=NA)), base_size = 15) - grob_df <- gridExtra::tableGrob(df, rows=NULL, - theme = theme_custom, - widths = ggplot2::unit(x = c(0.4,0.3), "npc" ) ) - return(grob_df) -} - - #' Check Validity of Truth Labels #' @@ -7750,6 +7689,12 @@ is_truthLabels_valid <- function(eval_data, col_param = "description", col_truth #' \item{counts}{A counts plot generated from mock object.} #' \item{performances}{A summary of the performances obtained.} #' @export +#' @examples +#' \dontrun{ +#' report <- evaluation_report(l_res, NULL, mock_data, +#' coeff_threshold = 0.67, alt_hypothesis = "greaterAbs") +#' } +#' evaluation_report <- function(list_tmb, dds, mock_obj, coeff_threshold, alt_hypothesis, alpha_risk = 0.05, palette_color = c(DESeq2 = "#500472", HTRfit ="#79cbb8"), palette_shape = c(DESeq2 = 17, HTRfit = 19), @@ -7897,9 +7842,8 @@ compute_metrics_summary <- function(dt) { #' Get classification metrics for evaluation object #' -#' This function extracts the classification metrics from an evaluation object generated by -#' \code{get_eval_metrics} function. It takes as input the identity term of the evaluation object -#' (\code{evaldata_params}) and an optional risk level for the alpha risk (\code{alpha_risk}). +#' This function extracts the classification metrics. It takes as input (\code{evaldata_params}) +#' and an optional risk level for the alpha risk (\code{alpha_risk}). #' It retrieves the p-values from the identity term and computes the binary classification #' result by thresholding with the alpha risk level. It then computes the classification metrics #' using \code{compute_metrics_summary} function separately for each parameter value as well as @@ -7933,72 +7877,154 @@ get_ml_metrics_obj <- function(evaldata_params, alpha_risk = 0.05, col_param = " return(list( byparams = as.data.frame(byparam_metrics), aggregate = as.data.frame(agg_metrics))) } -#' Compute evaluation metrics from evaluation object -#' -#' This function computes the evaluation metrics from the evaluation object generated by -#' \code{evaluation_report} function. It retrieves the R2 values from the identity plot, -#' ROC AUC and PR AUC from ROC and precision-recall curves and combines them into a single -#' data frame for easier analysis and interpretation. -#' -#' @param eval_obj Evaluation object generated from \code{evaluation_report} function. -#' -#' @return A data frame containing the R2 values from identity plot, ROC AUC and PR AUC values -#' from ROC and precision-recall curves. -#' @export -get_eval_metrics <- function(eval_obj){ - data_metrics <- rbind(eval_obj$identity$params$R2, eval_obj$identity$dispersion$R2) - - ## -- rename ROC AUC - idx <- names(eval_obj$roc$byparams$roc_auc) == 'AUC' - names(eval_obj$roc$byparams$roc_auc)[idx] <- 'roc_AUC' - - ## -- rename precisionrecall AUC - idx <- names(eval_obj$precision_recall$byparams$pr_auc) == 'AUC' - names(eval_obj$precision_recall$byparams$pr_auc)[idx] <- 'pr_AUC' - - ## -- join data frames - data_auc <- join_dtf(eval_obj$roc$byparams$roc_auc, eval_obj$precision_recall$byparams$pr_auc, - k1 = c("from", "description"), k2 = c("from", "description") ) - data_metrics <- join_dtf(data_metrics, data_auc , - k1 = c("from", "description"), k2 = c("from", "description") ) - return(data_metrics) -} +``` + +```{r test-simulation_report} -``` +# Test invalid data +test_that("is_truthLabels_valid returns TRUE for valid data with both labels present", { + eval_data <- data.frame(description = c("A", "B", "C"), isDE = c(TRUE, FALSE, FALSE), effect = c("fixed", "fixed", "fixed")) + result <- is_truthLabels_valid(eval_data) + expect_equal(result, FALSE) +}) +# Test valid data +test_that("is_truthLabels_valid returns FALSE for valid data with only one label present", { +eval_data <- data.frame(description = c("A", "A", "A"), isDE = c(TRUE, NA, FALSE), effect = c("fixed", "fixed", "fixed")) + result <- is_truthLabels_valid(eval_data) + expect_equal(result, TRUE) +}) -```{r test-simulation_report} -# Test case 1: Testing with a sample data frame -test_that("Generating a formatted table works correctly", { - sample_data <- data.frame( - Name = c("Alice", "Bob", "Charlie"), - Age = c(25, 30, 28) - ) + +test_that("evaluation_report returns correct output", { - table_grob <- getGrobTable(sample_data) + + N_GENES <- 100 + MAX_REPLICATES <- 5 + MIN_REPLICATES <- 5 + input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3) + mock_data <- mock_rnaseq(input_var_list, N_GENES, + min_replicates = MIN_REPLICATES, max_replicates = MAX_REPLICATES) - expect_s3_class(table_grob, "gtable") -}) - -# Test case 4: Testing with non-numeric values -test_that("Handling non-numeric values in the data frame", { - non_numeric_data <- data.frame( - Name = c("Alice", "Bob", "Charlie"), - Age = c(25, "N/A", 28) -) + data2fit <- prepareData2fit(countMatrix = mock_data$counts, metadata = mock_data$metadata) + l_res <- fitModelParallel(formula = kij ~ varA, + data = data2fit, group_by = "geneID", + family = glmmTMB::nbinom2(link = "log"), n.cores = 1) + - table_grob <- getGrobTable(non_numeric_data) + # Tests here + report <- evaluation_report(l_res, NULL, mock_data, coeff_threshold = 0.01, alt_hypothesis = "greater") + + expect_true(is.list(report)) + expect_true("data" %in% names(report)) + expect_true("identity" %in% names(report)) + expect_true("precision_recall" %in% names(report)) + expect_true("roc" %in% names(report)) + expect_true("counts" %in% names(report)) + expect_true("performances" %in% names(report)) +}) + + + + +test_that("get_performances_metrics_obj returns correct output", { + # Define the input data + r2_params <- data.frame(from = c("Glm", "Hglm"), + description = c("query_1|Intercept", "query_1|Intercept", "query_1|varA", "query_1|varA"), + R2 = c(0.9, 0.7)) + r2_dispersion <- data.frame(from = c("Glm", "Hglm"), + description = c("dispersion"), + R2 = c(0.8, 0.6)) + pr_obj <- list(byparams = list(pr_auc = data.frame(from = c("Glm", "Hglm"), + description = c("query_1|Intercept", "query_1|Intercept", "query_1|varA", "query_1|varA"), + pr_auc = 0.9)), + aggregate = list(pr_auc = data.frame(from = c("Glm", "Hglm"), + pr_auc = 0.8))) + roc_obj <- list(byparams = list(roc_auc = data.frame(from = c("Glm", "Hglm"), + description = c("query_1|Intercept", "query_1|Intercept", "query_1|varA", "query_1|varA"), + roc_auc = 0.7)), + aggregate = list(roc_auc = data.frame(from = c("Glm", "Hglm"), + roc_auc = 0.6))) + ml_metrics_obj <- list(byparams = data.frame(from = c("Glm", "Hglm"), + description = c("query_1|Intercept", "query_1|Intercept", "query_1|varA", "query_1|varA"), + accuracy = c(0.9, 0.8), + recall = c(0.09, 0.88)), + aggregate = data.frame(from = c("Glm", "Hglm"), + accuracy = c(0.7, 0.6), recall = c(0.1, 0.8))) + # Call the function + result <- get_performances_metrics_obj(r2_params, r2_dispersion, + pr_obj, roc_obj, ml_metrics_obj) - expect_s3_class(table_grob, "gtable") + # Test the output + expect_true(is.list(result)) + expect_true("byparams" %in% names(result)) + expect_true("aggregate" %in% names(result)) + expect_equal(nrow(result$byparams), 6) + expect_equal(ncol(result$byparams), 7) + expect_equal(nrow(result$aggregate), 2) + expect_equal(ncol(result$aggregate), 5) +}) + + + + + +test_that("compute_metrics_summary returns correct output",{ + # Define input data + dt <- data.frame( + y_pred = c(TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE), + isDE = c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE) + ) + # Compute expected output + acc_exp <- 0.375 + prec_exp <- 0.4 + rec_exp <- 0.5 + sens_exp <- 0.5 + spec_exp <- 0.25 + exp_df <- data.frame( + accuracy = acc_exp, + precision = prec_exp, + recall = rec_exp, + sensitivity = sens_exp, + specificity = spec_exp + ) + # Compute actual output with tested function + act_df <- compute_metrics_summary(dt) + # Test that compute_metrics_summary returns expected output + expect_identical(act_df, exp_df) +}) + + +test_that("get_ml_metrics_obj returns correct output", { + # Define input data + seed = 100 + evaldata_params <- data.frame( + from = rep(c("Glm", "Hglm", "Hglm", "Glm"), 1000), + description = rep(c("intercept", "varA", "intercept", "varA"), 1000), + p.adj = sort(runif(1000), decreasing = T), + isDE = sort(sample(c(TRUE, FALSE), 1000, replace = T)), + effect = rep("fixed", 1000) + ) + + # Compute actual output with tested function + act_output <- get_ml_metrics_obj(evaldata_params, alpha_risk = 0.05, col_param = "description") + # Test actual output against expected output + expect_identical(names(act_output$byparams), c( "from","description","accuracy","precision","recall","sensitivity","specificity")) + expect_identical(names(act_output$aggregate), c( "from","accuracy","precision","recall","sensitivity","specificity")) + }) + + + + ``` @@ -8755,20 +8781,16 @@ test_that("anovaParallel returns valid ANOVA results", { # The function also replaces the total number of genes in 'settings$values' with the length of 'l_genes'. # The result is a more focused and tailored genomic dataset, facilitating precision in subsequent analyses. #' -#' @examples -#' \dontrun{ -#' # Example list of genes to be retained -#' selected_genes <- c("GeneA", "GeneB", "GeneC") -#' -#' # Example data object 'mockObj' (simplified structure) -#' mockObj <- list( -#' # ... (mockObj structure) -#' ) -#' -#' # Using the subsetGenes function to filter 'mockObj' -#' filtered_mockObj <- subsetGenes(selected_genes, mockObj) -#' } #' @export +#' @examples +#' N_GENES = 100 +#' MAX_REPLICATES = 5 +#' MIN_REPLICATES = 5 +#' input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3) +#' mock_data <- mock_rnaseq(input_var_list, N_GENES, +#' min_replicates = MIN_REPLICATES, +#' max_replicates = MAX_REPLICATES) +#' subset_mockobj <- subsetGenes(mock_data, l_genes = c("gene1", "gene4")) subsetGenes <- function(l_genes, mockObj) { # Selects the indices of genes in 'groundTruth$effects$geneID' that are present in 'l_genes'. idx_gt_effects <- mockObj$groundTruth$effects$geneID %in% l_genes @@ -8793,6 +8815,35 @@ subsetGenes <- function(l_genes, mockObj) { ``` +```{r test-subsetGenes} +test_that("subsetGenes return correct ouptut", { + N_GENES = 100 + MAX_REPLICATES = 5 + MIN_REPLICATES = 5 + input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3) + mock_data <- mock_rnaseq(input_var_list, N_GENES, + min_replicates = MIN_REPLICATES, max_replicates = MAX_REPLICATES) + + subset_mockobj <- subsetGenes(mock_data, l_genes = c("gene1", "gene4")) + + mock_data$settings$values[1] <- 2 + expect_equal(mock_data$settings, subset_mockobj$settings ) + expect_equal(mock_data$init, subset_mockobj$init ) + + expect_equal(subset(mock_data$groundTruth$effects, geneID %in% c("gene1", "gene4")) , subset_mockobj$groundTruth$effects ) + expect_equal(mock_data$groundTruth$gene_dispersion[c("gene1", "gene4")] , subset_mockobj$groundTruth$gene_dispersion ) + expect_equal(mock_data$counts[c("gene1", "gene4"),] , subset_mockobj$counts ) + expect_equal(mock_data$metadata , subset_mockobj$metadata ) +}) + + + + + +``` + + + ```{r function-evaluation_withmixedeffect, filename = "evaluation_withmixedeffect"} #' Check if the formula contains a mixed effect structure. @@ -9293,7 +9344,7 @@ test_that("calculate_actualMixed calculates actual mixed effects as expected", { setwd("/Users/ex_dya/Documents/LBMC/HTRfit/") #usethis::create_package(path = "/Users/ex_dya/Documents/LBMC/HTRfit/") fusen::fill_description(fields = list(Title = "HTRfit"), overwrite = T) -use_gpl_license(version = 3, include_future = TRUE) +usethis::use_gpl_license(version = 3, include_future = TRUE) usethis::use_pipe(export = TRUE) devtools::document() # Keep eval=FALSE to avoid infinite loop in case you hit the knit button diff --git a/man/evaluation_report.Rd b/man/evaluation_report.Rd index cb5f2f20d4642c2094bb95998188ef558920d7ad..f65aa4b66d167288a2914a15bad89b316da315b8 100644 --- a/man/evaluation_report.Rd +++ b/man/evaluation_report.Rd @@ -64,3 +64,10 @@ and others. It takes as input several parameters like TMB results (\code{l_tmb}) result (\code{dds}), mock object (\code{mock_obj}), coefficient threshold (\code{coeff_threshold}) and alternative hypothesis (\code{alt_hypothesis}). } +\examples{ +\dontrun{ +report <- evaluation_report(l_res, NULL, mock_data, + coeff_threshold = 0.67, alt_hypothesis = "greaterAbs") +} + +} diff --git a/man/exportReportFile.Rd b/man/exportReportFile.Rd deleted file mode 100644 index b3dd70f3042b42f057b5ca60ad47a811a6dae36b..0000000000000000000000000000000000000000 --- a/man/exportReportFile.Rd +++ /dev/null @@ -1,26 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/simulation_report.R -\name{exportReportFile} -\alias{exportReportFile} -\title{Export the Analysis Report to a File} -\usage{ -exportReportFile(report_file, table_settings, roc_curve, id_plot, counts_plot) -} -\arguments{ -\item{report_file}{Path to the file where the report will be exported.} - -\item{table_settings}{A table containing settings and parameters used in the analysis.} - -\item{roc_curve}{A plot displaying the Receiver Operating Characteristic (ROC) curve.} - -\item{id_plot}{A plot displaying unique identifiers.} - -\item{counts_plot}{A plot displaying the gene counts.} -} -\value{ -report -} -\description{ -This function generates an analysis report by arranging and combining various plots -and tables, and then exports the report to a specified file. -} diff --git a/man/getGrobTable.Rd b/man/getGrobTable.Rd deleted file mode 100644 index 2fe06cb85b6f18774b827d9e320b7cae0123538b..0000000000000000000000000000000000000000 --- a/man/getGrobTable.Rd +++ /dev/null @@ -1,28 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/simulation_report.R -\name{getGrobTable} -\alias{getGrobTable} -\title{Generate a Formatted Table as a Grid Graphics Object} -\usage{ -getGrobTable(df) -} -\arguments{ -\item{df}{The data frame to be converted into a formatted table.} -} -\value{ -A grid graphics object representing the formatted table. -} -\description{ -This function generates a formatted table using the provided data frame and returns -it as a grid graphics object. -} -\examples{ -# Create a sample data frame -sample_data <- data.frame( - Name = c("Alice", "Bob", "Charlie"), - Age = c(25, 30, 28) -) - -# Generate the formatted table -table_grob <- getGrobTable(sample_data) -} diff --git a/man/get_eval_metrics.Rd b/man/get_eval_metrics.Rd deleted file mode 100644 index a3bc74ed7d5238e22e964843341eeea46d9067df..0000000000000000000000000000000000000000 --- a/man/get_eval_metrics.Rd +++ /dev/null @@ -1,21 +0,0 @@ -% Generated by roxygen2: do not edit by hand -% Please edit documentation in R/simulation_report.R -\name{get_eval_metrics} -\alias{get_eval_metrics} -\title{Compute evaluation metrics from evaluation object} -\usage{ -get_eval_metrics(eval_obj) -} -\arguments{ -\item{eval_obj}{Evaluation object generated from \code{evaluation_report} function.} -} -\value{ -A data frame containing the R2 values from identity plot, ROC AUC and PR AUC values -from ROC and precision-recall curves. -} -\description{ -This function computes the evaluation metrics from the evaluation object generated by -\code{evaluation_report} function. It retrieves the R2 values from the identity plot, -ROC AUC and PR AUC from ROC and precision-recall curves and combines them into a single -data frame for easier analysis and interpretation. -} diff --git a/man/get_ml_metrics_obj.Rd b/man/get_ml_metrics_obj.Rd index fa6dca2a9fb80007a2028949ac47f0493af41223..c5cf3cc22e5107faec5d5f16b8709562f6d69ca7 100644 --- a/man/get_ml_metrics_obj.Rd +++ b/man/get_ml_metrics_obj.Rd @@ -22,9 +22,8 @@ A list containing separate data frames for classification metrics by parameter v and for aggregated classification metrics. } \description{ -This function extracts the classification metrics from an evaluation object generated by -\code{get_eval_metrics} function. It takes as input the identity term of the evaluation object -(\code{evaldata_params}) and an optional risk level for the alpha risk (\code{alpha_risk}). +This function extracts the classification metrics. It takes as input (\code{evaldata_params}) +and an optional risk level for the alpha risk (\code{alpha_risk}). It retrieves the p-values from the identity term and computes the binary classification result by thresholding with the alpha risk level. It then computes the classification metrics using \code{compute_metrics_summary} function separately for each parameter value as well as diff --git a/man/subsetGenes.Rd b/man/subsetGenes.Rd index 1b48bd6d0a0636cd024703a6c997bc4edde00aa0..bd14d1e44948213908666763a35d1ab96f8e3e81 100644 --- a/man/subsetGenes.Rd +++ b/man/subsetGenes.Rd @@ -21,16 +21,12 @@ The 'subsetGenes' function selects and retains genes from 'mockObj' that match t This function filters and adjusts genomic data within the Roxygeb project, based on a specified list of genes. } \examples{ -\dontrun{ -# Example list of genes to be retained -selected_genes <- c("GeneA", "GeneB", "GeneC") - -# Example data object 'mockObj' (simplified structure) -mockObj <- list( - # ... (mockObj structure) -) - -# Using the subsetGenes function to filter 'mockObj' -filtered_mockObj <- subsetGenes(selected_genes, mockObj) -} +N_GENES = 100 +MAX_REPLICATES = 5 +MIN_REPLICATES = 5 +input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3) +mock_data <- mock_rnaseq(input_var_list, N_GENES, + min_replicates = MIN_REPLICATES, + max_replicates = MAX_REPLICATES) +subset_mockobj <- subsetGenes(mock_data, l_genes = c("gene1", "gene4")) } diff --git a/tests/testthat/test-simulation_report.R b/tests/testthat/test-simulation_report.R index 99899f9b59c70414f4aaabe68a281c5f4c6efd4b..822e642fa8a5ee7c8f5d1f68334cd17b288b5763 100644 --- a/tests/testthat/test-simulation_report.R +++ b/tests/testthat/test-simulation_report.R @@ -2,33 +2,150 @@ -# Test case 1: Testing with a sample data frame -test_that("Generating a formatted table works correctly", { - sample_data <- data.frame( - Name = c("Alice", "Bob", "Charlie"), - Age = c(25, 30, 28) - ) - - table_grob <- getGrobTable(sample_data) - - expect_s3_class(table_grob, "gtable") + +# Test invalid data +test_that("is_truthLabels_valid returns TRUE for valid data with both labels present", { + eval_data <- data.frame(description = c("A", "B", "C"), isDE = c(TRUE, FALSE, FALSE), effect = c("fixed", "fixed", "fixed")) + result <- is_truthLabels_valid(eval_data) + expect_equal(result, FALSE) }) -# Test case 4: Testing with non-numeric values -test_that("Handling non-numeric values in the data frame", { - non_numeric_data <- data.frame( - Name = c("Alice", "Bob", "Charlie"), - Age = c(25, "N/A", 28) -) - - table_grob <- getGrobTable(non_numeric_data) +# Test valid data +test_that("is_truthLabels_valid returns FALSE for valid data with only one label present", { +eval_data <- data.frame(description = c("A", "A", "A"), isDE = c(TRUE, NA, FALSE), effect = c("fixed", "fixed", "fixed")) + result <- is_truthLabels_valid(eval_data) + expect_equal(result, TRUE) +}) + + + + + +test_that("evaluation_report returns correct output", { - expect_s3_class(table_grob, "gtable") + + N_GENES <- 100 + MAX_REPLICATES <- 5 + MIN_REPLICATES <- 5 + input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3) + mock_data <- mock_rnaseq(input_var_list, N_GENES, + min_replicates = MIN_REPLICATES, max_replicates = MAX_REPLICATES) + + data2fit <- prepareData2fit(countMatrix = mock_data$counts, metadata = mock_data$metadata) + l_res <- fitModelParallel(formula = kij ~ varA, + data = data2fit, group_by = "geneID", + family = glmmTMB::nbinom2(link = "log"), n.cores = 1) + + + # Tests here + report <- evaluation_report(l_res, NULL, mock_data, coeff_threshold = 0.01, alt_hypothesis = "greater") + + expect_true(is.list(report)) + expect_true("data" %in% names(report)) + expect_true("identity" %in% names(report)) + expect_true("precision_recall" %in% names(report)) + expect_true("roc" %in% names(report)) + expect_true("counts" %in% names(report)) + expect_true("performances" %in% names(report)) +}) + + + + +test_that("get_performances_metrics_obj returns correct output", { + # Define the input data + r2_params <- data.frame(from = c("Glm", "Hglm"), + description = c("query_1|Intercept", "query_1|Intercept", "query_1|varA", "query_1|varA"), + R2 = c(0.9, 0.7)) + r2_dispersion <- data.frame(from = c("Glm", "Hglm"), + description = c("dispersion"), + R2 = c(0.8, 0.6)) + pr_obj <- list(byparams = list(pr_auc = data.frame(from = c("Glm", "Hglm"), + description = c("query_1|Intercept", "query_1|Intercept", "query_1|varA", "query_1|varA"), + pr_auc = 0.9)), + aggregate = list(pr_auc = data.frame(from = c("Glm", "Hglm"), + pr_auc = 0.8))) + roc_obj <- list(byparams = list(roc_auc = data.frame(from = c("Glm", "Hglm"), + description = c("query_1|Intercept", "query_1|Intercept", "query_1|varA", "query_1|varA"), + roc_auc = 0.7)), + aggregate = list(roc_auc = data.frame(from = c("Glm", "Hglm"), + roc_auc = 0.6))) + ml_metrics_obj <- list(byparams = data.frame(from = c("Glm", "Hglm"), + description = c("query_1|Intercept", "query_1|Intercept", "query_1|varA", "query_1|varA"), + accuracy = c(0.9, 0.8), + recall = c(0.09, 0.88)), + aggregate = data.frame(from = c("Glm", "Hglm"), + accuracy = c(0.7, 0.6), recall = c(0.1, 0.8))) + # Call the function + result <- get_performances_metrics_obj(r2_params, r2_dispersion, + pr_obj, roc_obj, ml_metrics_obj) + + # Test the output + expect_true(is.list(result)) + expect_true("byparams" %in% names(result)) + expect_true("aggregate" %in% names(result)) + expect_equal(nrow(result$byparams), 6) + expect_equal(ncol(result$byparams), 7) + expect_equal(nrow(result$aggregate), 2) + expect_equal(ncol(result$aggregate), 5) }) + +test_that("compute_metrics_summary returns correct output",{ + # Define input data + dt <- data.frame( + y_pred = c(TRUE, TRUE, FALSE, TRUE, FALSE, FALSE, TRUE, TRUE), + isDE = c(TRUE, FALSE, TRUE, TRUE, FALSE, TRUE, FALSE, FALSE) + ) + # Compute expected output + acc_exp <- 0.375 + prec_exp <- 0.4 + rec_exp <- 0.5 + sens_exp <- 0.5 + spec_exp <- 0.25 + exp_df <- data.frame( + accuracy = acc_exp, + precision = prec_exp, + recall = rec_exp, + sensitivity = sens_exp, + specificity = spec_exp + ) + # Compute actual output with tested function + act_df <- compute_metrics_summary(dt) + # Test that compute_metrics_summary returns expected output + expect_identical(act_df, exp_df) +}) + + +test_that("get_ml_metrics_obj returns correct output", { + # Define input data + seed = 100 + evaldata_params <- data.frame( + from = rep(c("Glm", "Hglm", "Hglm", "Glm"), 1000), + description = rep(c("intercept", "varA", "intercept", "varA"), 1000), + p.adj = sort(runif(1000), decreasing = T), + isDE = sort(sample(c(TRUE, FALSE), 1000, replace = T)), + effect = rep("fixed", 1000) + ) + + # Compute actual output with tested function + act_output <- get_ml_metrics_obj(evaldata_params, alpha_risk = 0.05, col_param = "description") + # Test actual output against expected output + expect_identical(names(act_output$byparams), c( "from","description","accuracy","precision","recall","sensitivity","specificity")) + expect_identical(names(act_output$aggregate), c( "from","accuracy","precision","recall","sensitivity","specificity")) + +}) + + + + + + + + # Test get_eval_data_from_ltmb test_that("get_eval_data_from_ltmb returns correct output", { diff --git a/tests/testthat/test-subsetgenes.R b/tests/testthat/test-subsetgenes.R new file mode 100644 index 0000000000000000000000000000000000000000..40ebcb88574d2213a4b0a90be2952a7daafddc72 --- /dev/null +++ b/tests/testthat/test-subsetgenes.R @@ -0,0 +1,26 @@ +# WARNING - Generated by {fusen} from /dev/flat_full.Rmd: do not edit by hand + +test_that("subsetGenes return correct ouptut", { + N_GENES = 100 + MAX_REPLICATES = 5 + MIN_REPLICATES = 5 + input_var_list <- init_variable(name = "varA", mu = 10, sd = 0.1, level = 3) + mock_data <- mock_rnaseq(input_var_list, N_GENES, + min_replicates = MIN_REPLICATES, max_replicates = MAX_REPLICATES) + + subset_mockobj <- subsetGenes(mock_data, l_genes = c("gene1", "gene4")) + + mock_data$settings$values[1] <- 2 + expect_equal(mock_data$settings, subset_mockobj$settings ) + expect_equal(mock_data$init, subset_mockobj$init ) + + expect_equal(subset(mock_data$groundTruth$effects, geneID %in% c("gene1", "gene4")) , subset_mockobj$groundTruth$effects ) + expect_equal(mock_data$groundTruth$gene_dispersion[c("gene1", "gene4")] , subset_mockobj$groundTruth$gene_dispersion ) + expect_equal(mock_data$counts[c("gene1", "gene4"),] , subset_mockobj$counts ) + expect_equal(mock_data$metadata , subset_mockobj$metadata ) +}) + + + + +