diff --git a/NAMESPACE b/NAMESPACE index 79ff882843a9b0406a119a83810ee06ef48641a7..fb7c5ffd0af868a4933a391d19c223c3cc403ed4 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -25,6 +25,7 @@ export(compute_covariation) export(compute_metrics_summary) export(compute_pr_auc) export(compute_pr_curve) +export(compute_rmse) export(compute_roc_auc) export(compute_roc_curve) export(compute_rsquare) @@ -106,13 +107,13 @@ export(get_label_y_position) export(get_mad_left_threshold) export(get_mad_user_message) export(get_messages_sequencing_depth) +export(get_metrics_2plot) export(get_ml_metrics_obj) export(get_performances_metrics_obj) export(get_pr_curve) export(get_pr_object) export(get_roc_curve) export(get_roc_object) -export(get_rsquare_2plot) export(get_scaling_factor) export(glance_tmb) export(group_logQij_per_genes_and_labels) @@ -160,6 +161,7 @@ export(reorderColumns) export(replicateByGroup) export(replicateMatrix) export(replicateRows) +export(rmse) export(samplingFromMvrnorm) export(scaleCountsTable) export(sensitivity) diff --git a/R/evaluation_identity.R b/R/evaluation_identity.R index fb74451540a6f0d445a2848a28f4412a6d3325b7..74c1b957107f318c12febec380b9c4ebdd950dc5 100644 --- a/R/evaluation_identity.R +++ b/R/evaluation_identity.R @@ -1,62 +1,28 @@ # WARNING - Generated by {fusen} from dev/flat_full.Rmd: do not edit by hand - - - -#' Compute R-squared values for linear regression on grouped data -#' -#' This function takes a data frame, performs linear regression on specified grouping variables, -#' and computes R-squared values for each group. -#' -#' @param data A data frame containing the variables 'actual' and 'estimate' for regression. -#' @param grouping_by A character vector specifying the grouping variables for regression. -#' @return A data frame with columns 'from', 'term', and 'R2' representing the grouping variables -#' and the corresponding R-squared values. -#' @export -#' @examples -#' data <- data.frame(from = c("A", "A", "A", "A"), -#' term = c("X", "Y", "X", "Y"), -#' actual = c(1, 2, 3, 4), -#' estimate = c(1.5, 2.5, 3.5, 4.5)) -#' compute_rsquare(data, grouping_by = c("from", "term")) -#' -#' @importFrom data.table data.table -compute_rsquare <- function(data, grouping_by = c("from", "description") ){ - ## -- convert to data.table - dat <- data.table::data.table(data) - ## -- calculate the regression coefficient r^2 - r_square_df <- as.data.frame( - dat[ , summary(lm(actual~estimate))$r.squared, - by = grouping_by ] - ) - names(r_square_df)[names(r_square_df) == "V1"] <- "R2" - return(r_square_df) -} - - #' Gets R-squared values for plotting. #' -#' This function takes a data frame with R-squared values, +#' This function takes a data frame with R-squared and RMSE values, #' computes position coordinates, and prepares data for plotting. -#' @param data_rsquare Data frame with R-squared values. +#' @param data Data frame with R-squared values and RMSE values. #' @return A data frame with additional columns for labeling in the plot. #' @export #' @examples -#' data_rsquare <- data.frame(from = c("A", "B", "C"), +#' data_metrics <- data.frame(from = c("A", "B", "C"), #' description = c("Desc1", "Desc2", "Desc3"), -#' R2 = c(0.9, 0.8, 0.7)) -#' result <- get_rsquare_2plot(data_rsquare) -get_rsquare_2plot <- function(data_rsquare){ - data_rsquare$pos_x <- -Inf - data_rsquare$pos_y <- Inf - data_rsquare$label_italic <- sprintf("italic(R^2) == %.2f", round(data_rsquare$R2, 3)) - data_rsquare$label_vjust <- as.numeric(factor(data_rsquare$from)) - return(data_rsquare) +#' R2 = c(0.9, 0.8, 0.7), +#' RMSE = c(0.6, 0.2, 0.1)) +#' result <- get_metrics_2plot(data_metrics) +get_metrics_2plot <- function(data){ + data$pos_x <- -Inf + data$pos_y <- Inf + data$label_italic <- sprintf("italic(R^2) == %1.2f ~ phantom() ~ phantom() ~ italic(RMSE) == %2.2f", round(data$R2, 3), round(data$RMSE, 3)) + data$label_vjust <- as.numeric(factor(data$from)) + return(data) } - #' Generate an identity term plot and get metrics associated #' #' This function generates an identity plot for comparing actual values with estimates @@ -71,25 +37,29 @@ get_rsquare_2plot <- function(data_rsquare){ #' @importFrom rlang .data new_environment #' @export #' @examples -#' comparison_data <- data.frame( -#' actual = c(1, 2, 3, 4, 5), -#' estimate = c(0.9, 2.2, 2.8, 4.1, 5.2), -#' description = rep("Category A", 5), -#' term = rep("Category A", 5), -#' from = c("A", "B", "B", "A", "B")) -#' eval_identityTerm(comparison_data) +#' comparison_data <- data.frame( +#' actual = c(1, 2, 3, 4, 5), +#' estimate = c(0.9, 2.2, 2.8, 4.1, 5.2), +#' description = rep("Category A", 5), +#' term = rep("Category A", 5), +#' from = c("A", "B", "B", "A", "B")) +#' eval_identityTerm(comparison_data, +#' palette_color = c(A = "#500472", B ="#79cbb8"), +#' palette_shape = c(A = 17, B = 19)) eval_identityTerm <- function(data_identity, palette_color = c(DESeq2 = "#500472", HTRfit ="#79cbb8"), palette_shape = c(DESeq2 = 17, HTRfit = 19), ...){ data_rsquare <- compute_rsquare(data_identity) - data_rsquare2plot <- get_rsquare_2plot(data_rsquare) - + data_rmse <- compute_rmse(data_identity) + data_metrics <- join_dtf(data_rsquare, data_rmse, k1 = c("from", "description"), k2 = c("from", "description")) + data_metrics2plot <- get_metrics_2plot(data_metrics) + p <- ggplot2::ggplot(data_identity, mapping = ggplot2::aes(x = .data$actual, y = .data$estimate, col = from, shape = from, ...) )+ ggplot2::geom_point(alpha = 0.6, size = 2) + ggplot2::geom_abline(intercept = 0, slope = 1, lty = 3, col = 'red', linewidth = 1) + ggplot2::facet_wrap(~description, scales = "free") + ggplot2::theme_bw() + - ggplot2::geom_text(data = data_rsquare2plot, + ggplot2::geom_text(data = data_metrics2plot, mapping = ggplot2::aes(x = pos_x, y = pos_y, label = label_italic, col = from, vjust = label_vjust), parse = TRUE, hjust = -0.3 ) + ggplot2::ggtitle("Identity plot") + @@ -98,7 +68,7 @@ eval_identityTerm <- function(data_identity, palette_color = c(DESeq2 = "#500472 p$plot_env <- rlang::new_environment() - obj_idTerm <- list(R2 = data_rsquare, p = p ) + obj_idTerm <- list(R2 = data_metrics, p = p ) return(obj_idTerm) } diff --git a/R/rmse.R b/R/rmse.R new file mode 100644 index 0000000000000000000000000000000000000000..c65f581f8091d2ea37736e470522b4b28682487d --- /dev/null +++ b/R/rmse.R @@ -0,0 +1,53 @@ +# WARNING - Generated by {fusen} from dev/flat_full.Rmd: do not edit by hand + + +#' Root Mean Squared Error (RMSE) +#' +#' Calculates the root mean squared error (RMSE) between two vectors. +#' +#' @param y Vector of actual values. +#' @param y_hat Vector of estimates/predicted values. +#' @return RMSE value. +#' @export +rmse <- function(y, y_hat) { + if (length(y) != length(y_hat)) { + stop("RMSE: Vectors y and y_hat must have the same length.") + } + + rmse <- sqrt(mean((y - y_hat)^2, na.rm = T)) + return(rmse) +} + + + + +#' Compute RMSE values on grouped data +#' +#' This function takes a data frame, performs RMSE between estimate and actual values on specified grouping variables, +#' +#' @param data A data frame containing the variables 'actual' and 'estimate' for regression. +#' @param grouping_by A character vector specifying the grouping variables +#' @return A data frame with columns 'from', 'term', and 'RMSE' representing the grouping variables +#' and the corresponding RMSE values. +#' @export +#' @examples +#' data <- data.frame(from = c("A", "A", "A", "A"), +#' term = c("X", "Y", "X", "Y"), +#' actual = c(1, 2, 3, 4), +#' estimate = c(1.5, 2.5, 3.5, 4.5)) +#' compute_rmse(data, grouping_by = c("from", "term")) +#' +#' @importFrom data.table data.table +compute_rmse <- function(data, grouping_by = c("from", "description")) { + ## -- convert to data.table + dat <- data.table::data.table(data) + ## -- calculate the RMSE + rmse_df <- as.data.frame( + dat[ , rmse(actual, estimate), + by = grouping_by ] + ) + names(rmse_df)[names(rmse_df) == "V1"] <- "RMSE" + return(rmse_df) +} + + diff --git a/R/rsquare.R b/R/rsquare.R new file mode 100644 index 0000000000000000000000000000000000000000..0dbec7567915456bd2bfa1c77797d517cfbd9252 --- /dev/null +++ b/R/rsquare.R @@ -0,0 +1,36 @@ +# WARNING - Generated by {fusen} from dev/flat_full.Rmd: do not edit by hand + + + + +#' Compute R-squared values for linear regression on grouped data +#' +#' This function takes a data frame, performs linear regression on specified grouping variables, +#' and computes R-squared values for each group. +#' +#' @param data A data frame containing the variables 'actual' and 'estimate' for regression. +#' @param grouping_by A character vector specifying the grouping variables for regression. +#' @return A data frame with columns 'from', 'term', and 'R2' representing the grouping variables +#' and the corresponding R-squared values. +#' @export +#' @examples +#' data <- data.frame(from = c("A", "A", "A", "A"), +#' term = c("X", "Y", "X", "Y"), +#' actual = c(1, 2, 3, 4), +#' estimate = c(1.5, 2.5, 3.5, 4.5)) +#' compute_rsquare(data, grouping_by = c("from", "term")) +#' +#' @importFrom data.table data.table +compute_rsquare <- function(data, grouping_by = c("from", "description") ){ + ## -- convert to data.table + dat <- data.table::data.table(data) + ## -- calculate the regression coefficient r^2 + r_square_df <- as.data.frame( + dat[ , summary(lm(actual~estimate))$r.squared, + by = grouping_by ] + ) + names(r_square_df)[names(r_square_df) == "V1"] <- "R2" + return(r_square_df) +} + + diff --git a/dev/flat_full.Rmd b/dev/flat_full.Rmd index b15c08e7ed2e3e56b48ae332642a933413e987c6..d070e85983cc738b84845b9e047f1791c29bcc56 100644 --- a/dev/flat_full.Rmd +++ b/dev/flat_full.Rmd @@ -6891,62 +6891,28 @@ test_that("Counts plot is generated correctly", { ```{r function-evaluation_identity, filename = "evaluation_identity"} - - - -#' Compute R-squared values for linear regression on grouped data -#' -#' This function takes a data frame, performs linear regression on specified grouping variables, -#' and computes R-squared values for each group. -#' -#' @param data A data frame containing the variables 'actual' and 'estimate' for regression. -#' @param grouping_by A character vector specifying the grouping variables for regression. -#' @return A data frame with columns 'from', 'term', and 'R2' representing the grouping variables -#' and the corresponding R-squared values. -#' @export -#' @examples -#' data <- data.frame(from = c("A", "A", "A", "A"), -#' term = c("X", "Y", "X", "Y"), -#' actual = c(1, 2, 3, 4), -#' estimate = c(1.5, 2.5, 3.5, 4.5)) -#' compute_rsquare(data, grouping_by = c("from", "term")) -#' -#' @importFrom data.table data.table -compute_rsquare <- function(data, grouping_by = c("from", "description") ){ - ## -- convert to data.table - dat <- data.table::data.table(data) - ## -- calculate the regression coefficient r^2 - r_square_df <- as.data.frame( - dat[ , summary(lm(actual~estimate))$r.squared, - by = grouping_by ] - ) - names(r_square_df)[names(r_square_df) == "V1"] <- "R2" - return(r_square_df) -} - - #' Gets R-squared values for plotting. #' -#' This function takes a data frame with R-squared values, +#' This function takes a data frame with R-squared and RMSE values, #' computes position coordinates, and prepares data for plotting. -#' @param data_rsquare Data frame with R-squared values. +#' @param data Data frame with R-squared values and RMSE values. #' @return A data frame with additional columns for labeling in the plot. #' @export #' @examples -#' data_rsquare <- data.frame(from = c("A", "B", "C"), +#' data_metrics <- data.frame(from = c("A", "B", "C"), #' description = c("Desc1", "Desc2", "Desc3"), -#' R2 = c(0.9, 0.8, 0.7)) -#' result <- get_rsquare_2plot(data_rsquare) -get_rsquare_2plot <- function(data_rsquare){ - data_rsquare$pos_x <- -Inf - data_rsquare$pos_y <- Inf - data_rsquare$label_italic <- sprintf("italic(R^2) == %.2f", round(data_rsquare$R2, 3)) - data_rsquare$label_vjust <- as.numeric(factor(data_rsquare$from)) - return(data_rsquare) +#' R2 = c(0.9, 0.8, 0.7), +#' RMSE = c(0.6, 0.2, 0.1)) +#' result <- get_metrics_2plot(data_metrics) +get_metrics_2plot <- function(data){ + data$pos_x <- -Inf + data$pos_y <- Inf + data$label_italic <- sprintf("italic(R^2) == %1.2f ~ phantom() ~ phantom() ~ italic(RMSE) == %2.2f", round(data$R2, 3), round(data$RMSE, 3)) + data$label_vjust <- as.numeric(factor(data$from)) + return(data) } - #' Generate an identity term plot and get metrics associated #' #' This function generates an identity plot for comparing actual values with estimates @@ -6961,25 +6927,29 @@ get_rsquare_2plot <- function(data_rsquare){ #' @importFrom rlang .data new_environment #' @export #' @examples -#' comparison_data <- data.frame( -#' actual = c(1, 2, 3, 4, 5), -#' estimate = c(0.9, 2.2, 2.8, 4.1, 5.2), -#' description = rep("Category A", 5), -#' term = rep("Category A", 5), -#' from = c("A", "B", "B", "A", "B")) -#' eval_identityTerm(comparison_data) +#' comparison_data <- data.frame( +#' actual = c(1, 2, 3, 4, 5), +#' estimate = c(0.9, 2.2, 2.8, 4.1, 5.2), +#' description = rep("Category A", 5), +#' term = rep("Category A", 5), +#' from = c("A", "B", "B", "A", "B")) +#' eval_identityTerm(comparison_data, +#' palette_color = c(A = "#500472", B ="#79cbb8"), +#' palette_shape = c(A = 17, B = 19)) eval_identityTerm <- function(data_identity, palette_color = c(DESeq2 = "#500472", HTRfit ="#79cbb8"), palette_shape = c(DESeq2 = 17, HTRfit = 19), ...){ data_rsquare <- compute_rsquare(data_identity) - data_rsquare2plot <- get_rsquare_2plot(data_rsquare) - + data_rmse <- compute_rmse(data_identity) + data_metrics <- join_dtf(data_rsquare, data_rmse, k1 = c("from", "description"), k2 = c("from", "description")) + data_metrics2plot <- get_metrics_2plot(data_metrics) + p <- ggplot2::ggplot(data_identity, mapping = ggplot2::aes(x = .data$actual, y = .data$estimate, col = from, shape = from, ...) )+ ggplot2::geom_point(alpha = 0.6, size = 2) + ggplot2::geom_abline(intercept = 0, slope = 1, lty = 3, col = 'red', linewidth = 1) + ggplot2::facet_wrap(~description, scales = "free") + ggplot2::theme_bw() + - ggplot2::geom_text(data = data_rsquare2plot, + ggplot2::geom_text(data = data_metrics2plot, mapping = ggplot2::aes(x = pos_x, y = pos_y, label = label_italic, col = from, vjust = label_vjust), parse = TRUE, hjust = -0.3 ) + ggplot2::ggtitle("Identity plot") + @@ -6988,7 +6958,7 @@ eval_identityTerm <- function(data_identity, palette_color = c(DESeq2 = "#500472 p$plot_env <- rlang::new_environment() - obj_idTerm <- list(R2 = data_rsquare, p = p ) + obj_idTerm <- list(R2 = data_metrics, p = p ) return(obj_idTerm) } @@ -7012,34 +6982,16 @@ test_that("Identity plot is generated correctly", { idTerm_obj <- eval_identityTerm(comparison_data) expect_true("gg" %in% class(idTerm_obj$p)) - expect_equal(c("from", "description", "R2"), colnames(idTerm_obj$R2)) + expect_equal(c("from", "description", "R2", "RMSE"), colnames(idTerm_obj$R2)) }) -# Test case 1: Check if the function returns a data frame -test_that("compute_rsquare returns a data frame", { - data <- data.frame(from = c("A", "A", "A", "A"), - description = c("X", "Y", "X", "Y"), - actual = c(1, 2, 3, 4), - estimate = c(10, 20, 30, 40)) - df_rsquare <- compute_rsquare(data, grouping_by = c("from", "description")) - expect_s3_class(df_rsquare, "data.frame") - expect_equal(df_rsquare$from, c("A", "A")) - expect_equal(df_rsquare$description, c("X", "Y")) - expect_equal(df_rsquare$R2, c(1, 1)) - -}) - - - - - #' Unit Test for get_rsquare_2plot function. -test_that("get_rsquare_2plot returns expected result", { - data_rsquare <- data.frame(from = c("A", "B", "C"), description = c("Desc1", "Desc2", "Desc3"), R2 = c(0.9, 0.8, 0.7)) - result <- get_rsquare_2plot(data_rsquare) - expect_equal(names(result), c("from","description","R2" ,"pos_x", "pos_y", "label_italic","label_vjust")) +test_that("get_metrics_2plot returns expected result", { + data_metrics <- data.frame(from = c("A", "B", "C"), description = c("Desc1", "Desc2", "Desc3"), R2 = c(0.9, 0.8, 0.7), RMSE = c(0.03, 0.9, 0.18)) + result <- get_metrics_2plot(data_metrics) + expect_equal(names(result), c("from","description","R2" , "RMSE", "pos_x", "pos_y", "label_italic","label_vjust")) expect_equal(result$from, c("A","B", "C")) expect_equal(result$description, c("Desc1","Desc2", "Desc3")) expect_equal(result$label_vjust, c(1,2, 3)) @@ -8869,6 +8821,183 @@ test_that("anovaParallel returns valid ANOVA results", { ``` +```{r function-R2, filename = "rsquare"} + + + +#' Compute R-squared values for linear regression on grouped data +#' +#' This function takes a data frame, performs linear regression on specified grouping variables, +#' and computes R-squared values for each group. +#' +#' @param data A data frame containing the variables 'actual' and 'estimate' for regression. +#' @param grouping_by A character vector specifying the grouping variables for regression. +#' @return A data frame with columns 'from', 'term', and 'R2' representing the grouping variables +#' and the corresponding R-squared values. +#' @export +#' @examples +#' data <- data.frame(from = c("A", "A", "A", "A"), +#' term = c("X", "Y", "X", "Y"), +#' actual = c(1, 2, 3, 4), +#' estimate = c(1.5, 2.5, 3.5, 4.5)) +#' compute_rsquare(data, grouping_by = c("from", "term")) +#' +#' @importFrom data.table data.table +compute_rsquare <- function(data, grouping_by = c("from", "description") ){ + ## -- convert to data.table + dat <- data.table::data.table(data) + ## -- calculate the regression coefficient r^2 + r_square_df <- as.data.frame( + dat[ , summary(lm(actual~estimate))$r.squared, + by = grouping_by ] + ) + names(r_square_df)[names(r_square_df) == "V1"] <- "R2" + return(r_square_df) +} + + +``` + + +```{r test-rsquare} + + +# Test case 1: Check if the function returns a data frame +test_that("compute_rsquare returns a data frame", { + data <- data.frame(from = c("A", "A", "A", "A"), + description = c("X", "Y", "X", "Y"), + actual = c(1, 2, 3, 4), + estimate = c(10, 20, 30, 40)) + df_rsquare <- compute_rsquare(data, grouping_by = c("from", "description")) + expect_s3_class(df_rsquare, "data.frame") + expect_equal(df_rsquare$from, c("A", "A")) + expect_equal(df_rsquare$description, c("X", "Y")) + expect_equal(df_rsquare$R2, c(1, 1)) + +}) + + +``` + + +```{r function-RMSE, filename = "rmse"} + +#' Root Mean Squared Error (RMSE) +#' +#' Calculates the root mean squared error (RMSE) between two vectors. +#' +#' @param y Vector of actual values. +#' @param y_hat Vector of estimates/predicted values. +#' @return RMSE value. +#' @export +rmse <- function(y, y_hat) { + if (length(y) != length(y_hat)) { + stop("RMSE: Vectors y and y_hat must have the same length.") + } + + rmse <- sqrt(mean((y - y_hat)^2, na.rm = T)) + return(rmse) +} + + + + +#' Compute RMSE values on grouped data +#' +#' This function takes a data frame, performs RMSE between estimate and actual values on specified grouping variables, +#' +#' @param data A data frame containing the variables 'actual' and 'estimate' for regression. +#' @param grouping_by A character vector specifying the grouping variables +#' @return A data frame with columns 'from', 'term', and 'RMSE' representing the grouping variables +#' and the corresponding RMSE values. +#' @export +#' @examples +#' data <- data.frame(from = c("A", "A", "A", "A"), +#' term = c("X", "Y", "X", "Y"), +#' actual = c(1, 2, 3, 4), +#' estimate = c(1.5, 2.5, 3.5, 4.5)) +#' compute_rmse(data, grouping_by = c("from", "term")) +#' +#' @importFrom data.table data.table +compute_rmse <- function(data, grouping_by = c("from", "description")) { + ## -- convert to data.table + dat <- data.table::data.table(data) + ## -- calculate the RMSE + rmse_df <- as.data.frame( + dat[ , rmse(actual, estimate), + by = grouping_by ] + ) + names(rmse_df)[names(rmse_df) == "V1"] <- "RMSE" + return(rmse_df) +} + + +``` + + + +```{r test-rmse} + + + +# Test case 1: Check if the function returns a data frame +test_that("compute_rmse returns a data frame", { + data <- data.frame(from = c("A", "A", "A", "A"), + description = c("X", "Y", "X", "Y"), + actual = c(1, 2, 3, 4), + estimate = c(10, 20, 30, 40)) + df_rmse <- compute_rmse(data, grouping_by = c("from", "description")) + expect_s3_class(df_rmse, "data.frame") + expect_equal(df_rmse$from, c("A", "A")) + expect_equal(df_rmse$description, c("X", "Y")) + expect_equal(df_rmse$RMSE, c(20.12461, 28.46050), tolerance = 1e-3) + + + ## -- exact match + data <- data.frame(from = c("A", "A", "A", "A"), + description = c("X", "Y", "X", "Y"), + actual = c(1, 2, 3, 4), + estimate = c(1, 2, 3, 4)) + df_rmse <- compute_rmse(data, grouping_by = c("from", "description")) + expect_s3_class(df_rmse, "data.frame") + expect_equal(df_rmse$from, c("A", "A")) + expect_equal(df_rmse$description, c("X", "Y")) + expect_equal(df_rmse$RMSE, c(0, 0), tolerance = 0) + +}) + + + +test_that("RMSE function calculates correct RMSE values", { + # Test case 1: Same vectors + y1 <- c(1, 2, 3, 4, 5) + y_hat1 <- c(1, 2, 3, 4, 5) + expect_equal(rmse(y1, y_hat1), 0, + info = "RMSE between identical vectors should be 0.") + + # Test case 2: Different vectors + y2 <- c(1, 2, 3, 4, 5) + y_hat2 <- c(2, 3, 4, 5, 6) + expect_equal(round(rmse(y2, y_hat2), 2), 1, + info = "RMSE between different vectors should be approximately 1") + + # Test case 3: vector with NA + y3 <- c(1, 2, 3, 4) + y_hat3 <- c(1, 2, NA, 4) + expect_equal(rmse(y3, y_hat3), 0) + + # Test case 4: Vectors with different lengths + y4 <- c(1, 2, 3, 4, 5) + y_hat4 <- c(1, 2, 3, 4) + expect_error(rmse(y4, y_hat4), + info = "RMSE should throw an error for vectors with different lengths.") +}) + + + +``` + + ```{r function-subsetGenes, filename = "subsetGenes"} diff --git a/man/compute_rmse.Rd b/man/compute_rmse.Rd new file mode 100644 index 0000000000000000000000000000000000000000..f8ce3263a4e3dce99435dda3913f9108308dfd06 --- /dev/null +++ b/man/compute_rmse.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rmse.R +\name{compute_rmse} +\alias{compute_rmse} +\title{Compute RMSE values on grouped data} +\usage{ +compute_rmse(data, grouping_by = c("from", "description")) +} +\arguments{ +\item{data}{A data frame containing the variables 'actual' and 'estimate' for regression.} + +\item{grouping_by}{A character vector specifying the grouping variables} +} +\value{ +A data frame with columns 'from', 'term', and 'RMSE' representing the grouping variables +and the corresponding RMSE values. +} +\description{ +This function takes a data frame, performs RMSE between estimate and actual values on specified grouping variables, +} +\examples{ +data <- data.frame(from = c("A", "A", "A", "A"), + term = c("X", "Y", "X", "Y"), + actual = c(1, 2, 3, 4), + estimate = c(1.5, 2.5, 3.5, 4.5)) +compute_rmse(data, grouping_by = c("from", "term")) + +} diff --git a/man/compute_rsquare.Rd b/man/compute_rsquare.Rd index b5a1b669c18096065c59169bc7010c51093ea970..9b56a4cadd43c312afc82054ce743b852ea67b90 100644 --- a/man/compute_rsquare.Rd +++ b/man/compute_rsquare.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/evaluation_identity.R +% Please edit documentation in R/rsquare.R \name{compute_rsquare} \alias{compute_rsquare} \title{Compute R-squared values for linear regression on grouped data} diff --git a/man/eval_identityTerm.Rd b/man/eval_identityTerm.Rd index faa44035d54e810d6e6f4a044f520c943b7965b4..faf0cf5af8dbc0ab0cf8653f717c16128ce6dcaf 100644 --- a/man/eval_identityTerm.Rd +++ b/man/eval_identityTerm.Rd @@ -27,11 +27,13 @@ A ggplot2 identity plot and R2 metric associated This function generates an identity plot for comparing actual values with estimates } \examples{ - comparison_data <- data.frame( - actual = c(1, 2, 3, 4, 5), - estimate = c(0.9, 2.2, 2.8, 4.1, 5.2), - description = rep("Category A", 5), - term = rep("Category A", 5), - from = c("A", "B", "B", "A", "B")) -eval_identityTerm(comparison_data) +comparison_data <- data.frame( + actual = c(1, 2, 3, 4, 5), + estimate = c(0.9, 2.2, 2.8, 4.1, 5.2), + description = rep("Category A", 5), + term = rep("Category A", 5), + from = c("A", "B", "B", "A", "B")) +eval_identityTerm(comparison_data, + palette_color = c(A = "#500472", B ="#79cbb8"), + palette_shape = c(A = 17, B = 19)) } diff --git a/man/get_rsquare_2plot.Rd b/man/get_metrics_2plot.Rd similarity index 50% rename from man/get_rsquare_2plot.Rd rename to man/get_metrics_2plot.Rd index 37bb6647f511a5e30f26f068e8544eee74dc4038..e534524c44c41647b0b584d88c36b47c51c8124d 100644 --- a/man/get_rsquare_2plot.Rd +++ b/man/get_metrics_2plot.Rd @@ -1,24 +1,25 @@ % Generated by roxygen2: do not edit by hand % Please edit documentation in R/evaluation_identity.R -\name{get_rsquare_2plot} -\alias{get_rsquare_2plot} +\name{get_metrics_2plot} +\alias{get_metrics_2plot} \title{Gets R-squared values for plotting.} \usage{ -get_rsquare_2plot(data_rsquare) +get_metrics_2plot(data) } \arguments{ -\item{data_rsquare}{Data frame with R-squared values.} +\item{data}{Data frame with R-squared values and RMSE values.} } \value{ A data frame with additional columns for labeling in the plot. } \description{ -This function takes a data frame with R-squared values, +This function takes a data frame with R-squared and RMSE values, computes position coordinates, and prepares data for plotting. } \examples{ -data_rsquare <- data.frame(from = c("A", "B", "C"), +data_metrics <- data.frame(from = c("A", "B", "C"), description = c("Desc1", "Desc2", "Desc3"), - R2 = c(0.9, 0.8, 0.7)) -result <- get_rsquare_2plot(data_rsquare) + R2 = c(0.9, 0.8, 0.7), + RMSE = c(0.6, 0.2, 0.1)) +result <- get_metrics_2plot(data_metrics) } diff --git a/man/rmse.Rd b/man/rmse.Rd new file mode 100644 index 0000000000000000000000000000000000000000..13795387b3e94069ba0e109b98e6ea254c467693 --- /dev/null +++ b/man/rmse.Rd @@ -0,0 +1,19 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/rmse.R +\name{rmse} +\alias{rmse} +\title{Root Mean Squared Error (RMSE)} +\usage{ +rmse(y, y_hat) +} +\arguments{ +\item{y}{Vector of actual values.} + +\item{y_hat}{Vector of estimates/predicted values.} +} +\value{ +RMSE value. +} +\description{ +Calculates the root mean squared error (RMSE) between two vectors. +} diff --git a/tests/testthat/test-evaluation_identity.R b/tests/testthat/test-evaluation_identity.R index 793bee512296e253a52be40d35c496fb089d9694..4e2fc87086949fdf46e12b3e23aafcf82935e226 100644 --- a/tests/testthat/test-evaluation_identity.R +++ b/tests/testthat/test-evaluation_identity.R @@ -15,34 +15,16 @@ test_that("Identity plot is generated correctly", { idTerm_obj <- eval_identityTerm(comparison_data) expect_true("gg" %in% class(idTerm_obj$p)) - expect_equal(c("from", "description", "R2"), colnames(idTerm_obj$R2)) + expect_equal(c("from", "description", "R2", "RMSE"), colnames(idTerm_obj$R2)) }) -# Test case 1: Check if the function returns a data frame -test_that("compute_rsquare returns a data frame", { - data <- data.frame(from = c("A", "A", "A", "A"), - description = c("X", "Y", "X", "Y"), - actual = c(1, 2, 3, 4), - estimate = c(10, 20, 30, 40)) - df_rsquare <- compute_rsquare(data, grouping_by = c("from", "description")) - expect_s3_class(df_rsquare, "data.frame") - expect_equal(df_rsquare$from, c("A", "A")) - expect_equal(df_rsquare$description, c("X", "Y")) - expect_equal(df_rsquare$R2, c(1, 1)) - -}) - - - - - #' Unit Test for get_rsquare_2plot function. -test_that("get_rsquare_2plot returns expected result", { - data_rsquare <- data.frame(from = c("A", "B", "C"), description = c("Desc1", "Desc2", "Desc3"), R2 = c(0.9, 0.8, 0.7)) - result <- get_rsquare_2plot(data_rsquare) - expect_equal(names(result), c("from","description","R2" ,"pos_x", "pos_y", "label_italic","label_vjust")) +test_that("get_metrics_2plot returns expected result", { + data_metrics <- data.frame(from = c("A", "B", "C"), description = c("Desc1", "Desc2", "Desc3"), R2 = c(0.9, 0.8, 0.7), RMSE = c(0.03, 0.9, 0.18)) + result <- get_metrics_2plot(data_metrics) + expect_equal(names(result), c("from","description","R2" , "RMSE", "pos_x", "pos_y", "label_italic","label_vjust")) expect_equal(result$from, c("A","B", "C")) expect_equal(result$description, c("Desc1","Desc2", "Desc3")) expect_equal(result$label_vjust, c(1,2, 3)) diff --git a/tests/testthat/test-rmse.R b/tests/testthat/test-rmse.R new file mode 100644 index 0000000000000000000000000000000000000000..405b42750bb6fb81dec629efa9c8cc6a1ad410f7 --- /dev/null +++ b/tests/testthat/test-rmse.R @@ -0,0 +1,60 @@ +# WARNING - Generated by {fusen} from dev/flat_full.Rmd: do not edit by hand + + + + +# Test case 1: Check if the function returns a data frame +test_that("compute_rmse returns a data frame", { + data <- data.frame(from = c("A", "A", "A", "A"), + description = c("X", "Y", "X", "Y"), + actual = c(1, 2, 3, 4), + estimate = c(10, 20, 30, 40)) + df_rmse <- compute_rmse(data, grouping_by = c("from", "description")) + expect_s3_class(df_rmse, "data.frame") + expect_equal(df_rmse$from, c("A", "A")) + expect_equal(df_rmse$description, c("X", "Y")) + expect_equal(df_rmse$RMSE, c(20.12461, 28.46050), tolerance = 1e-3) + + + ## -- exact match + data <- data.frame(from = c("A", "A", "A", "A"), + description = c("X", "Y", "X", "Y"), + actual = c(1, 2, 3, 4), + estimate = c(1, 2, 3, 4)) + df_rmse <- compute_rmse(data, grouping_by = c("from", "description")) + expect_s3_class(df_rmse, "data.frame") + expect_equal(df_rmse$from, c("A", "A")) + expect_equal(df_rmse$description, c("X", "Y")) + expect_equal(df_rmse$RMSE, c(0, 0), tolerance = 0) + +}) + + + +test_that("RMSE function calculates correct RMSE values", { + # Test case 1: Same vectors + y1 <- c(1, 2, 3, 4, 5) + y_hat1 <- c(1, 2, 3, 4, 5) + expect_equal(rmse(y1, y_hat1), 0, + info = "RMSE between identical vectors should be 0.") + + # Test case 2: Different vectors + y2 <- c(1, 2, 3, 4, 5) + y_hat2 <- c(2, 3, 4, 5, 6) + expect_equal(round(rmse(y2, y_hat2), 2), 1, + info = "RMSE between different vectors should be approximately 1") + + # Test case 3: vector with NA + y3 <- c(1, 2, 3, 4) + y_hat3 <- c(1, 2, NA, 4) + expect_equal(rmse(y3, y_hat3), 0) + + # Test case 4: Vectors with different lengths + y4 <- c(1, 2, 3, 4, 5) + y_hat4 <- c(1, 2, 3, 4) + expect_error(rmse(y4, y_hat4), + info = "RMSE should throw an error for vectors with different lengths.") +}) + + + diff --git a/tests/testthat/test-rsquare.R b/tests/testthat/test-rsquare.R new file mode 100644 index 0000000000000000000000000000000000000000..ae6eb26ec64fc9df6e132f7c9d6c5f394903feb6 --- /dev/null +++ b/tests/testthat/test-rsquare.R @@ -0,0 +1,19 @@ +# WARNING - Generated by {fusen} from dev/flat_full.Rmd: do not edit by hand + + + +# Test case 1: Check if the function returns a data frame +test_that("compute_rsquare returns a data frame", { + data <- data.frame(from = c("A", "A", "A", "A"), + description = c("X", "Y", "X", "Y"), + actual = c(1, 2, 3, 4), + estimate = c(10, 20, 30, 40)) + df_rsquare <- compute_rsquare(data, grouping_by = c("from", "description")) + expect_s3_class(df_rsquare, "data.frame") + expect_equal(df_rsquare$from, c("A", "A")) + expect_equal(df_rsquare$description, c("X", "Y")) + expect_equal(df_rsquare$R2, c(1, 1)) + +}) + +