diff --git a/.dockerfile/v2.0.5/HTRfit_2.0.5.tar.gz.REMOVED.git-id b/.dockerfile/v2.0.5/HTRfit_2.0.5.tar.gz.REMOVED.git-id new file mode 100644 index 0000000000000000000000000000000000000000..b2e8b5810d9b9d930e872408cf025f7539d932c4 --- /dev/null +++ b/.dockerfile/v2.0.5/HTRfit_2.0.5.tar.gz.REMOVED.git-id @@ -0,0 +1 @@ +695e8e39fdf4a2a1d592e1297ee21bdf45db28c6 \ No newline at end of file diff --git a/.dockerfile/v2.0.5/dockerfile b/.dockerfile/v2.0.5/dockerfile new file mode 100644 index 0000000000000000000000000000000000000000..45c787a6e744be8a7048f5e85ba2e0e7f6e8308f --- /dev/null +++ b/.dockerfile/v2.0.5/dockerfile @@ -0,0 +1,14 @@ +FROM rocker/r-base:4.3.1 + +RUN apt-get -y update && \ + apt-get -y install cmake libxml2-dev libcurl4-gnutls-dev + +COPY HTRfit_2.0.5.tar.gz /HTRfit_2.0.5.tar.gz + +RUN R -e "install.packages(c('parallel', 'data.table', 'ggplot2', 'gridExtra', 'glmmTMB', 'magrittr', 'MASS', 'reshape2', 'rlang', 'stats', 'utils', 'BiocManager', 'car'))" + +RUN R -e "BiocManager::install('S4Vectors', update = FALSE)" + +RUN R -e "BiocManager::install('DESeq2', update = FALSE)" + +RUN R -e "install.packages('/HTRfit_2.0.5.tar.gz', repos = NULL, type='source')" diff --git a/R/mlmetrics.R b/R/mlmetrics.R index c2778d35cba79c277edc9013d4813bbad6c87111..86df85ac8c67f3133d30ef92327896257a1a04f1 100644 --- a/R/mlmetrics.R +++ b/R/mlmetrics.R @@ -254,6 +254,3 @@ Area_Under_Curve <- function(x, y, method = "trapezoid"){ if (complement==TRUE) match.bool <- !match.bool return( arglist[ match.bool] ) } - - - diff --git a/R/rocr_functions.R b/R/rocr_functions.R deleted file mode 100644 index 920b403baabd3322270132652c2a3a0ba1a07fd7..0000000000000000000000000000000000000000 --- a/R/rocr_functions.R +++ /dev/null @@ -1,157 +0,0 @@ -# WARNING - Generated by {fusen} from dev/flat_full.Rmd: do not edit by hand - - - -#' Computes the precision-recall curve (AUC). -#' -#' -#' @param dt A data frame with columns truth (first column) and score (second column). -#' @return A dataframe with precision recall. -#' @export -compute_pr_curve <- function(dt){ - ## -- replace 0 by minimum machine - dt$p.adj[ dt$p.adj == 0 ] <- 1e-217 - ## --see .SDcols for order - pred_obj <- prediction( -log10(dt$p.adj) , dt$isDE) - perf_obj = performance(pred_obj, measure = "prec", x.measure = "rec") - data2curve <- data.frame(x.name = perf_obj@x.values[[1]], y.name = perf_obj@y.values[[1]]) - names(data2curve) <- c(unname(perf_obj@x.name), unname(perf_obj@y.name)) - ## -- drop NA - data2curve <- na.omit(data2curve) - ## -- add start point - data2curve <- rbind(c(0,1), data2curve) - return(data2curve) -} - - -#' Computes area under the precision-recall curve (AUC). -#' -#' This function calculates the area under the precision-recall curve (AUC). -#' -#' @param dt A data table with columns for recall and precision. -#' @return A numeric value representing the AUC. -#' @export -compute_pr_auc <- function(dt) Area_Under_Curve( dt$recall, dt$precision ) - - -#' Gets precision-recall objects for a given parameter. -#' -#' This function takes a data table of evaluation parameters and returns precision-recall curves -#' for each term and an aggregate precision-recall curve. -#' -#' @param evaldata_params Data table containing evaluation parameters. -#' @param col_param Column name specifying the parameter for grouping. -#' @param col_truth Column name for binary ground truth values. -#' @param col_score Column name for predicted scores. -#' @return A list containing precision-recall curves and AUCs for each group and an aggregate precision-recall curve and AUC. -#' @importFrom data.table setDT -#' @export -get_pr_object <- function(evaldata_params, col_param = "description", col_truth = "isDE", col_score = "p.adj" ) { - - ## -- subset fixed eff - evaldata_params <- subset(evaldata_params, effect == "fixed") - - ## -- by params -- random class AUC - prop_table <- table(evaldata_params[[col_param]], evaldata_params[[col_truth]]) - random_classifier_auc_params <- prop_table[,"TRUE"]/rowSums(prop_table) - random_classifier_auc_params <- as.data.frame(random_classifier_auc_params) - random_classifier_auc_params[col_param] <- rownames(random_classifier_auc_params) - - ## -- aggregate -- random class AUC - prop_table <- table(evaldata_params[[col_truth]]) - random_classifier_auc_agg <- unname(prop_table["TRUE"]/sum(prop_table)) - - ## -- data.table conversion - dt_evaldata_params <- data.table::setDT(evaldata_params) - - ## -- by params - pr_curve_params <- dt_evaldata_params[, compute_pr_curve(.SD), by=c("from", col_param), .SDcols=c(col_truth, col_score)] - pr_auc_params <- pr_curve_params[, compute_pr_auc(.SD), by=c("from", col_param), .SDcols=c("recall", "precision")] - names(pr_auc_params)[ names(pr_auc_params) == "V1" ] <- "pr_AUC" - pr_auc_params <- join_dtf(pr_auc_params, random_classifier_auc_params , - k1 = col_param, k2 = col_param) - names(pr_auc_params)[ names(pr_auc_params) == "random_classifier_auc_params" ] <- "pr_randm_AUC" - pr_auc_params$pr_performance_ratio <- pr_auc_params$pr_AUC/pr_auc_params$pr_randm_AUC - - ## -- aggregate - pr_curve_agg <- dt_evaldata_params[, compute_pr_curve(.SD), by = "from", .SDcols=c(col_truth, col_score)] - pr_auc_agg <- pr_curve_agg[, compute_pr_auc(.SD), by = "from", .SDcols=c("recall", "precision")] - names(pr_auc_agg)[ names(pr_auc_agg) == "V1" ] <- "pr_AUC" - pr_auc_agg$pr_randm_AUC <- random_classifier_auc_agg - pr_auc_agg$pr_performance_ratio <- pr_auc_agg$pr_AUC/pr_auc_agg$pr_randm_AUC - - return(list(byparams = list(pr_curve = as.data.frame(pr_curve_params), - pr_auc = as.data.frame(pr_auc_params)), - aggregate = list(pr_curve = as.data.frame(pr_curve_agg), - pr_auc = as.data.frame(pr_auc_agg))) - ) - -} - - - -#' Builds a ggplot precision-recall curve. -#' -#' This function takes data frames for precision-recall curve and AUC and builds a ggplot precision-recall curve. -#' -#' @param data_curve Data frame with precision-recall curve. -#' @param data_auc Data frame with AUC. -#' @param palette_color list of colors used. -#' @param ... Additional arguments to be passed to \code{ggplot2::geom_path}. -#' @return A ggplot object representing the precision-recall curve. -#' @importFrom ggplot2 ggplot geom_path geom_text theme_bw xlim ylim scale_color_manual aes sym -#' @export -build_gg_pr_curve <- function(data_curve, data_auc, palette_color = c("#500472", "#79cbb8"), ...){ - - #print(list(...)) - #print(ggplot2::sym(list(...))) - #args <- lapply(list(...), function(x) if(!is.null(x)) ggplot2::sym(x) ) - - - data_auc <- get_label_y_position(data_auc) - - - ggplot2::ggplot(data_curve) + - ggplot2::geom_path(ggplot2::aes(x = recall, y = precision , ... ), linewidth = 1) + - ggplot2::geom_text(data_auc, - mapping = ggplot2::aes(x = 0.75, y = pos_y, - label = paste("AUC :", round(pr_AUC, 2) , sep = ""), col = from ) - ) + - ggplot2::theme_bw() + - ggplot2::xlim( 0, 1 ) + - ggplot2::ylim( 0, 1 ) + - ggplot2::scale_color_manual(values = palette_color) - -} - - - -#' Gets precision-recall curves and AUC for both aggregated and individual parameters. -#' -#' This function takes a precision-recall object and returns precision-recall curves and AUCs for both aggregated and individual parameters. -#' -#' @param pr_obj precision-recall object. -#' @param ... Additional arguments to be passed to \code{ggplot2::geom_path}. -#' @return precision-recall curves and AUCs for both aggregated and individual parameters. -#' @importFrom ggplot2 facet_wrap coord_fixed -#' @export -get_pr_curve <- function(pr_obj, ...){ - - ## -- aggreg - data_curve <- pr_obj$aggregate$pr_curve - data_auc <- pr_obj$aggregate$pr_auc - pr_obj$aggregate$pr_curve <- build_gg_pr_curve(data_curve, data_auc, col = from , ... ) - - ## -- indiv - data_curve <- pr_obj$byparams$pr_curve - data_auc <- pr_obj$byparams$pr_auc - pr_obj$byparams$pr_curve <- build_gg_pr_curve(data_curve, data_auc, col = from , ... ) + - ggplot2::facet_wrap(~description) + - ggplot2::coord_fixed() - - return(pr_obj) -} - - - - diff --git a/R/fake-section-title.R b/R/rocr_pkg_classes.R similarity index 99% rename from R/fake-section-title.R rename to R/rocr_pkg_classes.R index 4da9d9a29f8503a46497c662c39a4375ad5ace84..9100697ba39da13f3d68d9bfaac3025aec7cbd69 100644 --- a/R/fake-section-title.R +++ b/R/rocr_pkg_classes.R @@ -1,4 +1,6 @@ -# WARNING - Generated by {fusen} from dev/flat_full.Rmd: do not edit by hand +# WARNING - This file is NOT generated by {fusen} from dev/flat_full.Rmd ! +# Do not erase, or error will appear ! +# All content of this file comes from ROCR package #' @name prediction-class diff --git a/R/rocr_pkg_functions.R b/R/rocr_pkg_functions.R deleted file mode 100644 index 920b403baabd3322270132652c2a3a0ba1a07fd7..0000000000000000000000000000000000000000 --- a/R/rocr_pkg_functions.R +++ /dev/null @@ -1,157 +0,0 @@ -# WARNING - Generated by {fusen} from dev/flat_full.Rmd: do not edit by hand - - - -#' Computes the precision-recall curve (AUC). -#' -#' -#' @param dt A data frame with columns truth (first column) and score (second column). -#' @return A dataframe with precision recall. -#' @export -compute_pr_curve <- function(dt){ - ## -- replace 0 by minimum machine - dt$p.adj[ dt$p.adj == 0 ] <- 1e-217 - ## --see .SDcols for order - pred_obj <- prediction( -log10(dt$p.adj) , dt$isDE) - perf_obj = performance(pred_obj, measure = "prec", x.measure = "rec") - data2curve <- data.frame(x.name = perf_obj@x.values[[1]], y.name = perf_obj@y.values[[1]]) - names(data2curve) <- c(unname(perf_obj@x.name), unname(perf_obj@y.name)) - ## -- drop NA - data2curve <- na.omit(data2curve) - ## -- add start point - data2curve <- rbind(c(0,1), data2curve) - return(data2curve) -} - - -#' Computes area under the precision-recall curve (AUC). -#' -#' This function calculates the area under the precision-recall curve (AUC). -#' -#' @param dt A data table with columns for recall and precision. -#' @return A numeric value representing the AUC. -#' @export -compute_pr_auc <- function(dt) Area_Under_Curve( dt$recall, dt$precision ) - - -#' Gets precision-recall objects for a given parameter. -#' -#' This function takes a data table of evaluation parameters and returns precision-recall curves -#' for each term and an aggregate precision-recall curve. -#' -#' @param evaldata_params Data table containing evaluation parameters. -#' @param col_param Column name specifying the parameter for grouping. -#' @param col_truth Column name for binary ground truth values. -#' @param col_score Column name for predicted scores. -#' @return A list containing precision-recall curves and AUCs for each group and an aggregate precision-recall curve and AUC. -#' @importFrom data.table setDT -#' @export -get_pr_object <- function(evaldata_params, col_param = "description", col_truth = "isDE", col_score = "p.adj" ) { - - ## -- subset fixed eff - evaldata_params <- subset(evaldata_params, effect == "fixed") - - ## -- by params -- random class AUC - prop_table <- table(evaldata_params[[col_param]], evaldata_params[[col_truth]]) - random_classifier_auc_params <- prop_table[,"TRUE"]/rowSums(prop_table) - random_classifier_auc_params <- as.data.frame(random_classifier_auc_params) - random_classifier_auc_params[col_param] <- rownames(random_classifier_auc_params) - - ## -- aggregate -- random class AUC - prop_table <- table(evaldata_params[[col_truth]]) - random_classifier_auc_agg <- unname(prop_table["TRUE"]/sum(prop_table)) - - ## -- data.table conversion - dt_evaldata_params <- data.table::setDT(evaldata_params) - - ## -- by params - pr_curve_params <- dt_evaldata_params[, compute_pr_curve(.SD), by=c("from", col_param), .SDcols=c(col_truth, col_score)] - pr_auc_params <- pr_curve_params[, compute_pr_auc(.SD), by=c("from", col_param), .SDcols=c("recall", "precision")] - names(pr_auc_params)[ names(pr_auc_params) == "V1" ] <- "pr_AUC" - pr_auc_params <- join_dtf(pr_auc_params, random_classifier_auc_params , - k1 = col_param, k2 = col_param) - names(pr_auc_params)[ names(pr_auc_params) == "random_classifier_auc_params" ] <- "pr_randm_AUC" - pr_auc_params$pr_performance_ratio <- pr_auc_params$pr_AUC/pr_auc_params$pr_randm_AUC - - ## -- aggregate - pr_curve_agg <- dt_evaldata_params[, compute_pr_curve(.SD), by = "from", .SDcols=c(col_truth, col_score)] - pr_auc_agg <- pr_curve_agg[, compute_pr_auc(.SD), by = "from", .SDcols=c("recall", "precision")] - names(pr_auc_agg)[ names(pr_auc_agg) == "V1" ] <- "pr_AUC" - pr_auc_agg$pr_randm_AUC <- random_classifier_auc_agg - pr_auc_agg$pr_performance_ratio <- pr_auc_agg$pr_AUC/pr_auc_agg$pr_randm_AUC - - return(list(byparams = list(pr_curve = as.data.frame(pr_curve_params), - pr_auc = as.data.frame(pr_auc_params)), - aggregate = list(pr_curve = as.data.frame(pr_curve_agg), - pr_auc = as.data.frame(pr_auc_agg))) - ) - -} - - - -#' Builds a ggplot precision-recall curve. -#' -#' This function takes data frames for precision-recall curve and AUC and builds a ggplot precision-recall curve. -#' -#' @param data_curve Data frame with precision-recall curve. -#' @param data_auc Data frame with AUC. -#' @param palette_color list of colors used. -#' @param ... Additional arguments to be passed to \code{ggplot2::geom_path}. -#' @return A ggplot object representing the precision-recall curve. -#' @importFrom ggplot2 ggplot geom_path geom_text theme_bw xlim ylim scale_color_manual aes sym -#' @export -build_gg_pr_curve <- function(data_curve, data_auc, palette_color = c("#500472", "#79cbb8"), ...){ - - #print(list(...)) - #print(ggplot2::sym(list(...))) - #args <- lapply(list(...), function(x) if(!is.null(x)) ggplot2::sym(x) ) - - - data_auc <- get_label_y_position(data_auc) - - - ggplot2::ggplot(data_curve) + - ggplot2::geom_path(ggplot2::aes(x = recall, y = precision , ... ), linewidth = 1) + - ggplot2::geom_text(data_auc, - mapping = ggplot2::aes(x = 0.75, y = pos_y, - label = paste("AUC :", round(pr_AUC, 2) , sep = ""), col = from ) - ) + - ggplot2::theme_bw() + - ggplot2::xlim( 0, 1 ) + - ggplot2::ylim( 0, 1 ) + - ggplot2::scale_color_manual(values = palette_color) - -} - - - -#' Gets precision-recall curves and AUC for both aggregated and individual parameters. -#' -#' This function takes a precision-recall object and returns precision-recall curves and AUCs for both aggregated and individual parameters. -#' -#' @param pr_obj precision-recall object. -#' @param ... Additional arguments to be passed to \code{ggplot2::geom_path}. -#' @return precision-recall curves and AUCs for both aggregated and individual parameters. -#' @importFrom ggplot2 facet_wrap coord_fixed -#' @export -get_pr_curve <- function(pr_obj, ...){ - - ## -- aggreg - data_curve <- pr_obj$aggregate$pr_curve - data_auc <- pr_obj$aggregate$pr_auc - pr_obj$aggregate$pr_curve <- build_gg_pr_curve(data_curve, data_auc, col = from , ... ) - - ## -- indiv - data_curve <- pr_obj$byparams$pr_curve - data_auc <- pr_obj$byparams$pr_auc - pr_obj$byparams$pr_curve <- build_gg_pr_curve(data_curve, data_auc, col = from , ... ) + - ggplot2::facet_wrap(~description) + - ggplot2::coord_fixed() - - return(pr_obj) -} - - - - diff --git a/dev/flat_full.Rmd b/dev/flat_full.Rmd index f38ce2eea7ee73f7670d886f65ed9e935ff3b562..7af307ee486c87e38b49208f0d7c77b28df4b86e 100644 --- a/dev/flat_full.Rmd +++ b/dev/flat_full.Rmd @@ -7054,9 +7054,6 @@ Area_Under_Curve <- function(x, y, method = "trapezoid"){ if (complement==TRUE) match.bool <- !match.bool return( arglist[ match.bool] ) } - - - ``` @@ -7347,938 +7344,6 @@ test_that("get_pr_curve gets precision-recall curves and AUCs", { ``` -```{r function-rocr_pkg, filename = "rocr_pkg_functions"} - -#' @name prediction-class -#' @aliases prediction-class -#' -#' @title Class \code{prediction} -#' -#' @description -#' Object to encapsulate numerical predictions together with the -#' corresponding true class labels, optionally collecting predictions and -#' labels for several cross-validation or bootstrapping runs. -#' -#' @section Objects from the Class: -#' Objects can be created by using the \code{prediction} function. -#' -#' @note -#' Every \code{prediction} object contains information about the 2x2 -#' contingency table consisting of tp,tn,fp, and fn, along with the -#' marginal sums n.pos,n.neg,n.pos.pred,n.neg.pred, because these form -#' the basis for many derived performance measures. -#' -#' @slot predictions A list, in which each element is a vector of predictions -#' (the list has length > 1 for x-validation data. -#' @slot labels Analogously, a list in which each element is a vector of true -#' class labels. -#' @slot cutoffs A list in which each element is a vector of all necessary -#' cutoffs. Each cutoff vector consists of the predicted scores (duplicates -#' removed), in descending order. -#' @slot fp A list in which each element is a vector of the number (not the -#' rate!) of false positives induced by the cutoffs given in the corresponding -#' 'cutoffs' list entry. -#' @slot tp As fp, but for true positives. -#' @slot tn As fp, but for true negatives. -#' @slot fn As fp, but for false negatives. -#' @slot n.pos A list in which each element contains the number of positive -#' samples in the given x-validation run. -#' @slot n.neg As n.pos, but for negative samples. -#' @slot n.pos.pred A list in which each element is a vector of the number of -#' samples predicted as positive at the cutoffs given in the corresponding -#' 'cutoffs' entry. -#' @slot n.neg.pred As n.pos.pred, but for negatively predicted samples. -#' -#' -#' @author -#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander -#' \email{osander@gmail.com} -#' -#' @seealso -#' \code{\link{prediction}}, -#' \code{\link{performance}}, -#' \code{\link{performance-class}}, -#' \code{\link{plot.performance}} -#' -#' @export -setClass("prediction", - representation(predictions = "list", - labels = "list", - cutoffs = "list", - fp = "list", - tp = "list", - tn = "list", - fn = "list", - n.pos = "list", - n.neg = "list", - n.pos.pred = "list", - n.neg.pred = "list")) - -setMethod("show","prediction", - function(object){ - cat("A ", class(object), " instance\n", sep = "") - if(length(object@predictions) > 1L){ - cat(" with ", length(object@predictions)," cross ", - "validation runs ", sep = "") - if(length(unique(vapply(object@predictions,length,integer(1))))){ - cat("(equal lengths)", sep = "") - } else { - cat("(different lengths)", sep = "") - } - } else { - cat(" with ", length(object@predictions[[1L]]), - " data points", sep = "") - } - }) - -#' @name performance-class -#' @aliases performance-class -#' -#' @title Class \code{performance} -#' -#' @description -#' Object to capture the result of a performance evaluation, optionally -#' collecting evaluations from several cross-validation or bootstrapping runs. -#' -#' @section Objects from the Class: -#' Objects can be created by using the \code{performance} function. -#' -#' @details -#' A \code{performance} object can capture information from four -#' different evaluation scenarios: -#' \itemize{ -#' \item The behaviour of a cutoff-dependent performance measure across -#' the range of all cutoffs (e.g. \code{performance( predObj, 'acc' )} ). Here, -#' \code{x.values} contains the cutoffs, \code{y.values} the -#' corresponding values of the performance measure, and -#' \code{alpha.values} is empty.\cr -#' \item The trade-off between two performance measures across the -#' range of all cutoffs (e.g. \code{performance( predObj, -#' 'tpr', 'fpr' )} ). In this case, the cutoffs are stored in -#' \code{alpha.values}, while \code{x.values} and \code{y.values} -#' contain the corresponding values of the two performance measures.\cr -#' \item A performance measure that comes along with an obligatory -#' second axis (e.g. \code{performance( predObj, 'ecost' )} ). Here, the measure values are -#' stored in \code{y.values}, while the corresponding values of the -#' obligatory axis are stored in \code{x.values}, and \code{alpha.values} -#' is empty.\cr -#' \item A performance measure whose value is just a scalar -#' (e.g. \code{performance( predObj, 'auc' )} ). The value is then stored in -#' \code{y.values}, while \code{x.values} and \code{alpha.values} are -#' empty. -#' } -#' -#' @slot x.name Performance measure used for the x axis. -#' @slot y.name Performance measure used for the y axis. -#' @slot alpha.name Name of the unit that is used to create the parametrized -#' curve. Currently, curves can only be parametrized by cutoff, so -#' \code{alpha.name} is either \code{none} or \code{cutoff}. -#' @slot x.values A list in which each entry contains the x values of the curve -#' of this particular cross-validation run. \code{x.values[[i]]}, -#' \code{y.values[[i]]}, and \code{alpha.values[[i]]} correspond to each -#' other. -#' @slot y.values A list in which each entry contains the y values of the curve -#' of this particular cross-validation run. -#' @slot alpha.values A list in which each entry contains the cutoff values of -#' the curve of this particular cross-validation run. -#' -#' @references -#' A detailed list of references can be found on the ROCR homepage at -#' \url{http://rocr.bioinf.mpi-sb.mpg.de}. -#' -#' @author -#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander -#' \email{osander@gmail.com} -#' -#' @seealso -#' \code{\link{prediction}} -#' \code{\link{performance}}, -#' \code{\link{prediction-class}}, -#' \code{\link{plot.performance}} -#' -#' @export -setClass("performance", - representation(x.name = "character", - y.name = "character", - alpha.name = "character", - x.values = "list", - y.values = "list", - alpha.values = "list" )) - -setMethod("show","performance", - function(object){ - cat("A ", class(object), " instance\n", sep = "") - if(length(object@y.values[[1L]]) > 1L){ - cat(" '", object@x.name, "' vs. '", object@y.name, - "' (alpha: '",object@alpha.name,"')\n", sep = "") - } else { - cat(" '", object@y.name, "'\n", sep = "") - } - if(length(object@y.values) > 1L){ - cat(" for ", length(object@y.values)," cross ", - "validation runs ", sep = "") - } else { - if(length(object@y.values[[1L]]) > 1L){ - cat(" with ", length(object@y.values[[1L]])," data points", - sep = "") - } - } - }) - - - -#' @name prediction -#' -#' @title Function to create prediction objects -#' -#' @description -#' Every classifier evaluation using ROCR starts with creating a -#' \code{prediction} object. This function is used to transform the input data -#' (which can be in vector, matrix, data frame, or list form) into a -#' standardized format. -#' -#' @details -#' \code{predictions} and \code{labels} can simply be vectors of the same -#' length. However, in the case of cross-validation data, different -#' cross-validation runs can be provided as the *columns* of a matrix or -#' data frame, or as the entries of a list. In the case of a matrix or -#' data frame, all cross-validation runs must have the same length, whereas -#' in the case of a list, the lengths can vary across the cross-validation -#' runs. Internally, as described in section 'Value', all of these input -#' formats are converted to list representation. -#' -#' Since scoring classifiers give relative tendencies towards a negative -#' (low scores) or positive (high scores) class, it has to be declared -#' which class label denotes the negative, and which the positive class. -#' Ideally, labels should be supplied as ordered factor(s), the lower -#' level corresponding to the negative class, the upper level to the -#' positive class. If the labels are factors (unordered), numeric, -#' logical or characters, ordering of the labels is inferred from -#' R's built-in \code{<} relation (e.g. 0 < 1, -1 < 1, 'a' < 'b', -#' FALSE < TRUE). Use \code{label.ordering} to override this default -#' ordering. Please note that the ordering can be locale-dependent -#' e.g. for character labels '-1' and '1'. -#' -#' Currently, ROCR supports only binary classification (extensions toward -#' multiclass classification are scheduled for the next release, -#' however). If there are more than two distinct label symbols, execution -#' stops with an error message. If all predictions use the same two -#' symbols that are used for the labels, categorical predictions are -#' assumed. If there are more than two predicted values, but all numeric, -#' continuous predictions are assumed (i.e. a scoring -#' classifier). Otherwise, if more than two symbols occur in the -#' predictions, and not all of them are numeric, execution stops with an -#' error message. -#' -#' @param predictions A vector, matrix, list, or data frame containing the -#' predictions. -#' @param labels A vector, matrix, list, or data frame containing the true class -#' labels. Must have the same dimensions as \code{predictions}. -#' @param label.ordering The default ordering (cf.details) of the classes can -#' be changed by supplying a vector containing the negative and the positive -#' class label. -#' -#' @return An S4 object of class \code{prediction}. -#' -#' @author -#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander -#' \email{osander@gmail.com} -#' @export -prediction <- function(predictions, labels, label.ordering=NULL) { - - ## bring 'predictions' and 'labels' into list format, - ## each list entry representing one x-validation run - - ## convert predictions into canonical list format - if (is.data.frame(predictions)) { - names(predictions) <- c() - predictions <- as.list(predictions) - } else if (is.matrix(predictions)) { - predictions <- as.list(data.frame(predictions)) - names(predictions) <- c() - } else if (is.vector(predictions) && !is.list(predictions)) { - predictions <- list(predictions) - } else if (!is.list(predictions)) { - stop("Format of predictions is invalid. It couldn't be coerced to a list.", - call. = FALSE) - } - - ## convert labels into canonical list format - if (is.data.frame(labels)) { - names(labels) <- c() - labels <- as.list( labels) - } else if (is.matrix(labels)) { - labels <- as.list( data.frame( labels)) - names(labels) <- c() - } else if ((is.vector(labels) || - is.ordered(labels) || - is.factor(labels)) && - !is.list(labels)) { - labels <- list( labels) - } else if (!is.list(labels)) { - stop("Format of labels is invalid. It couldn't be coerced to a list.", - call. = FALSE) - } - - - if(any(vapply(predictions,anyNA, logical(1)))){ - warnings("'predictions' contains NA. These missing predictions will be removed from evaluation") - nonNA_pred <- !is.na(predictions) - predictions <- predictions[nonNA_pred] - labels <- labels[nonNA_pred] - } - - - ## Length consistency checks - if (length(predictions) != length(labels)) - stop(paste("Number of cross-validation runs must be equal", - "for predictions and labels.")) - if (! all(sapply(predictions, length) == sapply(labels, length))) - stop(paste("Number of predictions in each run must be equal", - "to the number of labels for each run.")) - - ## replace infinite numbers by max values - ## avoid prob with infinite values - #for (i in 1:length(predictions)) { - # epsilon <- max(predictions[[i]][is.finite(predictions[[i]] )]) - # idx_inf_values <- !is.finite( predictions[[i]] ) - # predictions[[i]][idx_inf_values] <- epsilon - #} - - ## only keep prediction/label pairs that are finite numbers - for (i in 1:length(predictions)) { - finite.bool <- is.finite( predictions[[i]] ) - predictions[[i]] <- predictions[[i]][ finite.bool ] - labels[[i]] <- labels[[i]][ finite.bool ] - } - - - - - ## abort if 'labels' format is inconsistent across - ## different cross-validation runs - label.format="" ## one of 'normal','factor','ordered' - if (all(sapply( labels, is.factor)) && - !any(sapply(labels, is.ordered))) { - label.format <- "factor" - } else if (all(sapply( labels, is.ordered))) { - label.format <- "ordered" - } else if (all(sapply( labels, is.character)) || - all(sapply( labels, is.numeric)) || - all(sapply( labels, is.logical))) { - label.format <- "normal" - } else { - stop(paste("Inconsistent label data type across different", - "cross-validation runs.")) - } - - ## abort if levels are not consistent across different - ## cross-validation runs - if (! all(sapply(labels, levels)==levels(labels[[1]])) ) { - stop(paste("Inconsistent factor levels across different", - "cross-validation runs.")) - } - - ## convert 'labels' into ordered factors, aborting if the number - ## of classes is not equal to 2. - levels <- c() - if ( label.format == "ordered" ) { - if (!is.null(label.ordering)) { - stop(paste("'labels' is already ordered. No additional", - "'label.ordering' must be supplied.")) - } else { - levels <- levels(labels[[1]]) - } - } else { - if ( is.null( label.ordering )) { - if ( label.format == "factor" ) levels <- sort(levels(labels[[1]])) - else levels <- sort( unique( unlist( labels))) - } else { - ## if (!setequal( levels, label.ordering)) { - if (!setequal( unique(unlist(labels)), label.ordering )) { - stop("Label ordering does not match class labels.") - } - levels <- label.ordering - } - for (i in 1:length(labels)) { - if (is.factor(labels)) - labels[[i]] <- ordered(as.character(labels[[i]]), - levels=levels) - else labels[[i]] <- ordered( labels[[i]], levels=levels) - } - - } - #print(levels) - #print(labels) - if (length(levels) != 2) { - message <- paste("Number of classes is not equal to 2.\n", - "HTRfit currently supports only evaluation of ", - "binary classification tasks.",sep="") - stop(message) - } - - ## determine whether predictions are continuous or categorical - ## (in the latter case stop - if (!is.numeric( unlist( predictions ))) { - stop("Currently, only continuous predictions are supported by HTRfit") - } - - ## compute cutoff/fp/tp data - - cutoffs <- list() - fp <- list() - tp <- list() - fn <- list() - tn <- list() - n.pos <- list() - n.neg <- list() - n.pos.pred <- list() - n.neg.pred <- list() - for (i in 1:length(predictions)) { - n.pos <- c( n.pos, sum( labels[[i]] == levels[2] )) - n.neg <- c( n.neg, sum( labels[[i]] == levels[1] )) - ans <- .compute.unnormalized.roc.curve( predictions[[i]], labels[[i]] ) - cutoffs <- c( cutoffs, list( ans$cutoffs )) - fp <- c( fp, list( ans$fp )) - tp <- c( tp, list( ans$tp )) - fn <- c( fn, list( n.pos[[i]] - tp[[i]] )) - tn <- c( tn, list( n.neg[[i]] - fp[[i]] )) - n.pos.pred <- c(n.pos.pred, list(tp[[i]] + fp[[i]]) ) - n.neg.pred <- c(n.neg.pred, list(tn[[i]] + fn[[i]]) ) - } - - - return( new("prediction", predictions=predictions, - labels=labels, - cutoffs=cutoffs, - fp=fp, - tp=tp, - fn=fn, - tn=tn, - n.pos=n.pos, - n.neg=n.neg, - n.pos.pred=n.pos.pred, - n.neg.pred=n.neg.pred)) -} - -## fast fp/tp computation based on cumulative summing -.compute.unnormalized.roc.curve <- function( predictions, labels ) { - ## determine the labels that are used for the pos. resp. neg. class : - pos.label <- levels(labels)[2] - neg.label <- levels(labels)[1] - - pred.order <- order(predictions, decreasing=TRUE) - predictions.sorted <- predictions[pred.order] - tp <- cumsum(labels[pred.order]==pos.label) - fp <- cumsum(labels[pred.order]==neg.label) - - ## remove fp & tp for duplicated predictions - ## as duplicated keeps the first occurrence, but we want the last, two - ## rev are used. - ## Highest cutoff (Infinity) corresponds to tp=0, fp=0 - dups <- rev(duplicated(rev(predictions.sorted))) - tp <- c(0, tp[!dups]) - fp <- c(0, fp[!dups]) - cutoffs <- c(Inf, predictions.sorted[!dups]) - - return(list( cutoffs=cutoffs, fp=fp, tp=tp )) -} - -#' @name performance -#' -#' @title Function to create performance objects -#' -#' @description -#' All kinds of predictor evaluations are performed using this function. -#' -#' @details -#' Here is the list of available performance measures. Let Y and -#' \eqn{\hat{Y}}{Yhat} be random variables representing the class and the prediction for -#' a randomly drawn sample, respectively. We denote by -#' \eqn{\oplus}{+} and \eqn{\ominus}{-} the positive and -#' negative class, respectively. Further, we use the following -#' abbreviations for empirical quantities: P (\# positive -#' samples), N (\# negative samples), TP (\# true positives), TN (\# true -#' negatives), FP (\# false positives), FN (\# false negatives). -#' \describe{ -#' \item{\code{acc}:}{accuracy. \eqn{P(\hat{Y}=Y)}{P(Yhat = Y)}. Estimated -#' as: \eqn{\frac{TP+TN}{P+N}}{(TP+TN)/(P+N)}.} -#' \item{\code{err}:}{Error rate. \eqn{P(\hat{Y}\ne Y)}{P(Yhat != -#' Y)}. Estimated as: \eqn{\frac{FP+FN}{P+N}}{(FP+FN)/(P+N)}.} -#' \item{\code{fpr}:}{False positive rate. \eqn{P(\hat{Y}=\oplus | Y = -#' \ominus)}{P(Yhat = + | Y = -)}. Estimated as: -#' \eqn{\frac{FP}{N}}{FP/N}.} -#' \item{\code{fall}:}{Fallout. Same as \code{fpr}.} -#' \item{\code{tpr}:}{True positive -#' rate. \eqn{P(\hat{Y}=\oplus|Y=\oplus)}{P(Yhat = + | Y = +)}. Estimated -#' as: \eqn{\frac{TP}{P}}{TP/P}.} -#' \item{\code{rec}:}{recall. Same as \code{tpr}.} -#' \item{\code{sens}:}{sensitivity. Same as \code{tpr}.} -#' \item{\code{fnr}:}{False negative -#' rate. \eqn{P(\hat{Y}=\ominus|Y=\oplus)}{P(Yhat = - | Y = -#' +)}. Estimated as: \eqn{\frac{FN}{P}}{FN/P}.} -#' \item{\code{miss}:}{Miss. Same as \code{fnr}.} -#' \item{\code{tnr}:}{True negative rate. \eqn{P(\hat{Y} = -#' \ominus|Y=\ominus)}{P(Yhat = - | Y = -)}.} -#' \item{\code{spec}:}{specificity. Same as \code{tnr}.} -#' \item{\code{ppv}:}{Positive predictive -#' value. \eqn{P(Y=\oplus|\hat{Y}=\oplus)}{P(Y = + | Yhat = -#' +)}. Estimated as: \eqn{\frac{TP}{TP+FP}}{TP/(TP+FP)}.} -#' \item{\code{prec}:}{precision. Same as \code{ppv}.} -#' \item{\code{npv}:}{Negative predictive -#' value. \eqn{P(Y=\ominus|\hat{Y}=\ominus)}{P(Y = - | Yhat = -#' -)}. Estimated as: \eqn{\frac{TN}{TN+FN}}{TN/(TN+FN)}.} -#' \item{\code{pcfall}:}{Prediction-conditioned -#' fallout. \eqn{P(Y=\ominus|\hat{Y}=\oplus)}{P(Y = - | Yhat = -#' +)}. Estimated as: \eqn{\frac{FP}{TP+FP}}{FP/(TP+FP)}.} -#' \item{\code{pcmiss}:}{Prediction-conditioned -#' miss. \eqn{P(Y=\oplus|\hat{Y}=\ominus)}{P(Y = + | Yhat = -#' -)}. Estimated as: \eqn{\frac{FN}{TN+FN}}{FN/(TN+FN)}.} -#' \item{\code{rpp}:}{Rate of positive predictions. \eqn{P( \hat{Y} = -#' \oplus)}{P(Yhat = +)}. Estimated as: (TP+FP)/(TP+FP+TN+FN).} -#' \item{\code{rnp}:}{Rate of negative predictions. \eqn{P( \hat{Y} = -#' \ominus)}{P(Yhat = -)}. Estimated as: (TN+FN)/(TP+FP+TN+FN).} -#' \item{\code{phi}:}{Phi correlation coefficient. \eqn{\frac{TP \cdot -#' TN - FP \cdot FN}{\sqrt{ (TP+FN) \cdot (TN+FP) \cdot (TP+FP) -#' \cdot (TN+FN)}}}{(TP*TN - -#' FP*FN)/(sqrt((TP+FN)*(TN+FP)*(TP+FP)*(TN+FN)))}. Yields a -#' number between -1 and 1, with 1 indicating a perfect -#' prediction, 0 indicating a random prediction. Values below 0 -#' indicate a worse than random prediction.} -#' \item{\code{mat}:}{Matthews correlation coefficient. Same as \code{phi}.} -#' \item{\code{mi}:}{Mutual information. \eqn{I(\hat{Y},Y) := H(Y) - -#' H(Y|\hat{Y})}{I(Yhat, Y) := H(Y) - H(Y | Yhat)}, where H is the -#' (conditional) entropy. Entropies are estimated naively (no bias -#' correction).} -#' \item{\code{chisq}:}{Chi square test statistic. \code{?chisq.test} -#' for details. Note that R might raise a warning if the sample size -#' is too small.} -#' \item{\code{odds}:}{Odds ratio. \eqn{\frac{TP \cdot TN}{FN \cdot -#' FP}}{(TP*TN)/(FN*FP)}. Note that odds ratio produces -#' Inf or NA values for all cutoffs corresponding to FN=0 or -#' FP=0. This can substantially decrease the plotted cutoff region.} -#' \item{\code{lift}:}{Lift -#' value. \eqn{\frac{P(\hat{Y}=\oplus|Y=\oplus)}{P(\hat{Y}=\oplus)}}{P(Yhat = + | -#' Y = +)/P(Yhat = +)}.} -#' \item{\code{f}:}{precision-recall F measure (van Rijsbergen, 1979). Weighted -#' harmonic mean of precision (P) and recall (R). \eqn{F = -#' \frac{1}{\alpha \frac{1}{P} + (1-\alpha)\frac{1}{R}}}{F = 1/ -#' (alpha*1/P + (1-alpha)*1/R)}. If -#' \eqn{\alpha=\frac{1}{2}}{alpha=1/2}, the mean is balanced. A -#' frequent equivalent formulation is -#' \eqn{F = \frac{(\beta^2+1) \cdot P \cdot R}{R + \beta^2 \cdot -#' P}}{F = (beta^2+1) * P * R / (R + beta^2 * P)}. In this formulation, the -#' mean is balanced if \eqn{\beta=1}{beta=1}. Currently, ROCR only accepts -#' the alpha version as input (e.g. \eqn{\alpha=0.5}{alpha=0.5}). If no -#' value for alpha is given, the mean will be balanced by default.} -#' \item{\code{rch}:}{ROC convex hull. A ROC (=\code{tpr} vs \code{fpr}) curve -#' with concavities (which represent suboptimal choices of cutoff) removed -#' (Fawcett 2001). Since the result is already a parametric performance -#' curve, it cannot be used in combination with other measures.} -#' \item{\code{auc}:}{Area under the ROC curve. This is equal to the value of the -#' Wilcoxon-Mann-Whitney test statistic and also the probability that the -#' classifier will score are randomly drawn positive sample higher than a -#' randomly drawn negative sample. Since the output of -#' \code{auc} is cutoff-independent, this -#' measure cannot be combined with other measures into a parametric -#' curve. The partial area under the ROC curve up to a given false -#' positive rate can be calculated by passing the optional parameter -#' \code{fpr.stop=0.5} (or any other value between 0 and 1) to -#' \code{performance}.} -#' \item{\code{aucpr}:}{Area under the precision/recall curve. Since the output -#' of \code{aucpr} is cutoff-independent, this measure cannot be combined -#' with other measures into a parametric curve.} -#' \item{\code{prbe}:}{precision-recall break-even point. The cutoff(s) where -#' precision and recall are equal. At this point, positive and negative -#' predictions are made at the same rate as their prevalence in the -#' data. Since the output of -#' \code{prbe} is just a cutoff-independent scalar, this -#' measure cannot be combined with other measures into a parametric curve.} -#' \item{\code{cal}:}{Calibration error. The calibration error is the -#' absolute difference between predicted confidence and actual reliability. This -#' error is estimated at all cutoffs by sliding a window across the -#' range of possible cutoffs. The default window size of 100 can be -#' adjusted by passing the optional parameter \code{window.size=200} -#' to \code{performance}. E.g., if for several -#' positive samples the output of the classifier is around 0.75, you might -#' expect from a well-calibrated classifier that the fraction of them -#' which is correctly predicted as positive is also around 0.75. In a -#' well-calibrated classifier, the probabilistic confidence estimates -#' are realistic. Only for use with -#' probabilistic output (i.e. scores between 0 and 1).} -#' \item{\code{mxe}:}{Mean cross-entropy. Only for use with -#' probabilistic output. \eqn{MXE :=-\frac{1}{P+N}( \sum_{y_i=\oplus} -#' ln(\hat{y}_i) + \sum_{y_i=\ominus} ln(1-\hat{y}_i))}{MXE := - 1/(P+N) \sum_{y_i=+} -#' ln(yhat_i) + \sum_{y_i=-} ln(1-yhat_i)}. Since the output of -#' \code{mxe} is just a cutoff-independent scalar, this -#' measure cannot be combined with other measures into a parametric curve.} -#' \item{\code{rmse}:}{Root-mean-squared error. Only for use with -#' numerical class labels. \eqn{RMSE:=\sqrt{\frac{1}{P+N}\sum_i (y_i -#' - \hat{y}_i)^2}}{RMSE := sqrt(1/(P+N) \sum_i (y_i - -#' yhat_i)^2)}. Since the output of -#' \code{rmse} is just a cutoff-independent scalar, this -#' measure cannot be combined with other measures into a parametric curve.} -#' \item{\code{sar}:}{Score combinining performance measures of different -#' characteristics, in the attempt of creating a more "robust" -#' measure (cf. Caruana R., ROCAI2004): -#' SAR = 1/3 * ( accuracy + Area under the ROC curve + Root -#' mean-squared error ).} -#' \item{\code{ecost}:}{Expected cost. For details on cost curves, -#' cf. Drummond&Holte 2000,2004. \code{ecost} has an obligatory x -#' axis, the so-called 'probability-cost function'; thus it cannot be -#' combined with other measures. While using \code{ecost} one is -#' interested in the lower envelope of a set of lines, it might be -#' instructive to plot the whole set of lines in addition to the lower -#' envelope. An example is given in \code{demo(ROCR)}.} -#' \item{\code{cost}:}{Cost of a classifier when -#' class-conditional misclassification costs are explicitly given. -#' Accepts the optional parameters \code{cost.fp} and -#' \code{cost.fn}, by which the costs for false positives and -#' negatives can be adjusted, respectively. By default, both are set -#' to 1.} -#' } -#' -#' @note -#' Here is how to call \code{performance()} to create some standard -#' evaluation plots: -#' \describe{ -#' \item{ROC curves:}{measure="tpr", x.measure="fpr".} -#' \item{precision/recall graphs:}{measure="prec", x.measure="rec".} -#' \item{sensitivity/specificity plots:}{measure="sens", x.measure="spec".} -#' \item{Lift charts:}{measure="lift", x.measure="rpp".} -#' } -#' -#' @param prediction.obj An object of class \code{prediction}. -#' @param measure Performance measure to use for the evaluation. A complete list -#' of the performance measures that are available for \code{measure} and -#' \code{x.measure} is given in the 'Details' section. -#' @param x.measure A second performance measure. If different from the default, -#' a two-dimensional curve, with \code{x.measure} taken to be the unit in -#' direction of the x axis, and \code{measure} to be the unit in direction of -#' the y axis, is created. This curve is parametrized with the cutoff. -#' @param ... Optional arguments (specific to individual performance measures). -#' -#' @return An S4 object of class \code{performance}. -#' -#' @author -#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander -#' \email{osander@gmail.com} -#' -#' @export -performance <- function(prediction.obj, - measure, - x.measure="cutoff", - ...) { - - ## define the needed environments - envir.list <- .define.environments() - long.unit.names <- envir.list$long.unit.names - function.names <- envir.list$function.names - obligatory.x.axis <- envir.list$obligatory.x.axis - optional.arguments <- envir.list$optional.arguments - default.values <- envir.list$default.values - - ## abort in case of misuse - if (class(prediction.obj) != 'prediction' || - !exists(measure, where=long.unit.names, inherits=FALSE) || - !exists(x.measure, where=long.unit.names, inherits=FALSE)) { - stop(paste("Wrong argument types: First argument must be of type", - "'prediction'; second and optional third argument must", - "be available performance measures!")) - } - - ## abort, if attempt is made to use a measure that has an obligatory - ## x.axis as the x.measure (cannot be combined) - if (exists( x.measure, where=obligatory.x.axis, inherits=FALSE )) { - message <- paste("The performance measure", - x.measure, - "can only be used as 'measure', because it has", - "the following obligatory 'x.measure':\n", - get( x.measure, envir=obligatory.x.axis)) - stop(message) - } - - ## if measure is a performance measure with obligatory x.axis, then - ## enforce this axis: - if (exists( measure, where=obligatory.x.axis, inherits=FALSE )) { - x.measure <- get( measure, envir=obligatory.x.axis ) - } - - if (x.measure == "cutoff" || - exists( measure, where=obligatory.x.axis, inherits=FALSE )) { - - ## fetch from '...' any optional arguments for the performance - ## measure at hand that are given, otherwise fill up the default values - optional.args <- list(...) - argnames <- c() - if ( exists( measure, where=optional.arguments, inherits=FALSE )) { - argnames <- get( measure, envir=optional.arguments ) - default.arglist <- list() - for (i in 1:length(argnames)) { - default.arglist <- c(default.arglist, - get(paste(measure,":",argnames[i],sep=""), - envir=default.values, inherits=FALSE)) - } - names(default.arglist) <- argnames - - for (i in 1:length(argnames)) { - templist <- list(optional.args, - default.arglist[[i]]) - names(templist) <- c('arglist', argnames[i]) - - optional.args <- do.call('.farg', templist) - } - } - optional.args <- .select.args( optional.args, argnames ) - - ## determine function name - function.name <- get( measure, envir=function.names ) - - ## for each x-validation run, compute the requested performance measure - x.values <- list() - y.values <- list() - for (i in 1:length( prediction.obj@predictions )) { - argumentlist <- .sarg(optional.args, - predictions= prediction.obj@predictions[[i]], - labels= prediction.obj@labels[[i]], - cutoffs= prediction.obj@cutoffs[[i]], - fp= prediction.obj@fp[[i]], - tp= prediction.obj@tp[[i]], - fn= prediction.obj@fn[[i]], - tn= prediction.obj@tn[[i]], - n.pos= prediction.obj@n.pos[[i]], - n.neg= prediction.obj@n.neg[[i]], - n.pos.pred= prediction.obj@n.pos.pred[[i]], - n.neg.pred= prediction.obj@n.neg.pred[[i]]) - - ans <- do.call( function.name, argumentlist ) - - if (!is.null(ans[[1]])) x.values <- c( x.values, list( ans[[1]] )) - y.values <- c( y.values, list( ans[[2]] )) - } - - if (! (length(x.values)==0 || length(x.values)==length(y.values)) ) { - stop("Consistency error.") - } - - ## create a new performance object - return( new("performance", - x.name = get( x.measure, envir=long.unit.names ), - y.name = get( measure, envir=long.unit.names ), - alpha.name = "none", - x.values = x.values, - y.values = y.values, - alpha.values = list() )) - } else { - perf.obj.1 <- performance( prediction.obj, measure=x.measure, ... ) - perf.obj.2 <- performance( prediction.obj, measure=measure, ... ) - return( .combine.performance.objects( perf.obj.1, perf.obj.2 ) ) - } -} - -#' @importFrom stats approxfun -.combine.performance.objects <- function( p.obj.1, p.obj.2 ) { - ## some checks for misusage (in any way, this function is - ## only for internal use) - if ( p.obj.1@x.name != p.obj.2@x.name ) { - stop("Error: Objects need to have identical x axis.") - } - if ( p.obj.1@alpha.name != "none" || p.obj.2@alpha.name != "none") { - stop("Error: At least one of the two objects has already been merged.") - } - if (length(p.obj.1@x.values) != length(p.obj.2@x.values)) { - stop(paste("Only performance objects with identical number of", - "cross-validation runs can be combined.")) - } - - x.values <- list() - x.name <- p.obj.1@y.name - y.values <- list() - y.name <- p.obj.2@y.name - alpha.values <- list() - alpha.name <- p.obj.1@x.name - - for (i in 1:length( p.obj.1@x.values )) { - x.values.1 <- p.obj.1@x.values[[i]] - y.values.1 <- p.obj.1@y.values[[i]] - x.values.2 <- p.obj.2@x.values[[i]] - y.values.2 <- p.obj.2@y.values[[i]] - - ## cutoffs of combined object = merged cutoffs of simple objects - cutoffs <- sort( unique( c(x.values.1, x.values.2)), decreasing=TRUE ) - - ## calculate y.values at cutoffs using step function - y.values.int.1 <- stats::approxfun(x.values.1, y.values.1, - method="constant",f=1,rule=2)(cutoffs) - y.values.int.2 <- stats::approxfun(x.values.2, y.values.2, - method="constant",f=1,rule=2)(cutoffs) - - ## 'approxfun' ignores NA and NaN - objs <- list( y.values.int.1, y.values.int.2) - objs.x <- list( x.values.1, x.values.2 ) - na.cutoffs.1.bool <- is.na( y.values.1) & !is.nan( y.values.1 ) - nan.cutoffs.1.bool <- is.nan( y.values.1) - na.cutoffs.2.bool <- is.na( y.values.2) & !is.nan( y.values.2 ) - nan.cutoffs.2.bool <- is.nan( y.values.2) - bools <- list(na.cutoffs.1.bool, nan.cutoffs.1.bool, - na.cutoffs.2.bool, nan.cutoffs.2.bool) - values <- c(NA,NaN,NA,NaN) - - for (j in 1:4) { - for (k in which(bools[[j]])) { - interval.max <- objs.x[[ ceiling(j/2) ]][k] - interval.min <- -Inf - if (k < length(objs.x[[ ceiling(j/2) ]])) { - interval.min <- objs.x[[ ceiling(j/2) ]][k+1] - } - objs[[ ceiling(j/2) ]][cutoffs <= interval.max & - cutoffs > interval.min ] <- values[j] - } - } - - alpha.values <- c(alpha.values, list(cutoffs)) - x.values <- c(x.values, list(objs[[1]])) - y.values <- c(y.values, list(objs[[2]])) - } - - return( new("performance", - x.name=x.name, y.name=y.name, - alpha.name=alpha.name, x.values=x.values, - y.values=y.values, alpha.values=alpha.values)) -} - -.define.environments <- function() { - ## There are five environments: long.unit.names, function.names, - ## obligatory.x.axis, optional.arguments, default.values - - ## Define long names corresponding to the measure abbreviations. - long.unit.names <- new.env() - assign("none","None", envir=long.unit.names) - assign("cutoff", "Cutoff", envir=long.unit.names) - assign("acc", "accuracy", envir=long.unit.names) - assign("err", "Error Rate", envir=long.unit.names) - assign("fpr", "False positive rate", envir=long.unit.names) - assign("tpr", "True positive rate", envir=long.unit.names) - assign("rec", "recall", envir=long.unit.names) - assign("sens", "sensitivity", envir=long.unit.names) - assign("fnr", "False negative rate", envir=long.unit.names) - assign("tnr", "True negative rate", envir=long.unit.names) - assign("spec", "specificity", envir=long.unit.names) - assign("ppv", "Positive predictive value", envir=long.unit.names) - assign("prec", "precision", envir=long.unit.names) - assign("npv", "Negative predictive value", envir=long.unit.names) - assign("fall", "Fallout", envir=long.unit.names) - assign("miss", "Miss", envir=long.unit.names) - assign("pcfall", "Prediction-conditioned fallout", envir=long.unit.names) - assign("pcmiss", "Prediction-conditioned miss", envir=long.unit.names) - assign("rpp", "Rate of positive predictions", envir=long.unit.names) - assign("rnp", "Rate of negative predictions", envir=long.unit.names) - assign("auc","Area under the ROC curve", envir=long.unit.names) - assign("aucpr","Area under the precision/recall curve", envir=long.unit.names) - assign("cal", "Calibration error", envir=long.unit.names) - assign("mwp", "Median window position", envir=long.unit.names) - assign("prbe","precision/recall break-even point", envir=long.unit.names) - assign("rch", "ROC convex hull", envir=long.unit.names) - assign("mxe", "Mean cross-entropy", envir=long.unit.names) - assign("rmse","Root-mean-square error", envir=long.unit.names) - assign("phi", "Phi correlation coefficient", envir=long.unit.names) - assign("mat","Matthews correlation coefficient", envir=long.unit.names) - assign("mi", "Mutual information", envir=long.unit.names) - assign("chisq", "Chi-square test statistic", envir=long.unit.names) - assign("odds","Odds ratio", envir=long.unit.names) - assign("lift", "Lift value", envir=long.unit.names) - assign("f","precision-recall F measure", envir=long.unit.names) - assign("sar", "SAR", envir=long.unit.names) - assign("ecost", "Expected cost", envir=long.unit.names) - assign("cost", "Explicit cost", envir=long.unit.names) - - ## Define function names corresponding to the measure abbreviations. - function.names <- new.env() - assign("acc", ".performance.accuracy", envir=function.names) - assign("err", ".performance.error.rate", envir=function.names) - assign("fpr", ".performance.false.positive.rate", envir=function.names) - assign("tpr", ".performance.true.positive.rate", envir=function.names) - assign("rec", ".performance.true.positive.rate", envir=function.names) - assign("sens", ".performance.true.positive.rate", envir=function.names) - assign("fnr", ".performance.false.negative.rate", envir=function.names) - assign("tnr", ".performance.true.negative.rate", envir=function.names) - assign("spec", ".performance.true.negative.rate", envir=function.names) - assign("ppv", ".performance.positive.predictive.value", - envir=function.names) - assign("prec", ".performance.positive.predictive.value", - envir=function.names) - assign("npv", ".performance.negative.predictive.value", - envir=function.names) - assign("fall", ".performance.false.positive.rate", envir=function.names) - assign("miss", ".performance.false.negative.rate", envir=function.names) - assign("pcfall", ".performance.prediction.conditioned.fallout", - envir=function.names) - assign("pcmiss", ".performance.prediction.conditioned.miss", - envir=function.names) - assign("rpp", ".performance.rate.of.positive.predictions", - envir=function.names) - assign("rnp", ".performance.rate.of.negative.predictions", - envir=function.names) - assign("auc", ".performance.auc", envir=function.names) - assign("aucpr", ".performance.aucpr", envir=function.names) - assign("cal", ".performance.calibration.error", envir=function.names) - assign("prbe", ".performance.precision.recall.break.even.point", - envir=function.names) - assign("rch", ".performance.rocconvexhull", envir=function.names) - assign("mxe", ".performance.mean.cross.entropy", envir=function.names) - assign("rmse", ".performance.root.mean.squared.error", - envir=function.names) - assign("phi", ".performance.phi", envir=function.names) - assign("mat", ".performance.phi", envir=function.names) - assign("mi", ".performance.mutual.information", envir=function.names) - assign("chisq", ".performance.chisq", envir=function.names) - assign("odds", ".performance.odds.ratio", envir=function.names) - assign("lift", ".performance.lift", envir=function.names) - assign("f", ".performance.f", envir=function.names) - assign("sar", ".performance.sar", envir=function.names) - assign("ecost", ".performance.expected.cost", envir=function.names) - assign("cost", ".performance.cost", envir=function.names) - - ## If a measure comes along with an obligatory x axis (including "none"), - ## list it here. - obligatory.x.axis <- new.env() - assign("mxe", "none", envir=obligatory.x.axis) - assign("rmse", "none", envir=obligatory.x.axis) - assign("prbe", "none", envir=obligatory.x.axis) - assign("auc", "none", envir=obligatory.x.axis) - assign("aucpr", "none", envir=obligatory.x.axis) - assign("rch","none", envir=obligatory.x.axis) - ## ecost requires probability cost function as x axis, which is handled - ## implicitly, not as an explicit performance measure. - assign("ecost","none", envir=obligatory.x.axis) - - ## If a measure has optional arguments, list the names of the - ## arguments here. - optional.arguments <- new.env() - assign("cal", "window.size", envir=optional.arguments) - assign("f", "alpha", envir=optional.arguments) - assign("cost", c("cost.fp", "cost.fn"), envir=optional.arguments) - assign("auc", "fpr.stop", envir=optional.arguments) - - ## If a measure has additional arguments, list the default values - ## for them here. Naming convention: e.g. "cal" has an optional - ## argument "window.size" the key to use here is "cal:window.size" - ## (colon as separator) - default.values <- new.env() - assign("cal:window.size", 100, envir=default.values) - assign("f:alpha", 0.5, envir=default.values) - assign("cost:cost.fp", 1, envir=default.values) - assign("cost:cost.fn", 1, envir=default.values) - assign("auc:fpr.stop", 1, envir=default.values) - - list(long.unit.names=long.unit.names, function.names=function.names, - obligatory.x.axis=obligatory.x.axis, - optional.arguments=optional.arguments, - default.values=default.values) -} - -``` - ```{r function-simulation_report, filename = "simulation_report" } diff --git a/man/build_gg_pr_curve.Rd b/man/build_gg_pr_curve.Rd index 9120210020b6b1eaca73481756cde7101a996703..38afcbe7f66bce70241191a14665234ac1de7660 100644 --- a/man/build_gg_pr_curve.Rd +++ b/man/build_gg_pr_curve.Rd @@ -1,24 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/precision_recall.R, R/rocr_functions.R, -% R/rocr_pkg_functions.R +% Please edit documentation in R/precision_recall.R \name{build_gg_pr_curve} \alias{build_gg_pr_curve} \title{Builds a ggplot precision-recall curve.} \usage{ -build_gg_pr_curve( - data_curve, - data_auc, - palette_color = c("#500472", "#79cbb8"), - ... -) - -build_gg_pr_curve( - data_curve, - data_auc, - palette_color = c("#500472", "#79cbb8"), - ... -) - build_gg_pr_curve( data_curve, data_auc, @@ -36,16 +21,8 @@ build_gg_pr_curve( \item{...}{Additional arguments to be passed to \code{ggplot2::geom_path}.} } \value{ -A ggplot object representing the precision-recall curve. - -A ggplot object representing the precision-recall curve. - A ggplot object representing the precision-recall curve. } \description{ -This function takes data frames for precision-recall curve and AUC and builds a ggplot precision-recall curve. - -This function takes data frames for precision-recall curve and AUC and builds a ggplot precision-recall curve. - This function takes data frames for precision-recall curve and AUC and builds a ggplot precision-recall curve. } diff --git a/man/compute_pr_auc.Rd b/man/compute_pr_auc.Rd index 69a7287bada6125676492372a6d2b94b8ff15899..55c72ee0be3b74db9ffcec84343fd42189d858eb 100644 --- a/man/compute_pr_auc.Rd +++ b/man/compute_pr_auc.Rd @@ -1,30 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/precision_recall.R, R/rocr_functions.R, -% R/rocr_pkg_functions.R +% Please edit documentation in R/precision_recall.R \name{compute_pr_auc} \alias{compute_pr_auc} \title{Computes area under the precision-recall curve (AUC).} \usage{ -compute_pr_auc(dt) - -compute_pr_auc(dt) - compute_pr_auc(dt) } \arguments{ \item{dt}{A data table with columns for recall and precision.} } \value{ -A numeric value representing the AUC. - -A numeric value representing the AUC. - A numeric value representing the AUC. } \description{ -This function calculates the area under the precision-recall curve (AUC). - -This function calculates the area under the precision-recall curve (AUC). - This function calculates the area under the precision-recall curve (AUC). } diff --git a/man/compute_pr_curve.Rd b/man/compute_pr_curve.Rd index 0726f7ab391ed7a5d9fd427c2b2a94d2fe4494b0..92263e1043bedc3506cf844b632ef187f33ef5a0 100644 --- a/man/compute_pr_curve.Rd +++ b/man/compute_pr_curve.Rd @@ -1,30 +1,17 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/precision_recall.R, R/rocr_functions.R, -% R/rocr_pkg_functions.R +% Please edit documentation in R/precision_recall.R \name{compute_pr_curve} \alias{compute_pr_curve} \title{Computes the precision-recall curve (AUC).} \usage{ -compute_pr_curve(dt) - -compute_pr_curve(dt) - compute_pr_curve(dt) } \arguments{ \item{dt}{A data frame with columns truth (first column) and score (second column).} } \value{ -A dataframe with precision recall. - -A dataframe with precision recall. - A dataframe with precision recall. } \description{ -Computes the precision-recall curve (AUC). - -Computes the precision-recall curve (AUC). - Computes the precision-recall curve (AUC). } diff --git a/man/get_pr_curve.Rd b/man/get_pr_curve.Rd index 4ac37a2babe24d9368395b4364057d9fdbbd463c..815667a44dc289985171cb156cfda37b460b1673 100644 --- a/man/get_pr_curve.Rd +++ b/man/get_pr_curve.Rd @@ -1,14 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/precision_recall.R, R/rocr_functions.R, -% R/rocr_pkg_functions.R +% Please edit documentation in R/precision_recall.R \name{get_pr_curve} \alias{get_pr_curve} \title{Gets precision-recall curves and AUC for both aggregated and individual parameters.} \usage{ -get_pr_curve(pr_obj, ...) - -get_pr_curve(pr_obj, ...) - get_pr_curve(pr_obj, ...) } \arguments{ @@ -17,16 +12,8 @@ get_pr_curve(pr_obj, ...) \item{...}{Additional arguments to be passed to \code{ggplot2::geom_path}.} } \value{ -precision-recall curves and AUCs for both aggregated and individual parameters. - -precision-recall curves and AUCs for both aggregated and individual parameters. - precision-recall curves and AUCs for both aggregated and individual parameters. } \description{ -This function takes a precision-recall object and returns precision-recall curves and AUCs for both aggregated and individual parameters. - -This function takes a precision-recall object and returns precision-recall curves and AUCs for both aggregated and individual parameters. - This function takes a precision-recall object and returns precision-recall curves and AUCs for both aggregated and individual parameters. } diff --git a/man/get_pr_object.Rd b/man/get_pr_object.Rd index 539408f23d63d1e594d91541a77a9c4408e1f03c..2033414e19bda0bd44ed78787c0d0ee6b90767f4 100644 --- a/man/get_pr_object.Rd +++ b/man/get_pr_object.Rd @@ -1,24 +1,9 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/precision_recall.R, R/rocr_functions.R, -% R/rocr_pkg_functions.R +% Please edit documentation in R/precision_recall.R \name{get_pr_object} \alias{get_pr_object} \title{Gets precision-recall objects for a given parameter.} \usage{ -get_pr_object( - evaldata_params, - col_param = "description", - col_truth = "isDE", - col_score = "p.adj" -) - -get_pr_object( - evaldata_params, - col_param = "description", - col_truth = "isDE", - col_score = "p.adj" -) - get_pr_object( evaldata_params, col_param = "description", @@ -36,19 +21,9 @@ get_pr_object( \item{col_score}{Column name for predicted scores.} } \value{ -A list containing precision-recall curves and AUCs for each group and an aggregate precision-recall curve and AUC. - -A list containing precision-recall curves and AUCs for each group and an aggregate precision-recall curve and AUC. - A list containing precision-recall curves and AUCs for each group and an aggregate precision-recall curve and AUC. } \description{ -This function takes a data table of evaluation parameters and returns precision-recall curves -for each term and an aggregate precision-recall curve. - -This function takes a data table of evaluation parameters and returns precision-recall curves -for each term and an aggregate precision-recall curve. - This function takes a data table of evaluation parameters and returns precision-recall curves for each term and an aggregate precision-recall curve. } diff --git a/man/performance-class.Rd b/man/performance-class.Rd index 3d3bc11f542186cdbd9bd6ffef695ab637853778..3f939de7388653b07b3ea10973945d29c166f89c 100644 --- a/man/performance-class.Rd +++ b/man/performance-class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fake-section-title.R +% Please edit documentation in R/rocr_pkg_classes.R \docType{class} \name{performance-class} \alias{performance-class} diff --git a/man/performance.Rd b/man/performance.Rd index 886aee2b7bccdf04404eb1f60278cd14d4fabf34..5fc9a1844e907c2fbc907d55c444b72d114c83f2 100644 --- a/man/performance.Rd +++ b/man/performance.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fake-section-title.R +% Please edit documentation in R/rocr_pkg_classes.R \name{performance} \alias{performance} \title{Function to create performance objects} diff --git a/man/prediction-class.Rd b/man/prediction-class.Rd index 472b9af76bed7e16d3fa0ae6e855ed89da7c76e5..3c430b472eaa658196792e615e54d9985b940781 100644 --- a/man/prediction-class.Rd +++ b/man/prediction-class.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fake-section-title.R +% Please edit documentation in R/rocr_pkg_classes.R \docType{class} \name{prediction-class} \alias{prediction-class} diff --git a/man/prediction.Rd b/man/prediction.Rd index 1d20b7865e3971ae541a0d0062d7d3fdd3186dcb..417d98d283fce79adee7d6614ea79b0ecc3751d9 100644 --- a/man/prediction.Rd +++ b/man/prediction.Rd @@ -1,5 +1,5 @@ % Generated by roxygen2: do not edit by hand -% Please edit documentation in R/fake-section-title.R +% Please edit documentation in R/rocr_pkg_classes.R \name{prediction} \alias{prediction} \title{Function to create prediction objects}