From e3ae7b8effd42ab76efdf3c819ed02dcf811cb07 Mon Sep 17 00:00:00 2001
From: aduvermy <arnaud.duvermy@ens-lyon.fr>
Date: Thu, 14 Mar 2024 14:31:19 +0100
Subject: [PATCH] missing file

---
 R/fake-section-title.R | 931 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 931 insertions(+)
 create mode 100644 R/fake-section-title.R

diff --git a/R/fake-section-title.R b/R/fake-section-title.R
new file mode 100644
index 0000000..4da9d9a
--- /dev/null
+++ b/R/fake-section-title.R
@@ -0,0 +1,931 @@
+# WARNING - Generated by {fusen} from dev/flat_full.Rmd: do not edit by hand
+
+
+#' @name prediction-class
+#' @aliases prediction-class
+#'
+#' @title Class \code{prediction}
+#'
+#' @description
+#' Object to encapsulate numerical predictions together with the
+#' corresponding true class labels, optionally collecting predictions and
+#' labels for several cross-validation or bootstrapping runs.
+#'
+#' @section Objects from the Class:
+#' Objects can be created by using the \code{prediction} function.
+#'
+#' @note
+#' Every \code{prediction} object contains information about the 2x2
+#' contingency table consisting of tp,tn,fp, and fn, along with the
+#' marginal sums n.pos,n.neg,n.pos.pred,n.neg.pred, because these form
+#' the basis for many derived performance measures.
+#'
+#' @slot predictions A list, in which each element is a vector of predictions
+#'   (the list has length > 1 for x-validation data.
+#' @slot labels Analogously, a list in which each element is a vector of true
+#'   class labels.
+#' @slot cutoffs A list in which each element is a vector of all necessary
+#'   cutoffs. Each cutoff vector consists of the predicted scores (duplicates
+#'   removed), in descending order.
+#' @slot fp A list in which each element is a vector of the number (not the
+#'   rate!) of false positives induced by the cutoffs given in the corresponding
+#'   'cutoffs' list entry.
+#' @slot tp As fp, but for true positives.
+#' @slot tn As fp, but for true negatives.
+#' @slot fn As fp, but for false negatives.
+#' @slot n.pos A list in which each element contains the number of positive
+#'   samples in the given x-validation run.
+#' @slot n.neg As n.pos, but for negative samples.
+#' @slot n.pos.pred A list in which each element is a vector of the number of
+#'   samples predicted as positive at the cutoffs given in the corresponding
+#'   'cutoffs' entry.
+#' @slot n.neg.pred As n.pos.pred, but for negatively predicted samples.
+#'
+#'
+#' @author
+#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander
+#' \email{osander@gmail.com}
+#'
+#' @seealso
+#' \code{\link{prediction}},
+#' \code{\link{performance}},
+#' \code{\link{performance-class}},
+#' \code{\link{plot.performance}}
+#'
+#' @export
+setClass("prediction",
+         representation(predictions = "list",
+                        labels      = "list",
+                        cutoffs     = "list",
+                        fp          = "list",
+                        tp          = "list",
+                        tn          = "list",
+                        fn          = "list",
+                        n.pos       = "list",
+                        n.neg       = "list",
+                        n.pos.pred  = "list",
+                        n.neg.pred  = "list"))
+
+setMethod("show","prediction",
+          function(object){
+              cat("A ", class(object), " instance\n", sep = "")
+              if(length(object@predictions) > 1L){
+                  cat("  with ", length(object@predictions)," cross ",
+                      "validation runs ", sep = "")
+                  if(length(unique(vapply(object@predictions,length,integer(1))))){
+                      cat("(equal lengths)", sep = "")
+                  } else {
+                      cat("(different lengths)", sep = "")
+                  }
+              } else {
+                  cat("  with ", length(object@predictions[[1L]]),
+                      " data points", sep = "")
+              }
+          })
+
+#' @name performance-class
+#' @aliases performance-class
+#'
+#' @title Class \code{performance}
+#'
+#' @description
+#' Object to capture the result of a performance evaluation, optionally
+#' collecting evaluations from several cross-validation or bootstrapping runs.
+#'
+#' @section Objects from the Class:
+#' Objects can be created by using the \code{performance} function.
+#'
+#' @details
+#' A \code{performance} object can capture information from four
+#' different evaluation scenarios:
+#'   \itemize{
+#'     \item The behaviour of a cutoff-dependent performance measure across
+#'     the range of all cutoffs (e.g. \code{performance( predObj, 'acc' )} ). Here,
+#'     \code{x.values} contains the cutoffs, \code{y.values} the
+#'     corresponding values of the performance measure, and
+#'     \code{alpha.values} is empty.\cr
+#'     \item The trade-off between two performance measures across the
+#'     range of all cutoffs (e.g. \code{performance( predObj,
+#'                                                   'tpr', 'fpr' )} ). In this case, the cutoffs are stored in
+#'     \code{alpha.values}, while \code{x.values} and \code{y.values}
+#'     contain the corresponding values of the two performance measures.\cr
+#'     \item A performance measure that comes along with an obligatory
+#'     second axis (e.g. \code{performance( predObj, 'ecost' )} ). Here, the measure values are
+#'     stored in \code{y.values}, while the corresponding values of the
+#'     obligatory axis are stored in \code{x.values}, and \code{alpha.values}
+#'     is empty.\cr
+#'     \item A performance measure whose value is just a scalar
+#'     (e.g. \code{performance( predObj, 'auc' )} ). The value is then stored in
+#'     \code{y.values}, while \code{x.values} and \code{alpha.values} are
+#'     empty.
+#'   }
+#'
+#' @slot x.name Performance measure used for the x axis.
+#' @slot y.name Performance measure used for the y axis.
+#' @slot alpha.name Name of the unit that is used to create the parametrized
+#'   curve. Currently, curves can only be parametrized by cutoff, so
+#'   \code{alpha.name} is either \code{none} or \code{cutoff}.
+#' @slot x.values A list in which each entry contains the x values of the curve
+#'   of this particular cross-validation run. \code{x.values[[i]]},
+#'   \code{y.values[[i]]}, and \code{alpha.values[[i]]} correspond to each
+#'   other.
+#' @slot y.values A list in which each entry contains the y values of the curve
+#'   of this particular cross-validation run.
+#' @slot alpha.values A list in which each entry contains the cutoff values of
+#'   the curve of this particular cross-validation run.
+#'
+#' @references
+#' A detailed list of references can be found on the ROCR homepage at
+#' \url{http://rocr.bioinf.mpi-sb.mpg.de}.
+#'
+#' @author
+#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander
+#' \email{osander@gmail.com}
+#'
+#' @seealso
+#' \code{\link{prediction}}
+#' \code{\link{performance}},
+#' \code{\link{prediction-class}},
+#' \code{\link{plot.performance}}
+#'
+#' @export
+setClass("performance",
+         representation(x.name       = "character",
+                        y.name       = "character",
+                        alpha.name   = "character",
+                        x.values     = "list",
+                        y.values     = "list",
+                        alpha.values = "list" ))
+
+setMethod("show","performance",
+          function(object){
+              cat("A ", class(object), " instance\n", sep = "")
+              if(length(object@y.values[[1L]]) > 1L){
+                  cat("  '", object@x.name, "' vs. '", object@y.name,
+                      "' (alpha: '",object@alpha.name,"')\n", sep = "")
+              } else {
+                  cat("  '", object@y.name, "'\n", sep = "")
+              }
+              if(length(object@y.values) > 1L){
+                  cat("  for ", length(object@y.values)," cross ",
+                      "validation runs ", sep = "")
+              } else {
+                  if(length(object@y.values[[1L]]) > 1L){
+                      cat("  with ", length(object@y.values[[1L]])," data points",
+                          sep = "")
+                  }
+              }
+          })
+
+
+
+#' @name prediction
+#'
+#' @title Function to create prediction objects
+#'
+#' @description
+#' Every classifier evaluation using ROCR starts with creating a
+#' \code{prediction} object. This function is used to transform the input data
+#' (which can be in vector, matrix, data frame, or list form) into a
+#' standardized format.
+#'
+#' @details
+#' \code{predictions} and \code{labels} can simply be vectors of the same
+#' length. However, in the case of cross-validation data, different
+#' cross-validation runs can be provided as the *columns* of a matrix or
+#' data frame, or as the entries of a list. In the case of a matrix or
+#' data frame, all cross-validation runs must have the same length, whereas
+#' in the case of a list, the lengths can vary across the cross-validation
+#' runs. Internally, as described in section 'Value', all of these input
+#' formats are converted to list representation.
+#'
+#' Since scoring classifiers give relative tendencies towards a negative
+#' (low scores) or positive (high scores) class, it has to be declared
+#' which class label denotes the negative, and which the positive class.
+#' Ideally, labels should be supplied as ordered factor(s), the lower
+#' level corresponding to the negative class, the upper level to the
+#' positive class. If the labels are factors (unordered), numeric,
+#' logical or characters, ordering of the labels is inferred from
+#' R's built-in \code{<} relation (e.g. 0 < 1, -1 < 1, 'a' < 'b',
+#' FALSE < TRUE). Use \code{label.ordering} to override this default
+#' ordering. Please note that the ordering can be locale-dependent
+#' e.g. for character labels '-1' and '1'.
+#'
+#' Currently, ROCR supports only binary classification (extensions toward
+#' multiclass classification are scheduled for the next release,
+#' however). If there are more than two distinct label symbols, execution
+#' stops with an error message. If all predictions use the same two
+#' symbols that are used for the labels, categorical predictions are
+#' assumed. If there are more than two predicted values, but all numeric,
+#' continuous predictions are assumed (i.e. a scoring
+#' classifier). Otherwise, if more than two symbols occur in the
+#' predictions, and not all of them are numeric, execution stops with an
+#' error message.
+#'
+#' @param predictions A vector, matrix, list, or data frame containing the
+#'   predictions.
+#' @param labels A vector, matrix, list, or data frame containing the true class
+#'   labels. Must have the same dimensions as \code{predictions}.
+#' @param label.ordering The default ordering (cf.details)  of the classes can
+#'   be changed by supplying a vector containing the negative and the positive
+#'   class label.
+#'
+#' @return An S4 object of class \code{prediction}.
+#'
+#' @author
+#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander
+#' \email{osander@gmail.com}
+#' @export
+prediction <- function(predictions, labels, label.ordering=NULL) {
+
+  ## bring 'predictions' and 'labels' into list format,
+  ## each list entry representing one x-validation run
+
+  ## convert predictions into canonical list format
+  if (is.data.frame(predictions)) {
+    names(predictions) <- c()
+    predictions <- as.list(predictions)
+  } else if (is.matrix(predictions)) {
+    predictions <- as.list(data.frame(predictions))
+    names(predictions) <- c()
+  } else if (is.vector(predictions) && !is.list(predictions)) {
+    predictions <- list(predictions)
+  } else if (!is.list(predictions)) {
+    stop("Format of predictions is invalid. It couldn't be coerced to a list.",
+         call. = FALSE)
+  }
+
+  ## convert labels into canonical list format
+  if (is.data.frame(labels)) {
+    names(labels) <- c()
+    labels <- as.list( labels)
+  } else if (is.matrix(labels)) {
+    labels <- as.list( data.frame( labels))
+    names(labels) <- c()
+  } else if ((is.vector(labels) ||
+              is.ordered(labels) ||
+              is.factor(labels)) &&
+             !is.list(labels)) {
+    labels <- list( labels)
+  } else if (!is.list(labels)) {
+    stop("Format of labels is invalid. It couldn't be coerced to a list.",
+         call. = FALSE)
+  }
+
+  
+  if(any(vapply(predictions,anyNA, logical(1)))){
+    warnings("'predictions' contains NA. These missing predictions will be removed from evaluation")
+    nonNA_pred <- !is.na(predictions)
+    predictions <- predictions[nonNA_pred]
+    labels <- labels[nonNA_pred]
+  }
+  
+  
+  ## Length consistency checks
+  if (length(predictions) != length(labels))
+    stop(paste("Number of cross-validation runs must be equal",
+               "for predictions and labels."))
+  if (! all(sapply(predictions, length) == sapply(labels, length)))
+    stop(paste("Number of predictions in each run must be equal",
+               "to the number of labels for each run."))
+
+  ## replace infinite numbers by max values 
+  ## avoid prob with infinite values
+  #for (i in 1:length(predictions)) {
+  #  epsilon <- max(predictions[[i]][is.finite(predictions[[i]] )])
+  #  idx_inf_values <- !is.finite( predictions[[i]]  )
+  #  predictions[[i]][idx_inf_values] <- epsilon
+  #}
+  
+  ## only keep prediction/label pairs that are finite numbers
+  for (i in 1:length(predictions)) {
+    finite.bool <- is.finite( predictions[[i]] )
+    predictions[[i]] <- predictions[[i]][ finite.bool ]
+    labels[[i]] <- labels[[i]][ finite.bool ]
+  }
+
+
+  
+
+  ## abort if 'labels' format is inconsistent across
+  ## different cross-validation runs
+  label.format=""  ## one of 'normal','factor','ordered'
+  if (all(sapply( labels, is.factor)) &&
+      !any(sapply(labels, is.ordered))) {
+    label.format <- "factor"
+  } else if (all(sapply( labels, is.ordered))) {
+    label.format <- "ordered"
+  } else if (all(sapply( labels, is.character)) ||
+             all(sapply( labels, is.numeric)) ||
+             all(sapply( labels, is.logical))) {
+    label.format <- "normal"
+  } else {
+    stop(paste("Inconsistent label data type across different",
+               "cross-validation runs."))
+  }
+
+  ## abort if levels are not consistent across different
+  ## cross-validation runs
+  if (! all(sapply(labels, levels)==levels(labels[[1]])) ) {
+    stop(paste("Inconsistent factor levels across different",
+               "cross-validation runs."))
+  }
+
+  ## convert 'labels' into ordered factors, aborting if the number
+  ## of classes is not equal to 2.
+  levels <- c()
+  if ( label.format == "ordered" ) {
+    if (!is.null(label.ordering)) {
+      stop(paste("'labels' is already ordered. No additional",
+                 "'label.ordering' must be supplied."))
+    } else {
+      levels <- levels(labels[[1]])
+    }
+  } else {
+    if ( is.null( label.ordering )) {
+      if ( label.format == "factor" ) levels <- sort(levels(labels[[1]]))
+      else levels <- sort( unique( unlist( labels)))
+    } else {
+      ## if (!setequal( levels, label.ordering)) {
+      if (!setequal( unique(unlist(labels)), label.ordering )) {
+        stop("Label ordering does not match class labels.")
+      }
+      levels <- label.ordering
+    }
+    for (i in 1:length(labels)) {
+      if (is.factor(labels))
+        labels[[i]] <- ordered(as.character(labels[[i]]),
+                               levels=levels)
+      else labels[[i]] <- ordered( labels[[i]], levels=levels)
+    }
+
+  }
+  #print(levels)
+  #print(labels)
+  if (length(levels) != 2) {
+    message <- paste("Number of classes is not equal to 2.\n",
+                     "HTRfit currently supports only evaluation of ",
+                     "binary classification tasks.",sep="")
+    stop(message)
+  }
+
+  ## determine whether predictions are continuous or categorical
+  ## (in the latter case stop
+  if (!is.numeric( unlist( predictions ))) {
+    stop("Currently, only continuous predictions are supported by HTRfit")
+  }
+
+  ## compute cutoff/fp/tp data
+
+  cutoffs <- list()
+  fp <- list()
+  tp <- list()
+  fn <- list()
+  tn <- list()
+  n.pos <- list()
+  n.neg <- list()
+  n.pos.pred <- list()
+  n.neg.pred <- list()
+  for (i in 1:length(predictions)) {
+    n.pos <- c( n.pos, sum( labels[[i]] == levels[2] ))
+    n.neg <- c( n.neg, sum( labels[[i]] == levels[1] ))
+    ans <- .compute.unnormalized.roc.curve( predictions[[i]], labels[[i]] )
+    cutoffs <- c( cutoffs, list( ans$cutoffs ))
+    fp <- c( fp, list( ans$fp ))
+    tp <- c( tp, list( ans$tp ))
+    fn <- c( fn, list( n.pos[[i]] - tp[[i]] ))
+    tn <- c( tn, list( n.neg[[i]] - fp[[i]] ))
+    n.pos.pred <- c(n.pos.pred, list(tp[[i]] + fp[[i]]) )
+    n.neg.pred <- c(n.neg.pred, list(tn[[i]] + fn[[i]]) )
+  }
+
+
+  return( new("prediction", predictions=predictions,
+              labels=labels,
+              cutoffs=cutoffs,
+              fp=fp,
+              tp=tp,
+              fn=fn,
+              tn=tn,
+              n.pos=n.pos,
+              n.neg=n.neg,
+              n.pos.pred=n.pos.pred,
+              n.neg.pred=n.neg.pred))
+}
+
+## fast fp/tp computation based on cumulative summing
+.compute.unnormalized.roc.curve <- function( predictions, labels ) {
+  ## determine the labels that are used for the pos. resp. neg. class :
+  pos.label <- levels(labels)[2]
+  neg.label <- levels(labels)[1]
+
+  pred.order <- order(predictions, decreasing=TRUE)
+  predictions.sorted <- predictions[pred.order]
+  tp <- cumsum(labels[pred.order]==pos.label)
+  fp <- cumsum(labels[pred.order]==neg.label)
+
+  ## remove fp & tp for duplicated predictions
+  ## as duplicated keeps the first occurrence, but we want the last, two
+  ## rev are used.
+  ## Highest cutoff (Infinity) corresponds to tp=0, fp=0
+  dups <- rev(duplicated(rev(predictions.sorted)))
+  tp <- c(0, tp[!dups])
+  fp <- c(0, fp[!dups])
+  cutoffs <- c(Inf, predictions.sorted[!dups])
+
+  return(list( cutoffs=cutoffs, fp=fp, tp=tp ))
+}
+
+#' @name performance
+#'
+#' @title Function to create performance objects
+#'
+#' @description
+#' All kinds of predictor evaluations are performed using this function.
+#'
+#' @details
+#' Here is the list of available performance measures. Let Y and
+#' \eqn{\hat{Y}}{Yhat} be random variables representing the class and the prediction for
+#' a randomly drawn sample, respectively. We denote by
+#' \eqn{\oplus}{+} and \eqn{\ominus}{-} the positive and
+#' negative class, respectively. Further, we use the following
+#' abbreviations for empirical quantities: P (\# positive
+#' samples), N (\# negative samples), TP (\# true positives), TN (\# true
+#' negatives), FP (\# false positives), FN (\# false negatives).
+#' \describe{
+#'  \item{\code{acc}:}{accuracy. \eqn{P(\hat{Y}=Y)}{P(Yhat = Y)}. Estimated
+#'    as: \eqn{\frac{TP+TN}{P+N}}{(TP+TN)/(P+N)}.}
+#'  \item{\code{err}:}{Error rate. \eqn{P(\hat{Y}\ne Y)}{P(Yhat !=
+#'                                                           Y)}. Estimated as: \eqn{\frac{FP+FN}{P+N}}{(FP+FN)/(P+N)}.}
+#'  \item{\code{fpr}:}{False positive rate. \eqn{P(\hat{Y}=\oplus | Y =
+#'                                                    \ominus)}{P(Yhat = + | Y = -)}. Estimated as:
+#'      \eqn{\frac{FP}{N}}{FP/N}.}
+#'  \item{\code{fall}:}{Fallout. Same as \code{fpr}.}
+#'  \item{\code{tpr}:}{True positive
+#'    rate. \eqn{P(\hat{Y}=\oplus|Y=\oplus)}{P(Yhat = + | Y = +)}. Estimated
+#'    as: \eqn{\frac{TP}{P}}{TP/P}.}
+#'  \item{\code{rec}:}{recall. Same as \code{tpr}.}
+#'  \item{\code{sens}:}{sensitivity. Same as \code{tpr}.}
+#'  \item{\code{fnr}:}{False negative
+#'    rate. \eqn{P(\hat{Y}=\ominus|Y=\oplus)}{P(Yhat = - | Y =
+#'                                                +)}. Estimated as: \eqn{\frac{FN}{P}}{FN/P}.}
+#'  \item{\code{miss}:}{Miss. Same as \code{fnr}.}
+#'  \item{\code{tnr}:}{True negative rate. \eqn{P(\hat{Y} =
+#'                                                   \ominus|Y=\ominus)}{P(Yhat = - | Y = -)}.}
+#'  \item{\code{spec}:}{specificity. Same as \code{tnr}.}
+#'  \item{\code{ppv}:}{Positive predictive
+#'    value. \eqn{P(Y=\oplus|\hat{Y}=\oplus)}{P(Y = + | Yhat =
+#'                                                +)}. Estimated as: \eqn{\frac{TP}{TP+FP}}{TP/(TP+FP)}.}
+#'  \item{\code{prec}:}{precision. Same as \code{ppv}.}
+#'  \item{\code{npv}:}{Negative predictive
+#'    value. \eqn{P(Y=\ominus|\hat{Y}=\ominus)}{P(Y = - | Yhat =
+#'                                                  -)}. Estimated as: \eqn{\frac{TN}{TN+FN}}{TN/(TN+FN)}.}
+#'  \item{\code{pcfall}:}{Prediction-conditioned
+#'    fallout. \eqn{P(Y=\ominus|\hat{Y}=\oplus)}{P(Y = - | Yhat =
+#'                                                   +)}. Estimated as: \eqn{\frac{FP}{TP+FP}}{FP/(TP+FP)}.}
+#'  \item{\code{pcmiss}:}{Prediction-conditioned
+#'    miss. \eqn{P(Y=\oplus|\hat{Y}=\ominus)}{P(Y = + | Yhat =
+#'                                                -)}. Estimated as: \eqn{\frac{FN}{TN+FN}}{FN/(TN+FN)}.}
+#'  \item{\code{rpp}:}{Rate of positive predictions. \eqn{P( \hat{Y} =
+#'                                                            \oplus)}{P(Yhat = +)}. Estimated as: (TP+FP)/(TP+FP+TN+FN).}
+#'  \item{\code{rnp}:}{Rate of negative predictions. \eqn{P( \hat{Y} =
+#'                                                            \ominus)}{P(Yhat = -)}. Estimated as: (TN+FN)/(TP+FP+TN+FN).}
+#'  \item{\code{phi}:}{Phi correlation coefficient. \eqn{\frac{TP \cdot
+#'    TN - FP \cdot FN}{\sqrt{ (TP+FN) \cdot (TN+FP) \cdot (TP+FP)
+#'      \cdot (TN+FN)}}}{(TP*TN -
+#'                          FP*FN)/(sqrt((TP+FN)*(TN+FP)*(TP+FP)*(TN+FN)))}. Yields a
+#'    number between -1 and 1, with 1 indicating a perfect
+#'    prediction, 0 indicating a random prediction. Values below 0
+#'    indicate a worse than random prediction.}
+#'  \item{\code{mat}:}{Matthews correlation coefficient. Same as \code{phi}.}
+#'  \item{\code{mi}:}{Mutual information. \eqn{I(\hat{Y},Y) := H(Y) -
+#'      H(Y|\hat{Y})}{I(Yhat, Y) := H(Y) - H(Y | Yhat)}, where H is the
+#'    (conditional) entropy. Entropies are estimated naively (no bias
+#'                                                            correction).}
+#'  \item{\code{chisq}:}{Chi square test statistic. \code{?chisq.test}
+#'    for details. Note that R might raise a warning if the sample size
+#'    is too small.}
+#'  \item{\code{odds}:}{Odds ratio. \eqn{\frac{TP \cdot TN}{FN \cdot
+#'    FP}}{(TP*TN)/(FN*FP)}. Note that odds ratio produces
+#'    Inf or NA values for all cutoffs corresponding to FN=0 or
+#'    FP=0. This can substantially decrease the plotted cutoff region.}
+#'  \item{\code{lift}:}{Lift
+#'    value. \eqn{\frac{P(\hat{Y}=\oplus|Y=\oplus)}{P(\hat{Y}=\oplus)}}{P(Yhat = + |
+#'                                                                          Y = +)/P(Yhat = +)}.}
+#'  \item{\code{f}:}{precision-recall F measure (van Rijsbergen, 1979). Weighted
+#'    harmonic mean of precision (P) and recall (R). \eqn{F =
+#'      \frac{1}{\alpha \frac{1}{P} + (1-\alpha)\frac{1}{R}}}{F = 1/
+#'        (alpha*1/P + (1-alpha)*1/R)}. If
+#'    \eqn{\alpha=\frac{1}{2}}{alpha=1/2}, the mean is balanced. A
+#'    frequent equivalent formulation is
+#'    \eqn{F = \frac{(\beta^2+1) \cdot P \cdot R}{R + \beta^2 \cdot
+#'      P}}{F = (beta^2+1) * P * R / (R + beta^2 * P)}. In this formulation, the
+#'      mean is balanced if \eqn{\beta=1}{beta=1}. Currently, ROCR only accepts
+#'      the alpha version as input (e.g. \eqn{\alpha=0.5}{alpha=0.5}). If no 
+#'      value for alpha is given, the mean will be balanced by default.}
+#'  \item{\code{rch}:}{ROC convex hull. A ROC (=\code{tpr} vs \code{fpr}) curve 
+#'    with concavities (which represent suboptimal choices of cutoff) removed 
+#'    (Fawcett 2001). Since the result is already a parametric performance 
+#'    curve, it cannot be used in combination with other measures.}
+#'  \item{\code{auc}:}{Area under the ROC curve. This is equal to the value of the
+#'    Wilcoxon-Mann-Whitney test statistic and also the probability that the
+#'    classifier will score are randomly drawn positive sample higher than a
+#'    randomly drawn negative sample. Since the output of
+#'    \code{auc} is cutoff-independent, this
+#'    measure cannot be combined with other measures into a parametric
+#'    curve. The partial area under the ROC curve up to a given false
+#'    positive rate can be calculated by passing the optional parameter
+#'    \code{fpr.stop=0.5} (or any other value between 0 and 1) to 
+#'    \code{performance}.}
+#'  \item{\code{aucpr}:}{Area under the precision/recall curve. Since the output
+#'    of \code{aucpr} is cutoff-independent, this measure cannot be combined 
+#'    with other measures into a parametric curve.}
+#'  \item{\code{prbe}:}{precision-recall break-even point. The cutoff(s) where
+#'    precision and recall are equal. At this point, positive and negative
+#'    predictions are made at the same rate as their prevalence in the
+#'    data. Since the output of
+#'    \code{prbe} is just a cutoff-independent scalar, this
+#'    measure cannot be combined with other measures into a parametric curve.}
+#'  \item{\code{cal}:}{Calibration error. The calibration error is the
+#'    absolute difference between predicted confidence and actual reliability. This
+#'    error is estimated at all cutoffs by sliding a window across the
+#'    range of possible cutoffs. The default window size of 100 can be
+#'    adjusted by passing the optional parameter \code{window.size=200}
+#'    to \code{performance}. E.g., if for several
+#'    positive samples the output of the classifier is around 0.75, you might
+#'    expect from a well-calibrated classifier that the fraction of them
+#'    which is correctly predicted as positive is also around 0.75. In a
+#'    well-calibrated classifier, the probabilistic confidence estimates
+#'    are realistic. Only for use with
+#'    probabilistic output (i.e. scores between 0 and 1).}
+#'  \item{\code{mxe}:}{Mean cross-entropy. Only for use with
+#'    probabilistic output. \eqn{MXE :=-\frac{1}{P+N}( \sum_{y_i=\oplus}
+#'                                                    ln(\hat{y}_i) + \sum_{y_i=\ominus} ln(1-\hat{y}_i))}{MXE := - 1/(P+N) \sum_{y_i=+}
+#'                                                      ln(yhat_i) + \sum_{y_i=-} ln(1-yhat_i)}. Since the output of
+#'    \code{mxe} is just a cutoff-independent scalar, this
+#'    measure cannot be combined with other measures into a parametric curve.}
+#'  \item{\code{rmse}:}{Root-mean-squared error. Only for use with
+#'    numerical class labels. \eqn{RMSE:=\sqrt{\frac{1}{P+N}\sum_i (y_i
+#'                                                                  - \hat{y}_i)^2}}{RMSE := sqrt(1/(P+N) \sum_i (y_i -
+#'                                                                                                                  yhat_i)^2)}. Since the output of
+#'    \code{rmse} is just a cutoff-independent scalar, this
+#'    measure cannot be combined with other measures into a parametric curve.}
+#'  \item{\code{sar}:}{Score combinining performance measures of different
+#'    characteristics, in the attempt of creating a more "robust"
+#'    measure (cf. Caruana R., ROCAI2004):
+#'      SAR = 1/3 * ( accuracy + Area under the ROC curve + Root
+#'                    mean-squared error ).}
+#'  \item{\code{ecost}:}{Expected cost. For details on cost curves,
+#'    cf. Drummond&Holte 2000,2004. \code{ecost} has an obligatory x
+#'    axis, the so-called 'probability-cost function'; thus it cannot be
+#'    combined with other measures. While using \code{ecost} one is
+#'    interested in the lower envelope of a set of lines, it might be
+#'    instructive to plot the whole set of lines in addition to the lower
+#'    envelope. An example is given in \code{demo(ROCR)}.}
+#'  \item{\code{cost}:}{Cost of a classifier when
+#'    class-conditional misclassification costs are explicitly given.
+#'    Accepts the optional parameters \code{cost.fp} and
+#'    \code{cost.fn}, by which the costs for false positives and
+#'    negatives can be adjusted, respectively. By default, both are set
+#'    to 1.}
+#' }
+#'
+#' @note
+#' Here is how to call \code{performance()} to create some standard
+#' evaluation plots:
+#' \describe{
+#'   \item{ROC curves:}{measure="tpr", x.measure="fpr".}
+#'   \item{precision/recall graphs:}{measure="prec", x.measure="rec".}
+#'   \item{sensitivity/specificity plots:}{measure="sens", x.measure="spec".}
+#'   \item{Lift charts:}{measure="lift", x.measure="rpp".}
+#' }
+#'
+#' @param prediction.obj An object of class \code{prediction}.
+#' @param measure Performance measure to use for the evaluation. A complete list
+#'   of the performance measures that are available for \code{measure} and
+#'   \code{x.measure} is given in the 'Details' section.
+#' @param x.measure A second performance measure. If different from the default,
+#'   a two-dimensional curve, with \code{x.measure} taken to be the unit in
+#'   direction of the x axis, and \code{measure} to be the unit in direction of
+#'   the y axis, is created. This curve is parametrized with the cutoff.
+#' @param ... Optional arguments (specific to individual performance measures).
+#'
+#' @return An S4 object of class \code{performance}.
+#'
+#' @author
+#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander
+#' \email{osander@gmail.com}
+#'
+#' @export
+performance <- function(prediction.obj,
+                        measure,
+                        x.measure="cutoff",
+                        ...) {
+
+  ## define the needed environments
+  envir.list <- .define.environments()
+  long.unit.names <- envir.list$long.unit.names
+  function.names <- envir.list$function.names
+  obligatory.x.axis <- envir.list$obligatory.x.axis
+  optional.arguments <- envir.list$optional.arguments
+  default.values <- envir.list$default.values
+
+  ## abort in case of misuse
+  if (class(prediction.obj) != 'prediction' ||
+      !exists(measure, where=long.unit.names, inherits=FALSE) ||
+      !exists(x.measure, where=long.unit.names, inherits=FALSE)) {
+    stop(paste("Wrong argument types: First argument must be of type",
+               "'prediction'; second and optional third argument must",
+               "be available performance measures!"))
+  }
+
+  ## abort, if attempt is made to use a measure that has an obligatory
+  ## x.axis as the x.measure (cannot be combined)
+  if (exists( x.measure, where=obligatory.x.axis, inherits=FALSE )) {
+    message <- paste("The performance measure",
+                     x.measure,
+                     "can only be used as 'measure', because it has",
+                     "the following obligatory 'x.measure':\n",
+                     get( x.measure, envir=obligatory.x.axis))
+    stop(message)
+  }
+
+  ## if measure is a performance measure with obligatory x.axis, then
+  ## enforce this axis:
+  if (exists( measure, where=obligatory.x.axis, inherits=FALSE )) {
+    x.measure <- get( measure, envir=obligatory.x.axis )
+  }
+
+  if (x.measure == "cutoff" ||
+      exists( measure, where=obligatory.x.axis, inherits=FALSE )) {
+
+    ## fetch from '...' any optional arguments for the performance
+    ## measure at hand that are given, otherwise fill up the default values
+    optional.args <- list(...)
+    argnames <- c()
+    if ( exists( measure, where=optional.arguments, inherits=FALSE )) {
+      argnames <- get( measure, envir=optional.arguments )
+      default.arglist <- list()
+      for (i in 1:length(argnames)) {
+        default.arglist <- c(default.arglist,
+                             get(paste(measure,":",argnames[i],sep=""),
+                                 envir=default.values, inherits=FALSE))
+      }
+      names(default.arglist) <- argnames
+
+      for (i in 1:length(argnames)) {
+        templist <- list(optional.args,
+                         default.arglist[[i]])
+        names(templist) <- c('arglist', argnames[i])
+
+        optional.args <- do.call('.farg', templist)
+      }
+    }
+    optional.args <- .select.args( optional.args, argnames )
+
+    ## determine function name
+    function.name <- get( measure, envir=function.names )
+
+    ## for each x-validation run, compute the requested performance measure
+    x.values <- list()
+    y.values <- list()
+    for (i in 1:length( prediction.obj@predictions )) {
+      argumentlist <- .sarg(optional.args,
+                            predictions= prediction.obj@predictions[[i]],
+                            labels= prediction.obj@labels[[i]],
+                            cutoffs= prediction.obj@cutoffs[[i]],
+                            fp= prediction.obj@fp[[i]],
+                            tp= prediction.obj@tp[[i]],
+                            fn= prediction.obj@fn[[i]],
+                            tn= prediction.obj@tn[[i]],
+                            n.pos= prediction.obj@n.pos[[i]],
+                            n.neg= prediction.obj@n.neg[[i]],
+                            n.pos.pred= prediction.obj@n.pos.pred[[i]],
+                            n.neg.pred= prediction.obj@n.neg.pred[[i]])
+
+      ans <- do.call( function.name, argumentlist )
+
+      if (!is.null(ans[[1]])) x.values <- c( x.values, list( ans[[1]] ))
+      y.values <- c( y.values, list( ans[[2]] ))
+    }
+
+    if (! (length(x.values)==0 || length(x.values)==length(y.values)) ) {
+      stop("Consistency error.")
+    }
+
+    ## create a new performance object
+    return( new("performance",
+                x.name       = get( x.measure, envir=long.unit.names ),
+                y.name       = get( measure, envir=long.unit.names ),
+                alpha.name   = "none",
+                x.values     = x.values,
+                y.values     = y.values,
+                alpha.values = list() ))
+  } else {
+    perf.obj.1 <- performance( prediction.obj, measure=x.measure, ... )
+    perf.obj.2 <- performance( prediction.obj, measure=measure, ... )
+    return( .combine.performance.objects( perf.obj.1, perf.obj.2 ) )
+  }
+}
+
+#' @importFrom stats approxfun
+.combine.performance.objects <- function( p.obj.1, p.obj.2 ) {
+  ## some checks for misusage (in any way, this function is
+  ## only for internal use)
+  if ( p.obj.1@x.name != p.obj.2@x.name ) {
+    stop("Error: Objects need to have identical x axis.")
+  }
+  if ( p.obj.1@alpha.name != "none" || p.obj.2@alpha.name != "none") {
+    stop("Error: At least one of the two objects has already been merged.")
+  }
+  if (length(p.obj.1@x.values) != length(p.obj.2@x.values)) {
+    stop(paste("Only performance objects with identical number of",
+               "cross-validation runs can be combined."))
+  }
+
+  x.values <- list()
+  x.name <- p.obj.1@y.name
+  y.values <- list()
+  y.name <- p.obj.2@y.name
+  alpha.values <- list()
+  alpha.name <- p.obj.1@x.name
+
+  for (i in 1:length( p.obj.1@x.values )) {
+    x.values.1 <- p.obj.1@x.values[[i]]
+    y.values.1 <- p.obj.1@y.values[[i]]
+    x.values.2 <- p.obj.2@x.values[[i]]
+    y.values.2 <- p.obj.2@y.values[[i]]
+
+    ## cutoffs of combined object = merged cutoffs of simple objects
+    cutoffs <- sort( unique( c(x.values.1, x.values.2)), decreasing=TRUE )
+
+    ## calculate y.values at cutoffs using step function
+    y.values.int.1 <- stats::approxfun(x.values.1, y.values.1,
+                                       method="constant",f=1,rule=2)(cutoffs)
+    y.values.int.2 <- stats::approxfun(x.values.2, y.values.2,
+                                       method="constant",f=1,rule=2)(cutoffs)
+
+    ## 'approxfun' ignores NA and NaN
+    objs <- list( y.values.int.1, y.values.int.2)
+    objs.x <- list( x.values.1, x.values.2 )
+    na.cutoffs.1.bool <- is.na( y.values.1) & !is.nan( y.values.1 )
+    nan.cutoffs.1.bool <- is.nan( y.values.1)
+    na.cutoffs.2.bool <- is.na( y.values.2) & !is.nan( y.values.2 )
+    nan.cutoffs.2.bool <- is.nan( y.values.2)
+    bools <- list(na.cutoffs.1.bool, nan.cutoffs.1.bool,
+                  na.cutoffs.2.bool, nan.cutoffs.2.bool)
+    values <- c(NA,NaN,NA,NaN)
+
+    for (j in 1:4) {
+      for (k in which(bools[[j]])) {
+        interval.max <- objs.x[[ ceiling(j/2) ]][k]
+        interval.min <- -Inf
+        if (k < length(objs.x[[ ceiling(j/2) ]])) {
+          interval.min <- objs.x[[ ceiling(j/2) ]][k+1]
+        }
+        objs[[ ceiling(j/2) ]][cutoffs <= interval.max &
+                                 cutoffs > interval.min ] <- values[j]
+      }
+    }
+
+    alpha.values <- c(alpha.values, list(cutoffs))
+    x.values <- c(x.values, list(objs[[1]]))
+    y.values <- c(y.values, list(objs[[2]]))
+  }
+
+  return( new("performance",
+              x.name=x.name, y.name=y.name,
+              alpha.name=alpha.name, x.values=x.values,
+              y.values=y.values, alpha.values=alpha.values))
+}
+
+.define.environments <- function() {
+  ## There are five environments: long.unit.names, function.names,
+  ## obligatory.x.axis, optional.arguments, default.values
+
+  ## Define long names corresponding to the measure abbreviations.
+  long.unit.names <- new.env()
+  assign("none","None", envir=long.unit.names)
+  assign("cutoff", "Cutoff", envir=long.unit.names)
+  assign("acc", "accuracy", envir=long.unit.names)
+  assign("err", "Error Rate", envir=long.unit.names)
+  assign("fpr", "False positive rate", envir=long.unit.names)
+  assign("tpr", "True positive rate", envir=long.unit.names)
+  assign("rec", "recall", envir=long.unit.names)
+  assign("sens", "sensitivity", envir=long.unit.names)
+  assign("fnr", "False negative rate", envir=long.unit.names)
+  assign("tnr", "True negative rate", envir=long.unit.names)
+  assign("spec", "specificity", envir=long.unit.names)
+  assign("ppv", "Positive predictive value", envir=long.unit.names)
+  assign("prec", "precision", envir=long.unit.names)
+  assign("npv", "Negative predictive value", envir=long.unit.names)
+  assign("fall", "Fallout", envir=long.unit.names)
+  assign("miss", "Miss", envir=long.unit.names)
+  assign("pcfall", "Prediction-conditioned fallout", envir=long.unit.names)
+  assign("pcmiss", "Prediction-conditioned miss", envir=long.unit.names)
+  assign("rpp", "Rate of positive predictions", envir=long.unit.names)
+  assign("rnp", "Rate of negative predictions", envir=long.unit.names)
+  assign("auc","Area under the ROC curve", envir=long.unit.names)
+  assign("aucpr","Area under the precision/recall curve", envir=long.unit.names)
+  assign("cal", "Calibration error", envir=long.unit.names)
+  assign("mwp", "Median window position", envir=long.unit.names)
+  assign("prbe","precision/recall break-even point", envir=long.unit.names)
+  assign("rch", "ROC convex hull", envir=long.unit.names)
+  assign("mxe", "Mean cross-entropy", envir=long.unit.names)
+  assign("rmse","Root-mean-square error", envir=long.unit.names)
+  assign("phi", "Phi correlation coefficient", envir=long.unit.names)
+  assign("mat","Matthews correlation coefficient", envir=long.unit.names)
+  assign("mi", "Mutual information", envir=long.unit.names)
+  assign("chisq", "Chi-square test statistic", envir=long.unit.names)
+  assign("odds","Odds ratio", envir=long.unit.names)
+  assign("lift", "Lift value", envir=long.unit.names)
+  assign("f","precision-recall F measure", envir=long.unit.names)
+  assign("sar", "SAR", envir=long.unit.names)
+  assign("ecost", "Expected cost", envir=long.unit.names)
+  assign("cost", "Explicit cost", envir=long.unit.names)
+
+  ## Define function names corresponding to the measure abbreviations.
+  function.names <- new.env()
+  assign("acc", ".performance.accuracy", envir=function.names)
+  assign("err", ".performance.error.rate", envir=function.names)
+  assign("fpr", ".performance.false.positive.rate", envir=function.names)
+  assign("tpr", ".performance.true.positive.rate", envir=function.names)
+  assign("rec", ".performance.true.positive.rate", envir=function.names)
+  assign("sens", ".performance.true.positive.rate", envir=function.names)
+  assign("fnr", ".performance.false.negative.rate", envir=function.names)
+  assign("tnr", ".performance.true.negative.rate", envir=function.names)
+  assign("spec", ".performance.true.negative.rate", envir=function.names)
+  assign("ppv", ".performance.positive.predictive.value",
+         envir=function.names)
+  assign("prec", ".performance.positive.predictive.value",
+         envir=function.names)
+  assign("npv", ".performance.negative.predictive.value",
+         envir=function.names)
+  assign("fall", ".performance.false.positive.rate", envir=function.names)
+  assign("miss", ".performance.false.negative.rate", envir=function.names)
+  assign("pcfall", ".performance.prediction.conditioned.fallout",
+         envir=function.names)
+  assign("pcmiss", ".performance.prediction.conditioned.miss",
+         envir=function.names)
+  assign("rpp", ".performance.rate.of.positive.predictions",
+         envir=function.names)
+  assign("rnp", ".performance.rate.of.negative.predictions",
+         envir=function.names)
+  assign("auc", ".performance.auc", envir=function.names)
+  assign("aucpr", ".performance.aucpr", envir=function.names)
+  assign("cal", ".performance.calibration.error", envir=function.names)
+  assign("prbe", ".performance.precision.recall.break.even.point",
+         envir=function.names)
+  assign("rch", ".performance.rocconvexhull", envir=function.names)
+  assign("mxe", ".performance.mean.cross.entropy", envir=function.names)
+  assign("rmse", ".performance.root.mean.squared.error",
+         envir=function.names)
+  assign("phi", ".performance.phi", envir=function.names)
+  assign("mat", ".performance.phi", envir=function.names)
+  assign("mi", ".performance.mutual.information", envir=function.names)
+  assign("chisq", ".performance.chisq", envir=function.names)
+  assign("odds", ".performance.odds.ratio", envir=function.names)
+  assign("lift", ".performance.lift", envir=function.names)
+  assign("f", ".performance.f", envir=function.names)
+  assign("sar", ".performance.sar", envir=function.names)
+  assign("ecost", ".performance.expected.cost", envir=function.names)
+  assign("cost", ".performance.cost", envir=function.names)
+
+  ## If a measure comes along with an obligatory x axis (including "none"),
+  ## list it here.
+  obligatory.x.axis <- new.env()
+  assign("mxe", "none", envir=obligatory.x.axis)
+  assign("rmse", "none", envir=obligatory.x.axis)
+  assign("prbe", "none", envir=obligatory.x.axis)
+  assign("auc", "none", envir=obligatory.x.axis)
+  assign("aucpr", "none", envir=obligatory.x.axis)
+  assign("rch","none", envir=obligatory.x.axis)
+  ## ecost requires probability cost function as x axis, which is handled
+  ## implicitly, not as an explicit performance measure.
+  assign("ecost","none", envir=obligatory.x.axis)
+
+  ## If a measure has optional arguments, list the names of the
+  ## arguments here.
+  optional.arguments <- new.env()
+  assign("cal", "window.size", envir=optional.arguments)
+  assign("f", "alpha", envir=optional.arguments)
+  assign("cost", c("cost.fp", "cost.fn"), envir=optional.arguments)
+  assign("auc", "fpr.stop", envir=optional.arguments)
+
+  ## If a measure has additional arguments, list the default values
+  ## for them here. Naming convention: e.g. "cal" has an optional
+  ## argument "window.size" the key to use here is "cal:window.size"
+  ## (colon as separator)
+  default.values <- new.env()
+  assign("cal:window.size", 100, envir=default.values)
+  assign("f:alpha", 0.5, envir=default.values)
+  assign("cost:cost.fp", 1, envir=default.values)
+  assign("cost:cost.fn", 1, envir=default.values)
+  assign("auc:fpr.stop", 1, envir=default.values)
+
+  list(long.unit.names=long.unit.names, function.names=function.names,
+       obligatory.x.axis=obligatory.x.axis,
+       optional.arguments=optional.arguments,
+       default.values=default.values)
+}
+
-- 
GitLab