Skip to content
Snippets Groups Projects
Commit e3ae7b8e authored by Arnaud Duvermy's avatar Arnaud Duvermy
Browse files

missing file

parent 952032d1
Branches
Tags
No related merge requests found
# WARNING - Generated by {fusen} from dev/flat_full.Rmd: do not edit by hand
#' @name prediction-class
#' @aliases prediction-class
#'
#' @title Class \code{prediction}
#'
#' @description
#' Object to encapsulate numerical predictions together with the
#' corresponding true class labels, optionally collecting predictions and
#' labels for several cross-validation or bootstrapping runs.
#'
#' @section Objects from the Class:
#' Objects can be created by using the \code{prediction} function.
#'
#' @note
#' Every \code{prediction} object contains information about the 2x2
#' contingency table consisting of tp,tn,fp, and fn, along with the
#' marginal sums n.pos,n.neg,n.pos.pred,n.neg.pred, because these form
#' the basis for many derived performance measures.
#'
#' @slot predictions A list, in which each element is a vector of predictions
#' (the list has length > 1 for x-validation data.
#' @slot labels Analogously, a list in which each element is a vector of true
#' class labels.
#' @slot cutoffs A list in which each element is a vector of all necessary
#' cutoffs. Each cutoff vector consists of the predicted scores (duplicates
#' removed), in descending order.
#' @slot fp A list in which each element is a vector of the number (not the
#' rate!) of false positives induced by the cutoffs given in the corresponding
#' 'cutoffs' list entry.
#' @slot tp As fp, but for true positives.
#' @slot tn As fp, but for true negatives.
#' @slot fn As fp, but for false negatives.
#' @slot n.pos A list in which each element contains the number of positive
#' samples in the given x-validation run.
#' @slot n.neg As n.pos, but for negative samples.
#' @slot n.pos.pred A list in which each element is a vector of the number of
#' samples predicted as positive at the cutoffs given in the corresponding
#' 'cutoffs' entry.
#' @slot n.neg.pred As n.pos.pred, but for negatively predicted samples.
#'
#'
#' @author
#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander
#' \email{osander@gmail.com}
#'
#' @seealso
#' \code{\link{prediction}},
#' \code{\link{performance}},
#' \code{\link{performance-class}},
#' \code{\link{plot.performance}}
#'
#' @export
setClass("prediction",
representation(predictions = "list",
labels = "list",
cutoffs = "list",
fp = "list",
tp = "list",
tn = "list",
fn = "list",
n.pos = "list",
n.neg = "list",
n.pos.pred = "list",
n.neg.pred = "list"))
setMethod("show","prediction",
function(object){
cat("A ", class(object), " instance\n", sep = "")
if(length(object@predictions) > 1L){
cat(" with ", length(object@predictions)," cross ",
"validation runs ", sep = "")
if(length(unique(vapply(object@predictions,length,integer(1))))){
cat("(equal lengths)", sep = "")
} else {
cat("(different lengths)", sep = "")
}
} else {
cat(" with ", length(object@predictions[[1L]]),
" data points", sep = "")
}
})
#' @name performance-class
#' @aliases performance-class
#'
#' @title Class \code{performance}
#'
#' @description
#' Object to capture the result of a performance evaluation, optionally
#' collecting evaluations from several cross-validation or bootstrapping runs.
#'
#' @section Objects from the Class:
#' Objects can be created by using the \code{performance} function.
#'
#' @details
#' A \code{performance} object can capture information from four
#' different evaluation scenarios:
#' \itemize{
#' \item The behaviour of a cutoff-dependent performance measure across
#' the range of all cutoffs (e.g. \code{performance( predObj, 'acc' )} ). Here,
#' \code{x.values} contains the cutoffs, \code{y.values} the
#' corresponding values of the performance measure, and
#' \code{alpha.values} is empty.\cr
#' \item The trade-off between two performance measures across the
#' range of all cutoffs (e.g. \code{performance( predObj,
#' 'tpr', 'fpr' )} ). In this case, the cutoffs are stored in
#' \code{alpha.values}, while \code{x.values} and \code{y.values}
#' contain the corresponding values of the two performance measures.\cr
#' \item A performance measure that comes along with an obligatory
#' second axis (e.g. \code{performance( predObj, 'ecost' )} ). Here, the measure values are
#' stored in \code{y.values}, while the corresponding values of the
#' obligatory axis are stored in \code{x.values}, and \code{alpha.values}
#' is empty.\cr
#' \item A performance measure whose value is just a scalar
#' (e.g. \code{performance( predObj, 'auc' )} ). The value is then stored in
#' \code{y.values}, while \code{x.values} and \code{alpha.values} are
#' empty.
#' }
#'
#' @slot x.name Performance measure used for the x axis.
#' @slot y.name Performance measure used for the y axis.
#' @slot alpha.name Name of the unit that is used to create the parametrized
#' curve. Currently, curves can only be parametrized by cutoff, so
#' \code{alpha.name} is either \code{none} or \code{cutoff}.
#' @slot x.values A list in which each entry contains the x values of the curve
#' of this particular cross-validation run. \code{x.values[[i]]},
#' \code{y.values[[i]]}, and \code{alpha.values[[i]]} correspond to each
#' other.
#' @slot y.values A list in which each entry contains the y values of the curve
#' of this particular cross-validation run.
#' @slot alpha.values A list in which each entry contains the cutoff values of
#' the curve of this particular cross-validation run.
#'
#' @references
#' A detailed list of references can be found on the ROCR homepage at
#' \url{http://rocr.bioinf.mpi-sb.mpg.de}.
#'
#' @author
#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander
#' \email{osander@gmail.com}
#'
#' @seealso
#' \code{\link{prediction}}
#' \code{\link{performance}},
#' \code{\link{prediction-class}},
#' \code{\link{plot.performance}}
#'
#' @export
setClass("performance",
representation(x.name = "character",
y.name = "character",
alpha.name = "character",
x.values = "list",
y.values = "list",
alpha.values = "list" ))
setMethod("show","performance",
function(object){
cat("A ", class(object), " instance\n", sep = "")
if(length(object@y.values[[1L]]) > 1L){
cat(" '", object@x.name, "' vs. '", object@y.name,
"' (alpha: '",object@alpha.name,"')\n", sep = "")
} else {
cat(" '", object@y.name, "'\n", sep = "")
}
if(length(object@y.values) > 1L){
cat(" for ", length(object@y.values)," cross ",
"validation runs ", sep = "")
} else {
if(length(object@y.values[[1L]]) > 1L){
cat(" with ", length(object@y.values[[1L]])," data points",
sep = "")
}
}
})
#' @name prediction
#'
#' @title Function to create prediction objects
#'
#' @description
#' Every classifier evaluation using ROCR starts with creating a
#' \code{prediction} object. This function is used to transform the input data
#' (which can be in vector, matrix, data frame, or list form) into a
#' standardized format.
#'
#' @details
#' \code{predictions} and \code{labels} can simply be vectors of the same
#' length. However, in the case of cross-validation data, different
#' cross-validation runs can be provided as the *columns* of a matrix or
#' data frame, or as the entries of a list. In the case of a matrix or
#' data frame, all cross-validation runs must have the same length, whereas
#' in the case of a list, the lengths can vary across the cross-validation
#' runs. Internally, as described in section 'Value', all of these input
#' formats are converted to list representation.
#'
#' Since scoring classifiers give relative tendencies towards a negative
#' (low scores) or positive (high scores) class, it has to be declared
#' which class label denotes the negative, and which the positive class.
#' Ideally, labels should be supplied as ordered factor(s), the lower
#' level corresponding to the negative class, the upper level to the
#' positive class. If the labels are factors (unordered), numeric,
#' logical or characters, ordering of the labels is inferred from
#' R's built-in \code{<} relation (e.g. 0 < 1, -1 < 1, 'a' < 'b',
#' FALSE < TRUE). Use \code{label.ordering} to override this default
#' ordering. Please note that the ordering can be locale-dependent
#' e.g. for character labels '-1' and '1'.
#'
#' Currently, ROCR supports only binary classification (extensions toward
#' multiclass classification are scheduled for the next release,
#' however). If there are more than two distinct label symbols, execution
#' stops with an error message. If all predictions use the same two
#' symbols that are used for the labels, categorical predictions are
#' assumed. If there are more than two predicted values, but all numeric,
#' continuous predictions are assumed (i.e. a scoring
#' classifier). Otherwise, if more than two symbols occur in the
#' predictions, and not all of them are numeric, execution stops with an
#' error message.
#'
#' @param predictions A vector, matrix, list, or data frame containing the
#' predictions.
#' @param labels A vector, matrix, list, or data frame containing the true class
#' labels. Must have the same dimensions as \code{predictions}.
#' @param label.ordering The default ordering (cf.details) of the classes can
#' be changed by supplying a vector containing the negative and the positive
#' class label.
#'
#' @return An S4 object of class \code{prediction}.
#'
#' @author
#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander
#' \email{osander@gmail.com}
#' @export
prediction <- function(predictions, labels, label.ordering=NULL) {
## bring 'predictions' and 'labels' into list format,
## each list entry representing one x-validation run
## convert predictions into canonical list format
if (is.data.frame(predictions)) {
names(predictions) <- c()
predictions <- as.list(predictions)
} else if (is.matrix(predictions)) {
predictions <- as.list(data.frame(predictions))
names(predictions) <- c()
} else if (is.vector(predictions) && !is.list(predictions)) {
predictions <- list(predictions)
} else if (!is.list(predictions)) {
stop("Format of predictions is invalid. It couldn't be coerced to a list.",
call. = FALSE)
}
## convert labels into canonical list format
if (is.data.frame(labels)) {
names(labels) <- c()
labels <- as.list( labels)
} else if (is.matrix(labels)) {
labels <- as.list( data.frame( labels))
names(labels) <- c()
} else if ((is.vector(labels) ||
is.ordered(labels) ||
is.factor(labels)) &&
!is.list(labels)) {
labels <- list( labels)
} else if (!is.list(labels)) {
stop("Format of labels is invalid. It couldn't be coerced to a list.",
call. = FALSE)
}
if(any(vapply(predictions,anyNA, logical(1)))){
warnings("'predictions' contains NA. These missing predictions will be removed from evaluation")
nonNA_pred <- !is.na(predictions)
predictions <- predictions[nonNA_pred]
labels <- labels[nonNA_pred]
}
## Length consistency checks
if (length(predictions) != length(labels))
stop(paste("Number of cross-validation runs must be equal",
"for predictions and labels."))
if (! all(sapply(predictions, length) == sapply(labels, length)))
stop(paste("Number of predictions in each run must be equal",
"to the number of labels for each run."))
## replace infinite numbers by max values
## avoid prob with infinite values
#for (i in 1:length(predictions)) {
# epsilon <- max(predictions[[i]][is.finite(predictions[[i]] )])
# idx_inf_values <- !is.finite( predictions[[i]] )
# predictions[[i]][idx_inf_values] <- epsilon
#}
## only keep prediction/label pairs that are finite numbers
for (i in 1:length(predictions)) {
finite.bool <- is.finite( predictions[[i]] )
predictions[[i]] <- predictions[[i]][ finite.bool ]
labels[[i]] <- labels[[i]][ finite.bool ]
}
## abort if 'labels' format is inconsistent across
## different cross-validation runs
label.format="" ## one of 'normal','factor','ordered'
if (all(sapply( labels, is.factor)) &&
!any(sapply(labels, is.ordered))) {
label.format <- "factor"
} else if (all(sapply( labels, is.ordered))) {
label.format <- "ordered"
} else if (all(sapply( labels, is.character)) ||
all(sapply( labels, is.numeric)) ||
all(sapply( labels, is.logical))) {
label.format <- "normal"
} else {
stop(paste("Inconsistent label data type across different",
"cross-validation runs."))
}
## abort if levels are not consistent across different
## cross-validation runs
if (! all(sapply(labels, levels)==levels(labels[[1]])) ) {
stop(paste("Inconsistent factor levels across different",
"cross-validation runs."))
}
## convert 'labels' into ordered factors, aborting if the number
## of classes is not equal to 2.
levels <- c()
if ( label.format == "ordered" ) {
if (!is.null(label.ordering)) {
stop(paste("'labels' is already ordered. No additional",
"'label.ordering' must be supplied."))
} else {
levels <- levels(labels[[1]])
}
} else {
if ( is.null( label.ordering )) {
if ( label.format == "factor" ) levels <- sort(levels(labels[[1]]))
else levels <- sort( unique( unlist( labels)))
} else {
## if (!setequal( levels, label.ordering)) {
if (!setequal( unique(unlist(labels)), label.ordering )) {
stop("Label ordering does not match class labels.")
}
levels <- label.ordering
}
for (i in 1:length(labels)) {
if (is.factor(labels))
labels[[i]] <- ordered(as.character(labels[[i]]),
levels=levels)
else labels[[i]] <- ordered( labels[[i]], levels=levels)
}
}
#print(levels)
#print(labels)
if (length(levels) != 2) {
message <- paste("Number of classes is not equal to 2.\n",
"HTRfit currently supports only evaluation of ",
"binary classification tasks.",sep="")
stop(message)
}
## determine whether predictions are continuous or categorical
## (in the latter case stop
if (!is.numeric( unlist( predictions ))) {
stop("Currently, only continuous predictions are supported by HTRfit")
}
## compute cutoff/fp/tp data
cutoffs <- list()
fp <- list()
tp <- list()
fn <- list()
tn <- list()
n.pos <- list()
n.neg <- list()
n.pos.pred <- list()
n.neg.pred <- list()
for (i in 1:length(predictions)) {
n.pos <- c( n.pos, sum( labels[[i]] == levels[2] ))
n.neg <- c( n.neg, sum( labels[[i]] == levels[1] ))
ans <- .compute.unnormalized.roc.curve( predictions[[i]], labels[[i]] )
cutoffs <- c( cutoffs, list( ans$cutoffs ))
fp <- c( fp, list( ans$fp ))
tp <- c( tp, list( ans$tp ))
fn <- c( fn, list( n.pos[[i]] - tp[[i]] ))
tn <- c( tn, list( n.neg[[i]] - fp[[i]] ))
n.pos.pred <- c(n.pos.pred, list(tp[[i]] + fp[[i]]) )
n.neg.pred <- c(n.neg.pred, list(tn[[i]] + fn[[i]]) )
}
return( new("prediction", predictions=predictions,
labels=labels,
cutoffs=cutoffs,
fp=fp,
tp=tp,
fn=fn,
tn=tn,
n.pos=n.pos,
n.neg=n.neg,
n.pos.pred=n.pos.pred,
n.neg.pred=n.neg.pred))
}
## fast fp/tp computation based on cumulative summing
.compute.unnormalized.roc.curve <- function( predictions, labels ) {
## determine the labels that are used for the pos. resp. neg. class :
pos.label <- levels(labels)[2]
neg.label <- levels(labels)[1]
pred.order <- order(predictions, decreasing=TRUE)
predictions.sorted <- predictions[pred.order]
tp <- cumsum(labels[pred.order]==pos.label)
fp <- cumsum(labels[pred.order]==neg.label)
## remove fp & tp for duplicated predictions
## as duplicated keeps the first occurrence, but we want the last, two
## rev are used.
## Highest cutoff (Infinity) corresponds to tp=0, fp=0
dups <- rev(duplicated(rev(predictions.sorted)))
tp <- c(0, tp[!dups])
fp <- c(0, fp[!dups])
cutoffs <- c(Inf, predictions.sorted[!dups])
return(list( cutoffs=cutoffs, fp=fp, tp=tp ))
}
#' @name performance
#'
#' @title Function to create performance objects
#'
#' @description
#' All kinds of predictor evaluations are performed using this function.
#'
#' @details
#' Here is the list of available performance measures. Let Y and
#' \eqn{\hat{Y}}{Yhat} be random variables representing the class and the prediction for
#' a randomly drawn sample, respectively. We denote by
#' \eqn{\oplus}{+} and \eqn{\ominus}{-} the positive and
#' negative class, respectively. Further, we use the following
#' abbreviations for empirical quantities: P (\# positive
#' samples), N (\# negative samples), TP (\# true positives), TN (\# true
#' negatives), FP (\# false positives), FN (\# false negatives).
#' \describe{
#' \item{\code{acc}:}{accuracy. \eqn{P(\hat{Y}=Y)}{P(Yhat = Y)}. Estimated
#' as: \eqn{\frac{TP+TN}{P+N}}{(TP+TN)/(P+N)}.}
#' \item{\code{err}:}{Error rate. \eqn{P(\hat{Y}\ne Y)}{P(Yhat !=
#' Y)}. Estimated as: \eqn{\frac{FP+FN}{P+N}}{(FP+FN)/(P+N)}.}
#' \item{\code{fpr}:}{False positive rate. \eqn{P(\hat{Y}=\oplus | Y =
#' \ominus)}{P(Yhat = + | Y = -)}. Estimated as:
#' \eqn{\frac{FP}{N}}{FP/N}.}
#' \item{\code{fall}:}{Fallout. Same as \code{fpr}.}
#' \item{\code{tpr}:}{True positive
#' rate. \eqn{P(\hat{Y}=\oplus|Y=\oplus)}{P(Yhat = + | Y = +)}. Estimated
#' as: \eqn{\frac{TP}{P}}{TP/P}.}
#' \item{\code{rec}:}{recall. Same as \code{tpr}.}
#' \item{\code{sens}:}{sensitivity. Same as \code{tpr}.}
#' \item{\code{fnr}:}{False negative
#' rate. \eqn{P(\hat{Y}=\ominus|Y=\oplus)}{P(Yhat = - | Y =
#' +)}. Estimated as: \eqn{\frac{FN}{P}}{FN/P}.}
#' \item{\code{miss}:}{Miss. Same as \code{fnr}.}
#' \item{\code{tnr}:}{True negative rate. \eqn{P(\hat{Y} =
#' \ominus|Y=\ominus)}{P(Yhat = - | Y = -)}.}
#' \item{\code{spec}:}{specificity. Same as \code{tnr}.}
#' \item{\code{ppv}:}{Positive predictive
#' value. \eqn{P(Y=\oplus|\hat{Y}=\oplus)}{P(Y = + | Yhat =
#' +)}. Estimated as: \eqn{\frac{TP}{TP+FP}}{TP/(TP+FP)}.}
#' \item{\code{prec}:}{precision. Same as \code{ppv}.}
#' \item{\code{npv}:}{Negative predictive
#' value. \eqn{P(Y=\ominus|\hat{Y}=\ominus)}{P(Y = - | Yhat =
#' -)}. Estimated as: \eqn{\frac{TN}{TN+FN}}{TN/(TN+FN)}.}
#' \item{\code{pcfall}:}{Prediction-conditioned
#' fallout. \eqn{P(Y=\ominus|\hat{Y}=\oplus)}{P(Y = - | Yhat =
#' +)}. Estimated as: \eqn{\frac{FP}{TP+FP}}{FP/(TP+FP)}.}
#' \item{\code{pcmiss}:}{Prediction-conditioned
#' miss. \eqn{P(Y=\oplus|\hat{Y}=\ominus)}{P(Y = + | Yhat =
#' -)}. Estimated as: \eqn{\frac{FN}{TN+FN}}{FN/(TN+FN)}.}
#' \item{\code{rpp}:}{Rate of positive predictions. \eqn{P( \hat{Y} =
#' \oplus)}{P(Yhat = +)}. Estimated as: (TP+FP)/(TP+FP+TN+FN).}
#' \item{\code{rnp}:}{Rate of negative predictions. \eqn{P( \hat{Y} =
#' \ominus)}{P(Yhat = -)}. Estimated as: (TN+FN)/(TP+FP+TN+FN).}
#' \item{\code{phi}:}{Phi correlation coefficient. \eqn{\frac{TP \cdot
#' TN - FP \cdot FN}{\sqrt{ (TP+FN) \cdot (TN+FP) \cdot (TP+FP)
#' \cdot (TN+FN)}}}{(TP*TN -
#' FP*FN)/(sqrt((TP+FN)*(TN+FP)*(TP+FP)*(TN+FN)))}. Yields a
#' number between -1 and 1, with 1 indicating a perfect
#' prediction, 0 indicating a random prediction. Values below 0
#' indicate a worse than random prediction.}
#' \item{\code{mat}:}{Matthews correlation coefficient. Same as \code{phi}.}
#' \item{\code{mi}:}{Mutual information. \eqn{I(\hat{Y},Y) := H(Y) -
#' H(Y|\hat{Y})}{I(Yhat, Y) := H(Y) - H(Y | Yhat)}, where H is the
#' (conditional) entropy. Entropies are estimated naively (no bias
#' correction).}
#' \item{\code{chisq}:}{Chi square test statistic. \code{?chisq.test}
#' for details. Note that R might raise a warning if the sample size
#' is too small.}
#' \item{\code{odds}:}{Odds ratio. \eqn{\frac{TP \cdot TN}{FN \cdot
#' FP}}{(TP*TN)/(FN*FP)}. Note that odds ratio produces
#' Inf or NA values for all cutoffs corresponding to FN=0 or
#' FP=0. This can substantially decrease the plotted cutoff region.}
#' \item{\code{lift}:}{Lift
#' value. \eqn{\frac{P(\hat{Y}=\oplus|Y=\oplus)}{P(\hat{Y}=\oplus)}}{P(Yhat = + |
#' Y = +)/P(Yhat = +)}.}
#' \item{\code{f}:}{precision-recall F measure (van Rijsbergen, 1979). Weighted
#' harmonic mean of precision (P) and recall (R). \eqn{F =
#' \frac{1}{\alpha \frac{1}{P} + (1-\alpha)\frac{1}{R}}}{F = 1/
#' (alpha*1/P + (1-alpha)*1/R)}. If
#' \eqn{\alpha=\frac{1}{2}}{alpha=1/2}, the mean is balanced. A
#' frequent equivalent formulation is
#' \eqn{F = \frac{(\beta^2+1) \cdot P \cdot R}{R + \beta^2 \cdot
#' P}}{F = (beta^2+1) * P * R / (R + beta^2 * P)}. In this formulation, the
#' mean is balanced if \eqn{\beta=1}{beta=1}. Currently, ROCR only accepts
#' the alpha version as input (e.g. \eqn{\alpha=0.5}{alpha=0.5}). If no
#' value for alpha is given, the mean will be balanced by default.}
#' \item{\code{rch}:}{ROC convex hull. A ROC (=\code{tpr} vs \code{fpr}) curve
#' with concavities (which represent suboptimal choices of cutoff) removed
#' (Fawcett 2001). Since the result is already a parametric performance
#' curve, it cannot be used in combination with other measures.}
#' \item{\code{auc}:}{Area under the ROC curve. This is equal to the value of the
#' Wilcoxon-Mann-Whitney test statistic and also the probability that the
#' classifier will score are randomly drawn positive sample higher than a
#' randomly drawn negative sample. Since the output of
#' \code{auc} is cutoff-independent, this
#' measure cannot be combined with other measures into a parametric
#' curve. The partial area under the ROC curve up to a given false
#' positive rate can be calculated by passing the optional parameter
#' \code{fpr.stop=0.5} (or any other value between 0 and 1) to
#' \code{performance}.}
#' \item{\code{aucpr}:}{Area under the precision/recall curve. Since the output
#' of \code{aucpr} is cutoff-independent, this measure cannot be combined
#' with other measures into a parametric curve.}
#' \item{\code{prbe}:}{precision-recall break-even point. The cutoff(s) where
#' precision and recall are equal. At this point, positive and negative
#' predictions are made at the same rate as their prevalence in the
#' data. Since the output of
#' \code{prbe} is just a cutoff-independent scalar, this
#' measure cannot be combined with other measures into a parametric curve.}
#' \item{\code{cal}:}{Calibration error. The calibration error is the
#' absolute difference between predicted confidence and actual reliability. This
#' error is estimated at all cutoffs by sliding a window across the
#' range of possible cutoffs. The default window size of 100 can be
#' adjusted by passing the optional parameter \code{window.size=200}
#' to \code{performance}. E.g., if for several
#' positive samples the output of the classifier is around 0.75, you might
#' expect from a well-calibrated classifier that the fraction of them
#' which is correctly predicted as positive is also around 0.75. In a
#' well-calibrated classifier, the probabilistic confidence estimates
#' are realistic. Only for use with
#' probabilistic output (i.e. scores between 0 and 1).}
#' \item{\code{mxe}:}{Mean cross-entropy. Only for use with
#' probabilistic output. \eqn{MXE :=-\frac{1}{P+N}( \sum_{y_i=\oplus}
#' ln(\hat{y}_i) + \sum_{y_i=\ominus} ln(1-\hat{y}_i))}{MXE := - 1/(P+N) \sum_{y_i=+}
#' ln(yhat_i) + \sum_{y_i=-} ln(1-yhat_i)}. Since the output of
#' \code{mxe} is just a cutoff-independent scalar, this
#' measure cannot be combined with other measures into a parametric curve.}
#' \item{\code{rmse}:}{Root-mean-squared error. Only for use with
#' numerical class labels. \eqn{RMSE:=\sqrt{\frac{1}{P+N}\sum_i (y_i
#' - \hat{y}_i)^2}}{RMSE := sqrt(1/(P+N) \sum_i (y_i -
#' yhat_i)^2)}. Since the output of
#' \code{rmse} is just a cutoff-independent scalar, this
#' measure cannot be combined with other measures into a parametric curve.}
#' \item{\code{sar}:}{Score combinining performance measures of different
#' characteristics, in the attempt of creating a more "robust"
#' measure (cf. Caruana R., ROCAI2004):
#' SAR = 1/3 * ( accuracy + Area under the ROC curve + Root
#' mean-squared error ).}
#' \item{\code{ecost}:}{Expected cost. For details on cost curves,
#' cf. Drummond&Holte 2000,2004. \code{ecost} has an obligatory x
#' axis, the so-called 'probability-cost function'; thus it cannot be
#' combined with other measures. While using \code{ecost} one is
#' interested in the lower envelope of a set of lines, it might be
#' instructive to plot the whole set of lines in addition to the lower
#' envelope. An example is given in \code{demo(ROCR)}.}
#' \item{\code{cost}:}{Cost of a classifier when
#' class-conditional misclassification costs are explicitly given.
#' Accepts the optional parameters \code{cost.fp} and
#' \code{cost.fn}, by which the costs for false positives and
#' negatives can be adjusted, respectively. By default, both are set
#' to 1.}
#' }
#'
#' @note
#' Here is how to call \code{performance()} to create some standard
#' evaluation plots:
#' \describe{
#' \item{ROC curves:}{measure="tpr", x.measure="fpr".}
#' \item{precision/recall graphs:}{measure="prec", x.measure="rec".}
#' \item{sensitivity/specificity plots:}{measure="sens", x.measure="spec".}
#' \item{Lift charts:}{measure="lift", x.measure="rpp".}
#' }
#'
#' @param prediction.obj An object of class \code{prediction}.
#' @param measure Performance measure to use for the evaluation. A complete list
#' of the performance measures that are available for \code{measure} and
#' \code{x.measure} is given in the 'Details' section.
#' @param x.measure A second performance measure. If different from the default,
#' a two-dimensional curve, with \code{x.measure} taken to be the unit in
#' direction of the x axis, and \code{measure} to be the unit in direction of
#' the y axis, is created. This curve is parametrized with the cutoff.
#' @param ... Optional arguments (specific to individual performance measures).
#'
#' @return An S4 object of class \code{performance}.
#'
#' @author
#' Tobias Sing \email{tobias.sing@gmail.com}, Oliver Sander
#' \email{osander@gmail.com}
#'
#' @export
performance <- function(prediction.obj,
measure,
x.measure="cutoff",
...) {
## define the needed environments
envir.list <- .define.environments()
long.unit.names <- envir.list$long.unit.names
function.names <- envir.list$function.names
obligatory.x.axis <- envir.list$obligatory.x.axis
optional.arguments <- envir.list$optional.arguments
default.values <- envir.list$default.values
## abort in case of misuse
if (class(prediction.obj) != 'prediction' ||
!exists(measure, where=long.unit.names, inherits=FALSE) ||
!exists(x.measure, where=long.unit.names, inherits=FALSE)) {
stop(paste("Wrong argument types: First argument must be of type",
"'prediction'; second and optional third argument must",
"be available performance measures!"))
}
## abort, if attempt is made to use a measure that has an obligatory
## x.axis as the x.measure (cannot be combined)
if (exists( x.measure, where=obligatory.x.axis, inherits=FALSE )) {
message <- paste("The performance measure",
x.measure,
"can only be used as 'measure', because it has",
"the following obligatory 'x.measure':\n",
get( x.measure, envir=obligatory.x.axis))
stop(message)
}
## if measure is a performance measure with obligatory x.axis, then
## enforce this axis:
if (exists( measure, where=obligatory.x.axis, inherits=FALSE )) {
x.measure <- get( measure, envir=obligatory.x.axis )
}
if (x.measure == "cutoff" ||
exists( measure, where=obligatory.x.axis, inherits=FALSE )) {
## fetch from '...' any optional arguments for the performance
## measure at hand that are given, otherwise fill up the default values
optional.args <- list(...)
argnames <- c()
if ( exists( measure, where=optional.arguments, inherits=FALSE )) {
argnames <- get( measure, envir=optional.arguments )
default.arglist <- list()
for (i in 1:length(argnames)) {
default.arglist <- c(default.arglist,
get(paste(measure,":",argnames[i],sep=""),
envir=default.values, inherits=FALSE))
}
names(default.arglist) <- argnames
for (i in 1:length(argnames)) {
templist <- list(optional.args,
default.arglist[[i]])
names(templist) <- c('arglist', argnames[i])
optional.args <- do.call('.farg', templist)
}
}
optional.args <- .select.args( optional.args, argnames )
## determine function name
function.name <- get( measure, envir=function.names )
## for each x-validation run, compute the requested performance measure
x.values <- list()
y.values <- list()
for (i in 1:length( prediction.obj@predictions )) {
argumentlist <- .sarg(optional.args,
predictions= prediction.obj@predictions[[i]],
labels= prediction.obj@labels[[i]],
cutoffs= prediction.obj@cutoffs[[i]],
fp= prediction.obj@fp[[i]],
tp= prediction.obj@tp[[i]],
fn= prediction.obj@fn[[i]],
tn= prediction.obj@tn[[i]],
n.pos= prediction.obj@n.pos[[i]],
n.neg= prediction.obj@n.neg[[i]],
n.pos.pred= prediction.obj@n.pos.pred[[i]],
n.neg.pred= prediction.obj@n.neg.pred[[i]])
ans <- do.call( function.name, argumentlist )
if (!is.null(ans[[1]])) x.values <- c( x.values, list( ans[[1]] ))
y.values <- c( y.values, list( ans[[2]] ))
}
if (! (length(x.values)==0 || length(x.values)==length(y.values)) ) {
stop("Consistency error.")
}
## create a new performance object
return( new("performance",
x.name = get( x.measure, envir=long.unit.names ),
y.name = get( measure, envir=long.unit.names ),
alpha.name = "none",
x.values = x.values,
y.values = y.values,
alpha.values = list() ))
} else {
perf.obj.1 <- performance( prediction.obj, measure=x.measure, ... )
perf.obj.2 <- performance( prediction.obj, measure=measure, ... )
return( .combine.performance.objects( perf.obj.1, perf.obj.2 ) )
}
}
#' @importFrom stats approxfun
.combine.performance.objects <- function( p.obj.1, p.obj.2 ) {
## some checks for misusage (in any way, this function is
## only for internal use)
if ( p.obj.1@x.name != p.obj.2@x.name ) {
stop("Error: Objects need to have identical x axis.")
}
if ( p.obj.1@alpha.name != "none" || p.obj.2@alpha.name != "none") {
stop("Error: At least one of the two objects has already been merged.")
}
if (length(p.obj.1@x.values) != length(p.obj.2@x.values)) {
stop(paste("Only performance objects with identical number of",
"cross-validation runs can be combined."))
}
x.values <- list()
x.name <- p.obj.1@y.name
y.values <- list()
y.name <- p.obj.2@y.name
alpha.values <- list()
alpha.name <- p.obj.1@x.name
for (i in 1:length( p.obj.1@x.values )) {
x.values.1 <- p.obj.1@x.values[[i]]
y.values.1 <- p.obj.1@y.values[[i]]
x.values.2 <- p.obj.2@x.values[[i]]
y.values.2 <- p.obj.2@y.values[[i]]
## cutoffs of combined object = merged cutoffs of simple objects
cutoffs <- sort( unique( c(x.values.1, x.values.2)), decreasing=TRUE )
## calculate y.values at cutoffs using step function
y.values.int.1 <- stats::approxfun(x.values.1, y.values.1,
method="constant",f=1,rule=2)(cutoffs)
y.values.int.2 <- stats::approxfun(x.values.2, y.values.2,
method="constant",f=1,rule=2)(cutoffs)
## 'approxfun' ignores NA and NaN
objs <- list( y.values.int.1, y.values.int.2)
objs.x <- list( x.values.1, x.values.2 )
na.cutoffs.1.bool <- is.na( y.values.1) & !is.nan( y.values.1 )
nan.cutoffs.1.bool <- is.nan( y.values.1)
na.cutoffs.2.bool <- is.na( y.values.2) & !is.nan( y.values.2 )
nan.cutoffs.2.bool <- is.nan( y.values.2)
bools <- list(na.cutoffs.1.bool, nan.cutoffs.1.bool,
na.cutoffs.2.bool, nan.cutoffs.2.bool)
values <- c(NA,NaN,NA,NaN)
for (j in 1:4) {
for (k in which(bools[[j]])) {
interval.max <- objs.x[[ ceiling(j/2) ]][k]
interval.min <- -Inf
if (k < length(objs.x[[ ceiling(j/2) ]])) {
interval.min <- objs.x[[ ceiling(j/2) ]][k+1]
}
objs[[ ceiling(j/2) ]][cutoffs <= interval.max &
cutoffs > interval.min ] <- values[j]
}
}
alpha.values <- c(alpha.values, list(cutoffs))
x.values <- c(x.values, list(objs[[1]]))
y.values <- c(y.values, list(objs[[2]]))
}
return( new("performance",
x.name=x.name, y.name=y.name,
alpha.name=alpha.name, x.values=x.values,
y.values=y.values, alpha.values=alpha.values))
}
.define.environments <- function() {
## There are five environments: long.unit.names, function.names,
## obligatory.x.axis, optional.arguments, default.values
## Define long names corresponding to the measure abbreviations.
long.unit.names <- new.env()
assign("none","None", envir=long.unit.names)
assign("cutoff", "Cutoff", envir=long.unit.names)
assign("acc", "accuracy", envir=long.unit.names)
assign("err", "Error Rate", envir=long.unit.names)
assign("fpr", "False positive rate", envir=long.unit.names)
assign("tpr", "True positive rate", envir=long.unit.names)
assign("rec", "recall", envir=long.unit.names)
assign("sens", "sensitivity", envir=long.unit.names)
assign("fnr", "False negative rate", envir=long.unit.names)
assign("tnr", "True negative rate", envir=long.unit.names)
assign("spec", "specificity", envir=long.unit.names)
assign("ppv", "Positive predictive value", envir=long.unit.names)
assign("prec", "precision", envir=long.unit.names)
assign("npv", "Negative predictive value", envir=long.unit.names)
assign("fall", "Fallout", envir=long.unit.names)
assign("miss", "Miss", envir=long.unit.names)
assign("pcfall", "Prediction-conditioned fallout", envir=long.unit.names)
assign("pcmiss", "Prediction-conditioned miss", envir=long.unit.names)
assign("rpp", "Rate of positive predictions", envir=long.unit.names)
assign("rnp", "Rate of negative predictions", envir=long.unit.names)
assign("auc","Area under the ROC curve", envir=long.unit.names)
assign("aucpr","Area under the precision/recall curve", envir=long.unit.names)
assign("cal", "Calibration error", envir=long.unit.names)
assign("mwp", "Median window position", envir=long.unit.names)
assign("prbe","precision/recall break-even point", envir=long.unit.names)
assign("rch", "ROC convex hull", envir=long.unit.names)
assign("mxe", "Mean cross-entropy", envir=long.unit.names)
assign("rmse","Root-mean-square error", envir=long.unit.names)
assign("phi", "Phi correlation coefficient", envir=long.unit.names)
assign("mat","Matthews correlation coefficient", envir=long.unit.names)
assign("mi", "Mutual information", envir=long.unit.names)
assign("chisq", "Chi-square test statistic", envir=long.unit.names)
assign("odds","Odds ratio", envir=long.unit.names)
assign("lift", "Lift value", envir=long.unit.names)
assign("f","precision-recall F measure", envir=long.unit.names)
assign("sar", "SAR", envir=long.unit.names)
assign("ecost", "Expected cost", envir=long.unit.names)
assign("cost", "Explicit cost", envir=long.unit.names)
## Define function names corresponding to the measure abbreviations.
function.names <- new.env()
assign("acc", ".performance.accuracy", envir=function.names)
assign("err", ".performance.error.rate", envir=function.names)
assign("fpr", ".performance.false.positive.rate", envir=function.names)
assign("tpr", ".performance.true.positive.rate", envir=function.names)
assign("rec", ".performance.true.positive.rate", envir=function.names)
assign("sens", ".performance.true.positive.rate", envir=function.names)
assign("fnr", ".performance.false.negative.rate", envir=function.names)
assign("tnr", ".performance.true.negative.rate", envir=function.names)
assign("spec", ".performance.true.negative.rate", envir=function.names)
assign("ppv", ".performance.positive.predictive.value",
envir=function.names)
assign("prec", ".performance.positive.predictive.value",
envir=function.names)
assign("npv", ".performance.negative.predictive.value",
envir=function.names)
assign("fall", ".performance.false.positive.rate", envir=function.names)
assign("miss", ".performance.false.negative.rate", envir=function.names)
assign("pcfall", ".performance.prediction.conditioned.fallout",
envir=function.names)
assign("pcmiss", ".performance.prediction.conditioned.miss",
envir=function.names)
assign("rpp", ".performance.rate.of.positive.predictions",
envir=function.names)
assign("rnp", ".performance.rate.of.negative.predictions",
envir=function.names)
assign("auc", ".performance.auc", envir=function.names)
assign("aucpr", ".performance.aucpr", envir=function.names)
assign("cal", ".performance.calibration.error", envir=function.names)
assign("prbe", ".performance.precision.recall.break.even.point",
envir=function.names)
assign("rch", ".performance.rocconvexhull", envir=function.names)
assign("mxe", ".performance.mean.cross.entropy", envir=function.names)
assign("rmse", ".performance.root.mean.squared.error",
envir=function.names)
assign("phi", ".performance.phi", envir=function.names)
assign("mat", ".performance.phi", envir=function.names)
assign("mi", ".performance.mutual.information", envir=function.names)
assign("chisq", ".performance.chisq", envir=function.names)
assign("odds", ".performance.odds.ratio", envir=function.names)
assign("lift", ".performance.lift", envir=function.names)
assign("f", ".performance.f", envir=function.names)
assign("sar", ".performance.sar", envir=function.names)
assign("ecost", ".performance.expected.cost", envir=function.names)
assign("cost", ".performance.cost", envir=function.names)
## If a measure comes along with an obligatory x axis (including "none"),
## list it here.
obligatory.x.axis <- new.env()
assign("mxe", "none", envir=obligatory.x.axis)
assign("rmse", "none", envir=obligatory.x.axis)
assign("prbe", "none", envir=obligatory.x.axis)
assign("auc", "none", envir=obligatory.x.axis)
assign("aucpr", "none", envir=obligatory.x.axis)
assign("rch","none", envir=obligatory.x.axis)
## ecost requires probability cost function as x axis, which is handled
## implicitly, not as an explicit performance measure.
assign("ecost","none", envir=obligatory.x.axis)
## If a measure has optional arguments, list the names of the
## arguments here.
optional.arguments <- new.env()
assign("cal", "window.size", envir=optional.arguments)
assign("f", "alpha", envir=optional.arguments)
assign("cost", c("cost.fp", "cost.fn"), envir=optional.arguments)
assign("auc", "fpr.stop", envir=optional.arguments)
## If a measure has additional arguments, list the default values
## for them here. Naming convention: e.g. "cal" has an optional
## argument "window.size" the key to use here is "cal:window.size"
## (colon as separator)
default.values <- new.env()
assign("cal:window.size", 100, envir=default.values)
assign("f:alpha", 0.5, envir=default.values)
assign("cost:cost.fp", 1, envir=default.values)
assign("cost:cost.fn", 1, envir=default.values)
assign("auc:fpr.stop", 1, envir=default.values)
list(long.unit.names=long.unit.names, function.names=function.names,
obligatory.x.axis=obligatory.x.axis,
optional.arguments=optional.arguments,
default.values=default.values)
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please to comment