% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/methods.R
\name{plot.vsel}
\alias{plot.vsel}
\title{Plot predictive performance}
\usage{
\method{plot}{vsel}(
  x,
  nterms_max = NULL,
  stats = "elpd",
  deltas = FALSE,
  alpha = 2 * pnorm(-1),
  baseline = if (!inherits(x$refmodel, "datafit")) "ref" else "best",
  thres_elpd = NA,
  resp_oscale = TRUE,
  point_size = getOption("projpred.plot_vsel_point_size", 3),
  bar_thickness = getOption("projpred.plot_vsel_bar_thickness", 1),
  ranking_nterms_max = getOption("projpred.plot_vsel_ranking_nterms_max", NULL),
  ranking_abbreviate = getOption("projpred.plot_vsel_ranking_abbreviate", FALSE),
  ranking_abbreviate_args = getOption("projpred.plot_vsel_ranking_abbreviate_args",
    list()),
  ranking_repel = getOption("projpred.plot_vsel_ranking_repel", NULL),
  ranking_repel_args = getOption("projpred.plot_vsel_ranking_repel_args", list()),
  ranking_colored = getOption("projpred.plot_vsel_ranking_colored", FALSE),
  show_cv_proportions = getOption("projpred.plot_vsel_show_cv_proportions", FALSE),
  cumulate = FALSE,
  text_angle = getOption("projpred.plot_vsel_text_angle", 45),
  size_position = getOption("projpred.plot_vsel_size_position", "primary_x_top"),
  ...
)
}
\arguments{
\item{x}{An object of class \code{vsel} (returned by \code{\link[=varsel]{varsel()}} or \code{\link[=cv_varsel]{cv_varsel()}}).}

\item{nterms_max}{Maximum submodel size (number of predictor terms) for which
the performance statistics are calculated. Using \code{NULL} is effectively the
same as \code{length(ranking(object)$fulldata)}. Note that \code{nterms_max} does not
count the intercept, so use \code{nterms_max = 0} for the intercept-only model.
For \code{\link[=plot.vsel]{plot.vsel()}}, \code{nterms_max} must be at least \code{1}.}

\item{stats}{One or more character strings determining which performance
statistics (i.e., utilities or losses) to estimate based on the
observations in the evaluation (or "test") set (in case of
cross-validation, these are all observations because they are partitioned
into multiple test sets; in case of \code{\link[=varsel]{varsel()}} with \code{d_test = NULL}, these
are again all observations because the test set is the same as the training
set). Available statistics are:
\itemize{
\item \code{"elpd"}: expected log (pointwise) predictive density (for a new
dataset) (ELPD). Estimated by the sum of the observation-specific log
predictive density values (with each of these predictive density values
being a---possibly weighted---average across the parameter draws). For the
corresponding uncertainty interval, a normal approximation is used.
\item \code{"mlpd"}: mean log predictive density (MLPD), that is, the ELPD divided
by the number of observations. For the corresponding uncertainty interval,
a normal approximation is used.
\item \code{"gmpd"}: geometric mean predictive density (GMPD), that is, \code{\link[=exp]{exp()}} of
the MLPD. The GMPD is especially helpful for discrete response families
(because there, the GMPD is bounded by zero and one). For the corresponding
standard error, the delta method is used. The corresponding uncertainty
interval type is "exponentiated normal approximation" because the
uncertainty interval bounds are the exponentiated uncertainty interval
bounds of the MLPD.
\item \code{"mse"}: mean squared error (only available in the situations mentioned
in section "Details" below). For the corresponding uncertainty interval, a
log-normal approximation is used if \code{deltas} is \code{FALSE} and a normal
approximation is used if \code{deltas} is \code{TRUE} (or \code{"mixed"}, in case of
\code{\link[=plot.vsel]{plot.vsel()}}).
\item \code{"rmse"}: root mean squared error (only available in the situations
mentioned in section "Details" below). For the corresponding standard
error, the delta method is used. For the corresponding uncertainty
interval, a log-normal approximation is used if \code{deltas} is \code{FALSE} and a
normal approximation is used if \code{deltas} is \code{TRUE} (or \code{"mixed"}, in case
of \code{\link[=plot.vsel]{plot.vsel()}}).
\item \code{"R2"}: R-squared, i.e., coefficient of determination (only available in
the situations mentioned in section "Details" below). For the corresponding
standard error, the delta method is used. For the corresponding uncertainty
interval, a normal approximation is used.
\item \code{"acc"} (or its alias, \code{"pctcorr"}): classification accuracy (only
available in the situations mentioned in section "Details" below). By
"classification accuracy", we mean the proportion of correctly classified
observations. For this, the response category ("class") with highest
probability (the probabilities are model-based) is taken as the prediction
("classification") for an observation. For the corresponding uncertainty
interval, a normal approximation is used.
\item \code{"auc"}: area under the ROC curve (only available in the situations
mentioned in section "Details" below). For the corresponding standard error
and lower and upper uncertainty interval bounds, bootstrapping is used. Not
supported in case of subsampled LOO-CV (see argument \code{nloo} of
\code{\link[=cv_varsel]{cv_varsel()}}).
}}

\item{deltas}{May be set to \code{FALSE}, \code{TRUE}, or \code{"mixed"}. If \code{FALSE}, the
submodel performance statistics are plotted on their actual scale and the
uncertainty bars match this scale. If \code{TRUE}, the submodel statistics are
plotted relatively to the baseline model (see argument \code{baseline}) and the
uncertainty bars match this scale. For the GMPD, the term "relatively"
refers to the \emph{ratio} vs. the baseline model (i.e., the submodel statistic
divided by the baseline model statistic). For all other \code{stats},
"relatively" refers to the \emph{difference} from the baseline model (i.e., the
submodel statistic minus the baseline model statistic). If set to
\code{"mixed"}, the \code{deltas = FALSE} point estimates are combined with the
uncertainty bars from the \code{deltas = TRUE} plot.}

\item{alpha}{A number determining the (nominal) coverage \code{1 - alpha} of the
uncertainty intervals. For example, in case of a normal-approximation
uncertainty interval, \code{alpha = 2 * pnorm(-1)} corresponds to a uncertainty
interval stretching by one standard error on either side of the point
estimate.}

\item{baseline}{For \code{\link[=summary.vsel]{summary.vsel()}}: Only relevant if \code{deltas} is \code{TRUE}.
For \code{\link[=plot.vsel]{plot.vsel()}}: Always relevant. Either \code{"ref"} or \code{"best"}, indicating
whether the baseline is the reference model or the best submodel found (in
terms of \code{stats[1]}), respectively. In case of subsampled LOO-CV, \code{baseline = "best"} is not supported.}

\item{thres_elpd}{Only relevant if \code{any(stats \%in\% c("elpd", "mlpd", "gmpd"))}. The threshold for the ELPD difference (taking the submodel's
ELPD minus the baseline model's ELPD) above which the submodel's ELPD is
considered to be close enough to the baseline model's ELPD. An equivalent
rule is applied in case of the MLPD and the GMPD. See \code{\link[=suggest_size]{suggest_size()}} for
a formalization. Supplying \code{NA} deactivates this.}

\item{resp_oscale}{Only relevant for the latent projection. A single logical
value indicating whether to calculate the performance statistics on the
original response scale (\code{TRUE}) or on latent scale (\code{FALSE}).}

\item{point_size}{Passed to argument \code{size} of \code{\link[ggplot2:geom_point]{ggplot2::geom_point()}} and
controls the size of the points.}

\item{bar_thickness}{Passed to argument \code{linewidth} of
\code{\link[ggplot2:geom_linerange]{ggplot2::geom_linerange()}} and controls the thickness of the uncertainty
bars.}

\item{ranking_nterms_max}{Maximum submodel size (number of predictor terms)
for which the predictor names and the corresponding ranking proportions are
added on the x-axis. Using \code{NULL} is effectively the same as using
\code{nterms_max}. Using \code{NA} causes the predictor names and the corresponding
ranking proportions to be omitted, which requires \code{size_position = "primary_x_bottom"}. Note that \code{ranking_nterms_max} does not count the
intercept, so \code{ranking_nterms_max = 1} corresponds to the submodel
consisting of the first (non-intercept) predictor term.}

\item{ranking_abbreviate}{A single logical value indicating whether the
predictor names in the full-data predictor ranking should be abbreviated by
\code{\link[=abbreviate]{abbreviate()}} (\code{TRUE}) or not (\code{FALSE}). See also argument
\code{ranking_abbreviate_args} and section "Value".}

\item{ranking_abbreviate_args}{A \code{list} of arguments (except for \code{names.arg})
to be passed to \code{\link[=abbreviate]{abbreviate()}} in case of \code{ranking_abbreviate = TRUE}.}

\item{ranking_repel}{Either \code{NULL}, \code{"text"}, or \code{"label"}. By \code{NULL}, the
full-data predictor ranking and the corresponding ranking proportions are
placed below the x-axis. By \code{"text"} or \code{"label"}, they are placed within
the plotting area, using \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_text_repel()}} or
\code{\link[ggrepel:geom_text_repel]{ggrepel::geom_label_repel()}}, respectively. See also argument
\code{ranking_repel_args}.}

\item{ranking_repel_args}{A \code{list} of arguments (except for \code{mapping}) to be
passed to \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_text_repel()}} or \code{\link[ggrepel:geom_text_repel]{ggrepel::geom_label_repel()}} in
case of \code{ranking_repel = "text"} or \code{ranking_repel = "label"},
respectively.}

\item{ranking_colored}{A single logical value indicating whether the points
and the uncertainty bars should be gradient-colored according to the CV
ranking proportions (\code{TRUE}, currently only works if \code{show_cv_proportions}
is \code{TRUE} as well) or not (\code{FALSE}). The CV ranking proportions may be
cumulated (see argument \code{cumulate}). Note that the point and the
uncertainty bar at submodel size 0 (i.e., at the intercept-only model) are
always colored in gray because the intercept is forced to be selected
before any predictors are selected (in other words, the reason is that for
submodel size 0, the question of variability across CV folds is not
appropriate in the first place).}

\item{show_cv_proportions}{A single logical value indicating whether the CV
ranking proportions (see \code{\link[=cv_proportions]{cv_proportions()}}) should be displayed (\code{TRUE})
or not (\code{FALSE}).}

\item{cumulate}{Passed to argument \code{cumulate} of \code{\link[=cv_proportions]{cv_proportions()}}. Affects
the ranking proportions given on the x-axis (below the full-data predictor
ranking).}

\item{text_angle}{Passed to argument \code{angle} of \code{\link[ggplot2:element]{ggplot2::element_text()}} for
the x-axis tick labels. Note that the default of argument \code{angle} in
\code{\link[ggplot2:element]{ggplot2::element_text()}} is \code{NULL} (which implies no rotation) whereas we
use a default of \code{text_angle = 45} here. If \code{text_angle > 0} (\verb{< 0}), the
x-axis text is automatically right-aligned (left-aligned). If \code{-90 < text_angle && text_angle < 90 && text_angle != 0}, the x-axis text is also
top-aligned. When controlling \code{text_angle} via global option
\code{projpred.plot_vsel_text_angle}, keep in mind that a global option set to
\code{NULL} is treated like an unset global option, so
\code{options(projpred.plot_vsel_text_angle = NULL)} would result in \code{text_angle = 45}, not \code{text_angle = 0}.}

\item{size_position}{A single character string specifying the position of the
submodel sizes. Either \code{"primary_x_bottom"} for including them in the
x-axis tick labels, \code{"primary_x_top"} for putting them above the x-axis
(the current default), or \code{"secondary_x"} for putting them into a secondary
x-axis. Currently, \code{"primary_x_top"} and \code{"secondary_x"} may not be
combined with \code{ranking_nterms_max = NA} (i.e., only \code{"primary_x_bottom"}
works with \code{ranking_nterms_max = NA}).}

\item{...}{Arguments passed to the internal function which is used for
bootstrapping (if applicable; see argument \code{stats}). Currently, relevant
arguments are \code{B} (the number of bootstrap samples, defaulting to \code{2000})
and \code{seed} (see \code{\link[=set.seed]{set.seed()}}, but defaulting to \code{NA} so that \code{\link[=set.seed]{set.seed()}}
is not called within that function at all).}
}
\value{
A \pkg{ggplot2} plotting object (of class \code{gg} and \code{ggplot}). If
\code{ranking_abbreviate} is \code{TRUE}, the output of \code{\link[=abbreviate]{abbreviate()}} is stored in
an attribute called \code{projpred_ranking_abbreviated} (to allow the
abbreviations to be easily mapped back to the original predictor names).
}
\description{
This is the \code{\link[=plot]{plot()}} method for \code{vsel} objects (returned by \code{\link[=varsel]{varsel()}} or
\code{\link[=cv_varsel]{cv_varsel()}}). It visualizes the predictive performance of the reference
model (possibly also that of some other "baseline" model) and that of the
submodels along the full-data predictor ranking. Basic information about the
(CV) variability in the ranking of the predictors is included as well (if
available; inferred from \code{\link[=cv_proportions]{cv_proportions()}}). For a tabular representation,
see \code{\link[=summary.vsel]{summary.vsel()}} and \code{\link[=performances]{performances()}}.
}
\details{
The \code{stats} options \code{"mse"}, \code{"rmse"}, and \code{"R2"} are only available
for:
\itemize{
\item the traditional projection,
\item the latent projection with \code{resp_oscale = FALSE},
\item the latent projection with \code{resp_oscale = TRUE} in combination with
\verb{<refmodel>$family$cats} being \code{NULL}.
}

The \code{stats} option \code{"acc"} (= \code{"pctcorr"}) is only available for:
\itemize{
\item the \code{\link[=binomial]{binomial()}} family in case of the traditional projection,
\item all families in case of the augmented-data projection,
\item the \code{\link[=binomial]{binomial()}} family (on the original response scale) in case of the
latent projection with \code{resp_oscale = TRUE} in combination with
\verb{<refmodel>$family$cats} being \code{NULL},
\item all families (on the original response scale) in case of the latent
projection with \code{resp_oscale = TRUE} in combination with
\verb{<refmodel>$family$cats} being not \code{NULL}.
}

The \code{stats} option \code{"auc"} is only available for:
\itemize{
\item the \code{\link[=binomial]{binomial()}} family in case of the traditional projection,
\item the \code{\link[=binomial]{binomial()}} family (on the original response scale) in case of the
latent projection with \code{resp_oscale = TRUE} in combination with
\verb{<refmodel>$family$cats} being \code{NULL}.
}

Note that the \code{stats} option \code{"auc"} is not supported in case of subsampled
LOO-CV (see argument \code{nloo} of \code{\link[=cv_varsel]{cv_varsel()}}).
}
\section{Horizontal lines}{
As long as the reference model's performance is computable, it is always
shown in the plot as a dashed red horizontal line. If \code{baseline = "best"},
the baseline model's performance is shown as a dotted black horizontal line.
If \code{!is.na(thres_elpd)} and \code{any(stats \%in\% c("elpd", "mlpd", "gmpd"))}, the
value supplied to \code{thres_elpd} (which is automatically adapted internally in
case of the MLPD or the GMPD or \code{deltas = FALSE} or \code{deltas = "mixed"}) is
shown as a dot-dashed gray horizontal line for the reference model and, if
\code{baseline = "best"}, as a long-dashed green horizontal line for the baseline
model.
}

\examples{
\dontshow{if (requireNamespace("rstanarm", quietly = TRUE)) (if (getRversion() >= "3.4") withAutoprint else force)(\{ # examplesIf}
# Data:
dat_gauss <- data.frame(y = df_gaussian$y, df_gaussian$x)

# The `stanreg` fit which will be used as the reference model (with small
# values for `chains` and `iter`, but only for technical reasons in this
# example; this is not recommended in general):
fit <- rstanarm::stan_glm(
  y ~ X1 + X2 + X3 + X4 + X5, family = gaussian(), data = dat_gauss,
  QR = TRUE, chains = 2, iter = 500, refresh = 0, seed = 9876
)

# Run varsel() (here without cross-validation, with L1 search, and with small
# values for `nterms_max` and `nclusters_pred`, but only for the sake of
# speed in this example; this is not recommended in general):
vs <- varsel(fit, method = "L1", nterms_max = 3, nclusters_pred = 10,
             seed = 5555)
print(plot(vs))
\dontshow{\}) # examplesIf}
}
