% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/light_profile.R
\name{light_profile}
\alias{light_profile}
\alias{light_profile.default}
\alias{light_profile.flashlight}
\alias{light_profile.multiflashlight}
\title{Partial Dependence and other Profiles}
\usage{
light_profile(x, ...)

\method{light_profile}{default}(x, ...)

\method{light_profile}{flashlight}(
  x,
  v = NULL,
  data = NULL,
  by = x$by,
  type = c("partial dependence", "ale", "predicted", "response", "residual", "shap"),
  stats = "mean",
  breaks = NULL,
  n_bins = 11L,
  cut_type = c("equal", "quantile"),
  use_linkinv = TRUE,
  counts = TRUE,
  counts_weighted = FALSE,
  v_labels = TRUE,
  pred = NULL,
  pd_evaluate_at = NULL,
  pd_grid = NULL,
  pd_indices = NULL,
  pd_n_max = 1000L,
  pd_seed = NULL,
  pd_center = c("no", "first", "middle", "last", "mean", "0"),
  ale_two_sided = FALSE,
  ...
)

\method{light_profile}{multiflashlight}(
  x,
  v = NULL,
  data = NULL,
  type = c("partial dependence", "ale", "predicted", "response", "residual", "shap"),
  breaks = NULL,
  n_bins = 11L,
  cut_type = c("equal", "quantile"),
  pd_evaluate_at = NULL,
  pd_grid = NULL,
  ...
)
}
\arguments{
\item{x}{An object of class "flashlight" or "multiflashlight".}

\item{...}{Further arguments passed to \code{\link[=formatC]{formatC()}} in forming the
cut breaks of the \code{v} variable.}

\item{v}{The variable name to be profiled.}

\item{data}{An optional \code{data.frame}.}

\item{by}{An optional vector of column names used to additionally group the results.}

\item{type}{Type of the profile: Either "partial dependence", "ale", "predicted",
"response", or "residual".}

\item{stats}{Deprecated. Will be removed in version 1.1.0.}

\item{breaks}{Cut breaks for a numeric \code{v}. Used to overwrite automatic binning via
\code{n_bins} and \code{cut_type}. Ignored if \code{v} is not numeric.}

\item{n_bins}{Approximate number of unique values to evaluate for numeric \code{v}.
Ignored if \code{v} is not numeric or if \code{breaks} is specified.}

\item{cut_type}{Should a numeric \code{v} be cut into "equal" or "quantile" bins?
Ignored if \code{v} is not numeric or if \code{breaks} is specified.}

\item{use_linkinv}{Should retransformation function be applied? Default is \code{TRUE}.}

\item{counts}{Should observation counts be added?}

\item{counts_weighted}{If \code{counts = TRUE}: Should counts be weighted by the
case weights? If \code{TRUE}, the sum of \code{w} is returned by group.}

\item{v_labels}{If \code{FALSE}, return group centers of \code{v} instead of labels.
Only relevant for types "response", "predicted" or "residual" and if \code{v}
is being binned. In that case useful, for instance, if different flashlights
use different data sets and bin labels would not match.}

\item{pred}{Optional vector with predictions (after application of inverse link).
Can be used to avoid recalculation of predictions over and over if the functions
is to be repeatedly called for different \code{v} and predictions are computationally
expensive to make. Not implemented for multiflashlight.}

\item{pd_evaluate_at}{Vector with values of \code{v} used to evaluate the profile.
Only relevant for type = "partial dependence" and "ale".}

\item{pd_grid}{A \code{data.frame} with grid values, e.g., generated by \code{\link[=expand.grid]{expand.grid()}}.
Only used for type = "partial dependence".}

\item{pd_indices}{A vector of row numbers to consider in calculating
partial dependence profiles and "ale".}

\item{pd_n_max}{Maximum number of ICE profiles to calculate (will be randomly
picked from \code{data}) for partial dependence and ALE.}

\item{pd_seed}{Integer random seed used to select ICE profiles for partial dependence
and ALE.}

\item{pd_center}{How should ICE curves be centered?
\itemize{
\item Default is "no".
\item Choose "first", "middle", or "last" to 0-center at specific evaluation points.
\item Choose "mean" to center all profiles at the within-group means.
\item Choose "0" to mean-center curves at 0. Only relevant for partial dependence.
}}

\item{ale_two_sided}{If \code{TRUE}, \code{v} is continuous and \code{breaks}
are passed or being calculated, then two-sided derivatives are calculated
for ALE instead of left derivatives. More specifically: Usually, local effects
at value x are calculated using points in \eqn{[x-e, x]}.
Set \code{ale_two_sided = TRUE} to use points in \eqn{[x-e/2, x+e/2]}.}
}
\value{
An object of class "light_profile" with the following elements:
\itemize{
\item \code{data} A tibble containing results.
\item \code{by} Names of group by variable.
\item \code{v} The variable(s) evaluated.
\item \code{type} Same as input \code{type}. For information only.
}
}
\description{
Calculates different types of profiles across covariable values.
By default, partial dependence profiles are calculated (see Friedman).
Other options are profiles of ALE (accumulated local effects, see Apley),
response, predicted values ("M plots" or "marginal plots", see Apley), and residuals.
The results are aggregated either by (weighted) means or by (weighted) quartiles.

Note that ALE profiles are calibrated by (weighted) average predictions.
In contrast to the suggestions in Apley, we calculate ALE profiles of factors
in the same order as the factor levels.
They are not being reordered based on similiarity of other variables.
}
\details{
Numeric covariables \code{v} with more than \code{n_bins} disjoint values
are binned into \code{n_bins} bins. Alternatively, \code{breaks} can be provided
to specify the binning. For partial dependence profiles
(and partly also ALE profiles), this behaviour can be overwritten either
by providing a vector of evaluation points (\code{pd_evaluate_at}) or an
evaluation \code{pd_grid}. By the latter we mean a data frame with column name(s)
with a (multi-)variate evaluation grid.

For partial dependence, ALE, and prediction profiles, "model", "predict_function",
"linkinv" and "data" are required. For response profiles its "y", "linkinv" and
"data". "data" can also be passed on the fly.
}
\section{Methods (by class)}{
\itemize{
\item \code{light_profile(default)}: Default method not implemented yet.

\item \code{light_profile(flashlight)}: Profiles for flashlight.

\item \code{light_profile(multiflashlight)}: Profiles for multiflashlight.

}}
\examples{
fit_lin <- lm(Sepal.Length ~ ., data = iris)
fl_lin <- flashlight(model = fit_lin, label = "lin", data = iris, y = "Sepal.Length")

# PDP by Species
plot(light_profile(fl_lin, v = "Petal.Length", by = "Species"))

# Average predicted
plot(light_profile(fl_lin, v = "Petal.Length", type = "pred"))

# Second model with non-linear Petal.Length effect
fit_nonlin <- lm(Sepal.Length ~ . + I(Petal.Length^2), data = iris)
fl_nonlin <- flashlight(
  model = fit_nonlin, label = "nonlin", data = iris, y = "Sepal.Length"
)
fls <- multiflashlight(list(fl_lin, fl_nonlin))

# PDP by Species
plot(light_profile(fls, v = "Petal.Length", by = "Species"))
plot(light_profile(fls, v = "Petal.Length", by = "Species"), swap_dim = TRUE)

# Average residuals (calibration)
plot(light_profile(fls, v = "Petal.Length", type = "residual"))
}
\references{
\itemize{
\item Friedman J. H. (2001). Greedy function approximation: A gradient boosting machine.
The Annals of Statistics, 29:1189–1232.
\item Apley D. W. (2016). Visualizing the effects of predictor variables in black box
supervised learning models.
}
}
\seealso{
\code{\link[=light_effects]{light_effects()}}, \code{\link[=plot.light_profile]{plot.light_profile()}}
}
