% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/diagnostics-stability.R
\name{evaluate_subgroup_stability}
\alias{evaluate_subgroup_stability}
\title{Subgroup stability diagnostics}
\usage{
evaluate_subgroup_stability(
  estimator,
  fit,
  X,
  y,
  Z = NULL,
  rpart_control = NULL,
  B = 100,
  max_depth = NULL
)
}
\arguments{
\item{estimator}{Function used to estimate subgroups of individuals and their
corresponding estimated treatment effects. The function should take in
\code{X}, \code{y}, and optionally \code{Z} (if input is not \code{NULL}) and return a
model fit (e.g,. output of \code{rpart}) that can be coerced into a \code{party}
object via \code{partykit::as_party()}. Typically, \code{student_rpart}
will be used as the \code{estimator}.}

\item{fit}{Fitted subgroup model (often, the output of \code{estimator()}). Mainly
used to determine an appropriate \code{max_depth} for the stability diagnostics.
If \code{fit} is not an \code{rpart} object, stability diagnostics will be skipped.}

\item{X}{A tibble, data.frame, or matrix of covariates.}

\item{y}{A vector of responses to predict.}

\item{Z}{A vector of treatments.}

\item{rpart_control}{A list of control parameters for the \code{rpart} algorithm.
See \code{? rpart.control} for details.}

\item{B}{Number of bootstrap samples to use in evaluating stability
diagnostics. Default is 100.}

\item{max_depth}{Maximum depth of the tree to consider when evaluating
stability diagnostics. If \code{NULL}, the default is
max(4, max depth of \code{fit}).}
}
\value{
A list with the following elements:
\item{jaccard_mean}{Vector of mean Jaccard similarity index for each tree depth. The tree depth is given by the vector index.}
\item{jaccard_distribution}{List of Jaccard similarity indices across all bootstraps for each tree depth.}
\item{bootstrap_predictions}{List of mean student model predictions (for training (non-holdout) data) across all bootstraps for each tree depth.}
\item{bootstrap_predictions_var}{List of variance of student model predictions (for training (non-holdout) data) across all bootstraps for each tree depth.}
\item{leaf_ids}{List of leaf node identifiers, indicating the leaf membership of each training sample in the (original) fitted student model.}
}
\description{
This function evaluates the stability of the estimated
subgroups from causal distillation trees (CDT) using the Jaccard subgroup
stability index (SSI), developed in Huang et al. (2025). It is generally
recommended to choose teacher models in CDT that result in the most stable
subgroups, as indicated by high SSI values.
}
\examples{
\donttest{
n <- 200
p <- 10
X <- matrix(rnorm(n * p), nrow = n, ncol = p)
Z <- rbinom(n, 1, 0.5)
Y <- 2 * Z * (X[, 1] > 0) + X[, 2] + rnorm(n, 0.1)

# run causal distillation trees without stability diagnostics
out <- causalDT(X, Y, Z, B_stability = 0)
# run stability diagnostics
stability_out <- evaluate_subgroup_stability(
  estimator = student_rpart,
  fit = out$student_fit$fit,
  X = X[-out$holdout_idxs, , drop = FALSE],
  y = out$student_fit$predictions
)
}

}
\references{
Huang, M., Tang, T. M., and Kenney, A. M. (2025). Distilling heterogeneous treatment effects: Stable subgroup estimation in causal inference. \emph{arXiv preprint arXiv:2502.07275}.
}
