% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/kcmeans.R
\name{kcmeans}
\alias{kcmeans}
\title{K-Conditional-Means Estimator}
\usage{
kcmeans(y, X, which_is_cat = 1, K = 2)
}
\arguments{
\item{y}{The outcome variable, a numerical vector.}

\item{X}{A (sparse) feature matrix where one column is the categorical
predictor.}

\item{which_is_cat}{An integer indicating which column of \code{X}
corresponds to the categorical predictor.}

\item{K}{The number of support points, an integer greater than 2.}
}
\value{
\code{kcmeans} returns an object of S3 class \code{kcmeans}. An
object of class \code{kcmeans} is a list containing the following
components:
\describe{
\item{\code{cluster_map}}{A matrix that characterizes the estimated
predictor of the residualized outcome
\eqn{\tilde{Y} \equiv Y - X_{2:}^\top \hat{\pi}}. The first column
\code{x} denotes the value of the categorical variable that
corresponds to the unrestricted sample mean \code{mean_x} of
\eqn{\tilde{Y}}, the sample share \code{p_x}, the estimated
cluster \code{cluster_x}, and the estimated restricted sample mean
\code{mean_xK} of \eqn{\tilde{Y}} with just \code{K} support
points.}
\item{\code{mean_y}}{The unconditional sample mean of
\eqn{\tilde{Y}}.}
\item{\code{pi}}{The best linear prediction coefficients of \eqn{Y}
on \eqn{X} corresponding to the non-categorical predictors
\eqn{X_{2:}}.}
\item{\code{which_is_cat},\code{K}}{Passthrough of
user-provided arguments. See above for details.}
}
}
\description{
Implementation of the K-Conditional-Means estimator.
}
\examples{
# Simulate simple dataset with n=800 observations
X <- rnorm(800) # continuous predictor
Z <- sample(1:20, 800, replace = TRUE) # categorical predictor
Z0 <- Z \%\% 4 # lower-dimensional latent categorical variable
y <- Z0 + X + rnorm(800) # outcome
# Compute kcmeans with four support points
kcmeans_fit <- kcmeans(y, cbind(Z, X), K = 4)
# Print the estimated support points of the categorical predictor
print(unique(kcmeans_fit$cluster_map[, "mean_xK"]))
}
\references{
Wang H and Song M (2011). "Ckmeans.1d.dp: optimal k-means clustering in one
dimension by dynamic programming." The R Journal 3(2), 29--33.

Wiemann T (2023). "Optimal Categorical Instruments." \url{https://arxiv.org/abs/2311.17021}
}
