% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/mixgb.R
\name{mixgb}
\alias{mixgb}
\title{Multiple imputation through XGBoost}
\usage{
mixgb(
  data,
  m = 5,
  maxit = 1,
  ordinalAsInteger = FALSE,
  pmm.type = NULL,
  pmm.k = 5,
  pmm.link = "prob",
  initial.num = "normal",
  initial.int = "mode",
  initial.fac = "mode",
  save.models = FALSE,
  save.vars = NULL,
  save.models.folder = NULL,
  verbose = F,
  xgb.params = list(),
  nrounds = 100,
  early_stopping_rounds = NULL,
  print_every_n = 10L,
  xgboost_verbose = 0,
  ...
)
}
\arguments{
\item{data}{A data.frame or data.table with missing values}

\item{m}{The number of imputed datasets. Default: 5}

\item{maxit}{The number of imputation iterations. Default: 1}

\item{ordinalAsInteger}{Whether to convert ordinal factors to integers. By default, \code{ordinalAsInteger = FALSE}. Setting \code{ordinalAsInteger = TRUE} may speed up the imputation process for large datasets.}

\item{pmm.type}{The type of predictive mean matching (PMM). Possible values:
\itemize{
 \item \code{NULL} (default): Imputations without PMM;
 \item \code{0}: Imputations with PMM type 0;
 \item \code{1}: Imputations with PMM type 1;
 \item \code{2}: Imputations with PMM type 2;
 \item \code{"auto"}: Imputations with PMM type 2 for numeric/integer variables; imputations without PMM for categorical variables.
}}

\item{pmm.k}{The number of donors for predictive mean matching. Default: 5}

\item{pmm.link}{The link for predictive mean matching in binary variables
\itemize{
 \item \code{"prob"} (default): use probabilities;
 \item \code{"logit"}: use logit values.
}}

\item{initial.num}{Initial imputation method for numeric type data:
\itemize{
 \item \code{"normal"} (default);
 \item \code{"mean"};
 \item \code{"median"};
 \item \code{"mode"};
 \item \code{"sample"}.
}}

\item{initial.int}{Initial imputation method for integer type data:
\itemize{
 \item \code{"mode"} (default);
 \item \code{"sample"}.
}}

\item{initial.fac}{Initial imputation method for factor type data:
\itemize{
 \item \code{"mode"} (default);
 \item \code{"sample"}.
}}

\item{save.models}{Whether to save imputation models for imputing new data later on. Default: \code{FALSE}}

\item{save.vars}{For the purpose of imputing new data, the imputation models for response variables specified in \code{save.vars} will be saved. The values in \code{save.vars} can be a vector of names or indices. By default, only the imputation models for variables with missing values in the original data will be saved (\code{save.vars = NULL}). To save imputation models for all variables, users can specify \code{save.vars = colnames(data)}.}

\item{save.models.folder}{Users can specify a directory to save all imputation models. Models will be saved in JSON format by internally calling \code{xgb.save()}, which is recommended by XGBoost.}

\item{verbose}{Verbose setting for mixgb. If \code{TRUE}, will print out the progress of imputation. Default: \code{FALSE}.}

\item{xgb.params}{A list of XGBoost parameters. For more details, please check \href{https://xgboost.readthedocs.io/en/stable/parameter.html}{XGBoost documentation on parameters}.}

\item{nrounds}{The maximum number of boosting iterations for XGBoost. Default: 100}

\item{early_stopping_rounds}{An integer value \code{k}. XGBoost training will stop if the validation performance has not improved for \code{k} rounds. Default: 10.}

\item{print_every_n}{Print XGBoost evaluation information at every nth iteration if \code{xgboost_verbose > 0}.}

\item{xgboost_verbose}{Verbose setting for XGBoost training: 0 (silent), 1 (print information) and 2 (print additional information). Default: 0}

\item{...}{Extra arguments to be passed to XGBoost}
}
\value{
If \code{save.models = FALSE}, this function will return a list of \code{m} imputed datasets. If \code{save.models = TRUE}, it will return an object with imputed datasets, saved models and parameters.
}
\description{
This function is used to generate multiply-imputed datasets using XGBoost, subsampling and predictive mean matching (PMM).
}
\examples{
# obtain m multiply datasets without saving models
params <- list(max_depth = 3, subsample = 0.7, nthread = 2)
mixgb.data <- mixgb(data = nhanes3, m = 2, xgb.params = params, nrounds = 10)

# obtain m multiply imputed datasets and save models for imputing new data later on
mixgb.obj <- mixgb(
  data = nhanes3, m = 2, xgb.params = params, nrounds = 10,
  save.models = TRUE, save.models.folder = tempdir()
)
}
