% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/CPO_impute.R
\name{cpoImpute}
\alias{cpoImpute}
\alias{cpoImputeAll}
\title{Impute and Re-Impute Data}
\usage{
cpoImpute(
  target.cols = character(0),
  classes = list(),
  cols = list(),
  dummy.classes = character(0),
  dummy.cols = character(0),
  dummy.type = "factor",
  force.dummies = FALSE,
  impute.new.levels = TRUE,
  recode.factor.levels = TRUE,
  id,
  export = "export.default",
  affect.type = NULL,
  affect.index = integer(0),
  affect.names = character(0),
  affect.pattern = NULL,
  affect.invert = FALSE,
  affect.pattern.ignore.case = FALSE,
  affect.pattern.perl = FALSE,
  affect.pattern.fixed = FALSE
)

cpoImputeAll(
  target.cols = character(0),
  classes = list(),
  cols = list(),
  dummy.classes = character(0),
  dummy.cols = character(0),
  dummy.type = "factor",
  force.dummies = FALSE,
  impute.new.levels = TRUE,
  recode.factor.levels = TRUE,
  id,
  export = "export.default",
  affect.type = NULL,
  affect.index = integer(0),
  affect.names = character(0),
  affect.pattern = NULL,
  affect.invert = FALSE,
  affect.pattern.ignore.case = FALSE,
  affect.pattern.perl = FALSE,
  affect.pattern.fixed = FALSE
)
}
\arguments{
\item{target.cols}{[\code{character}]\cr
Name of the column(s) specifying the response.
Default is \code{character(0)}.}

\item{classes}{[\code{named list}]\cr
Named list containing imputation techniques for classes of columns.
E.g. \code{list(numeric = imputeMedian())}.}

\item{cols}{[\code{named list}]\cr
Named list containing names of imputation methods to impute missing values
in the data column referenced by the list element's name. Overrules imputation set via
\code{classes}.}

\item{dummy.classes}{[\code{character}]\cr
Classes of columns to create dummy columns for.
Default is \code{character(0)}.}

\item{dummy.cols}{[\code{character}]\cr
Column names to create dummy columns (containing binary missing indicator) for.
Default is \code{character(0)}.}

\item{dummy.type}{[\code{character(1)}]\cr
How dummy columns are encoded. Either as 0/1 with type \dQuote{numeric}
or as \dQuote{factor}.
Default is \dQuote{factor}.}

\item{force.dummies}{[\code{logical(1)}]\cr
Force dummy creation even if the respective data column does not
contain any NAs. Note that (a) most learners will complain about
constant columns created this way but (b) your feature set might
be stochastic if you turn this off.
Default is \code{FALSE}.}

\item{impute.new.levels}{[\code{logical(1)}]\cr
If new, unencountered factor level occur during reimputation,
should these be handled as NAs and then be imputed the same way?
Default is \code{TRUE}.}

\item{recode.factor.levels}{[\code{logical(1)}]\cr
Recode factor levels after reimputation, so they match the respective element of
\code{lvls} (in the description object) and therefore match the levels of the
feature factor in the training data after imputation?.
Default is \code{TRUE}.}

\item{id}{[\code{character(1)}]\cr
id to use as prefix for the CPO's hyperparameters. this
must be used to avoid name clashes when composing two
CPOs of the same type, or with learners or other CPOS
with hyperparameters with clashing names.}

\item{export}{[\code{character}]\cr
Either a character vector indicating the parameters to
export as hyperparameters, or one of the special values
\dQuote{export.all} (export all parameters),
\dQuote{export.default} (export all parameters that are exported by default),
\dQuote{export.set} (export all parameters that were set during construction),
\dQuote{export.default.set} (export the intersection of the \dQuote{default} and \dQuote{set} parameters),
\dQuote{export.unset} (export all parameters that were \emph{not} set during construction) or
\dQuote{export.default.unset} (export the intersection of the \dQuote{default} and \dQuote{unset} parameters).
Default is \dQuote{export.default}.}

\item{affect.type}{[\code{character} | \code{NULL}]\cr
Type of columns to affect. A subset of \dQuote{numeric}, \dQuote{factor}, \dQuote{ordered}, \dQuote{other}, or \code{NULL}
to not match by column type. Default is \code{NULL}.}

\item{affect.index}{[\code{numeric}]\cr
Indices of feature columns to affect. The order of indices given is respected. Target column indices are not counted
(since target columns are always included). Default is \code{integer(0)}.}

\item{affect.names}{[\code{character}]\cr
Feature names of feature columns to affect. The order of names given is respected. Default is \code{character(0)}.}

\item{affect.pattern}{[\code{character(1)} | \code{NULL}]\cr
\code{\link[base]{grep}} pattern to match feature names by. Default is \code{NULL} (no pattern matching)}

\item{affect.invert}{[\code{logical(1)}]\cr
Whether to affect all features \emph{not} matched by other \code{affect.*} parameters.}

\item{affect.pattern.ignore.case}{[\code{logical(1)}]\cr
Ignore case when matching features with \code{affect.pattern}; see \code{\link[base]{grep}}. Default is \code{FALSE}.}

\item{affect.pattern.perl}{[\code{logical(1)}]\cr
Use Perl-style regular expressions for \code{affect.pattern}; see \code{\link[base]{grep}}. Default is \code{FALSE}.}

\item{affect.pattern.fixed}{[\code{logical(1)}]\cr
Use fixed matching instead of regular expressions for \code{affect.pattern}; see \code{\link[base]{grep}}. Default is \code{FALSE}.}
}
\value{
[\code{\link{CPO}}].
}
\description{
This is a \code{\link{CPOConstructor}} to be used to create a
\code{\link{CPO}}. It is called like any R function and returns
the created \code{\link{CPO}}.

Allows imputation of missing feature values through various techniques.
Note that you have the possibility to re-impute a data set
in the same way as the imputation was performed during training.
This especially comes in handy during resampling when one wants to perform the
same imputation on the test set as on the training set.

The function \code{impute} performs the imputation on a data set and returns,
alongside with the imputed data set, an \dQuote{ImputationDesc} object
which can contain \dQuote{learned} coefficients and helpful data.
It can then be passed together with a new data set to \code{\link[mlr]{reimpute}}.

The imputation techniques can be specified for certain features or for feature classes,
see function arguments.

You can either provide an arbitrary object, use a built-in imputation method listed
under \code{\link[mlr]{imputations}} or create one yourself using \code{\link[mlr]{makeImputeMethod}}.

\code{cpoImpute} will impute some columns. \code{cpoImputeAll} behaves just like \code{cpoImpute},
except that it will throw an error if there are any missings remaining in its output. \code{cpoImputeAll}
should be used if one wants to prepend an imputer to a learner.
}
\details{
The description object contains these slots
\describe{
  \item{target [\code{character}]}{See argument.}
  \item{features [\code{character}]}{Feature names (column names of \code{data}).},
  \item{classes [\code{character}]}{Feature classes (storage type of \code{data}).}
  \item{lvls [\code{named list}]}{Mapping of column names of factor features to their levels,
    including newly created ones during imputation.}
  \item{impute [\code{named list}]}{Mapping of column names to imputation functions.}
  \item{dummies [\code{named list}]}{Mapping of column names to imputation functions.}
  \item{impute.new.levels [\code{logical(1)}]}{See argument.}
  \item{recode.factor.levels [\code{logical(1)}]}{See argument.}
}
}
\section{General CPO info}{

This function creates a CPO object, which can be applied to
\code{\link[mlr]{Task}}s, \code{data.frame}s, \code{link[mlr]{Learner}}s
and other CPO objects using the \code{\link{\%>>\%}} operator.

The parameters of this object can be changed after creation
using the function \code{\link[mlr]{setHyperPars}}. The other
hyper-parameter manipulating functins, \code{\link[mlr]{getHyperPars}}
and \code{\link[ParamHelpers]{getParamSet}} similarly work as one expects.

If the \dQuote{id} parameter is given, the hyperparameters
will have this id as aprefix; this will, however, not change
the parameters of the creator function.
}

\section{Calling a \code{\link{CPOConstructor}}}{

CPO constructor functions are called with optional values of parameters, and additional \dQuote{special} optional values.
The special optional values are the \code{id} parameter, and the \code{affect.*} parameters. The \code{affect.*} parameters
enable the user to control which subset of a given dataset is affected. If no \code{affect.*} parameters are given, all
data features are affected by default.
}

\seealso{
Other CPOs: 
\code{\link{cpoApplyFun}()},
\code{\link{cpoApplyFunRegrTarget}()},
\code{\link{cpoAsNumeric}()},
\code{\link{cpoCache}()},
\code{\link{cpoCbind}()},
\code{\link{cpoCollapseFact}()},
\code{\link{cpoDropConstants}()},
\code{\link{cpoDropMostlyConstants}()},
\code{\link{cpoDummyEncode}()},
\code{\link{cpoFilterAnova}()},
\code{\link{cpoFilterCarscore}()},
\code{\link{cpoFilterChiSquared}()},
\code{\link{cpoFilterFeatures}()},
\code{\link{cpoFilterGainRatio}()},
\code{\link{cpoFilterInformationGain}()},
\code{\link{cpoFilterKruskal}()},
\code{\link{cpoFilterLinearCorrelation}()},
\code{\link{cpoFilterMrmr}()},
\code{\link{cpoFilterOneR}()},
\code{\link{cpoFilterPermutationImportance}()},
\code{\link{cpoFilterRankCorrelation}()},
\code{\link{cpoFilterRelief}()},
\code{\link{cpoFilterRfCImportance}()},
\code{\link{cpoFilterRfImportance}()},
\code{\link{cpoFilterRfSRCImportance}()},
\code{\link{cpoFilterSymmetricalUncertainty}()},
\code{\link{cpoFilterUnivariate}()},
\code{\link{cpoFilterVariance}()},
\code{\link{cpoFixFactors}()},
\code{\link{cpoIca}()},
\code{\link{cpoImpactEncodeClassif}()},
\code{\link{cpoImpactEncodeRegr}()},
\code{\link{cpoImputeConstant}()},
\code{\link{cpoImputeHist}()},
\code{\link{cpoImputeLearner}()},
\code{\link{cpoImputeMax}()},
\code{\link{cpoImputeMean}()},
\code{\link{cpoImputeMedian}()},
\code{\link{cpoImputeMin}()},
\code{\link{cpoImputeMode}()},
\code{\link{cpoImputeNormal}()},
\code{\link{cpoImputeUniform}()},
\code{\link{cpoLogTrafoRegr}()},
\code{\link{cpoMakeCols}()},
\code{\link{cpoMissingIndicators}()},
\code{\link{cpoModelMatrix}()},
\code{\link{cpoOversample}()},
\code{\link{cpoPca}()},
\code{\link{cpoProbEncode}()},
\code{\link{cpoQuantileBinNumerics}()},
\code{\link{cpoRegrResiduals}()},
\code{\link{cpoResponseFromSE}()},
\code{\link{cpoSample}()},
\code{\link{cpoScale}()},
\code{\link{cpoScaleMaxAbs}()},
\code{\link{cpoScaleRange}()},
\code{\link{cpoSelect}()},
\code{\link{cpoSmote}()},
\code{\link{cpoSpatialSign}()},
\code{\link{cpoTransformParams}()},
\code{\link{cpoWrap}()},
\code{\link{makeCPOCase}()},
\code{\link{makeCPOMultiplex}()}

Other imputation CPOs: 
\code{\link{cpoImputeConstant}()},
\code{\link{cpoImputeHist}()},
\code{\link{cpoImputeLearner}()},
\code{\link{cpoImputeMax}()},
\code{\link{cpoImputeMean}()},
\code{\link{cpoImputeMedian}()},
\code{\link{cpoImputeMin}()},
\code{\link{cpoImputeMode}()},
\code{\link{cpoImputeNormal}()},
\code{\link{cpoImputeUniform}()}
}
\concept{CPOs}
\concept{imputation CPOs}
