% Generated by roxygen2 (4.1.1): do not edit by hand
% Please edit documentation in R/data_preparation.R
\name{auto_grouping}
\alias{auto_grouping}
\title{Reduce cardinality in categorical variable by automatic grouping}
\usage{
auto_grouping(data, input, target, n_groups, seed = 999)
}
\arguments{
\item{data}{data frame source}

\item{input}{categorical variable indicating}

\item{target}{string of the variable to optimize the re-grouping}

\item{n_groups}{number of groups for the new category based on str_input, normally between 3 and 10.}

\item{seed}{optional, random number used internally for the k-means, changing this value will change the model}
}
\value{
A list containing 3 elements: recateg_results which contains the description of the target variable with the new groups;
df_equivalence is a data frame containing the str_input category and the new category; fit_cluster which is the cluster model used to do the re-grouping
}
\description{
Reduce the cardinality of an input variable based on a target -binary for now- variable based on attribitues of accuracy and representativity, for both input and target variable. It uses a cluster model to create the new groups. Full documentation can be found at: <http://http://livebook.datascienceheroes.com/data_preparation/high_cardinality_predictive_modeling.html/>
}
\examples{
# Reducing quantity of countries based on has_flu variable
auto_grouping(data=data_country, input='country', target="has_flu", n_groups=8)
}

