% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/add_interest-associations.R, R/add_interest.R
\name{add_interest.associations}
\alias{add_interest.associations}
\alias{add_interest}
\title{Add additional interest measures for association rules}
\usage{
\method{add_interest}{associations}(x, measures = NULL, smooth_counts = 0, p = 0.5, ...)

add_interest(x, ...)
}
\arguments{
\item{x}{A nugget of flavour \code{associations}, typically created with
\code{\link[=dig_associations]{dig_associations()}}.}

\item{measures}{A character vector specifying which interest measures to
calculate. If \code{NULL} (the default), all supported measures are calculated.
See the Details section for the list of supported measures.}

\item{smooth_counts}{A non-negative numeric value specifying the amount of
Laplace smoothing to apply to the contingency table counts before
calculating the interest measures. Default is \code{0} (no smoothing).
Positive values add the specified amount to each of the counts
(\code{pp}, \code{pn}, \code{np}, \code{nn}), which can help avoid issues with undefined measures
due to zero counts. Use \code{smooth_counts = 1} for standard Laplace smoothing.
Use \code{smooth_counts = 0.5} for Haldane-Anscombe smoothing, which is
often used for odds ratio estimation and in chi-squared tests.}

\item{p}{A numeric value in the range \verb{[0, 1]} representing the conditional
probability of the consequent being true given that the antecedent is
true. This parameter is used in the calculation of GUHA quantifiers
\code{"lci"}, \code{"uci"}, \code{"dlci"}, \code{"duci"}, \code{"lce"}, and \code{"uce"}.
The default value is \code{0.5}.}

\item{...}{Currently unused.}
}
\value{
An S3 object which is an instance of \code{associations} and \code{nugget}
classes and which is a tibble containing all the columns of the input
nugget \code{x}, plus additional columns for each of the requested interest
measures.
}
\description{
\ifelse{html}{\href{https://lifecycle.r-lib.org/articles/stages.html#experimental}{\figure{lifecycle-experimental.svg}{options: alt='[Experimental]'}}}{\strong{[Experimental]}}

This function calculates various additional interest measures for
association rules based on their contingency table counts.
}
\details{
The input nugget object must contain the columns
\code{pp} (positive antecedent & positive consequent),
\code{pn} (positive antecedent & negative consequent),
\code{np} (negative antecedent & positive consequent), and
\code{nn} (negative antecedent & negative consequent), representing the counts
from the contingency table. These columns are automatically produced by
\code{\link[=dig_associations]{dig_associations()}}.

The supported interest measures that can be calculated include:
\itemize{
\item Founded GUHA (General Unary Hypothesis Automaton) quantifiers:
\itemize{
\item \code{"fi"} - \emph{Founded Implication}, which equals to the \code{"confidence"} measure
calculated automatically by \code{\link[=dig_associations]{dig_associations()}}.
\item \code{"dfi"} - \emph{Double Founded Implication} computed as \eqn{pp / (pp + pn + np)}
\item \code{"fe"} - \emph{Founded Equivalence} computed as \eqn{(pp + nn) / (pp + pn + np + nn)}
}
\item GUHA quantifiers based on binomial tests - these measures require the
additional parameter \code{p}, which represents the conditional probability of
the consequent being true given that the antecedent is true under the null
hypothesis. The measures are computed as one-sided p-values from the
Clopper-Pearson confidence interval for the binomial proportion:
\itemize{
\item \code{"lci"} - \emph{Lower Critical Implication} computed as
\eqn{\sum_{i=pp}^{pp+pn} \frac{(pp+pn)!}{i!(pp+pn-i)!} p^i (1-p)^{pp+pn-i}}
\item \code{"uci"} - \emph{Upper Critical Implication} computed as
\eqn{\sum_{i=0}^{pp} \frac{(pp+pn)!}{i!(pp+pn-i)!} p^i (1-p)^{pp+pn-i}}
\item \code{"dlci"} - \emph{Double Lower Critical Implication} computed as
\eqn{\sum_{i=pp}^{pp+pn+np} \frac{(pp+pn+np)!}{i!(pp+pn+np-i)!} p^i (1-p)^{pp+pn+np-i}}
\item \code{"duci"} - \emph{Double Upper Critical Implication} computed as
\eqn{\sum_{i=0}^{pp} \frac{(pp+pn+np)!}{i!(pp+pn+np-i)!} p^i (1-p)^{pp+pn+np-i}}
\item \code{"lce"} - \emph{Lower Critical Equivalence} computed as
\eqn{\sum_{i=pp}^{pp+pn+np+nn} \frac{(pp+pn+np+nn)!}{i!(pp+pn+np+nn-i)!} p^i (1-p)^{pp+pn+np+nn-i}}
\item \code{"uce"} - \emph{Upper Critical Equivalence} computed as
\eqn{\sum_{i=0}^{pp} \frac{(pp+pn+np+nn)!}{i!(pp+pn+np+nn-i)!} p^i (1-p)^{pp+pn+np+nn-i}}
}
\item measures adopted from the \code{arules} package:
\itemize{
\item \code{"added_value"} - \emph{Added Value}, see \url{https://mhahsler.github.io/arules/docs/measures#addedvalue} for details
\item \code{"casual_confidence"} - \emph{Casual Confidence}, see \url{https://mhahsler.github.io/arules/docs/measures#casualconfidence} for details
\item \code{"casual_support"} - \emph{Casual Support}, see \url{https://mhahsler.github.io/arules/docs/measures#casualsupport} for details
\item \code{"centered_confidence"} - \emph{Centered Confidence}, see \url{https://mhahsler.github.io/arules/docs/measures#centeredconfidence} for details
\item \code{"certainty"} - \emph{Certainty Factor}, see \url{https://mhahsler.github.io/arules/docs/measures#certainty} for details
\item \code{"collective_strength"} - \emph{Collective Strength}, see \url{https://mhahsler.github.io/arules/docs/measures#collectivestrength} for details
\item \code{"confirmed_confidence"} - \emph{Descriptive Confirmed Confidence}, see \url{https://mhahsler.github.io/arules/docs/measures#confirmedconfidence} for details
\item \code{"conviction"} - \emph{Conviction}, see \url{https://mhahsler.github.io/arules/docs/measures#conviction} for details
\item \code{"cosine"} - \emph{Cosine}, see \url{https://mhahsler.github.io/arules/docs/measures#cosine} for details
\item \code{"counterexample"} - \emph{Example and Counter-Example Rate}, see \url{https://mhahsler.github.io/arules/docs/measures#counterexample} for details
\item \code{"doc"} - \emph{Difference of Confidence}, see \url{https://mhahsler.github.io/arules/docs/measures#doc} for details
\item \code{"gini"} - \emph{Gini Index}, see \url{https://mhahsler.github.io/arules/docs/measures#gini} for details
\item \code{"imbalance"} - \emph{Imbalance Ratio}, see \url{https://mhahsler.github.io/arules/docs/measures#imbalance} for details
\item \code{"implication_index"} - \emph{Implication Index}, see \url{https://mhahsler.github.io/arules/docs/measures#implicationindex} for details
\item \code{"importance"} - \emph{Importance}, see \url{https://mhahsler.github.io/arules/docs/measures#importance} for details
\item \code{"j_measure"} - \emph{J-Measure}, see \url{https://mhahsler.github.io/arules/docs/measures#jmeasure} for details
\item \code{"jaccard"} - \emph{Jaccard Coefficient}, see \url{https://mhahsler.github.io/arules/docs/measures#jaccard} for details
\item \code{"kappa"} - \emph{Kappa}, see \url{https://mhahsler.github.io/arules/docs/measures#kappa} for details
\item \code{"kulczynski"} - \emph{Kulczynski}, see \url{https://mhahsler.github.io/arules/docs/measures#kulczynski} for details
\item \code{"lambda"} - \emph{Lambda}, see \url{https://mhahsler.github.io/arules/docs/measures#lambda} for details
\item \code{"least_contradiction"} - \emph{Least Contradiction}, see \url{https://mhahsler.github.io/arules/docs/measures#leastcontradiction} for details
\item \code{"lerman"} - \emph{Lerman Similarity}, see \url{https://mhahsler.github.io/arules/docs/measures#lerman} for details
\item \code{"leverage"} - \emph{Leverage}, see \url{https://mhahsler.github.io/arules/docs/measures#leverage} for details
\item \code{"maxconfidence"} - \emph{Max Confidence}, see \url{https://mhahsler.github.io/arules/docs/measures#maxconfidence} for details
\item \code{"mutual_information"} - \emph{Mutual Information}, see \url{https://mhahsler.github.io/arules/docs/measures#mutualinformation} for details
\item \code{"odds_ratio"} - \emph{Odds Ratio}, see \url{https://mhahsler.github.io/arules/docs/measures#oddsratio} for details
\item \code{"phi"} - \emph{Phi Correlation Coefficient}, see \url{https://mhahsler.github.io/arules/docs/measures#phi} for details
\item \code{"ralambondrainy"} - \emph{Ralambondrainy}, see \url{https://mhahsler.github.io/arules/docs/measures#ralambondrainy} for details
\item \code{"relative_risk"} - \emph{Relative Risk}, see \url{https://mhahsler.github.io/arules/docs/measures#relativerisk} for details
\item \code{"rule_power_factor"} - \emph{Rule Power Factor}, see \url{https://mhahsler.github.io/arules/docs/measures#rulepowerfactor} for details
\item \code{"sebag"} - \emph{Sebag-Schoenauer}, see \url{https://mhahsler.github.io/arules/docs/measures#sebag} for details
\item \code{"varying_liaison"} - \emph{Varying Rates Liaison}, see \url{https://mhahsler.github.io/arules/docs/measures#varyingliaison} for details
\item \code{"yule_q"} - \emph{Yule's Q}, see \url{https://mhahsler.github.io/arules/docs/measures#yuleq} for details
\item \code{"yule_y"} - \emph{Yule's Y}, see \url{https://mhahsler.github.io/arules/docs/measures#yuley} for details
}
}

All the above measures are primarily intended for use with binary (logical)
data. While they can be computed for numerical data as well, their
interpretations may not be meaningful in that context - users should exercise
caution when applying these measures to non-binary data.

Many measures are based on the contingency table counts, and some may be
undefined for certain combinations of counts (e.g., division by zero).
This issue can be mitigated by applying smoothing using the \code{smooth_counts}
argument.
}
\examples{
d <- partition(mtcars, .breaks = 2)
rules <- dig_associations(d,
                          antecedent = !starts_with("mpg"),
                          consequent = starts_with("mpg"),
                          min_support = 0.3,
                          min_confidence = 0.8)
rules <- add_interest(rules,
                   measures = c("conviction", "leverage", "jaccard"))
}
\seealso{
\code{\link[=dig_associations]{dig_associations()}}
}
\author{
Michal Burda
}
