% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/measures_clusterings.R
\name{variation_info}
\alias{variation_info}
\title{Variation of Information Between Clusterings}
\usage{
variation_info(true, pred, base = exp(1))
}
\arguments{
\item{true}{ground truth clustering represented as a membership
vector. Each entry corresponds to an element and the value identifies
the assigned cluster. The specific values of the cluster identifiers
are arbitrary.}

\item{pred}{predicted clustering represented as a membership
vector.}

\item{base}{base of the logarithm. Defaults to \code{exp(1)}.}
}
\description{
Computes the variation of information between two
clusterings, such as a predicted and ground truth clustering.
}
\details{
Variation of information is an entropy-based distance metric
on the space of clusterings. It is unnormalized and varies between
\eqn{0} and \eqn{\log(N)}{log(N)} where \eqn{N} is the number of
clustered elements. Larger values of the distance metric correspond
to greater dissimilarity between the clusterings.
}
\examples{
true <- c(1,1,1,2,2)  # ground truth clustering
pred <- c(1,1,2,2,2)  # predicted clustering
variation_info(true, pred)

}
\references{
Arabie, P. and Boorman, S. A. "Multidimensional scaling of measures of
distance between partitions." \emph{Journal of Mathematical Psychology} \strong{10:2},
148-203, (1973). \doi{10.1016/0022-2496(73)90012-6}

Meilă, M. "Comparing Clusterings by the Variation of Information." In:
Learning Theory and Kernel Machines, Lecture Notes in Computer Science
\strong{2777}, Springer, Berlin, Heidelberg, (2003).
\doi{10.1007/978-3-540-45167-9_14}
}
