% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dataset-lfw.R
\name{lfw_dataset}
\alias{lfw_dataset}
\alias{lfw_people_dataset}
\alias{lfw_pairs_dataset}
\title{LFW Datasets}
\usage{
lfw_people_dataset(
  root = tempdir(),
  transform = NULL,
  split = "original",
  target_transform = NULL,
  download = FALSE
)

lfw_pairs_dataset(
  root = tempdir(),
  train = TRUE,
  transform = NULL,
  split = "original",
  target_transform = NULL,
  download = FALSE
)
}
\arguments{
\item{root}{Root directory for dataset storage. The dataset will be stored under \code{root/lfw_people} or \code{root/lfw_pairs}.}

\item{transform}{Optional. A function that takes an image and returns a transformed version (e.g., normalization, cropping).}

\item{split}{Which version of the dataset to use. One of \code{"original"} or \code{"funneled"}. Defaults to \code{"original"}.}

\item{target_transform}{Optional. A function that transforms the label.}

\item{download}{Logical. If TRUE, downloads the dataset to \verb{root/}. If the dataset is already present, download is skipped.}

\item{train}{For \code{lfw_pairs_dataset}, whether to load the training (\code{pairsDevTrain.txt}) or test (\code{pairsDevTest.txt}) split.}
}
\value{
A torch dataset object \code{lfw_people_dataset} or \code{lfw_pairs_dataset}.
Each element is a named list with:
\itemize{
\item \code{x}:
\itemize{
\item For \code{lfw_people_dataset}: a H x W x 3 numeric array representing a single RGB image.
\item For \code{lfw_pairs_dataset}: a list of two H x W x 3 numeric arrays representing a pair of RGB images.
}
\item \code{y}:
\itemize{
\item For \code{lfw_people_dataset}: an integer index from 1 to the number of identities in the dataset.
\item For \code{lfw_pairs_dataset}: 1 if the pair shows the same person, 2 if different people.
}
}
}
\description{
Labelled Faces in the Wild (LFW) Datasets
}
\details{
The LFW dataset collection provides facial images for evaluating face recognition systems.
It includes two variants:
\itemize{
\item \code{lfw_people_dataset}: A \strong{multi-class classification} dataset where each image is labelled by person identity.
\item \code{lfw_pairs_dataset}: A \strong{face verification} dataset containing image pairs with binary labels (same or different person).
}

This R implementation of the LFW dataset is based on the \code{fetch_lfw_people()} and \code{fetch_lfw_pairs()} functions from the \code{scikit-learn} library,
but deviates in a few key aspects due to dataset availability and R API conventions:
\itemize{
\item The \code{color} and \code{resize} arguments from Python are not directly exposed. Instead, all images are RGB with a fixed size of 250x250.
\item The \code{split} argument in Python (e.g., \code{train}, \code{test}, \verb{10fold}) is simplified to a \code{train} boolean flag in R.
The \verb{10fold} split is not supported, as the original protocol files are unavailable or incompatible with clean separation of image-label pairs.
\item The \code{split} parameter in R controls which version of the dataset to use: \code{"original"} (unaligned) or \code{"funneled"} (aligned using funneling).
The funneled version contains geometrically normalized face images, offering better alignment and typically improved performance for face recognition models.
\item The dataset is downloaded from \href{https://figshare.com/authors/_/3118605}{Figshare},
which hosts the same files referenced in \code{scikit-learn}'s dataset utilities.
\item \code{lfw_people_dataset}: 13,233 images across multiple identities (using either \code{"original"} or \code{"funneled"} splits)
\item \code{lfw_pairs_dataset}:
\itemize{
\item Training split (\code{train = TRUE}): 2,200 image pairs
\item Test split (\code{train = FALSE}): 1,000 image pairs
}
}
}
\examples{
\dontrun{
# Load data for LFW People Dataset
lfw <- lfw_people_dataset(download = TRUE)
first_item <- lfw[1]
first_item$x  # RGB image
first_item$y  # Label index
lfw$classes[first_item$y]  # person's name (e.g., "Aaron_Eckhart")

# Load training data for LFW Pairs Dataset
lfw <- lfw_pairs_dataset(download = TRUE, train = TRUE)
first_item <- lfw[1]
first_item$x  # List of 2 RGB Images
first_item$x[[1]]  # RGB Image
first_item$x[[2]]  # RGB Image
first_item$y  # Label index
lfw$classes[first_item$y]  # Class Name (e.g., "Same" or "Different")

# Load test data for LFW Pairs Dataset
lfw <- lfw_pairs_dataset(download = TRUE, train = FALSE)
first_item <- lfw[1]
first_item$x  # List of 2 RGB Images
first_item$x[[1]]  # RGB Image
first_item$x[[2]]  # RGB Image
first_item$y  # Label index
lfw$classes[first_item$y]  # Class Name (e.g., "Same" or "Different")
}

}
\seealso{
Other classification_dataset: 
\code{\link{caltech_dataset}},
\code{\link{cifar10_dataset}()},
\code{\link{eurosat_dataset}()},
\code{\link{fer_dataset}()},
\code{\link{fgvc_aircraft_dataset}()},
\code{\link{flowers102_dataset}()},
\code{\link{image_folder_dataset}()},
\code{\link{mnist_dataset}()},
\code{\link{oxfordiiitpet_dataset}()},
\code{\link{places365_dataset}()},
\code{\link{tiny_imagenet_dataset}()},
\code{\link{whoi_plankton_dataset}()},
\code{\link{whoi_small_coralnet_dataset}()}
}
\concept{classification_dataset}
