% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/vocab.R
\name{load_vocab}
\alias{load_vocab}
\title{Load a vocabulary file}
\usage{
load_vocab(vocab_file)
}
\arguments{
\item{vocab_file}{path to vocabulary file. File is assumed to be a text file,
with one token per line, with the line number corresponding to the index of
that token in the vocabulary.}
}
\value{
The vocab as a character vector of tokens. The casedness of the
vocabulary is inferred and attached as the "is_cased" attribute. The
vocabulary indices are taken to be the positions of the tokens,
\emph{starting at zero} for historical consistency.

Note that from the perspective of a neural net, the numeric indices \emph{are}
the tokens, and the mapping from token to index is fixed. If we changed the
indexing (the order of the tokens), it would break any pre-trained models.
}
\description{
Load a vocabulary file
}
\examples{
# Get path to sample vocabulary included with package.
vocab_path <- system.file("extdata", "tiny_vocab.txt", package = "wordpiece")
vocab <- load_vocab(vocab_file = vocab_path)
}
