% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/agg_read.R
\name{read_ipums_agg}
\alias{read_ipums_agg}
\title{Read data from an IPUMS aggregate data extract}
\usage{
read_ipums_agg(
  data_file,
  file_select = NULL,
  vars = NULL,
  col_types = NULL,
  n_max = Inf,
  guess_max = min(n_max, 1000),
  var_attrs = c("val_labels", "var_label", "var_desc"),
  remove_extra_header = TRUE,
  file_encoding = NULL,
  verbose = TRUE
)
}
\arguments{
\item{data_file}{Path to a .zip archive containing an IPUMS NHGIS or
IPUMS IHGIS extract or a single .csv file from such an extract.}

\item{file_select}{If \code{data_file} is a .zip archive that
contains multiple files, an expression identifying the file to load.
Accepts a character vector specifying the
file name, a \link[=selection_language]{tidyselect selection}, or an index
position. This must uniquely identify a file.}

\item{vars}{Names of variables to include in the output. Accepts a
vector of names or a \link[=selection_language]{tidyselect selection}.
If \code{NULL}, includes all variables in the file.}

\item{col_types}{One of \code{NULL}, a \code{\link[readr:cols]{cols()}}
specification or a string. If \code{NULL}, all column types will be inferred
from the values in the first \code{guess_max} rows of each column.
Alternatively, you can use a compact string representation to specify
column types:
\itemize{
\item c = character
\item i = integer
\item n = number
\item d = double
\item l = logical
\item f = factor
\item D = date
\item T = date time
\item t = time
\item ? = guess
\item _ or - = skip
}

See \code{\link[readr:read_delim]{read_delim()}} for more details.}

\item{n_max}{Maximum number of lines to read.}

\item{guess_max}{For .csv files, maximum number of lines to use for guessing
column types. Will never use more than the number of lines read.}

\item{var_attrs}{Variable attributes to add from the codebook (.txt) file
included in the extract. Defaults to all available attributes.

See \code{\link[=set_ipums_var_attributes]{set_ipums_var_attributes()}} for more details.}

\item{remove_extra_header}{If \code{TRUE}, remove the additional descriptive
header row included in some NHGIS .csv files.

This header row is not
usually needed as it contains similar information to that
included in the \code{"label"} attribute of each data column (if \code{var_attrs}
includes \code{"var_label"}).}

\item{file_encoding}{Encoding for the file to be loaded. For NHGIS extracts,
defaults to ISO-8859-1. For IHGIS extracts, defaults to UTF-8. If the
default encoding produces unexpected characters, adjust the encoding here.}

\item{verbose}{Logical controlling whether to display output when loading
data. If \code{TRUE}, displays IPUMS conditions, a progress bar, and
column types. Otherwise, all are suppressed.

Will be overridden by \code{readr.show_progress} and \code{readr.show_col_types}
options, if they are set.}
}
\value{
A \code{\link[tibble:tbl_df-class]{tibble}} containing the data found in
\code{data_file}
}
\description{
Read a .csv file from an extract downloaded from an IPUMS aggregate
data collection (IPUMS NHGIS or IPUMS IHGIS).

To read spatial data from an NHGIS extract, use \code{\link[=read_ipums_sf]{read_ipums_sf()}}.
}
\examples{
nhgis_file <- ipums_example("nhgis0972_csv.zip")
ihgis_file <- ipums_example("ihgis0014.zip")

# Provide the .zip archive directly to load the data inside:
read_ipums_agg(nhgis_file)

# For extracts that contain multiple files, use `file_select` to specify
# a single file to load. This accepts a tidyselect expression:
read_ipums_agg(ihgis_file, file_select = matches("AAA_g0"), verbose = FALSE)

# Or an index position:
read_ipums_agg(ihgis_file, file_select = 2, verbose = FALSE)

# Variable metadata is automatically attached to data, if available
ihgis_data <- read_ipums_agg(ihgis_file, file_select = 2, verbose = FALSE)
ipums_var_info(ihgis_data)

# Column types are inferred from the data. You can
# manually specify column types with `col_types`. This may be useful for
# geographic codes, which should typically be interpreted as character values
read_ipums_agg(nhgis_file, col_types = list(MSA_CMSAA = "c"), verbose = FALSE)

# You can also read in a subset of the data file:
read_ipums_agg(
  nhgis_file,
  n_max = 15,
  vars = c(GISJOIN, YEAR, D6Z002),
  verbose = FALSE
)
}
\seealso{
\code{\link[=read_ipums_sf]{read_ipums_sf()}} to read spatial data from an IPUMS extract.

\code{\link[=read_nhgis_codebook]{read_nhgis_codebook()}} or \code{\link[=read_ihgis_codebook]{read_ihgis_codebook()}} to read metadata about
an IPUMS aggregate data extract.

\code{\link[=ipums_list_files]{ipums_list_files()}} to list files in an IPUMS extract.
}
