% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/variable_selection.R
\name{select_variables}
\alias{select_variables}
\title{Select Variables}
\usage{
select_variables(sample_a, sample_b, aux_variables)
}
\arguments{
\item{sample_a}{data.frame of the reference probability sample (i.e., NHIS)}

\item{sample_b}{data.frame of the All of Us sample}

\item{aux_variables}{character vector with names of auxiliary variables}
}
\value{
character vector with selected variable names
}
\description{
Select variables relevant to propensity for inclusion in All of Us
}
\details{
Chooses which variables are meaningful in modeling propensity for inclusion in All of Us (sample_b)
as compared to the general US population as represented by a reference probability sample (sample_a). This function
assumes that variable names in both sample_a and sample_b are harmonized (i.e., definitions and names are the same across the two sources).
}
\examples{
# Prepare the NHIS data
calVars <- c(
  "SEX_A_R", "AGEP_A_R", "HISPALLP_A_R", "ORIENT_A_R", "HICOV_A_R", "EDUCP_A_R", "REGION_R",
  "EMPLASTWK_A_R", "HOUTENURE_A_R", "MARITAL_A_R"
)
stuVars <- "DIBTYPE_A_R"
vars_dummies <- c("AGEP_A_R","HISPALLP_A_R","EDUCP_A_R","REGION_R")
nhis_keep_vars <- c("PPSU","PSTRAT","WTFA_A")
nhis_imputed <- impute_data(nhis_processed, c(calVars, stuVars), nhis_keep_vars)
nhis_dummied <- dummies(nhis_imputed, vars=paste0(vars_dummies, '_I'))
factor_vars <- setdiff(names(nhis_dummied), nhis_keep_vars)
nhis_dummied[factor_vars] <- lapply(nhis_dummied[factor_vars], as.factor)

# Prepare the synthetic All of Us data
aou_imputed <- impute_data(aou_synthetic, c(calVars, stuVars))
aou_dummied <- dummies(aou_imputed, vars=paste0(vars_dummies, '_I'))
aou_dummied[] <- lapply(aou_dummied, as.factor)

# Define base variable names of auxiliary variables
aux_variables <- c(
  "SEX_A_R_I","AGEP_A_R_I", "HISPALLP_A_R_I","EDUCP_A_R_I",
  "REGION_R_I","ORIENT_A_R_I","HICOV_A_R_I",
  "EMPLASTWK_A_R_I","HOUTENURE_A_R_I","MARITAL_A_R_I"
)

# Provide All of Us and NHIS data to select variables
selected_base_vars <- select_variables(nhis_dummied, aou_dummied, aux_variables)

}
