% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/TSQE.R
\name{TSQE}
\alias{TSQE}
\title{Two-step Q-matrix estimation method}
\usage{
TSQE(
  Y,
  K,
  input.cor = c("tetrachoric", "pearson"),
  ref.method = c("QR", "GDI"),
  GDI.model = c("GDINA", "DINA", "ACDM", "RRUM"),
  cutoff = 0.8
)
}
\arguments{
\item{Y}{A \eqn{N \times J} binary data matrix consisting of responses
from \eqn{N} examinees to \eqn{J} items}

\item{K}{The number of attributes in the Q-matrix}

\item{input.cor}{The type of correlation used as input for the
provisional attribute extraction (PAE) algorithm. It could be the 
\code{tetrachoric} or \code{pearson} correlation.}

\item{ref.method}{The refinement method used to polish the provisional
Q-matrix obtained from the PAE. Currently available methods include
the Q-matrix refinement (\code{QR}) method and the G-DINA discrimination index (\code{GDI}).}

\item{GDI.model}{The CDM used in the GDI algorithm to fit the data. Currently
available models include the DINA model, the ACDM, the RRUM, and the
G-DINA model.}

\item{cutoff}{The cutoff used to dichotomize the entries in the provisional
Q-matrix. The default is 0.8.}
}
\value{
The function returns the estimated Q-matrix.
}
\description{
The function estimates the Q-matrix based on the
    response data using the two-step Q-matrix estimation method.
}
\section{Details}{


The TSQE method estimates a Q-matrix by integrating the provisional attribute extraction (PAE) algorithm
with a Q-matrix refinement-and-validation method,
such as the Q-Matrix Refinement (QR) method and the G-DINA Model
Discrimination Index (GDI). Specifically, the PAE algorithm relies on
classic exploratory factor analysis (EFA) combined with a unique stopping
rule for identifying a provisional Q-matrix, and the resulting provisional
Q-Matrix is "polished" by a refinement method to derive the finalized
estimation of Q-matrix.


The PAE Algorithm starts with computing the 
inter-item tetrachoric correlation matrix. The reason for using
tetrachoric correlation is that the examinee responses are binary, so it
is more appropriate than the Pearson product moment correlation coefficient.
See \enc{Köhn}{Koehn} et al. (2025) for details. The next step is to use factor analysis
on the item-correlation matrix, and treat the extracted factors as proxies
for the latent attributes. The third step concerns the identification of specific
attributes required for each item. The detailed algorithm is described below:

\describe{
  \item{(1)}{Initialize the item index as \eqn{j = 1}.}
  \item{(2)}{Let \eqn{l_{jk}} denote the loading of item \eqn{j} on factor \eqn{k}, where \eqn{k = 1,2,...,K}.}
  \item{(3)}{Arrange the loadings in descending order. Define a mapping
      function \eqn{f(k) = t}, where \eqn{t} is the order index.
      Hence, \eqn{l_{j(1)}} will indicate the maximum loading,
      while \eqn{l_{j(K)}} will indicate the minimum loading.}
  \item{(4)}{Define \deqn{p_j(t) = \frac{\sum_{h=1}^t l_{j(h)}^2}{\sum_{k=1}^K l_{jk}^2}}
      as the proportion of the communality of item \eqn{j} accounted for
      by the first \eqn{t} factors.}
  \item{(5)}{Define \deqn{K_j = \min \{ t \mid p_j(t) \geq \lambda \}},
      where \eqn{\lambda} is the cut-off value for the desired proportion
      of item variance-accounted-for. Then, the ordered entries of the
      provisional q-vector of item \eqn{j} are obtained as
      \deqn{q_{j(t)}^* = \begin{cases}
      1 & \text{if } t \leq K_j \\
      0 & \text{if } t > K_j
      \end{cases}}.}
  \item{(6)}{Identify \eqn{q_j^* = (q_{j1}^*,q_{j2}^*,...,q_{jK}^*)}
      by rearranging the ordered entries of the q-vector using the inverse function \eqn{k = f^{-1}(t)}.}
  \item{(7)}{Set \eqn{j = j + 1} and repeat (2) to (6) until \eqn{j = J}.
      Then denote the provisional Q-matrix as \eqn{\mathbf{Q}^*}.}
}

The provisional Q-matrix \eqn{\mathbf{Q}^*}{} is then refined by 
using either the \code{QR} or \code{GDI} method.
}

\examples{
\dontrun{
library(GDINA)
N = 1000
Q = sim30GDINA$simQ
J = nrow(Q)
K= ncol(Q)
gs = data.frame(guess=rep(0.2,J),slip=rep(0.2,J))
sim = simGDINA(N,Q,gs.parm = gs,model = "DINA")
Y = extract(sim,what = "dat")

## Run TSQE method with QR
est.Q = TSQE(Y, K, input.cor = "tetrachoric", ref.method = "QR", cutoff = 0.8)

## If the recovery rate is to be computed, the columns of the estimated Q-matrix 
## should be permuted so that they align with those of the true Q-matrix. 
best.est.Q = bestQperm(est.Q, Q)

## Compute the recovery rate
RR(best.est.Q, Q)
}
}
\references{
Chiu, C. Y. (2013). Statistical Refinement of the Q-matrix in Cognitive Diagnosis. 
\emph{Applied Psychological Measurement, 37(8)}, 598-618.
\doi{10.1177/0146621613488436}

de la Torre, J., & Chiu, C.-Y. (2016). A general method of empirical Q-matrix validation. 
\emph{Psychometrika, 81}, 253-73.
\doi{10.1007/s11336-015-9467-8}

Köhn, H. F., Chiu, C.-Y., Oluwalana, O., Kim, H. & Wang, J. (2025). A two-step Q-matrix estimation 
method, \emph{Applied Psychological Measurement, 49}(1-2), 3-28.
\doi{10.1177/01466216241284418}
}
\seealso{
\code{\link{QR}}
}
