% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/dfba_mann_whitney.R
\name{dfba_mann_whitney}
\alias{dfba_mann_whitney}
\title{Independent Samples Test (Mann Whitney U)}
\usage{
dfba_mann_whitney(
  E,
  C,
  a0 = 1,
  b0 = 1,
  prob_interval = 0.95,
  samples = 30000,
  method = NULL,
  hide_progress = FALSE
)
}
\arguments{
\item{E}{Data for independent sample 1 ("Experimental")}

\item{C}{Data for independent sample 2 ("Control")}

\item{a0}{The first shape parameter for the prior beta distribution for \code{omega_E} (default is 1). Must be positive and finite.}

\item{b0}{The second shape parameter for the prior beta distribution for \code{omega_E} (default is 1). Must be positive and finite.}

\item{prob_interval}{Desired probability value for the interval estimate for \code{omega_E} (default is 95\%)}

\item{samples}{The number of Monte Carlo samples for \code{omega_E} when \code{method = "small"} (default is 30000)}

\item{method}{(Optional) The method option is either "small" or "large". The "small" algorithm is based on a discrete Monte Carlo solution for cases where n is typically less than 20. The "large" algorithm is based on beta approximation model for the posterior distribution for the omega_E parameter. This approximation is reasonable when n > 19. Regardless of \eqn{n}, the user can stipulate \code{method}. When the \code{method} argument is omitted, the program selects the appropriate procedure}

\item{hide_progress}{(Optional) If \code{TRUE}, hide percent progress while Monte Carlo sampling is running when \code{method = SMALL}. (default is \code{FALSE}).}
}
\value{
A list containing the following components:

\item{Emean}{Mean of the independent sample 1 ("Experimental") data}

\item{Cmean}{Mean of the independent sample 1 ("Control") data}

\item{n_E}{Number of observations of the independent sample 1 ("Experimental") data}

\item{n_C}{Mean of observations of the independent sample 2 ("Control") data}

\item{U_E}{Total number of comparisons for which observations from independent sample 1 ("Experimental") data exceed observations from independent sample 2 ("Control") data)}

\item{U_C}{Total number of comparisons for which observations from independent sample 2 ("Control") data exceed observations from independent sample 1 ("Experimental") data)}

\item{prob_interval}{User-defined width of \code{omega_E} interval estimate (default is 0.95)}

\item{a0}{First shape parameter for the prior beta distribution}

\item{b0}{Second shape parameter for the prior beta distribution}

\item{a_post}{First shape parameter for the posterior beta distribution}

\item{b_post}{Second shape parameter for the posterior beta distribution}

\item{samples}{The number of desired Monte Carlo samples (default is 30000)}

\item{method}{A character string indicating the calculation method used}

\item{omega_E}{A vector of values representing candidate values for \code{omega_E} when \code{method = "small"}}

\item{omegapost}{A vector of values representing discrete probabilities for candidate values of \code{omega_E}}

\item{priorvector}{A vector of values representing prior discrete probabilities of candidate values of \code{omega_E} when \code{method = "small"}}

\item{priorprH1}{Prior probability of the alternative model that omega_E exceeds 0.5}

\item{prH1}{Posterior probability of the alternative model that omega_E exceeds 0.5}

\item{BF10}{Bayes Factor describing the relative increase in the posterior odds for the alternative model that \code{omega_E} exceeds 0.5 over the null model of \code{omega_E} less than or equal to 0.5}

\item{omegabar}{Posterior mean estimate for \code{omega_E}}

\item{eti_lower}{Lower limit of the equal-tail probability interval for \code{omega_E} with probability width indicated by \code{prob_interval}}

\item{eti_upper}{Upper limit of the equal-tail probability interval for \code{omega_E} with probability width indicated by \code{prob_interval}}

\item{hdi_lower}{Lower limit of the highest-density probability interval for \code{omega_E} with probability width indicated by \code{prob_interval} when \code{method = "small"}}

\item{hdi_upper}{Upper limit of the highest-density probability interval for \code{omega_E} with probability width indicated by \code{prob_interval} when \code{method = "small"}}
}
\description{
Given two independent vectors \code{E} and \code{C}, the function computes
the sample Mann-Whitney \eqn{U} statistics \code{U_E} and \code{U_C} and
provides a Bayesian analysis for the population parameter \code{omega_E},
which is the population ratio of \eqn{U_E/(U_E+U_C)}.
}
\details{
The Mann-Whitney \emph{U} test is the frequentist nonparametric counterpart
to the independent-groups \eqn{t}-test. The sample \code{U_E} statistic is
the number of times that the \emph{E} variate is larger than the
\emph{C} variate, whereas \code{U_C} is the converse number.

This test uses only rank information, so it is robust with respect to
outliers, and it does not depend on the assumption of a normal model for the
variates. The Bayesian version for the Mann-Whitney is focused on the
population parameter \code{omega_E}, which is the population ratio
\code{U_E/(U_E+U_C)}.

While the frequentist test effectively assumes the sharp null hypothesis that
\code{omega_E} is .5, the Bayesian analysis has a prior and posterior
distribution for \code{omega_E} on the [0, 1] interval. The prior is a beta
distribution with shape parameters \code{a0} and \code{b0}. The default is
the flat prior (\eqn{a0 = b0 =} 1), but this prior can be altered by the
user.

The \code{prob_interval} input is the value for probability interval estimates for
omega_E. There are two cases depending on the sample size for the \emph{E}
and \emph{C} variates. When the samples sizes are small, there is a discrete
approximation method used. In this case, the Bayesian analysis considers 200
discrete values for \code{omega_E} from .0025 to .9975 in steps of .005. For
each discrete value, a prior and a posterior probability are obtained. The
posterior probabilities are based on Monte Carlo sampling to approximate the
likelihood of obtaining the observed \code{U_E} and \code{U_C} values for each candidate
value for omega_E. For each candidate value for omega_E, the likelihood for
the observed sample U statistics does not depend on the true distributions of
the \emph{E} and \emph{C} variates in the population. For each candidate
\code{omega_E}, the software constructs two exponential variates that have
the same omega_E value. The argument \code{samples} specifies the number of
Monte Carlo samples used for each candidate value of \code{omega_E}.

For large sample sizes of the \emph{E} and \emph{C} variates,
the Bayesian posterior distribution is closely approximated by a beta
distribution where the shape parameters are a function of the sample
\code{U_E} and \code{U_C} statistics. The large-sample beta approximation was
developed from extensive previous empirical studies designed to approximate
the quantiles of the discrete approach with the corresponding quantiles for a
particular beta distribution. The large-\emph{n} solution also uses Lagrange
polynomials for interpolation. The large-\emph{n} approximation is reasonably
accurate when \eqn{n > 19} for each condition. When the \code{method} input
is omitted, the function selects the appropriate procedure (\emph{i.e.},
either the discrete case for a small sample size or the large-\emph{n}
approach). Nonetheless, the user can stipulate which method they desire
regardless of sample size by inputting either \code{method="small"} or
\code{method="large"}. The large-\emph{n} solution is rapid compared
to the small-sample solution, so care should be executed when choosing the
\code{method="small"}, even for large sample sizes.

Technical details of the analysis are explained in the Chechile (2020)
Communications in Statistics paper cited below.
}
\examples{

# Note: examples with method = "small" have long runtimes due to Monte Carlo
# sampling; please feel free to run them in the console.

# Examples with large n per group
# The data for each condition are presorted only for the user convenience if
# checking the U stats by hand

groupA <- c(43, 45, 47, 50, 54, 58, 60, 63, 69, 84, 85, 91, 99, 127, 130,
            147, 165, 175, 193, 228, 252, 276)
groupB <- c(0, 01, 02, 03, 05, 14, 15, 23, 23, 25, 27, 32, 57, 105, 115, 158,
            161, 181, 203, 290)

dfba_mann_whitney(E = groupA,
                  C = groupB)

# The following uses a Jeffreys prior instead of a default flat prior:
dfba_mann_whitney(E = groupA,
                  C = groupB,
                  a0 = .5,
                  b0 =.5)

# The following also uses a Jeffreys prior but the analysis reverses the
# variates:
dfba_mann_whitney(E = groupB,
                  C = groupA,
                  a0 = .5,
                  b0 = .5)

# Note that BF10 from the above analysis is 1/BF10 from the original order
# of the variates.

# The next analysis constructs 99\% interval estimates with the Jeffreys
# prior.

AB <- dfba_mann_whitney(E = groupA,
                        C = groupB,
                        a0 = .5,
                        b0 = .5,
                        prob_interval=.99)

AB

# Plot with prior and posterior curves
plot(AB)

# Plot with posterior curve only
plot(AB,
     plot.prior = FALSE)

# Example with small n per group

groupC <- c(96.49, 96.78, 97.26, 98.85, 99.75, 100.14, 101.15, 101.39,
            102.58, 107.22, 107.70, 113.26)
groupD <- c(101.16, 102.09, 103.14, 104.70, 105.27, 108.22, 108.32, 108.51,
            109.88, 110.32, 110.55, 113.42)


dfba_mann_whitney(E = groupC,
                  C = groupD,
                  samples = 250,
                  hide_progress = TRUE)



}
\references{
Chechile, R.A. (2020). Bayesian Statistics for Experimental
Scientists: A General Introduction Using Distribution-Free Methods.
Cambridge: MIT Press.

Chechile, R.A. (2020). A Bayesian analysis for the Mann-Whitney
statistic. Communications in Statistics -- Theory and Methods 49(3): 670-696.
https://doi.org/10.1080/03610926.2018.1549247.
}
