bpp-popgen  3.0.0
bpp::MultilocusGenotypeStatistics Class Reference

The MultilocusGenotypeStatistics class. More...

#include <Bpp/PopGen/MultilocusGenotypeStatistics.h>

Classes

struct  Fstats
 
struct  PermResults
 
struct  VarComp
 

Static Public Member Functions

static std::vector< size_t > getAllelesIdsForGroups (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Get the alleles' id at one locus for a set of groups. More...
 
static size_t countGametesForGroups (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Count the number of allele (gametes) at a locus for a set of groups. More...
 
static std::map< size_t, size_t > getAllelesMapForGroups (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Get a map of allele count for a set of groups. More...
 
static std::map< size_t, double > getAllelesFrqForGroups (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Get the alleles frequencies at one locus for a set of groups. More...
 
static size_t countNonMissingForGroups (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Count the number of non-missing data at a given locus for a set of groups. More...
 
static size_t countBiAllelicForGroups (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Counr the number of bi-allelic MonolocusGenotype at a given locus for a set of groups. More...
 
static std::map< size_t, size_t > countHeterozygousForGroups (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Count how many times each allele is found in an heterozygous MonolocusGenotype in a set of groups. More...
 
static std::map< size_t, double > getHeterozygousFrqForGroups (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Get the heterozygous frequencies for each allele at a locus in a set of groups. More...
 
static double getHobsForGroups (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Compute the observed heterozygosity for one locus. More...
 
static double getHexpForGroups (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Compute the expected heterozygosity for one locus. More...
 
static double getHnbForGroups (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Compute the expected non biased heterozygosity for one locus. More...
 
static double getDnei72 (const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, size_t grp1, size_t grp2)
 Compute the Nei distance between two groups at one locus. More...
 
static double getDnei78 (const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, size_t grp1, size_t grp2)
 Compute the Nei unbiased distance between two groups at a given number of loci. More...
 
static std::map< size_t, FstatsgetAllelesFstats (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Compute the three F statistics of Weir and Cockerham for each allele of a given locus. More...
 
static std::map< size_t, double > getAllelesFit (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Compute the Weir and Cockerham Fit on a set of groups for each allele of a given locus. More...
 
static std::map< size_t, double > getAllelesFst (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Compute the Weir and Cockerham $\theta$ on a set of groups for each allele of a given locus. More...
 
static std::map< size_t, double > getAllelesFis (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Compute the Weir and Cockerham Fis on a set of groups for each allele of a given locus. More...
 
static std::map< size_t, VarCompgetVarianceComponents (const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
 Get the variance components a, b and c (Weir and Cockerham, 1983). More...
 
static double getWCMultilocusFst (const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups)
 Compute the Weir and Cockerham $\theta{wc}$ on a set of groups for a given set of loci. The variance componenets for each allele are calculated and then combined over loci using Weir and Cockerham weighting. More...
 
static double getWCMultilocusFis (const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups)
 Compute the Weir and Cockerham Fis on a set of groups for a given set of loci. The variance componenets for each allele are calculated and then combined over loci using Weir and Cockerham weighting. More...
 
static PermResults getWCMultilocusFstAndPerm (const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, std::set< size_t > groups, int nb_perm)
 Compute the Weir and Cockerham $\theta_{wc}$ on a set of groups for a given set of loci and make a permutation test. Multilocus $\theta$ is calculated as in getWCMultilocusFst on the original data set and on nb_perm data sets obtained after a permutation of individuals between the different groups. Return values are theta, % of values > theta and % of values < theta. More...
 
static PermResults getWCMultilocusFisAndPerm (const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, std::set< size_t > groups, int nb_perm)
 Compute the Weir and Cockerham Fis on a set of groups for a given set of loci and make a permutation test. Multilocus Fis is calculated as in getWCMultilocusFis on the original data set and on nb_perm data sets obtained after a permutation of alleles between individual of each group. Return values are Fis, % of values > Fis and % of values < Fis. More...
 
static double getRHMultilocusFst (const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups)
 Compute the $\theta_{RH}$ on a set of groups for a given set of loci. The variance componenets for each allele are calculated and then combined over loci using RH weighting with alleles frequency. More...
 
static std::unique_ptr< DistanceMatrixgetDistanceMatrix (const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups, std::string distance_method)
 Compute pairwise distances on a set of groups for a given set of loci. distance is either Nei72, Nei78, Fst W&C or Fst Robertson & Hill, Nm, D=-ln(1-Fst) of Reynolds et al. 1983, Rousset 1997 Fst/(1-Fst) More...
 

Detailed Description

The MultilocusGenotypeStatistics class.

This class is a set of static method for PolymorphismMultiGContainer.

Author
Sylvain Gaillard

Definition at line 70 of file MultilocusGenotypeStatistics.h.

Member Function Documentation

◆ countBiAllelicForGroups()

size_t MultilocusGenotypeStatistics::countBiAllelicForGroups ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

◆ countGametesForGroups()

size_t MultilocusGenotypeStatistics::countGametesForGroups ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

Count the number of allele (gametes) at a locus for a set of groups.

Exceptions
IndexOutOfBoundsExceptionif locus_position excedes the number of loci of one MultilocusGenotype.

Definition at line 70 of file MultilocusGenotypeStatistics.cpp.

References bpp::IndexOutOfBoundsException::getBadIndex(), bpp::IndexOutOfBoundsException::getBounds(), and bpp::MapTools::getValues().

◆ countHeterozygousForGroups()

map< size_t, size_t > MultilocusGenotypeStatistics::countHeterozygousForGroups ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

◆ countNonMissingForGroups()

size_t MultilocusGenotypeStatistics::countNonMissingForGroups ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

◆ getAllelesFis()

map< size_t, double > MultilocusGenotypeStatistics::getAllelesFis ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

Compute the Weir and Cockerham Fis on a set of groups for each allele of a given locus.

Definition at line 471 of file MultilocusGenotypeStatistics.cpp.

◆ getAllelesFit()

map< size_t, double > MultilocusGenotypeStatistics::getAllelesFit ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

Compute the Weir and Cockerham Fit on a set of groups for each allele of a given locus.

Definition at line 441 of file MultilocusGenotypeStatistics.cpp.

◆ getAllelesFrqForGroups()

map< size_t, double > MultilocusGenotypeStatistics::getAllelesFrqForGroups ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

Get the alleles frequencies at one locus for a set of groups.

Exceptions
IndexOutOfBoundsExceptionif locus_position excedes the number of loci of one MultilocusGenotype.
ZeroDivisionExceptionif the number of considered alleles = 0.

Definition at line 115 of file MultilocusGenotypeStatistics.cpp.

References bpp::IndexOutOfBoundsException::getBadIndex(), bpp::IndexOutOfBoundsException::getBounds(), and bpp::MapTools::getValues().

Referenced by getRHMultilocusFst().

◆ getAllelesFst()

map< size_t, double > MultilocusGenotypeStatistics::getAllelesFst ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

Compute the Weir and Cockerham $\theta$ on a set of groups for each allele of a given locus.

Definition at line 455 of file MultilocusGenotypeStatistics.cpp.

◆ getAllelesFstats()

map< size_t, MultilocusGenotypeStatistics::Fstats > MultilocusGenotypeStatistics::getAllelesFstats ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

Compute the three F statistics of Weir and Cockerham for each allele of a given locus.

Definition at line 415 of file MultilocusGenotypeStatistics.cpp.

◆ getAllelesIdsForGroups()

vector< size_t > MultilocusGenotypeStatistics::getAllelesIdsForGroups ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

Get the alleles' id at one locus for a set of groups.

Exceptions
IndexOutOfBoundsExceptionif locus_position excedes the number of loci of one MultilocusGenotype.

Definition at line 56 of file MultilocusGenotypeStatistics.cpp.

References bpp::IndexOutOfBoundsException::getBadIndex(), bpp::IndexOutOfBoundsException::getBounds(), and bpp::MapTools::getKeys().

◆ getAllelesMapForGroups()

map< size_t, size_t > MultilocusGenotypeStatistics::getAllelesMapForGroups ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

◆ getDistanceMatrix()

std::unique_ptr< DistanceMatrix > MultilocusGenotypeStatistics::getDistanceMatrix ( const PolymorphismMultiGContainer pmgc,
std::vector< size_t >  locus_positions,
const std::set< size_t > &  groups,
std::string  distance_method 
)
static

Compute pairwise distances on a set of groups for a given set of loci. distance is either Nei72, Nei78, Fst W&C or Fst Robertson & Hill, Nm, D=-ln(1-Fst) of Reynolds et al. 1983, Rousset 1997 Fst/(1-Fst)

Definition at line 735 of file MultilocusGenotypeStatistics.cpp.

References bpp::PolymorphismMultiGContainer::getAllGroupsNames(), getDnei72(), getDnei78(), getRHMultilocusFst(), and getWCMultilocusFst().

◆ getDnei72()

double MultilocusGenotypeStatistics::getDnei72 ( const PolymorphismMultiGContainer pmgc,
std::vector< size_t >  locus_positions,
size_t  grp1,
size_t  grp2 
)
static

Compute the Nei distance between two groups at one locus.

Nei 1972

\[ \hat{D}_1=-\ln \left[\frac{\displaystyle\sum_{i=1}^{n}\left(x_i\times y_i\right)} {\sqrt{\displaystyle\sum_{i=1}^{n}x_i^2\times \displaystyle\sum_{i=1}^{n}y_i^2}}\right] \]

where $x_i$ and $y_i$ are respectively the ith allele's frequency of the first and second group and $n$ the total number of alleles of both groups.

Exceptions
IndexOutOfBoundsExceptionif locus_position excedes the number of loci of one MultilocusGenotype.
ZeroDivisionExceptionif the number of considered alleles = 0.

Definition at line 317 of file MultilocusGenotypeStatistics.cpp.

Referenced by getDistanceMatrix().

◆ getDnei78()

double MultilocusGenotypeStatistics::getDnei78 ( const PolymorphismMultiGContainer pmgc,
std::vector< size_t >  locus_positions,
size_t  grp1,
size_t  grp2 
)
static

Compute the Nei unbiased distance between two groups at a given number of loci.

Nei 1978

\[ \hat{D}=-\ln \left[\frac{\displaystyle\sum_{i=1}^{n}\left(x_i\times y_i\right)} {\sqrt{\frac{2n_XJ_X-1}{2n_X-1}\times\frac{2n_YJ_Y-1}{2n_YJ_Y}}} \right] \]

where $x_i$ and $y_i$ are respectively the ith allele's frequency of the first and second group, $n$ the total number of alleles of both groups, $n_X$ and $n_Y$ the number of alleles in the first and second group and

\[ J_X=\sum_{i=1}^{n}x_i^2 \qquad\textrm{and}\qquad J_Y=\sum_{i=1}^{n}y_i^2 \]

Exceptions
IndexOutOfBoundsExceptionif locus_position excedes the number of loci of one MultilocusGenotype.
ZeroDivisionExceptionif the number of considered alleles = 0.

Definition at line 362 of file MultilocusGenotypeStatistics.cpp.

Referenced by getDistanceMatrix().

◆ getHeterozygousFrqForGroups()

map< size_t, double > MultilocusGenotypeStatistics::getHeterozygousFrqForGroups ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

◆ getHexpForGroups()

double MultilocusGenotypeStatistics::getHexpForGroups ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

Compute the expected heterozygosity for one locus.

Nei 1977

\[ H_{exp}=1-\sum_{i=1}^{n}x_i^2 \]

where $x_i$ is the frequency of the ith allele and $n$ the number of alleles.

Exceptions
IndexOutOfBoundsExceptionif locus_position excedes the number of loci of one MultilocusGenotype.
ZeroDivisionExceptionif the number of considered alleles = 0.

Definition at line 274 of file MultilocusGenotypeStatistics.cpp.

References bpp::IndexOutOfBoundsException::getBadIndex(), and bpp::IndexOutOfBoundsException::getBounds().

◆ getHnbForGroups()

double MultilocusGenotypeStatistics::getHnbForGroups ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

Compute the expected non biased heterozygosity for one locus.

Nei 1978

\[ H_{nb}=\frac{2n}{2n-1}\left(1-\sum_{i=1}^{n}x_i^2\right)=\frac{2n}{2n-1}H_{exp} \]

where $x_i$ is the frequency of the ith allele and $n$ the number of alleles.

Exceptions
IndexOutOfBoundsExceptionif locus_position excedes the number of loci of one MultilocusGenotype.
ZeroDivisionExceptionif the number of considered alleles = 0.

Definition at line 297 of file MultilocusGenotypeStatistics.cpp.

References bpp::IndexOutOfBoundsException::getBadIndex(), and bpp::IndexOutOfBoundsException::getBounds().

◆ getHobsForGroups()

double MultilocusGenotypeStatistics::getHobsForGroups ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

Compute the observed heterozygosity for one locus.

This is the mean value of the getHeterozygousFrqForGroups map.

Exceptions
IndexOutOfBoundsExceptionif locus_position excedes the number of loci of one MultilocusGenotype.
ZeroDivisionExceptionif the number of considered alleles = 0.

Definition at line 251 of file MultilocusGenotypeStatistics.cpp.

References bpp::IndexOutOfBoundsException::getBadIndex(), and bpp::IndexOutOfBoundsException::getBounds().

◆ getRHMultilocusFst()

double MultilocusGenotypeStatistics::getRHMultilocusFst ( const PolymorphismMultiGContainer pmgc,
std::vector< size_t >  locus_positions,
const std::set< size_t > &  groups 
)
static

Compute the $\theta_{RH}$ on a set of groups for a given set of loci. The variance componenets for each allele are calculated and then combined over loci using RH weighting with alleles frequency.

Definition at line 697 of file MultilocusGenotypeStatistics.cpp.

References getAllelesFrqForGroups().

Referenced by getDistanceMatrix().

◆ getVarianceComponents()

map< size_t, MultilocusGenotypeStatistics::VarComp > MultilocusGenotypeStatistics::getVarianceComponents ( const PolymorphismMultiGContainer pmgc,
size_t  locus_position,
const std::set< size_t > &  groups 
)
static

Get the variance components a, b and c (Weir and Cockerham, 1983).

Definition at line 485 of file MultilocusGenotypeStatistics.cpp.

References bpp::PolymorphismMultiGContainer::getLocusGroupSize().

◆ getWCMultilocusFis()

double MultilocusGenotypeStatistics::getWCMultilocusFis ( const PolymorphismMultiGContainer pmgc,
std::vector< size_t >  locus_positions,
const std::set< size_t > &  groups 
)
static

Compute the Weir and Cockerham Fis on a set of groups for a given set of loci. The variance componenets for each allele are calculated and then combined over loci using Weir and Cockerham weighting.

Definition at line 607 of file MultilocusGenotypeStatistics.cpp.

References bpp::PolymorphismMultiGContainer::getLocusGroupSize().

◆ getWCMultilocusFisAndPerm()

MultilocusGenotypeStatistics::PermResults MultilocusGenotypeStatistics::getWCMultilocusFisAndPerm ( const PolymorphismMultiGContainer pmgc,
std::vector< size_t >  locus_positions,
std::set< size_t >  groups,
int  nb_perm 
)
static

Compute the Weir and Cockerham Fis on a set of groups for a given set of loci and make a permutation test. Multilocus Fis is calculated as in getWCMultilocusFis on the original data set and on nb_perm data sets obtained after a permutation of alleles between individual of each group. Return values are Fis, % of values > Fis and % of values < Fis.

Definition at line 667 of file MultilocusGenotypeStatistics.cpp.

References bpp::PolymorphismMultiGContainerTools::extractGroups(), bpp::MultilocusGenotypeStatistics::PermResults::Percent_inf, bpp::MultilocusGenotypeStatistics::PermResults::Percent_sup, bpp::PolymorphismMultiGContainerTools::permutIntraGroupAlleles(), and bpp::MultilocusGenotypeStatistics::PermResults::Statistic.

◆ getWCMultilocusFst()

double MultilocusGenotypeStatistics::getWCMultilocusFst ( const PolymorphismMultiGContainer pmgc,
std::vector< size_t >  locus_positions,
const std::set< size_t > &  groups 
)
static

Compute the Weir and Cockerham $\theta{wc}$ on a set of groups for a given set of loci. The variance componenets for each allele are calculated and then combined over loci using Weir and Cockerham weighting.

Definition at line 576 of file MultilocusGenotypeStatistics.cpp.

References bpp::PolymorphismMultiGContainer::getLocusGroupSize().

Referenced by getDistanceMatrix().

◆ getWCMultilocusFstAndPerm()

MultilocusGenotypeStatistics::PermResults MultilocusGenotypeStatistics::getWCMultilocusFstAndPerm ( const PolymorphismMultiGContainer pmgc,
std::vector< size_t >  locus_positions,
std::set< size_t >  groups,
int  nb_perm 
)
static

Compute the Weir and Cockerham $\theta_{wc}$ on a set of groups for a given set of loci and make a permutation test. Multilocus $\theta$ is calculated as in getWCMultilocusFst on the original data set and on nb_perm data sets obtained after a permutation of individuals between the different groups. Return values are theta, % of values > theta and % of values < theta.

Definition at line 637 of file MultilocusGenotypeStatistics.cpp.

References bpp::PolymorphismMultiGContainerTools::extractGroups(), bpp::MultilocusGenotypeStatistics::PermResults::Percent_inf, bpp::MultilocusGenotypeStatistics::PermResults::Percent_sup, bpp::PolymorphismMultiGContainerTools::permutMultiG(), and bpp::MultilocusGenotypeStatistics::PermResults::Statistic.


The documentation for this class was generated from the following files: