bpp-popgen3  3.0.0
MultilocusGenotypeStatistics.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef _MULTILOCUSGENOTYPESTATISTICS_H_
6 #define _MULTILOCUSGENOTYPESTATISTICS_H_
7 
8 // From STL
9 #include <string>
10 #include <vector>
11 #include <map>
12 #include <set>
13 #include <memory>
14 
15 #include <Bpp/Exceptions.h>
16 
17 // From bpp-seq
18 #include <Bpp/Seq/DistanceMatrix.h>
19 
20 // From bpp-popgen
22 #include "MultilocusGenotype.h"
23 #include "GeneralExceptions.h"
24 
25 namespace bpp
26 {
35 {
36 public:
37  struct VarComp
38  {
39  double a;
40  double b;
41  double c;
42  };
43 
44  struct Fstats
45  {
46  double Fit;
47  double Fst;
48  double Fis;
49  };
50 
51  struct PermResults
52  {
53  double statistic;
54  double percentSup;
55  double percentInf;
56  };
57 
63  static std::vector<size_t> getAllelesIdsForGroups(
64  const PolymorphismMultiGContainer& pmgc,
65  size_t locusPosition,
66  const std::set<size_t>& groups);
67 
73  static size_t countGametesForGroups(
74  const PolymorphismMultiGContainer& pmgc,
75  size_t locusPosition,
76  const std::set<size_t>& groups);
77 
83  static std::map<size_t, size_t> getAllelesMapForGroups(
84  const PolymorphismMultiGContainer& pmgc,
85  size_t locusPosition,
86  const std::set<size_t>& groups);
87 
94  static std::map<size_t, double> getAllelesFrqForGroups(
95  const PolymorphismMultiGContainer& pmgc,
96  size_t locusPosition,
97  const std::set<size_t>& groups);
98 
104  static size_t countNonMissingForGroups(
105  const PolymorphismMultiGContainer& pmgc,
106  size_t locusPosition,
107  const std::set<size_t>& groups);
108 
114  static size_t countBiAllelicForGroups(
115  const PolymorphismMultiGContainer& pmgc,
116  size_t locusPosition,
117  const std::set<size_t>& groups);
118 
124  static std::map<size_t, size_t> countHeterozygousForGroups(
125  const PolymorphismMultiGContainer& pmgc,
126  size_t locusPosition,
127  const std::set<size_t>& groups);
128 
135  static std::map<size_t, double> getHeterozygousFrqForGroups(
136  const PolymorphismMultiGContainer& pmgc,
137  size_t locusPosition,
138  const std::set<size_t>& groups);
139 
147  static double getHobsForGroups(
148  const PolymorphismMultiGContainer& pmgc,
149  size_t locusPosition,
150  const std::set<size_t>& groups);
151 
163  static double getHexpForGroups(
164  const PolymorphismMultiGContainer& pmgc,
165  size_t locusPosition,
166  const std::set<size_t>& groups);
167 
179  static double getHnbForGroups(
180  const PolymorphismMultiGContainer& pmgc,
181  size_t locusPosition,
182  const std::set<size_t>& groups);
183 
197  static double getDnei72(
198  const PolymorphismMultiGContainer& pmgc,
199  std::vector<size_t> locusPositions,
200  size_t grp1,
201  size_t grp2);
202 
223  static double getDnei78(
224  const PolymorphismMultiGContainer& pmgc,
225  std::vector<size_t> locusPositions,
226  size_t grp1,
227  size_t grp2);
228 
232  static std::map<size_t, Fstats> getAllelesFstats(
233  const PolymorphismMultiGContainer& pmgc,
234  size_t locusPosition,
235  const std::set<size_t>& groups);
236 
240  static std::map<size_t, double> getAllelesFit(
241  const PolymorphismMultiGContainer& pmgc,
242  size_t locusPosition,
243  const std::set<size_t>& groups);
244 
248  static std::map<size_t, double> getAllelesFst(
249  const PolymorphismMultiGContainer& pmgc,
250  size_t locusPosition,
251  const std::set<size_t>& groups);
252 
256  static std::map<size_t, double> getAllelesFis(
257  const PolymorphismMultiGContainer& pmgc,
258  size_t locusPosition,
259  const std::set<size_t>& groups);
260 
264  static std::map<size_t, VarComp> getVarianceComponents(
265  const PolymorphismMultiGContainer& pmgc,
266  size_t locusPosition,
267  const std::set<size_t>& groups);
268 
273  static double getWCMultilocusFst(
274  const PolymorphismMultiGContainer& pmgc,
275  std::vector<size_t> locusPositions,
276  const std::set<size_t>& groups);
277 
282  static double getWCMultilocusFis(
283  const PolymorphismMultiGContainer& pmgc,
284  std::vector<size_t> locusPositions,
285  const std::set<size_t>& groups);
286 
294  const PolymorphismMultiGContainer& pmgc,
295  std::vector<size_t> locusPositions,
296  std::set<size_t> groups,
297  unsigned int nb_perm);
298 
306  const PolymorphismMultiGContainer& pmgc,
307  std::vector<size_t> locusPositions,
308  std::set<size_t> groups,
309  unsigned int nbPerm);
310 
311 
316  static double getRHMultilocusFst(
317  const PolymorphismMultiGContainer& pmgc,
318  std::vector<size_t> locusPositions,
319  const std::set<size_t>& groups);
320 
326  static std::unique_ptr<DistanceMatrix> getDistanceMatrix(
327  const PolymorphismMultiGContainer& pmgc,
328  std::vector<size_t> locusPositions,
329  const std::set<size_t>& groups,
330  std::string distance_method);
331 };
332 } // end of namespace bpp;
333 
334 #endif // _MULTILOCUSGENOTYPESTATISTICS_H_
The MultilocusGenotypeStatistics class.
static size_t countNonMissingForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Count the number of non-missing data at a given locus for a set of groups.
static double getWCMultilocusFis(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fis on a set of groups for a given set of loci. The variance component...
static std::map< size_t, double > getAllelesFrqForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the alleles frequencies at one locus for a set of groups.
static std::map< size_t, double > getAllelesFit(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fit on a set of groups for each allele of a given locus.
static std::map< size_t, size_t > getAllelesMapForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get a map of allele count for a set of groups.
static double getHexpForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the expected heterozygosity for one locus.
static std::unique_ptr< DistanceMatrix > getDistanceMatrix(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups, std::string distance_method)
Compute pairwise distances on a set of groups for a given set of loci. distance is either Nei72,...
static double getHnbForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the expected non biased heterozygosity for one locus.
static std::map< size_t, size_t > countHeterozygousForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Count how many times each allele is found in an heterozygous MonolocusGenotype in a set of groups.
static double getWCMultilocusFst(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups)
Compute the Weir and Cockerham on a set of groups for a given set of loci. The variance components f...
static std::map< size_t, VarComp > getVarianceComponents(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the variance components a, b and c (Weir and Cockerham, 1983).
static std::map< size_t, double > getAllelesFst(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the Weir and Cockerham on a set of groups for each allele of a given locus.
static std::map< size_t, Fstats > getAllelesFstats(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the three F statistics of Weir and Cockerham for each allele of a given locus.
static double getRHMultilocusFst(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups)
Compute the on a set of groups for a given set of loci. The variance components for each allele are ...
static PermResults getWCMultilocusFstAndPerm(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, std::set< size_t > groups, unsigned int nb_perm)
Compute the Weir and Cockerham on a set of groups for a given set of loci and make a permutation tes...
static std::map< size_t, double > getHeterozygousFrqForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the heterozygous frequencies for each allele at a locus in a set of groups.
static std::vector< size_t > getAllelesIdsForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the alleles' id at one locus for a set of groups.
static std::map< size_t, double > getAllelesFis(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fis on a set of groups for each allele of a given locus.
static double getDnei78(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, size_t grp1, size_t grp2)
Compute the Nei unbiased distance between two groups at a given number of loci.
static size_t countBiAllelicForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Counr the number of bi-allelic MonolocusGenotype at a given locus for a set of groups.
static size_t countGametesForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Count the number of allele (gametes) at a locus for a set of groups.
static PermResults getWCMultilocusFisAndPerm(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, std::set< size_t > groups, unsigned int nbPerm)
Compute the Weir and Cockerham Fis on a set of groups for a given set of loci and make a permutation ...
static double getHobsForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the observed heterozygosity for one locus.
static double getDnei72(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, size_t grp1, size_t grp2)
Compute the Nei distance between two groups at one locus.
The PolymorphismMultiGContainer class.