bpp-popgen  3.0.0
MultilocusGenotypeStatistics.h
Go to the documentation of this file.
1 //
2 // File MultilocusGenotypeStatistics.h
3 // Authors : Sylvain Gaillard
4 // Khalid Belkhir
5 // Last modification : Wednesday August 04 2004
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 
11  This software is a computer program whose purpose is to provide classes
12  for population genetics analysis.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39  */
40 
41 #ifndef _MULTILOCUSGENOTYPESTATISTICS_H_
42 #define _MULTILOCUSGENOTYPESTATISTICS_H_
43 
44 // From STL
45 #include <string>
46 #include <vector>
47 #include <map>
48 #include <set>
49 #include <memory>
50 
51 #include <Bpp/Exceptions.h>
52 
53 // From SeqLib
54 #include <Bpp/Seq/DistanceMatrix.h>
55 
56 // From popgenlib
58 #include "MultilocusGenotype.h"
59 #include "GeneralExceptions.h"
60 
61 namespace bpp
62 {
71 {
72 public:
73  struct VarComp
74  {
75  double a;
76  double b;
77  double c;
78  };
79 
80  struct Fstats
81  {
82  double Fit;
83  double Fst;
84  double Fis;
85  };
86 
87  struct PermResults
88  {
89  double Statistic;
90  double Percent_sup;
91  double Percent_inf;
92  };
93 
99  static std::vector<size_t> getAllelesIdsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
100 
106  static size_t countGametesForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
107 
113  static std::map<size_t, size_t> getAllelesMapForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
114 
121  static std::map<size_t, double> getAllelesFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
122 
128  static size_t countNonMissingForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
129 
135  static size_t countBiAllelicForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
136 
142  static std::map<size_t, size_t> countHeterozygousForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
143 
150  static std::map<size_t, double> getHeterozygousFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
151 
159  static double getHobsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
160 
172  static double getHexpForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
173 
185  static double getHnbForGroups(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
186 
200  static double getDnei72(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, size_t grp1, size_t grp2);
201 
222  static double getDnei78(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, size_t grp1, size_t grp2);
223 
227  static std::map<size_t, Fstats> getAllelesFstats(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
228 
232  static std::map<size_t, double> getAllelesFit(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
233 
237  static std::map<size_t, double> getAllelesFst(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
238 
242  static std::map<size_t, double> getAllelesFis(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
243 
247  static std::map<size_t, VarComp> getVarianceComponents(const PolymorphismMultiGContainer& pmgc, size_t locus_position, const std::set<size_t>& groups);
248 
253  static double getWCMultilocusFst(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, const std::set<size_t>& groups);
254 
259  static double getWCMultilocusFis(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, const std::set<size_t>& groups);
260 
267  static PermResults getWCMultilocusFstAndPerm(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, std::set<size_t> groups, int nb_perm);
268 
275  static PermResults getWCMultilocusFisAndPerm(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, std::set<size_t> groups, int nb_perm);
276 
277 
282  static double getRHMultilocusFst(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, const std::set<size_t>& groups);
283 
289  static std::unique_ptr<DistanceMatrix> getDistanceMatrix(const PolymorphismMultiGContainer& pmgc, std::vector<size_t> locus_positions, const std::set<size_t>& groups, std::string distance_method);
290 };
291 } // end of namespace bpp;
292 
293 #endif // _MULTILOCUSGENOTYPESTATISTICS_H_
294 
The MultilocusGenotypeStatistics class.
static double getDnei78(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, size_t grp1, size_t grp2)
Compute the Nei unbiased distance between two groups at a given number of loci.
static std::map< size_t, double > getAllelesFit(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fit on a set of groups for each allele of a given locus.
static std::map< size_t, double > getHeterozygousFrqForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Get the heterozygous frequencies for each allele at a locus in a set of groups.
static std::map< size_t, VarComp > getVarianceComponents(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Get the variance components a, b and c (Weir and Cockerham, 1983).
static double getDnei72(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, size_t grp1, size_t grp2)
Compute the Nei distance between two groups at one locus.
static std::map< size_t, size_t > countHeterozygousForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Count how many times each allele is found in an heterozygous MonolocusGenotype in a set of groups.
static size_t countBiAllelicForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Counr the number of bi-allelic MonolocusGenotype at a given locus for a set of groups.
static double getHnbForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the expected non biased heterozygosity for one locus.
static std::map< size_t, double > getAllelesFst(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the Weir and Cockerham on a set of groups for each allele of a given locus.
static PermResults getWCMultilocusFstAndPerm(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, std::set< size_t > groups, int nb_perm)
Compute the Weir and Cockerham on a set of groups for a given set of loci and make a permutation tes...
static std::vector< size_t > getAllelesIdsForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Get the alleles' id at one locus for a set of groups.
static size_t countGametesForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Count the number of allele (gametes) at a locus for a set of groups.
static std::unique_ptr< DistanceMatrix > getDistanceMatrix(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups, std::string distance_method)
Compute pairwise distances on a set of groups for a given set of loci. distance is either Nei72,...
static std::map< size_t, Fstats > getAllelesFstats(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the three F statistics of Weir and Cockerham for each allele of a given locus.
static double getHexpForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the expected heterozygosity for one locus.
static double getRHMultilocusFst(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups)
Compute the on a set of groups for a given set of loci. The variance componenets for each allele are...
static double getHobsForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the observed heterozygosity for one locus.
static size_t countNonMissingForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Count the number of non-missing data at a given locus for a set of groups.
static std::map< size_t, double > getAllelesFis(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fis on a set of groups for each allele of a given locus.
static PermResults getWCMultilocusFisAndPerm(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, std::set< size_t > groups, int nb_perm)
Compute the Weir and Cockerham Fis on a set of groups for a given set of loci and make a permutation ...
static std::map< size_t, size_t > getAllelesMapForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Get a map of allele count for a set of groups.
static double getWCMultilocusFst(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups)
Compute the Weir and Cockerham on a set of groups for a given set of loci. The variance componenets ...
static double getWCMultilocusFis(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fis on a set of groups for a given set of loci. The variance componene...
static std::map< size_t, double > getAllelesFrqForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Get the alleles frequencies at one locus for a set of groups.
The PolymorphismMultiGContainer class.