bpp-popgen3  3.0.0
MultilocusGenotypeStatistics.cpp
Go to the documentation of this file.
1 /*
2  * File MultilocusGenotypeStatistics.cpp
3  * Author : Sylvain Gaillard <yragael2001@yahoo.fr>
4  * Last modification : Wednesday August 04 2004
5  *
6  */
7 
8 // Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 // SPDX-FileCopyrightText: The Bio++ Development Group
10 //
11 // SPDX-License-Identifier: CECILL-2.1
12 
13 #include <Bpp/Utils/MapTools.h>
14 
17 
18 using namespace bpp;
19 
20 // From STL
21 
22 #include <iostream>
23 #include <cmath>
24 #include <algorithm>
25 
26 using namespace std;
27 
28 vector<size_t> MultilocusGenotypeStatistics::getAllelesIdsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
29 {
30  map<size_t, size_t> tmp_alleles;
31  try
32  {
33  tmp_alleles = getAllelesMapForGroups(pmgc, locusPosition, groups);
34  }
35  catch (IndexOutOfBoundsException& ioobe)
36  {
37  throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getAllelesIdsForGroups: locusPosition out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
38  }
39  return MapTools::getKeys(tmp_alleles);
40 }
41 
42 size_t MultilocusGenotypeStatistics::countGametesForGroups(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
43 {
44  map<size_t, size_t> allele_count;
45  size_t nb_tot_allele = 0;
46  try
47  {
48  allele_count = getAllelesMapForGroups(pmgc, locusPosition, groups);
49  }
50  catch (IndexOutOfBoundsException& ioobe)
51  {
52  throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countGametesForGroups: locusPosition out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
53  }
54  vector<size_t> counter = MapTools::getValues(allele_count);
55  for (size_t i = 0; i < counter.size(); i++)
56  {
57  nb_tot_allele += counter[i];
58  }
59  return nb_tot_allele;
60 }
61 
62 map<size_t, size_t> MultilocusGenotypeStatistics::getAllelesMapForGroups(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
63 {
64  map<size_t, size_t> alleles_count;
65  for (size_t i = 0; i < pmgc.size(); i++)
66  {
67  try
68  {
69  if (!pmgc.multilocusGenotype(i).isMonolocusGenotypeMissing(locusPosition) && (groups.find(pmgc.getGroupId(i)) != groups.end()) )
70  {
71  // if (! pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locusPosition) && (find(groups.begin(), groups.end(), pmgc.getGroupId(i)) != groups.end()) ) {
72  vector<size_t> tmp_alleles = pmgc.multilocusGenotype(i).monolocusGenotype(locusPosition).getAlleleIndex();
73  for (size_t j = 0; j < tmp_alleles.size(); j++)
74  {
75  alleles_count[tmp_alleles[j]]++;
76  }
77  }
78  }
79  catch (IndexOutOfBoundsException& ioobe)
80  {
81  throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getAllelesMapForGroups: locusPosition out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
82  }
83  }
84  return alleles_count;
85 }
86 
87 map<size_t, double> MultilocusGenotypeStatistics::getAllelesFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
88 {
89  map<size_t, double> alleles_frq;
90  size_t nb_tot_allele = 0;
91  map<size_t, size_t> tmp_alleles;
92  try
93  {
94  tmp_alleles = getAllelesMapForGroups(pmgc, locusPosition, groups);
95  }
96  catch (IndexOutOfBoundsException& ioobe)
97  {
98  throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getAllelesFrqForGroups: locusPosition out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
99  }
100  vector<size_t> counter = MapTools::getValues(tmp_alleles);
101  for (size_t i = 0; i < counter.size(); i++)
102  {
103  nb_tot_allele += counter[i];
104  }
105  if (nb_tot_allele == 0)
106  throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFrqForGroups.");
107  for (map<size_t, size_t>::iterator it = tmp_alleles.begin(); it != tmp_alleles.end(); it++)
108  {
109  alleles_frq[it->first] = static_cast<double>(it->second) / static_cast<double>(nb_tot_allele);
110  }
111  return alleles_frq;
112 }
113 
114 size_t MultilocusGenotypeStatistics::countNonMissingForGroups(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
115 {
116  size_t counter = 0;
117  for (size_t i = 0; i < pmgc.size(); i++)
118  {
119  try
120  {
121  // if (! pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locusPosition) && (find(groups.begin(), groups.end(), pmgc.getGroupId(i)) != groups.end()) )
122  if (!pmgc.multilocusGenotype(i).isMonolocusGenotypeMissing(locusPosition) && (groups.find(pmgc.getGroupId(i) ) != groups.end()) )
123  counter++;
124  }
125  catch (IndexOutOfBoundsException& ioobe)
126  {
127  throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countNonMissing: locusPosition out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
128  }
129  }
130  return counter;
131 }
132 
133 size_t MultilocusGenotypeStatistics::countBiAllelicForGroups(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
134 {
135  size_t counter = 0;
136  for (size_t i = 0; i < pmgc.size(); i++)
137  {
138  try
139  {
140  // if (! pmgc.getMultilocusGenotype(i)->isMonolocusGenotypeMissing(locusPosition) && (find(groups.begin(), groups.end(), pmgc.getGroupId(i)) != groups.end()) )
141  if (!pmgc.multilocusGenotype(i).isMonolocusGenotypeMissing(locusPosition) && (groups.find(pmgc.getGroupId(i)) != groups.end()) )
142  if ((pmgc.multilocusGenotype(i).monolocusGenotype(locusPosition).getAlleleIndex()).size() == 2)
143  counter++;
144  }
145  catch (IndexOutOfBoundsException& ioobe)
146  {
147  throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countBiAllelic: locusPosition out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
148  }
149  }
150  return counter;
151 }
152 
153 map<size_t, size_t> MultilocusGenotypeStatistics::countHeterozygousForGroups(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
154 {
155  map<size_t, size_t> counter;
156  for (size_t i = 0; i < pmgc.size(); i++)
157  {
158  try
159  {
160  if (!pmgc.multilocusGenotype(i).isMonolocusGenotypeMissing(locusPosition) && (groups.find(pmgc.getGroupId(i)) != groups.end() ))
161  {
162  const auto& tmpMg = pmgc.multilocusGenotype(i).monolocusGenotype(locusPosition);
163  if ((tmpMg.getAlleleIndex()).size() == 2)
164  {
165  if (!dynamic_cast<const BiAlleleMonolocusGenotype&>(tmpMg).isHomozygous())
166  {
167  vector<size_t> tmpAlleles = tmpMg.getAlleleIndex();
168  for (size_t j = 0; j < tmpAlleles.size(); j++)
169  {
170  counter[tmpAlleles[j]]++;
171  }
172  }
173  }
174  }
175  }
176  catch (IndexOutOfBoundsException& ioobe)
177  {
178  throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::countHeterozygous: locusPosition out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
179  }
180  }
181  return counter;
182 }
183 
184 map<size_t, double> MultilocusGenotypeStatistics::getHeterozygousFrqForGroups(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
185 {
186  map<size_t, double> freq;
187  size_t counter = 0;
188  for (size_t i = 0; i < pmgc.size(); i++)
189  {
190  try
191  {
192  if (!pmgc.multilocusGenotype(i).isMonolocusGenotypeMissing(locusPosition) && (groups.find(pmgc.getGroupId(i)) != groups.end()) )
193  {
194  const auto& tmpMg = pmgc.multilocusGenotype(i).monolocusGenotype(locusPosition);
195  if ((tmpMg.getAlleleIndex()).size() == 2)
196  {
197  counter++;
198  if (!dynamic_cast<const BiAlleleMonolocusGenotype&>(tmpMg).isHomozygous())
199  {
200  vector<size_t> tmpAlleles = tmpMg.getAlleleIndex();
201  for (size_t j = 0; j < tmpAlleles.size(); j++)
202  {
203  freq[tmpAlleles[j]]++;
204  }
205  }
206  }
207  }
208  }
209  catch (IndexOutOfBoundsException& ioobe)
210  {
211  throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHeterozygousFrqForGroups: locusPosition out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
212  }
213  }
214  if (counter == 0)
215  throw ZeroDivisionException("MultilocusGenotypeStatistics::getHeterozygousFrqForGroups.");
216  for (map<size_t, double>::iterator i = freq.begin(); i != freq.end(); i++)
217  {
218  i->second = (double) i->second / (double) counter;
219  }
220  return freq;
221 }
222 
223 double MultilocusGenotypeStatistics::getHobsForGroups(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
224 {
225  map<size_t, double> heterozygous_frq;
226  double frq = 0.;
227  try
228  {
229  heterozygous_frq = getHeterozygousFrqForGroups(pmgc, locusPosition, groups);
230  }
231  catch (IndexOutOfBoundsException& ioobe)
232  {
233  throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHobsForGroups: locusPosition out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
234  }
235  catch (ZeroDivisionException& zde)
236  {
237  throw ZeroDivisionException("MultilocusGenotypeStatistics::getHobsForGroups.");
238  }
239  for (map<size_t, double>::iterator it = heterozygous_frq.begin(); it != heterozygous_frq.end(); it++)
240  {
241  frq += it->second;
242  }
243  return frq / static_cast<double>(heterozygous_frq.size());
244 }
245 
246 double MultilocusGenotypeStatistics::getHexpForGroups(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
247 {
248  map<size_t, double> allele_frq;
249  double frqsqr = 0.;
250  try
251  {
252  allele_frq = getAllelesFrqForGroups(pmgc, locusPosition, groups);
253  }
254  catch (IndexOutOfBoundsException& ioobe)
255  {
256  throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHexpForGroups: locusPosition out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
257  }
258  catch (ZeroDivisionException& zde)
259  {
260  throw ZeroDivisionException("MultilocusGenotypeStatistics::getHexpForGroups.");
261  }
262  for (map<size_t, double>::iterator it = allele_frq.begin(); it != allele_frq.end(); it++)
263  {
264  frqsqr += it->second * it->second;
265  }
266  return 1 - frqsqr;
267 }
268 
269 double MultilocusGenotypeStatistics::getHnbForGroups(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
270 {
271  size_t nb_alleles;
272  double Hexp;
273  try
274  {
275  nb_alleles = countGametesForGroups(pmgc, locusPosition, groups);
276  Hexp = getHexpForGroups(pmgc, locusPosition, groups);
277  }
278  catch (IndexOutOfBoundsException& ioobe)
279  {
280  throw IndexOutOfBoundsException("MultilocusGenotypeStatistics::getHnbForGroups: locusPosition out of bounds.", ioobe.getBadIndex(), ioobe.getBounds()[0], ioobe.getBounds()[1]);
281  }
282  catch (ZeroDivisionException& zde)
283  {
284  throw ZeroDivisionException("MultilocusGenotypeStatistics::getHnbForGroups.");
285  }
286  return 2 * static_cast<double>(nb_alleles) * Hexp / static_cast<double>((2 * nb_alleles) - 1);
287 }
288 
289 double MultilocusGenotypeStatistics::getDnei72(const PolymorphismMultiGContainer& pmgc, vector<size_t> locusPositions, size_t grp1, size_t grp2)
290 {
291  map<size_t, double> allele_frq1, allele_frq2;
292  vector<size_t> allele_ids;
293  set<size_t> group1_id;
294  set<size_t> group2_id;
295  set<size_t> groups_id;
296  double Jx = 0.;
297  double Jy = 0.;
298  double Jxy = 0.;
299  group1_id.insert(grp1);
300  group2_id.insert(grp2);
301  groups_id.insert(grp1);
302  groups_id.insert(grp2);
303  for (size_t i = 0; i < locusPositions.size(); i++)
304  {
305  allele_ids.clear();
306  allele_frq1.clear();
307  allele_frq2.clear();
308  try
309  {
310  allele_ids = getAllelesIdsForGroups(pmgc, locusPositions[i], groups_id);
311  allele_frq1 = getAllelesFrqForGroups(pmgc, locusPositions[i], group1_id);
312  allele_frq2 = getAllelesFrqForGroups(pmgc, locusPositions[i], group2_id);
313  }
314  catch (Exception& e)
315  {
316  throw e;
317  }
318  for (size_t j = 0; j < allele_ids.size(); j++)
319  {
320  map<size_t, double>::iterator it1 = allele_frq1.find(allele_ids[j]);
321  map<size_t, double>::iterator it2 = allele_frq2.find(allele_ids[j]);
322  double tmp_frq1 = (it1 != allele_frq1.end()) ? it1->second : 0.;
323  double tmp_frq2 = (it2 != allele_frq2.end()) ? it2->second : 0.;
324  Jx += tmp_frq1 * tmp_frq1;
325  Jy += tmp_frq2 * tmp_frq2;
326  Jxy += tmp_frq1 * tmp_frq2;
327  }
328  }
329  if (Jx * Jy == 0.)
330  throw ZeroDivisionException("MultilocusGenotypeStatistics::getDnei72.");
331  return -log(Jxy / sqrt(Jx * Jy));
332 }
333 
334 double MultilocusGenotypeStatistics::getDnei78(const PolymorphismMultiGContainer& pmgc, vector<size_t> locusPositions, size_t grp1, size_t grp2)
335 {
336  map<size_t, double> allele_frq1, allele_frq2;
337  vector<size_t> allele_ids;
338  set<size_t> group1_id;
339  set<size_t> group2_id;
340  set<size_t> groups_id;
341  double Jx = 0.;
342  double Jy = 0.;
343  double Jxy = 0.;
344  size_t nx = 0, ny = 0;
345  group1_id.insert(grp1);
346  group2_id.insert(grp2);
347  groups_id.insert(grp1);
348  groups_id.insert(grp2);
349  for (size_t i = 0; i < locusPositions.size(); i++)
350  {
351  allele_ids.clear();
352  allele_frq1.clear();
353  allele_frq2.clear();
354  try
355  {
356  allele_ids = getAllelesIdsForGroups(pmgc, locusPositions[i], groups_id);
357  allele_frq1 = getAllelesFrqForGroups(pmgc, locusPositions[i], group1_id);
358  allele_frq2 = getAllelesFrqForGroups(pmgc, locusPositions[i], group2_id);
359  nx = countBiAllelicForGroups(pmgc, locusPositions[i], group1_id);
360  ny = countBiAllelicForGroups(pmgc, locusPositions[i], group2_id);
361  }
362  catch (Exception& e)
363  {
364  throw e;
365  }
366  double tmp_Jx = 0.;
367  double tmp_Jy = 0.;
368  for (size_t j = 0; j < allele_ids.size(); j++)
369  {
370  map<size_t, double>::iterator it1 = allele_frq1.find(allele_ids[j]);
371  map<size_t, double>::iterator it2 = allele_frq2.find(allele_ids[j]);
372  double tmp_frq1 = (it1 != allele_frq1.end()) ? it1->second : 0.;
373  double tmp_frq2 = (it2 != allele_frq2.end()) ? it2->second : 0.;
374  tmp_Jx += tmp_frq1 * tmp_frq1;
375  tmp_Jy += tmp_frq2 * tmp_frq2;
376  Jxy += tmp_frq1 * tmp_frq2;
377  }
378  Jx += ((2. * (double) nx * tmp_Jx) - 1.) / ((2. * (double) nx) - 1.);
379  Jy += ((2. * (double) ny * tmp_Jy) - 1.) / ((2. * (double) ny) - 1.);
380  }
381  double denom = Jx * Jy;
382  if (denom == 0.)
383  throw ZeroDivisionException("MultilocusGenotypeStatistics::getDnei78.");
384  return -log(Jxy / sqrt(denom));
385 }
386 
387 map<size_t, MultilocusGenotypeStatistics::Fstats> MultilocusGenotypeStatistics::getAllelesFstats(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
388 {
389  map<size_t, MultilocusGenotypeStatistics::VarComp> vc = getVarianceComponents(pmgc, locusPosition, groups);
390  map<size_t, MultilocusGenotypeStatistics::Fstats> f_stats;
391  for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = vc.begin(); it != vc.end(); it++)
392  {
393  double abc = it->second.a + it->second.b + it->second.c;
394  double bc = it->second.b + it->second.c;
395 
396  if (abc == 0)
397  {
398  f_stats[it->first].Fit = NAN;
399  f_stats[it->first].Fst = NAN;
400  }
401  {
402  f_stats[it->first].Fit = 1. - it->second.c / abc;
403  f_stats[it->first].Fst = it->second.a / abc;
404  }
405  if (bc == 0)
406  f_stats[it->first].Fis = NAN;
407  else
408  f_stats[it->first].Fis = 1. - it->second.c / bc;
409  }
410  return f_stats;
411 }
412 
413 map<size_t, double> MultilocusGenotypeStatistics::getAllelesFit(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
414 {
415  map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPosition, groups);
416  map<size_t, double> Fit;
417  for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
418  {
419  Fit[it->first] = it->second.a + it->second.b + it->second.c;
420  if (Fit[it->first] == 0.)
421  throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFit.");
422  Fit[it->first] = 1. - it->second.c / Fit[it->first];
423  }
424  return Fit;
425 }
426 
427 map<size_t, double> MultilocusGenotypeStatistics::getAllelesFst(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
428 {
429  if (groups.size() <= 1)
430  throw BadIntegerException("MultilocusGenotypeStatistics::getAllelesFst: groups must be >= 2.", static_cast<int>(groups.size()));
431  map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPosition, groups);
432  map<size_t, double> Fst;
433  for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
434  {
435  Fst[it->first] = it->second.a + it->second.b + it->second.c;
436  if (Fst[it->first] == 0.)
437  throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFst.");
438  Fst[it->first] = it->second.a / Fst[it->first];
439  }
440  return Fst;
441 }
442 
443 map<size_t, double> MultilocusGenotypeStatistics::getAllelesFis(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
444 {
445  map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPosition, groups);
446  map<size_t, double> Fis;
447  for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
448  {
449  Fis[it->first] = it->second.b + it->second.c;
450  if (Fis[it->first] == 0.)
451  throw ZeroDivisionException("MultilocusGenotypeStatistics::getAllelesFis.");
452  Fis[it->first] = 1. - it->second.c / Fis[it->first];
453  }
454  return Fis;
455 }
456 
457 map<size_t, MultilocusGenotypeStatistics::VarComp> MultilocusGenotypeStatistics::getVarianceComponents(const PolymorphismMultiGContainer& pmgc, size_t locusPosition, const set<size_t>& groups)
458 {
459  map<size_t, MultilocusGenotypeStatistics::VarComp> values;
460  // Base values computation
461  double nbar = 0.;
462  double nc = 0.;
463  vector<size_t> ids = getAllelesIdsForGroups(pmgc, locusPosition, groups);
464  map<size_t, double> pbar;
465  map<size_t, double> s2;
466  map<size_t, double> hbar;
467  for (size_t i = 0; i < ids.size(); i++)
468  {
469  pbar[ids[i]] = 0.;
470  s2[ids[i]] = 0.;
471  hbar[ids[i]] = 0.;
472  }
473  double r = static_cast<double>(groups.size());
474  for (set<size_t>::iterator set_it = groups.begin(); set_it != groups.end(); set_it++)
475  {
476  size_t i = (*set_it);
477  double ni = static_cast<double>(pmgc.getLocusGroupSize(i, locusPosition));
478  set<size_t> group_id;
479  group_id.insert( i );
480  map<size_t, double> pi = getAllelesFrqForGroups(pmgc, locusPosition, group_id);
481  map<size_t, double> hi = getHeterozygousFrqForGroups(pmgc, locusPosition, group_id);
482  nbar += ni;
483  if (r > 1)
484  nc += ni * ni;
485 
486  for (map<size_t, double>::iterator it = pi.begin(); it != pi.end(); it++)
487  {
488  pbar[it->first] += ni * it->second;
489  }
490  for (map<size_t, double>::iterator it = hi.begin(); it != hi.end(); it++)
491  {
492  hbar[it->first] += ni * it->second;
493  }
494 
495  group_id.clear();
496  }
497  nbar = nbar / r;
498  if (nbar <= 1)
499  throw ZeroDivisionException("MultilocusGenotypeStatistics::getVarianceComponents.");
500  if (r > 1)
501  nc = (r * nbar) - (nc / (r * nbar)) / (r - 1.);
502  for (map<size_t, double>::iterator it = pbar.begin(); it != pbar.end(); it++)
503  {
504  it->second = it->second / (r * nbar);
505  }
506  for (map<size_t, double>::iterator it = hbar.begin(); it != hbar.end(); it++)
507  {
508  it->second = it->second / ( r * nbar);
509  }
510 
511  for (set<size_t>::iterator set_it = groups.begin(); set_it != groups.end(); set_it++)
512  {
513  size_t i = (*set_it);
514  double ni = static_cast<double>(pmgc.getLocusGroupSize( i, locusPosition));
515  set<size_t> group_id;
516  group_id.insert( i );
517  map<size_t, double> pi = getAllelesFrqForGroups(pmgc, locusPosition, group_id);
518  for (size_t j = 0; j < ids.size(); j++)
519  {
520  pi[ids[j]];
521  }
522  for (map<size_t, double>::iterator it = pi.begin(); it != pi.end(); it++)
523  {
524  s2[it->first] += ni * (it->second - pbar[it->first]) * (it->second - pbar[it->first]);
525  }
526  group_id.clear();
527  }
528  for (map<size_t, double>::iterator it = s2.begin(); it != s2.end(); it++)
529  {
530  it->second = it->second / ((r - 1.) * nbar);
531  }
532 
533  // a, b, c computation
534  for (size_t i = 0; i < ids.size(); i++)
535  {
536  values[ids[i]];
537  }
538 
539  for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
540  {
541  it->second.a = (nbar / nc) * (s2[it->first] - ((1. / (nbar - 1.)) * ((pbar[it->first] * (1. - pbar[it->first])) - (s2[it->first] * ((double) r - 1.) / r) - ((1. / 4.) * hbar[it->first]))));
542  it->second.b = (nbar / (nbar - 1.)) * ((pbar[it->first] * (1. - pbar[it->first])) - (s2[it->first] * ((double) r - 1.) / (double) r) - ((((2. * nbar) - 1.) / (4. * nbar)) * hbar[it->first]));
543  it->second.c = hbar[it->first] / 2.;
544  }
545  return values;
546 }
547 
548 double MultilocusGenotypeStatistics::getWCMultilocusFst(const PolymorphismMultiGContainer& pmgc, vector<size_t> locusPositions, const set<size_t>& groups)
549 {
550  double A, B, C;
551  A = B = C = 0.0;
552  for (size_t i = 0; i < locusPositions.size(); i++)
553  {
554  // count total number of individuals without missing data
555  size_t ni = 0;
556  for (set<size_t>::iterator setIt = groups.begin(); setIt != groups.end(); setIt++)
557  {
558  ni += pmgc.getLocusGroupSize( (*setIt), i);
559  }
560 
561  // reduce computation for polymorphic loci for that groups
562  vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
563  if (ids.size() >= 2 && ni >= 1)
564  {
565  map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPositions[i], groups);
566  for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
567  {
568  A += it->second.a;
569  B += it->second.b;
570  C += it->second.c;
571  }
572  }
573  }
574  if ((A + B + C) == 0)
575  throw ZeroDivisionException("MultilocusGenotypeStatistics::getWCMultilocusFst.");
576  return A / (A + B + C);
577 }
578 
579 double MultilocusGenotypeStatistics::getWCMultilocusFis(const PolymorphismMultiGContainer& pmgc, vector<size_t> locusPositions, const set<size_t>& groups)
580 {
581  double B, C;
582  B = C = 0.0;
583  for (size_t i = 0; i < locusPositions.size(); i++)
584  {
585  // count total number of individuals without missing data
586  size_t ni = 0;
587  for (set<size_t>::iterator setIt = groups.begin(); setIt != groups.end(); setIt++)
588  {
589  ni += pmgc.getLocusGroupSize( (*setIt), i);
590  }
591 
592  // reduce computation for polymorphic loci for that groups
593  vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
594  if (ids.size() >= 2 && ni >= 1)
595  {
596  map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPositions[i], groups);
597  for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
598  {
599  B += it->second.b;
600  C += it->second.c;
601  }
602  }
603  }
604  if ((B + C) == 0)
605  throw ZeroDivisionException("MultilocusGenotypeStatistics::getWCMultilocusFis.");
606  return 1.0 - C / (B + C);
607 }
608 
610  const PolymorphismMultiGContainer& pmgc,
611  vector<size_t> locusPositions,
612  set<size_t> groups,
613  unsigned int nbPerm)
614 {
615  // extract a PolymorphismMultiGContainer with only those groups
616  auto subPmgc = PolymorphismMultiGContainerTools::extractGroups(pmgc, groups);
617  double nbSup = 0.0;
618  double nbInf = 0.0;
619  PermResults results;
620  results.statistic = getWCMultilocusFst(*subPmgc, locusPositions, groups);
621  if (nbPerm > 0)
622  {
623  for (size_t i = 0; i < nbPerm; ++i)
624  {
625  auto permutedPmgc = PolymorphismMultiGContainerTools::permuteMultiG(*subPmgc);
626  double FstPerm = getWCMultilocusFst(*permutedPmgc, locusPositions, groups);
627  // cout << Fst_perm << endl;
628  if (FstPerm > results.statistic)
629  nbSup++;
630  if (FstPerm < results.statistic)
631  nbInf++;
632  }
633 
634  nbSup /= static_cast<double>(nbPerm);
635  nbInf /= static_cast<double>(nbPerm);
636  }
637 
638  results.percentSup = nbSup;
639  results.percentInf = nbInf;
640  return results;
641 }
642 
644  const PolymorphismMultiGContainer& pmgc,
645  vector<size_t> locusPositions,
646  set<size_t> groups,
647  unsigned int nbPerm)
648 {
649  // extract a PolymorphismMultiGContainer with only those groups
650  auto subPmgc = PolymorphismMultiGContainerTools::extractGroups(pmgc, groups);
651  double nbSup = 0.0;
652  double nbInf = 0.0;
653  PermResults results;
654  results.statistic = getWCMultilocusFis(*subPmgc, locusPositions, groups);
655  if (nbPerm > 0)
656  {
657  for (unsigned int i = 0; i < nbPerm; ++i)
658  {
659  auto permutedPmgc = PolymorphismMultiGContainerTools::permuteIntraGroupAlleles(*subPmgc, groups);
660  double FisPerm = getWCMultilocusFis(*permutedPmgc, locusPositions, groups);
661 
662  if (FisPerm > results.statistic)
663  nbSup++;
664  if (FisPerm < results.statistic)
665  nbInf++;
666  }
667 
668  nbSup /= static_cast<double>(nbPerm);
669  nbInf /= static_cast<double>(nbPerm);
670  }
671 
672  results.percentSup = nbSup;
673  results.percentInf = nbInf;
674  return results;
675 }
676 
678  const PolymorphismMultiGContainer& pmgc,
679  vector<size_t> locusPositions,
680  const set<size_t>& groups)
681 {
682  double Au, Bu, Cu;
683  double RH = 0.0;
684  int nb_alleles = 0;
685  int total_alleles = 0;
686 
687  for (size_t i = 0; i < locusPositions.size(); i++)
688  {
689  // reduce computation for polymorphic loci for that groups
690  vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
691  if (ids.size() >= 2)
692  {
693  nb_alleles = 0;
694  // mean allelic frequencies
695  map< size_t, double > P = MultilocusGenotypeStatistics::getAllelesFrqForGroups (pmgc, locusPositions[i], groups);
696  // variance components from W&C
697  map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPositions[i], groups);
698  for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
699  {
700  Au = it->second.a;
701  Bu = it->second.b;
702  Cu = it->second.c;
703  if ((Au + Bu + Cu) != 0)
704  {
705  double Pu = P[it->first]; // it->first is the allele number
706  RH += (1 - Pu) * Au / (Au + Bu + Cu);
707  nb_alleles++;
708  }
709  }
710  total_alleles += (nb_alleles - 1);
711  }
712  }
713  if (total_alleles == 0)
714  throw ZeroDivisionException("MultilocusGenotypeStatistics::getRHMultilocusFst.");
715  return RH / double(total_alleles);
716 }
717 
718 std::unique_ptr<DistanceMatrix> MultilocusGenotypeStatistics::getDistanceMatrix(const PolymorphismMultiGContainer& pmgc, vector<size_t> locusPositions, const set<size_t>& groups, string distance_methode)
719 {
720  vector<string> names = pmgc.getAllGroupsNames();
721  vector<size_t> grp_ids_vect;
722  for (set<size_t>::iterator i = groups.begin(); i != groups.end(); i++)
723  {
724  grp_ids_vect.push_back(*i);
725  }
726 
727  unique_ptr<DistanceMatrix> _dist(new DistanceMatrix(names));
728  for (size_t i = 0; i < groups.size(); i++)
729  {
730  (*_dist)(i, i) = 0;
731  }
732 
733  set<size_t> pairwise_grp;
734 
735  for (size_t j = 0; j < groups.size () - 1; j++)
736  {
737  for (size_t k = j + 1; k < groups.size (); k++)
738  {
739  double distance = 0;
740  if (distance_methode == "nei72")
741  distance = MultilocusGenotypeStatistics::getDnei72( pmgc, locusPositions, grp_ids_vect[j], grp_ids_vect[k] );
742  else if (distance_methode == "nei78")
743  distance = MultilocusGenotypeStatistics::getDnei78( pmgc, locusPositions, grp_ids_vect[j], grp_ids_vect[k] );
744  else if (distance_methode == "WC") // Fst multilocus selon W&C
745  {
746  pairwise_grp.insert(grp_ids_vect[j] );
747  pairwise_grp.insert(grp_ids_vect[k] );
748  distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locusPositions, pairwise_grp);
749  pairwise_grp.clear();
750  }
751  else if (distance_methode == "RH") // Fst multilocus selon ponderation Robertson & Hill
752  {
753  pairwise_grp.insert(grp_ids_vect[j] );
754  pairwise_grp.insert(grp_ids_vect[k] );
755  distance = MultilocusGenotypeStatistics::getRHMultilocusFst( pmgc, locusPositions, pairwise_grp);
756  pairwise_grp.clear();
757  }
758  else if (distance_methode == "Nm") // Nm déduit des Fst multilocus selon W&C modèle en îles Fst = 1/(1+4Nm)
759  {
760  pairwise_grp.insert(grp_ids_vect[j] );
761  pairwise_grp.insert(grp_ids_vect[k] );
762  distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locusPositions, pairwise_grp);
763  if (distance != 0)
764  distance = 0.25 * (1 - distance) / distance;
765  else
766  distance = NAN;
767  pairwise_grp.clear();
768  }
769  else if (distance_methode == "D") // D=-ln(1-Fst) of Reynolds, Weir and Cockerham, 1983
770  {
771  pairwise_grp.insert(grp_ids_vect[j] );
772  pairwise_grp.insert(grp_ids_vect[k] );
773  distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locusPositions, pairwise_grp);
774  if (distance != 1)
775  distance = -log(1 - distance);
776  else
777  distance = NAN;
778  pairwise_grp.clear();
779  }
780  else if (distance_methode == "Rousset") // Calcul de Fst/(1-Fst). Rousset F. 1997
781  {
782  pairwise_grp.insert(grp_ids_vect[j] );
783  pairwise_grp.insert(grp_ids_vect[k] );
784  distance = MultilocusGenotypeStatistics::getWCMultilocusFst( pmgc, locusPositions, pairwise_grp);
785  if (distance != 1)
786  distance = distance / (1 - distance);
787  else
788  distance = NAN;
789  pairwise_grp.clear();
790  }
791 
792  (*_dist)(k, j) = distance;
793  (*_dist)(j, k) = distance;
794  } // for k
795  } // for j
796 
797  return _dist;
798 }
The BiAlleleMonolocusGenotype class.
std::size_t getBadIndex() const
const std::array< std::size_t, 2 > & getBounds() const
static std::vector< T > getValues(const std::map< Key, T, Cmp > &myMap)
static std::vector< Key > getKeys(const std::map< Key, T, Cmp > &myMap)
virtual std::vector< size_t > getAlleleIndex() const =0
Get the alleles' index.
static size_t countNonMissingForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Count the number of non-missing data at a given locus for a set of groups.
static double getWCMultilocusFis(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fis on a set of groups for a given set of loci. The variance component...
static std::map< size_t, double > getAllelesFrqForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the alleles frequencies at one locus for a set of groups.
static std::map< size_t, double > getAllelesFit(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fit on a set of groups for each allele of a given locus.
static std::map< size_t, size_t > getAllelesMapForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get a map of allele count for a set of groups.
static double getHexpForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the expected heterozygosity for one locus.
static std::unique_ptr< DistanceMatrix > getDistanceMatrix(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups, std::string distance_method)
Compute pairwise distances on a set of groups for a given set of loci. distance is either Nei72,...
static double getHnbForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the expected non biased heterozygosity for one locus.
static std::map< size_t, size_t > countHeterozygousForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Count how many times each allele is found in an heterozygous MonolocusGenotype in a set of groups.
static double getWCMultilocusFst(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups)
Compute the Weir and Cockerham on a set of groups for a given set of loci. The variance components f...
static std::map< size_t, VarComp > getVarianceComponents(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the variance components a, b and c (Weir and Cockerham, 1983).
static std::map< size_t, double > getAllelesFst(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the Weir and Cockerham on a set of groups for each allele of a given locus.
static std::map< size_t, Fstats > getAllelesFstats(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the three F statistics of Weir and Cockerham for each allele of a given locus.
static double getRHMultilocusFst(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups)
Compute the on a set of groups for a given set of loci. The variance components for each allele are ...
static PermResults getWCMultilocusFstAndPerm(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, std::set< size_t > groups, unsigned int nb_perm)
Compute the Weir and Cockerham on a set of groups for a given set of loci and make a permutation tes...
static std::map< size_t, double > getHeterozygousFrqForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the heterozygous frequencies for each allele at a locus in a set of groups.
static std::vector< size_t > getAllelesIdsForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the alleles' id at one locus for a set of groups.
static std::map< size_t, double > getAllelesFis(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fis on a set of groups for each allele of a given locus.
static double getDnei78(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, size_t grp1, size_t grp2)
Compute the Nei unbiased distance between two groups at a given number of loci.
static size_t countBiAllelicForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Counr the number of bi-allelic MonolocusGenotype at a given locus for a set of groups.
static size_t countGametesForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Count the number of allele (gametes) at a locus for a set of groups.
static PermResults getWCMultilocusFisAndPerm(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, std::set< size_t > groups, unsigned int nbPerm)
Compute the Weir and Cockerham Fis on a set of groups for a given set of loci and make a permutation ...
static double getHobsForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the observed heterozygosity for one locus.
static double getDnei72(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, size_t grp1, size_t grp2)
Compute the Nei distance between two groups at one locus.
const MonolocusGenotypeInterface & monolocusGenotype(size_t locusPosition) const
Get a MonolocusGenotype.
bool isMonolocusGenotypeMissing(size_t locusPosition) const
Tell if a MonolocusGenotype is a missing data.
static std::unique_ptr< PolymorphismMultiGContainer > permuteMultiG(const PolymorphismMultiGContainer &pmgc)
Permut the MultilocusGenotype in the whole PolymorphismMultiGContainer.
static std::unique_ptr< PolymorphismMultiGContainer > permuteIntraGroupAlleles(const PolymorphismMultiGContainer &pmgc, const std::set< size_t > &groups)
Permut the Alleles between individuals in the same group.
static std::unique_ptr< PolymorphismMultiGContainer > extractGroups(const PolymorphismMultiGContainer &pmgc, const std::set< size_t > &groups)
The PolymorphismMultiGContainer class.
size_t getGroupId(size_t position) const
Get the Group id of a MultilocusGenotype.
size_t getLocusGroupSize(size_t group, size_t locusPosition) const
Get the size of a group for a given locus.
const MultilocusGenotype & multilocusGenotype(size_t position) const
Get a MultilocusGenotype at a position.
std::vector< std::string > getAllGroupsNames() const
Get the groups names or ids if not available.
size_t size() const
Get the number of MultilocusGenotype.