30 map<size_t, size_t> tmp_alleles;
33 tmp_alleles = getAllelesMapForGroups(pmgc, locusPosition, groups);
44 map<size_t, size_t> allele_count;
45 size_t nb_tot_allele = 0;
48 allele_count = getAllelesMapForGroups(pmgc, locusPosition, groups);
55 for (
size_t i = 0; i < counter.size(); i++)
57 nb_tot_allele += counter[i];
64 map<size_t, size_t> alleles_count;
65 for (
size_t i = 0; i < pmgc.
size(); i++)
73 for (
size_t j = 0; j < tmp_alleles.size(); j++)
75 alleles_count[tmp_alleles[j]]++;
89 map<size_t, double> alleles_frq;
90 size_t nb_tot_allele = 0;
91 map<size_t, size_t> tmp_alleles;
94 tmp_alleles = getAllelesMapForGroups(pmgc, locusPosition, groups);
101 for (
size_t i = 0; i < counter.size(); i++)
103 nb_tot_allele += counter[i];
105 if (nb_tot_allele == 0)
107 for (map<size_t, size_t>::iterator it = tmp_alleles.begin(); it != tmp_alleles.end(); it++)
109 alleles_frq[it->first] =
static_cast<double>(it->second) /
static_cast<double>(nb_tot_allele);
117 for (
size_t i = 0; i < pmgc.
size(); i++)
136 for (
size_t i = 0; i < pmgc.
size(); i++)
155 map<size_t, size_t> counter;
156 for (
size_t i = 0; i < pmgc.
size(); i++)
163 if ((tmpMg.getAlleleIndex()).size() == 2)
168 for (
size_t j = 0; j < tmpAlleles.size(); j++)
170 counter[tmpAlleles[j]]++;
186 map<size_t, double> freq;
188 for (
size_t i = 0; i < pmgc.
size(); i++)
195 if ((tmpMg.getAlleleIndex()).size() == 2)
201 for (
size_t j = 0; j < tmpAlleles.size(); j++)
203 freq[tmpAlleles[j]]++;
216 for (map<size_t, double>::iterator i = freq.begin(); i != freq.end(); i++)
218 i->second = (double) i->second / (
double) counter;
225 map<size_t, double> heterozygous_frq;
229 heterozygous_frq = getHeterozygousFrqForGroups(pmgc, locusPosition, groups);
239 for (map<size_t, double>::iterator it = heterozygous_frq.begin(); it != heterozygous_frq.end(); it++)
243 return frq /
static_cast<double>(heterozygous_frq.size());
248 map<size_t, double> allele_frq;
252 allele_frq = getAllelesFrqForGroups(pmgc, locusPosition, groups);
262 for (map<size_t, double>::iterator it = allele_frq.begin(); it != allele_frq.end(); it++)
264 frqsqr += it->second * it->second;
275 nb_alleles = countGametesForGroups(pmgc, locusPosition, groups);
276 Hexp = getHexpForGroups(pmgc, locusPosition, groups);
286 return 2 *
static_cast<double>(nb_alleles) * Hexp /
static_cast<double>((2 * nb_alleles) - 1);
291 map<size_t, double> allele_frq1, allele_frq2;
292 vector<size_t> allele_ids;
293 set<size_t> group1_id;
294 set<size_t> group2_id;
295 set<size_t> groups_id;
299 group1_id.insert(grp1);
300 group2_id.insert(grp2);
301 groups_id.insert(grp1);
302 groups_id.insert(grp2);
303 for (
size_t i = 0; i < locusPositions.size(); i++)
310 allele_ids = getAllelesIdsForGroups(pmgc, locusPositions[i], groups_id);
311 allele_frq1 = getAllelesFrqForGroups(pmgc, locusPositions[i], group1_id);
312 allele_frq2 = getAllelesFrqForGroups(pmgc, locusPositions[i], group2_id);
318 for (
size_t j = 0; j < allele_ids.size(); j++)
320 map<size_t, double>::iterator it1 = allele_frq1.find(allele_ids[j]);
321 map<size_t, double>::iterator it2 = allele_frq2.find(allele_ids[j]);
322 double tmp_frq1 = (it1 != allele_frq1.end()) ? it1->second : 0.;
323 double tmp_frq2 = (it2 != allele_frq2.end()) ? it2->second : 0.;
324 Jx += tmp_frq1 * tmp_frq1;
325 Jy += tmp_frq2 * tmp_frq2;
326 Jxy += tmp_frq1 * tmp_frq2;
331 return -log(Jxy / sqrt(Jx * Jy));
336 map<size_t, double> allele_frq1, allele_frq2;
337 vector<size_t> allele_ids;
338 set<size_t> group1_id;
339 set<size_t> group2_id;
340 set<size_t> groups_id;
344 size_t nx = 0, ny = 0;
345 group1_id.insert(grp1);
346 group2_id.insert(grp2);
347 groups_id.insert(grp1);
348 groups_id.insert(grp2);
349 for (
size_t i = 0; i < locusPositions.size(); i++)
356 allele_ids = getAllelesIdsForGroups(pmgc, locusPositions[i], groups_id);
357 allele_frq1 = getAllelesFrqForGroups(pmgc, locusPositions[i], group1_id);
358 allele_frq2 = getAllelesFrqForGroups(pmgc, locusPositions[i], group2_id);
359 nx = countBiAllelicForGroups(pmgc, locusPositions[i], group1_id);
360 ny = countBiAllelicForGroups(pmgc, locusPositions[i], group2_id);
368 for (
size_t j = 0; j < allele_ids.size(); j++)
370 map<size_t, double>::iterator it1 = allele_frq1.find(allele_ids[j]);
371 map<size_t, double>::iterator it2 = allele_frq2.find(allele_ids[j]);
372 double tmp_frq1 = (it1 != allele_frq1.end()) ? it1->second : 0.;
373 double tmp_frq2 = (it2 != allele_frq2.end()) ? it2->second : 0.;
374 tmp_Jx += tmp_frq1 * tmp_frq1;
375 tmp_Jy += tmp_frq2 * tmp_frq2;
376 Jxy += tmp_frq1 * tmp_frq2;
378 Jx += ((2. * (double) nx * tmp_Jx) - 1.) / ((2. * (
double) nx) - 1.);
379 Jy += ((2. * (double) ny * tmp_Jy) - 1.) / ((2. * (
double) ny) - 1.);
381 double denom = Jx * Jy;
384 return -log(Jxy / sqrt(denom));
389 map<size_t, MultilocusGenotypeStatistics::VarComp> vc = getVarianceComponents(pmgc, locusPosition, groups);
390 map<size_t, MultilocusGenotypeStatistics::Fstats> f_stats;
391 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = vc.begin(); it != vc.end(); it++)
393 double abc = it->second.a + it->second.b + it->second.c;
394 double bc = it->second.b + it->second.c;
398 f_stats[it->first].Fit = NAN;
399 f_stats[it->first].Fst = NAN;
402 f_stats[it->first].Fit = 1. - it->second.c / abc;
403 f_stats[it->first].Fst = it->second.a / abc;
406 f_stats[it->first].Fis = NAN;
408 f_stats[it->first].Fis = 1. - it->second.c / bc;
415 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPosition, groups);
416 map<size_t, double> Fit;
417 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
419 Fit[it->first] = it->second.a + it->second.b + it->second.c;
420 if (Fit[it->first] == 0.)
422 Fit[it->first] = 1. - it->second.c / Fit[it->first];
429 if (groups.size() <= 1)
430 throw BadIntegerException(
"MultilocusGenotypeStatistics::getAllelesFst: groups must be >= 2.",
static_cast<int>(groups.size()));
431 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPosition, groups);
432 map<size_t, double> Fst;
433 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
435 Fst[it->first] = it->second.a + it->second.b + it->second.c;
436 if (Fst[it->first] == 0.)
438 Fst[it->first] = it->second.a / Fst[it->first];
445 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPosition, groups);
446 map<size_t, double> Fis;
447 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
449 Fis[it->first] = it->second.b + it->second.c;
450 if (Fis[it->first] == 0.)
452 Fis[it->first] = 1. - it->second.c / Fis[it->first];
459 map<size_t, MultilocusGenotypeStatistics::VarComp> values;
463 vector<size_t> ids = getAllelesIdsForGroups(pmgc, locusPosition, groups);
464 map<size_t, double> pbar;
465 map<size_t, double> s2;
466 map<size_t, double> hbar;
467 for (
size_t i = 0; i < ids.size(); i++)
473 double r =
static_cast<double>(groups.size());
474 for (set<size_t>::iterator set_it = groups.begin(); set_it != groups.end(); set_it++)
476 size_t i = (*set_it);
478 set<size_t> group_id;
479 group_id.insert( i );
480 map<size_t, double> pi = getAllelesFrqForGroups(pmgc, locusPosition, group_id);
481 map<size_t, double> hi = getHeterozygousFrqForGroups(pmgc, locusPosition, group_id);
486 for (map<size_t, double>::iterator it = pi.begin(); it != pi.end(); it++)
488 pbar[it->first] += ni * it->second;
490 for (map<size_t, double>::iterator it = hi.begin(); it != hi.end(); it++)
492 hbar[it->first] += ni * it->second;
501 nc = (r * nbar) - (nc / (r * nbar)) / (r - 1.);
502 for (map<size_t, double>::iterator it = pbar.begin(); it != pbar.end(); it++)
504 it->second = it->second / (r * nbar);
506 for (map<size_t, double>::iterator it = hbar.begin(); it != hbar.end(); it++)
508 it->second = it->second / ( r * nbar);
511 for (set<size_t>::iterator set_it = groups.begin(); set_it != groups.end(); set_it++)
513 size_t i = (*set_it);
515 set<size_t> group_id;
516 group_id.insert( i );
517 map<size_t, double> pi = getAllelesFrqForGroups(pmgc, locusPosition, group_id);
518 for (
size_t j = 0; j < ids.size(); j++)
522 for (map<size_t, double>::iterator it = pi.begin(); it != pi.end(); it++)
524 s2[it->first] += ni * (it->second - pbar[it->first]) * (it->second - pbar[it->first]);
528 for (map<size_t, double>::iterator it = s2.begin(); it != s2.end(); it++)
530 it->second = it->second / ((r - 1.) * nbar);
534 for (
size_t i = 0; i < ids.size(); i++)
539 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
541 it->second.a = (nbar / nc) * (s2[it->first] - ((1. / (nbar - 1.)) * ((pbar[it->first] * (1. - pbar[it->first])) - (s2[it->first] * ((double) r - 1.) / r) - ((1. / 4.) * hbar[it->first]))));
542 it->second.b = (nbar / (nbar - 1.)) * ((pbar[it->first] * (1. - pbar[it->first])) - (s2[it->first] * ((double) r - 1.) / (double) r) - ((((2. * nbar) - 1.) / (4. * nbar)) * hbar[it->first]));
543 it->second.c = hbar[it->first] / 2.;
552 for (
size_t i = 0; i < locusPositions.size(); i++)
556 for (set<size_t>::iterator setIt = groups.begin(); setIt != groups.end(); setIt++)
562 vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
563 if (ids.size() >= 2 && ni >= 1)
565 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPositions[i], groups);
566 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
574 if ((A + B + C) == 0)
576 return A / (A + B + C);
583 for (
size_t i = 0; i < locusPositions.size(); i++)
587 for (set<size_t>::iterator setIt = groups.begin(); setIt != groups.end(); setIt++)
593 vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
594 if (ids.size() >= 2 && ni >= 1)
596 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPositions[i], groups);
597 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
606 return 1.0 - C / (B + C);
611 vector<size_t> locusPositions,
620 results.
statistic = getWCMultilocusFst(*subPmgc, locusPositions, groups);
623 for (
size_t i = 0; i < nbPerm; ++i)
626 double FstPerm = getWCMultilocusFst(*permutedPmgc, locusPositions, groups);
634 nbSup /=
static_cast<double>(nbPerm);
635 nbInf /=
static_cast<double>(nbPerm);
645 vector<size_t> locusPositions,
654 results.
statistic = getWCMultilocusFis(*subPmgc, locusPositions, groups);
657 for (
unsigned int i = 0; i < nbPerm; ++i)
660 double FisPerm = getWCMultilocusFis(*permutedPmgc, locusPositions, groups);
668 nbSup /=
static_cast<double>(nbPerm);
669 nbInf /=
static_cast<double>(nbPerm);
679 vector<size_t> locusPositions,
680 const set<size_t>& groups)
685 int total_alleles = 0;
687 for (
size_t i = 0; i < locusPositions.size(); i++)
690 vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
697 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locusPositions[i], groups);
698 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
703 if ((Au + Bu + Cu) != 0)
705 double Pu = P[it->first];
706 RH += (1 - Pu) * Au / (Au + Bu + Cu);
710 total_alleles += (nb_alleles - 1);
713 if (total_alleles == 0)
715 return RH / double(total_alleles);
721 vector<size_t> grp_ids_vect;
722 for (set<size_t>::iterator i = groups.begin(); i != groups.end(); i++)
724 grp_ids_vect.push_back(*i);
728 for (
size_t i = 0; i < groups.size(); i++)
733 set<size_t> pairwise_grp;
735 for (
size_t j = 0; j < groups.size () - 1; j++)
737 for (
size_t k = j + 1; k < groups.size (); k++)
740 if (distance_methode ==
"nei72")
742 else if (distance_methode ==
"nei78")
744 else if (distance_methode ==
"WC")
746 pairwise_grp.insert(grp_ids_vect[j] );
747 pairwise_grp.insert(grp_ids_vect[k] );
749 pairwise_grp.clear();
751 else if (distance_methode ==
"RH")
753 pairwise_grp.insert(grp_ids_vect[j] );
754 pairwise_grp.insert(grp_ids_vect[k] );
756 pairwise_grp.clear();
758 else if (distance_methode ==
"Nm")
760 pairwise_grp.insert(grp_ids_vect[j] );
761 pairwise_grp.insert(grp_ids_vect[k] );
764 distance = 0.25 * (1 - distance) / distance;
767 pairwise_grp.clear();
769 else if (distance_methode ==
"D")
771 pairwise_grp.insert(grp_ids_vect[j] );
772 pairwise_grp.insert(grp_ids_vect[k] );
775 distance = -log(1 - distance);
778 pairwise_grp.clear();
780 else if (distance_methode ==
"Rousset")
782 pairwise_grp.insert(grp_ids_vect[j] );
783 pairwise_grp.insert(grp_ids_vect[k] );
786 distance = distance / (1 - distance);
789 pairwise_grp.clear();
792 (*_dist)(k, j) = distance;
793 (*_dist)(j, k) = distance;
The BiAlleleMonolocusGenotype class.
std::size_t getBadIndex() const
const std::array< std::size_t, 2 > & getBounds() const
virtual std::vector< size_t > getAlleleIndex() const =0
Get the alleles' index.
static size_t countNonMissingForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Count the number of non-missing data at a given locus for a set of groups.
static double getWCMultilocusFis(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fis on a set of groups for a given set of loci. The variance component...
static std::map< size_t, double > getAllelesFrqForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the alleles frequencies at one locus for a set of groups.
static std::map< size_t, double > getAllelesFit(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fit on a set of groups for each allele of a given locus.
static std::map< size_t, size_t > getAllelesMapForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get a map of allele count for a set of groups.
static double getHexpForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the expected heterozygosity for one locus.
static std::unique_ptr< DistanceMatrix > getDistanceMatrix(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups, std::string distance_method)
Compute pairwise distances on a set of groups for a given set of loci. distance is either Nei72,...
static double getHnbForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the expected non biased heterozygosity for one locus.
static std::map< size_t, size_t > countHeterozygousForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Count how many times each allele is found in an heterozygous MonolocusGenotype in a set of groups.
static double getWCMultilocusFst(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups)
Compute the Weir and Cockerham on a set of groups for a given set of loci. The variance components f...
static std::map< size_t, VarComp > getVarianceComponents(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the variance components a, b and c (Weir and Cockerham, 1983).
static std::map< size_t, double > getAllelesFst(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the Weir and Cockerham on a set of groups for each allele of a given locus.
static std::map< size_t, Fstats > getAllelesFstats(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the three F statistics of Weir and Cockerham for each allele of a given locus.
static double getRHMultilocusFst(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, const std::set< size_t > &groups)
Compute the on a set of groups for a given set of loci. The variance components for each allele are ...
static PermResults getWCMultilocusFstAndPerm(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, std::set< size_t > groups, unsigned int nb_perm)
Compute the Weir and Cockerham on a set of groups for a given set of loci and make a permutation tes...
static std::map< size_t, double > getHeterozygousFrqForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the heterozygous frequencies for each allele at a locus in a set of groups.
static std::vector< size_t > getAllelesIdsForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Get the alleles' id at one locus for a set of groups.
static std::map< size_t, double > getAllelesFis(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fis on a set of groups for each allele of a given locus.
static double getDnei78(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, size_t grp1, size_t grp2)
Compute the Nei unbiased distance between two groups at a given number of loci.
static size_t countBiAllelicForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Counr the number of bi-allelic MonolocusGenotype at a given locus for a set of groups.
static size_t countGametesForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Count the number of allele (gametes) at a locus for a set of groups.
static PermResults getWCMultilocusFisAndPerm(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, std::set< size_t > groups, unsigned int nbPerm)
Compute the Weir and Cockerham Fis on a set of groups for a given set of loci and make a permutation ...
static double getHobsForGroups(const PolymorphismMultiGContainer &pmgc, size_t locusPosition, const std::set< size_t > &groups)
Compute the observed heterozygosity for one locus.
static double getDnei72(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locusPositions, size_t grp1, size_t grp2)
Compute the Nei distance between two groups at one locus.
const MonolocusGenotypeInterface & monolocusGenotype(size_t locusPosition) const
Get a MonolocusGenotype.
bool isMonolocusGenotypeMissing(size_t locusPosition) const
Tell if a MonolocusGenotype is a missing data.
The PolymorphismMultiGContainer class.
size_t getGroupId(size_t position) const
Get the Group id of a MultilocusGenotype.
size_t getLocusGroupSize(size_t group, size_t locusPosition) const
Get the size of a group for a given locus.
const MultilocusGenotype & multilocusGenotype(size_t position) const
Get a MultilocusGenotype at a position.
std::vector< std::string > getAllGroupsNames() const
Get the groups names or ids if not available.
size_t size() const
Get the number of MultilocusGenotype.