58 map<size_t, size_t> tmp_alleles;
61 tmp_alleles = getAllelesMapForGroups(pmgc, locus_position, groups);
72 map<size_t, size_t> allele_count;
73 size_t nb_tot_allele = 0;
76 allele_count = getAllelesMapForGroups(pmgc, locus_position, groups);
83 for (
size_t i = 0; i < counter.size(); i++)
85 nb_tot_allele += counter[i];
92 map<size_t, size_t> alleles_count;
93 for (
size_t i = 0; i < pmgc.
size(); i++)
101 for (
size_t j = 0; j < tmp_alleles.size(); j++)
103 alleles_count[tmp_alleles[j]]++;
112 return alleles_count;
117 map<size_t, double> alleles_frq;
118 size_t nb_tot_allele = 0;
119 map<size_t, size_t> tmp_alleles;
122 tmp_alleles = getAllelesMapForGroups(pmgc, locus_position, groups);
129 for (
size_t i = 0; i < counter.size(); i++)
131 nb_tot_allele += counter[i];
133 if (nb_tot_allele == 0)
135 for (map<size_t, size_t>::iterator it = tmp_alleles.begin(); it != tmp_alleles.end(); it++)
137 alleles_frq[it->first] =
static_cast<double>(it->second) /
static_cast<double>(nb_tot_allele);
145 for (
size_t i = 0; i < pmgc.
size(); i++)
164 for (
size_t i = 0; i < pmgc.
size(); i++)
183 map<size_t, size_t> counter;
184 for (
size_t i = 0; i < pmgc.
size(); i++)
196 for (
size_t j = 0; j < tmp_alleles.size(); j++)
198 counter[tmp_alleles[j]]++;
214 map<size_t, double> freq;
216 for (
size_t i = 0; i < pmgc.
size(); i++)
229 for (
size_t j = 0; j < tmp_alleles.size(); j++)
231 freq[tmp_alleles[j]]++;
244 for (map<size_t, double>::iterator i = freq.begin(); i != freq.end(); i++)
246 i->second = (double) i->second / (
double) counter;
253 map<size_t, double> heterozygous_frq;
257 heterozygous_frq = getHeterozygousFrqForGroups(pmgc, locus_position, groups);
267 for (map<size_t, double>::iterator it = heterozygous_frq.begin(); it != heterozygous_frq.end(); it++)
271 return frq /
static_cast<double>(heterozygous_frq.size());
276 map<size_t, double> allele_frq;
280 allele_frq = getAllelesFrqForGroups(pmgc, locus_position, groups);
290 for (map<size_t, double>::iterator it = allele_frq.begin(); it != allele_frq.end(); it++)
292 frqsqr += it->second * it->second;
303 nb_alleles = countGametesForGroups(pmgc, locus_position, groups);
304 Hexp = getHexpForGroups(pmgc, locus_position, groups);
314 return 2 *
static_cast<double>(nb_alleles) * Hexp /
static_cast<double>((2 * nb_alleles) - 1);
319 map<size_t, double> allele_frq1, allele_frq2;
320 vector<size_t> allele_ids;
321 set<size_t> group1_id;
322 set<size_t> group2_id;
323 set<size_t> groups_id;
327 group1_id.insert(grp1);
328 group2_id.insert(grp2);
329 groups_id.insert(grp1);
330 groups_id.insert(grp2);
331 for (
size_t i = 0; i < locus_positions.size(); i++)
338 allele_ids = getAllelesIdsForGroups(pmgc, locus_positions[i], groups_id);
339 allele_frq1 = getAllelesFrqForGroups(pmgc, locus_positions[i], group1_id);
340 allele_frq2 = getAllelesFrqForGroups(pmgc, locus_positions[i], group2_id);
346 for (
size_t j = 0; j < allele_ids.size(); j++)
348 map<size_t, double>::iterator it1 = allele_frq1.find(allele_ids[j]);
349 map<size_t, double>::iterator it2 = allele_frq2.find(allele_ids[j]);
350 double tmp_frq1 = (it1 != allele_frq1.end()) ? it1->second : 0.;
351 double tmp_frq2 = (it2 != allele_frq2.end()) ? it2->second : 0.;
352 Jx += tmp_frq1 * tmp_frq1;
353 Jy += tmp_frq2 * tmp_frq2;
354 Jxy += tmp_frq1 * tmp_frq2;
359 return -log(Jxy / sqrt(Jx * Jy));
364 map<size_t, double> allele_frq1, allele_frq2;
365 vector<size_t> allele_ids;
366 set<size_t> group1_id;
367 set<size_t> group2_id;
368 set<size_t> groups_id;
372 size_t nx = 0, ny = 0;
373 group1_id.insert(grp1);
374 group2_id.insert(grp2);
375 groups_id.insert(grp1);
376 groups_id.insert(grp2);
377 for (
size_t i = 0; i < locus_positions.size(); i++)
384 allele_ids = getAllelesIdsForGroups(pmgc, locus_positions[i], groups_id);
385 allele_frq1 = getAllelesFrqForGroups(pmgc, locus_positions[i], group1_id);
386 allele_frq2 = getAllelesFrqForGroups(pmgc, locus_positions[i], group2_id);
387 nx = countBiAllelicForGroups(pmgc, locus_positions[i], group1_id);
388 ny = countBiAllelicForGroups(pmgc, locus_positions[i], group2_id);
396 for (
size_t j = 0; j < allele_ids.size(); j++)
398 map<size_t, double>::iterator it1 = allele_frq1.find(allele_ids[j]);
399 map<size_t, double>::iterator it2 = allele_frq2.find(allele_ids[j]);
400 double tmp_frq1 = (it1 != allele_frq1.end()) ? it1->second : 0.;
401 double tmp_frq2 = (it2 != allele_frq2.end()) ? it2->second : 0.;
402 tmp_Jx += tmp_frq1 * tmp_frq1;
403 tmp_Jy += tmp_frq2 * tmp_frq2;
404 Jxy += tmp_frq1 * tmp_frq2;
406 Jx += ((2. * (double) nx * tmp_Jx) - 1.) / ((2. * (
double) nx) - 1.);
407 Jy += ((2. * (double) ny * tmp_Jy) - 1.) / ((2. * (
double) ny) - 1.);
409 double denom = Jx * Jy;
412 return -log(Jxy / sqrt(denom));
417 map<size_t, MultilocusGenotypeStatistics::VarComp> vc = getVarianceComponents(pmgc, locus_position, groups);
418 map<size_t, MultilocusGenotypeStatistics::Fstats> f_stats;
419 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = vc.begin(); it != vc.end(); it++)
421 double abc = it->second.a + it->second.b + it->second.c;
422 double bc = it->second.b + it->second.c;
426 f_stats[it->first].Fit = NAN;
427 f_stats[it->first].Fst = NAN;
430 f_stats[it->first].Fit = 1. - it->second.c / abc;
431 f_stats[it->first].Fst = it->second.a / abc;
434 f_stats[it->first].Fis = NAN;
436 f_stats[it->first].Fis = 1. - it->second.c / bc;
443 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_position, groups);
444 map<size_t, double> Fit;
445 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
447 Fit[it->first] = it->second.a + it->second.b + it->second.c;
448 if (Fit[it->first] == 0.)
450 Fit[it->first] = 1. - it->second.c / Fit[it->first];
457 if (groups.size() <= 1)
458 throw BadIntegerException(
"MultilocusGenotypeStatistics::getAllelesFst: groups must be >= 2.",
static_cast<int>(groups.size()));
459 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_position, groups);
460 map<size_t, double> Fst;
461 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
463 Fst[it->first] = it->second.a + it->second.b + it->second.c;
464 if (Fst[it->first] == 0.)
466 Fst[it->first] = it->second.a / Fst[it->first];
473 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_position, groups);
474 map<size_t, double> Fis;
475 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
477 Fis[it->first] = it->second.b + it->second.c;
478 if (Fis[it->first] == 0.)
480 Fis[it->first] = 1. - it->second.c / Fis[it->first];
487 map<size_t, MultilocusGenotypeStatistics::VarComp> values;
491 vector<size_t> ids = getAllelesIdsForGroups(pmgc, locus_position, groups);
492 map<size_t, double> pbar;
493 map<size_t, double> s2;
494 map<size_t, double> hbar;
495 for (
size_t i = 0; i < ids.size(); i++)
501 double r =
static_cast<double>(groups.size());
502 for (set<size_t>::iterator set_it = groups.begin(); set_it != groups.end(); set_it++)
504 size_t i = (*set_it);
506 set<size_t> group_id;
507 group_id.insert( i );
508 map<size_t, double> pi = getAllelesFrqForGroups(pmgc, locus_position, group_id);
509 map<size_t, double> hi = getHeterozygousFrqForGroups(pmgc, locus_position, group_id);
514 for (map<size_t, double>::iterator it = pi.begin(); it != pi.end(); it++)
516 pbar[it->first] += ni * it->second;
518 for (map<size_t, double>::iterator it = hi.begin(); it != hi.end(); it++)
520 hbar[it->first] += ni * it->second;
529 nc = (r * nbar) - (nc / (r * nbar)) / (r - 1.);
530 for (map<size_t, double>::iterator it = pbar.begin(); it != pbar.end(); it++)
532 it->second = it->second / (r * nbar);
534 for (map<size_t, double>::iterator it = hbar.begin(); it != hbar.end(); it++)
536 it->second = it->second / ( r * nbar);
539 for (set<size_t>::iterator set_it = groups.begin(); set_it != groups.end(); set_it++)
541 size_t i = (*set_it);
543 set<size_t> group_id;
544 group_id.insert( i );
545 map<size_t, double> pi = getAllelesFrqForGroups(pmgc, locus_position, group_id);
546 for (
size_t j = 0; j < ids.size(); j++)
550 for (map<size_t, double>::iterator it = pi.begin(); it != pi.end(); it++)
552 s2[it->first] += ni * (it->second - pbar[it->first]) * (it->second - pbar[it->first]);
556 for (map<size_t, double>::iterator it = s2.begin(); it != s2.end(); it++)
558 it->second = it->second / ((r - 1.) * nbar);
562 for (
size_t i = 0; i < ids.size(); i++)
567 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
569 it->second.a = (nbar / nc) * (s2[it->first] - ((1. / (nbar - 1.)) * ((pbar[it->first] * (1. - pbar[it->first])) - (s2[it->first] * ((double) r - 1.) / r) - ((1. / 4.) * hbar[it->first]))));
570 it->second.b = (nbar / (nbar - 1.)) * ((pbar[it->first] * (1. - pbar[it->first])) - (s2[it->first] * ((double) r - 1.) / (double) r) - ((((2. * nbar) - 1.) / (4. * nbar)) * hbar[it->first]));
571 it->second.c = hbar[it->first] / 2.;
580 for (
size_t i = 0; i < locus_positions.size(); i++)
584 for (set<size_t>::iterator setIt = groups.begin() ; setIt != groups.end() ; setIt++)
590 vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
591 if (ids.size() >= 2 && ni >= 1)
593 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_positions[i], groups);
594 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
602 if ((A + B + C) == 0)
604 return A / (A + B + C);
611 for (
size_t i = 0; i < locus_positions.size(); i++)
615 for (set<size_t>::iterator setIt = groups.begin() ; setIt != groups.end() ; setIt++)
621 vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
622 if (ids.size() >= 2 && ni >= 1)
624 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_positions[i], groups);
625 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
634 return 1.0 - C / (B + C);
644 results.
Statistic = getWCMultilocusFst(sub_pmgc, locus_positions, groups);
647 for (
int i = 0; i < nb_perm; i++)
650 double Fst_perm = getWCMultilocusFst(permuted_pmgc, locus_positions, groups);
658 nb_sup /= (double) nb_perm;
659 nb_inf /= (double) nb_perm;
674 results.
Statistic = getWCMultilocusFis(sub_pmgc, locus_positions, groups);
677 for (
int i = 0; i < nb_perm; i++)
680 double Fis_perm = getWCMultilocusFis(permuted_pmgc, locus_positions, groups);
688 nb_sup /= (double) nb_perm;
689 nb_inf /= (double) nb_perm;
702 int total_alleles = 0;
704 for (
size_t i = 0; i < locus_positions.size(); i++)
707 vector<size_t> ids = getAllelesIdsForGroups(pmgc, i, groups);
714 map<size_t, MultilocusGenotypeStatistics::VarComp> values = getVarianceComponents(pmgc, locus_positions[i], groups);
715 for (map<size_t, MultilocusGenotypeStatistics::VarComp>::iterator it = values.begin(); it != values.end(); it++)
720 if ((Au + Bu + Cu) != 0)
722 double Pu = P[it->first];
723 RH += (1 - Pu) * Au / (Au + Bu + Cu);
727 total_alleles += (nb_alleles - 1);
730 if (total_alleles == 0)
732 return RH / double(total_alleles);
738 vector<size_t> grp_ids_vect;
739 for (set<size_t>::iterator i = groups.begin(); i != groups.end(); i++)
741 grp_ids_vect.push_back(*i);
745 for (
size_t i = 0; i < groups.size(); i++)
750 set<size_t> pairwise_grp;
752 for (
size_t j = 0; j < groups.size () - 1; j++)
754 for (
size_t k = j + 1; k < groups.size (); k++)
757 if (distance_methode ==
"nei72")
759 else if (distance_methode ==
"nei78")
761 else if (distance_methode ==
"WC")
763 pairwise_grp.insert(grp_ids_vect[j] );
764 pairwise_grp.insert(grp_ids_vect[k] );
766 pairwise_grp.clear();
768 else if (distance_methode ==
"RH")
770 pairwise_grp.insert(grp_ids_vect[j] );
771 pairwise_grp.insert(grp_ids_vect[k] );
773 pairwise_grp.clear();
775 else if (distance_methode ==
"Nm")
777 pairwise_grp.insert(grp_ids_vect[j] );
778 pairwise_grp.insert(grp_ids_vect[k] );
781 distance = 0.25 * (1 - distance) / distance;
784 pairwise_grp.clear();
786 else if (distance_methode ==
"D")
788 pairwise_grp.insert(grp_ids_vect[j] );
789 pairwise_grp.insert(grp_ids_vect[k] );
792 distance = -log(1 - distance);
795 pairwise_grp.clear();
797 else if (distance_methode ==
"Rousset")
799 pairwise_grp.insert(grp_ids_vect[j] );
800 pairwise_grp.insert(grp_ids_vect[k] );
803 distance = distance / (1 - distance);
806 pairwise_grp.clear();
809 (*_dist)(k, j) = distance;
810 (*_dist)(j, k) = distance;
The BiAlleleMonolocusGenotype class.
std::size_t getBadIndex() const
const std::array< std::size_t, 2 > & getBounds() const
The MonolocusGenotype virtual class.
virtual std::vector< size_t > getAlleleIndex() const =0
Get the alleles' index.
static double getDnei78(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, size_t grp1, size_t grp2)
Compute the Nei unbiased distance between two groups at a given number of loci.
static std::map< size_t, double > getAllelesFit(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fit on a set of groups for each allele of a given locus.
static std::map< size_t, double > getHeterozygousFrqForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Get the heterozygous frequencies for each allele at a locus in a set of groups.
static std::map< size_t, VarComp > getVarianceComponents(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Get the variance components a, b and c (Weir and Cockerham, 1983).
static double getDnei72(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, size_t grp1, size_t grp2)
Compute the Nei distance between two groups at one locus.
static std::map< size_t, size_t > countHeterozygousForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Count how many times each allele is found in an heterozygous MonolocusGenotype in a set of groups.
static size_t countBiAllelicForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Counr the number of bi-allelic MonolocusGenotype at a given locus for a set of groups.
static double getHnbForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the expected non biased heterozygosity for one locus.
static std::map< size_t, double > getAllelesFst(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the Weir and Cockerham on a set of groups for each allele of a given locus.
static PermResults getWCMultilocusFstAndPerm(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, std::set< size_t > groups, int nb_perm)
Compute the Weir and Cockerham on a set of groups for a given set of loci and make a permutation tes...
static std::vector< size_t > getAllelesIdsForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Get the alleles' id at one locus for a set of groups.
static size_t countGametesForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Count the number of allele (gametes) at a locus for a set of groups.
static std::unique_ptr< DistanceMatrix > getDistanceMatrix(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups, std::string distance_method)
Compute pairwise distances on a set of groups for a given set of loci. distance is either Nei72,...
static std::map< size_t, Fstats > getAllelesFstats(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the three F statistics of Weir and Cockerham for each allele of a given locus.
static double getHexpForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the expected heterozygosity for one locus.
static double getRHMultilocusFst(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups)
Compute the on a set of groups for a given set of loci. The variance componenets for each allele are...
static double getHobsForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the observed heterozygosity for one locus.
static size_t countNonMissingForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Count the number of non-missing data at a given locus for a set of groups.
static std::map< size_t, double > getAllelesFis(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fis on a set of groups for each allele of a given locus.
static PermResults getWCMultilocusFisAndPerm(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, std::set< size_t > groups, int nb_perm)
Compute the Weir and Cockerham Fis on a set of groups for a given set of loci and make a permutation ...
static std::map< size_t, size_t > getAllelesMapForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Get a map of allele count for a set of groups.
static double getWCMultilocusFst(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups)
Compute the Weir and Cockerham on a set of groups for a given set of loci. The variance componenets ...
static double getWCMultilocusFis(const PolymorphismMultiGContainer &pmgc, std::vector< size_t > locus_positions, const std::set< size_t > &groups)
Compute the Weir and Cockerham Fis on a set of groups for a given set of loci. The variance componene...
static std::map< size_t, double > getAllelesFrqForGroups(const PolymorphismMultiGContainer &pmgc, size_t locus_position, const std::set< size_t > &groups)
Get the alleles frequencies at one locus for a set of groups.
const MonolocusGenotype & getMonolocusGenotype(size_t locus_position) const
Get a MonolocusGenotype.
bool isMonolocusGenotypeMissing(size_t locus_position) const
Tell if a MonolocusGenotype is a missing data.
The PolymorphismMultiGContainer class.
size_t getGroupId(size_t position) const
Get the Group id of a MultilocusGenotype.
const MultilocusGenotype * getMultilocusGenotype(size_t position) const
Get a MultilocusGenotype at a position.
std::vector< std::string > getAllGroupsNames() const
Get the groups names or ids if not available.
size_t getLocusGroupSize(size_t group, size_t locus_position) const
Get the size of a group for a given locus.
size_t size() const
Get the number of MultilocusGenotype.