6 #ifndef _SEQUENCESTATISTICS_H_
7 #define _SEQUENCESTATISTICS_H_
55 bool ignoreUnknown =
true);
76 bool ignoreUnknown =
true);
113 bool gapflag =
true);
140 bool gapflag =
true);
150 bool gapflag =
true);
161 bool gapflag =
true);
187 bool gapflag =
true);
210 bool ignoreUnknown =
true,
211 bool scaled =
false);
237 bool ignoreUnknown =
true,
238 bool scaled =
false);
264 static unsigned int dvk(
266 bool gapflag =
true);
282 bool gapflag =
true);
323 bool gapflag =
true);
339 bool stopflag =
true,
340 bool gapflag =
true);
410 bool minchange =
false);
430 bool minchange =
false);
487 double freqmin = 0.);
507 double freqmin = 0.);
543 static std::vector<unsigned int>
mkTable(
547 double freqmin = 0.);
566 double freqmin = 0.);
588 bool ignoreUnknown =
true);
608 bool ignoreUnknown =
true);
631 bool useNbSingletons =
true,
632 bool useNbSegregatingSites =
false);
645 bool useNbSegregatingSites =
false);
668 bool useNbSingletons =
true,
669 bool useNbSegregatingSites =
false);
682 bool useNbSegregatingSites);
736 bool keepsingleton =
true,
737 double freqmin = 0.);
754 bool keepsingleton =
true,
755 double freqmin = 0.);
773 bool keepsingleton =
true,
774 double freqmin = 0.);
790 bool keepsingleton =
true,
791 double freqmin = 0.);
807 bool keepsingleton =
true,
808 double freqmin = 0.);
824 bool keepsingleton =
true,
825 double freqmin = 0.);
841 bool keepsingleton =
true,
842 double freqmin = 0.);
858 bool keepsingleton =
true,
859 double freqmin = 0.);
875 bool keepsingleton =
true,
876 double freqmin = 0.);
891 bool keepsingleton =
true,
892 double freqmin = 0.);
907 bool keepsingleton =
true,
908 double freqmin = 0.);
928 bool distance1 =
false,
929 bool keepsingleton =
true,
930 double freqmin = 0.);
950 bool distance1 =
false,
951 bool keepsingleton =
true,
952 double freqmin = 0.);
972 bool distance1 =
false,
973 bool keepsingleton =
true,
974 double freqmin = 0.);
994 bool distance1 =
false,
995 bool keepsingleton =
true,
996 double freqmin = 0.);
1016 bool distance1 =
false,
1017 bool keepsingleton =
true,
1018 double freqmin = 0.);
1038 bool distance1 =
false,
1039 bool keepsingleton =
true,
1040 double freqmin = 0.);
1061 bool distance1 =
false,
1062 bool keepsingleton =
true,
1063 double freqmin = 0.);
1076 double precision = 0.000001,
1077 double cinf = 0.001,
1078 double csup = 10000.);
1109 const Site& site_in,
1110 const Site& site_out);
The PolymorphismSequenceContainer class.
Static class providing methods to compute statistics on sequences data.
static double fuLiDStar(const PolymorphismSequenceContainer &group, bool useNbSegregatingSites=false)
Return the Fu and Li D* test (Fu & Li 1993, Genetics, 133 pp693-709).
static unsigned int numberOfSitesWithStopCodon(const PolymorphismSequenceContainer &psc, const GeneticCode &gCode, bool gapflag=true)
Compute the number of codon sites with stop codon.
static double meanDistance1(const PolymorphismSequenceContainer &psc, bool keepsingleton=true, double freqmin=0.)
give mean pairwise distances between sites / method 1: differences between sequences are not taken in...
static double meanR2(const PolymorphismSequenceContainer &psc, bool keepsingleton=true, double freqmin=0.)
give mean R² over all pairwise comparisons
static unsigned int dvk(const PolymorphismSequenceContainer &psc, bool gapflag=true)
Return the number of haplotype in the sample. Depaulis and Veuille (1998, Mol Biol Evol,...
static std::unique_ptr< PolymorphismSequenceContainer > generateLdContainer(const PolymorphismSequenceContainer &psc, bool keepsingleton=true, double freqmin=0.)
generate a special PolymorphismSequenceContainer for linkage disequilbrium analysis
static unsigned getNumberOfDerivedSingletons_(const Site &site_in, const Site &site_out)
Count the number of singleton for a site.
static double hudson87(const PolymorphismSequenceContainer &psc, double precision=0.000001, double cinf=0.001, double csup=10000.)
give estimate of C=4Nr using Hudson method (Hudson 1987, Genet. Res., 50 pp245-250)
static double meanDistance2(const PolymorphismSequenceContainer &psc, bool keepsingleton=true, double freqmin=0.)
give mean pairwise distances between sites / method 2: differences between sequences are taken into a...
static unsigned int numberOfMonoSitePolymorphicCodons(const PolymorphismSequenceContainer &psc, bool stopflag=true, bool gapflag=true)
Compute the number of polymorphic codon with only one mutated site.
static double tajimaDtnm(const PolymorphismSequenceContainer &psc, bool gapflag=true, bool ignoreUnknown=true)
Return the Tajima's D test (Tajima 1989, Genetics 123 pp 585-595).
static unsigned int numberOfTriplets(const PolymorphismSequenceContainer &psc, bool gapflag=true)
Compute the number of triplet in an alignment.
static double watterson75Synonymous(const PolymorphismSequenceContainer &psc, const GeneticCode &gc)
Compute the Watterson(1975,Theor Popul Biol, 7 pp256-276) estimator for synonymous positions.
static unsigned int numberOfSynonymousSubstitutions(const PolymorphismSequenceContainer &psc, const GeneticCode &gc, double freqmin=0.)
compute the number of synonymous substitutions in an alignment
static double dvh(const PolymorphismSequenceContainer &psc, bool gapflag=true)
Return the haplotype diversity of a sample. Depaulis and Veuille (1998, Mol Biol Evol,...
static double fstHudson92(const PolymorphismSequenceContainer &psc, size_t id1, size_t id2)
static double fayWu2000(const PolymorphismSequenceContainer &psc, const Sequence &ancestralSites)
Compute diversity estimator Theta H (eq. 3) of Fay and Wu (2000, Genetics, 155: 1405-1413)
static double fuLiD(const PolymorphismSequenceContainer &ingroup, const PolymorphismSequenceContainer &outgroup, bool useNbSingletons=true, bool useNbSegregatingSites=false)
Return the Fu and Li D test (Fu & Li 1993, Genetics, 133 pp693-709).
static double tajima83(const PolymorphismSequenceContainer &psc, bool gapflag=true, bool ignoreUnknown=true, bool scaled=false)
Compute diversity estimator Theta of Tajima (1983, Genetics, 105 pp437-460)
static Vdouble pairwiseR2(const PolymorphismSequenceContainer &psc, bool keepsingleton=true, double freqmin=0.)
give the vector of all mean pairwise R² value between two sites (Hill & Robertson 1968,...
static Vdouble pairwiseD(const PolymorphismSequenceContainer &psc, bool keepsingleton=true, double freqmin=0.)
give the vector of all mean pairwise D value between two sites (Lewontin & Kojima 1964,...
static Vdouble linearRegressionD(const PolymorphismSequenceContainer &psc, bool distance1=false, bool keepsingleton=true, double freqmin=0.)
give the slope and the origin of the regression |D| = a*distance+b
static std::vector< unsigned int > fixedDifferences(const PolymorphismSequenceContainer &pscin, const PolymorphismSequenceContainer &pscout, PolymorphismSequenceContainer &psccons, const GeneticCode &gc)
compute the number of fixed differences between two alignements
static double getUDstar_(size_t n, double a1, double vDs)
Get the uD* value of D* equation in Fu & Li 1993, Genetics, 133 pp693-709)
static double squaredHeterozygosity(const PolymorphismSequenceContainer &psc, bool gapflag=true)
Compute the sum of per site squared heterozygosity in an alignment.
static double originRegressionD(const PolymorphismSequenceContainer &psc, bool distance1=false, bool keepsingleton=true, double freqmin=0.)
give the slope of the regression |D| = 1+a*distance
static double leftHandHudson_(const PolymorphismSequenceContainer &psc)
give the left hand term of equation (4) in Hudson (Hudson 1987, Genet. Res., 50 pp245-250) This term ...
static unsigned int getNumberOfSingletons_(const Site &site)
Count the number of singleton for a site.
static unsigned int numberOfParsimonyInformativeSites(const PolymorphismSequenceContainer &psc, bool gapflag=true)
Compute the number of parsimony informative sites in an alignment.
static double meanNumberOfNonSynonymousSites(const PolymorphismSequenceContainer &psc, const GeneticCode &gc, double ratio=1.)
compute the mean number of non-synonymous site in an alignment
static double watterson75(const PolymorphismSequenceContainer &psc, bool gapflag=true, bool ignoreUnknown=true, bool scaled=false)
Compute diversity estimator Theta of Watterson (1975, Theor Popul Biol, 7 pp256-276)
static std::map< std::string, double > getUsefulValues_(size_t n)
Get useful values for theta estimators.
static unsigned int getNumberOfMutations_(const Site &site)
Count the number of mutation for a site.
static unsigned int numberOfTransitions(const PolymorphismSequenceContainer &psc)
Return the number of transitions.
static double frequencyOfPolymorphicSites(const PolymorphismSequenceContainer &psc, bool gapflag=true, bool ignoreUnknown=true)
Compute the frequency of polymorphic site in an alignment.
static double watterson75NonSynonymous(const PolymorphismSequenceContainer &psc, const GeneticCode &gc)
Compute the Watterson(1975, Theor Popul Biol, 7 pp256-276) estimator for non synonymous positions.
static unsigned int totalNumberOfMutationsOnExternalBranches(const PolymorphismSequenceContainer &ing, const PolymorphismSequenceContainer &outg)
Count the total number of mutations in external branches.
static std::vector< unsigned int > mkTable(const PolymorphismSequenceContainer &ingroup, const PolymorphismSequenceContainer &outgroup, const GeneticCode &gc, double freqmin=0.)
return a vector containing Pa, Ps, Da, Ds
static double heterozygosity(const PolymorphismSequenceContainer &psc, bool gapflag=true)
Compute the sum of per site heterozygosity in an alignment.
static void testUsefulValues(std::ostream &s, size_t n)
Test useful values.
static unsigned int numberOfSingletons(const PolymorphismSequenceContainer &psc, bool gapflag=true)
Count the number of singleton nucleotides in an alignment.
static double originRegressionR2(const PolymorphismSequenceContainer &psc, bool distance1=false, bool keepsingleton=true, double freqmin=0.)
give the slope of the regression R² = 1+a*distance
static double piSynonymous(const PolymorphismSequenceContainer &psc, const GeneticCode &gc, bool minchange=false)
Compute the synonymous nucleotide diversity, pi.
static double meanDprime(const PolymorphismSequenceContainer &psc, bool keepsingleton=true, double freqmin=0.)
give mean D' over all pairwise comparisons
static Vdouble linearRegressionR2(const PolymorphismSequenceContainer &psc, bool distance1=false, bool keepsingleton=true, double freqmin=0.)
give the slope and the origin of the regression R² = a*distance+b
static std::vector< unsigned int > gcPolymorphism(const PolymorphismSequenceContainer &psc, bool gapflag=true)
Return the number of GC alleles and the total number of alleles at polymorphic sites only.
static Vdouble linearRegressionDprime(const PolymorphismSequenceContainer &psc, bool distance1=false, bool keepsingleton=true, double freqmin=0.)
give the slope and the origin of the regression |D'| = a*distance+b
static unsigned int totalNumberOfMutations(const PolymorphismSequenceContainer &psc, bool gapflag=true)
Count the total number of mutations in an alignment.
static unsigned int numberOfTransversions(const PolymorphismSequenceContainer &psc)
Return the number of transversions.
static Vdouble pairwiseDistances2(const PolymorphismSequenceContainer &psc, bool keepsingleton=true, double freqmin=0.)
give the vector of all mean pairwise distance between two sites to a LD SequencePolymorphismContainer
static double getVDstar_(size_t n, double a1, double a2, double dn)
Get the vD* value of D* equation in Fu & Li 1993, Genetics, 133 pp693-709)
static double tajimaDss(const PolymorphismSequenceContainer &psc, bool gapflag=true, bool ignoreUnknown=true)
Return the Tajima's D test (Tajima 1989, Genetics 123 pp 585-595).
static double rightHandHudson_(double c, size_t n)
give the right hand term of equation (4) in Hudson (Hudson 1987, Genet. Res., 50 pp245-250) This term...
static unsigned int numberOfNonSynonymousSubstitutions(const PolymorphismSequenceContainer &psc, const GeneticCode &gc, double freqmin=0.)
compute the number of non synonymous substitutions in an alignment
static double fuLiFStar(const PolymorphismSequenceContainer &group, bool useNbSegregatingSites)
Return the Fu and Li F* test (Fu & Li 1993, Genetics, 133 pp693-709).
static double piNonSynonymous(const PolymorphismSequenceContainer &psc, const GeneticCode &gc, bool minchange=false)
Compute the non-synonymous nucleotide diversity, pi.
static double gcContent(const PolymorphismSequenceContainer &psc)
Compute the mean GC content in an alignment.
static unsigned int numberOfPolymorphicSites(const PolymorphismSequenceContainer &psc, bool gapflag=true, bool ignoreUnknown=true)
Compute the number of polymorphic site in an alignment.
static double inverseRegressionR2(const PolymorphismSequenceContainer &psc, bool distance1=false, bool keepsingleton=true, double freqmin=0.)
give the slope of the regression R² = 1/(1+a*distance)
static Vdouble pairwiseDistances1(const PolymorphismSequenceContainer &psc, bool keepsingleton=true, double freqmin=0.)
give the vector of the pairwise distances between site positions corresponding to a LD SequencePolymo...
static double getUD_(double a1, double vD)
Get the uD value of equation (32) in Fu & Li 1993, Genetics, 133 pp693-709)
static double ratioOfTransitionsTransversions(const PolymorphismSequenceContainer &psc)
Return the ratio of transitions/transversions.
static Vdouble pairwiseDprime(const PolymorphismSequenceContainer &psc, bool keepsingleton=true, double freqmin=0.)
give the vector of all mean pairwise D' value between two sites (Lewontin 1964, Genetics 49 pp49-67))
static double getVD_(size_t n, double a1, double a2, double cn)
Get the vD value of equation (32) in Fu & Li 1993, Genetics, 133 pp693-709)
static unsigned int numberOfSynonymousPolymorphicCodons(const PolymorphismSequenceContainer &psc, const GeneticCode &gc)
Compute the number of synonymous polymorphic codon sites.
static double meanNumberOfSynonymousSites(const PolymorphismSequenceContainer &psc, const GeneticCode &gc, double ratio=1.)
compute the mean number of synonymous site in an alignment
static double originRegressionDprime(const PolymorphismSequenceContainer &psc, bool distance1=false, bool keepsingleton=true, double freqmin=0.)
give the slope of the regression |D'| = 1+a*distance
static double fuLiF(const PolymorphismSequenceContainer &ingroup, const PolymorphismSequenceContainer &outgroup, bool useNbSingletons=true, bool useNbSegregatingSites=false)
Return the Fu and Li F test (Fu & Li 1993, Genetics, 133 pp693-709).
static double meanD(const PolymorphismSequenceContainer &psc, bool keepsingleton=true, double freqmin=0.)
give mean D over all pairwise comparisons
static double neutralityIndex(const PolymorphismSequenceContainer &ingroup, const PolymorphismSequenceContainer &outgroup, const GeneticCode &gc, double freqmin=0.)
return the neutrality index NI = (Pa/Ps)/(Da/Ds) (Rand & Kann 1996, Mol. Biol. Evol....
std::vector< double > Vdouble