60 if (seqs1.size() > 1 || seqs2.size() > 1)
61 throw Exception(
"PairwiseDivergenceMafStatistics::compute. Duplicated sequence for species " + species1_ +
"or " + species2_ +
".");
62 if (seqs1.size() == 0 || seqs2.size() == 0)
70 if (noSpeciesMeansAllSpecies_ && species_.size() == 0) {
75 for (
size_t i = 0; i < species_.size(); ++i) {
78 for (
size_t j = 0; j < selection.size(); ++j) {
91 for (
size_t i = 0; i < species.size(); ++i)
94 throw Exception(
"AbstractSpeciesMultipleSelectionMafStatistics (constructor). Species selections must be fully distinct.");
99 vector<SiteContainer*> alignments;
100 for (
size_t k = 0; k <
species_.size(); ++k) {
102 for (
size_t i = 0; i <
species_[k].size(); ++i) {
105 for (
size_t j = 0; j < selection.size(); ++j) {
110 alignments.push_back(alignment);
121 tags.push_back(
"Gap");
122 tags.push_back(
"Unresolved");
129 std::map<int, int> counts;
136 double countUnres = 0;
137 for (map<int, int>::iterator it = counts.begin(); it != counts.end(); ++it) {
139 countUnres += it->second;
150 tags.push_back(
"Unresolved");
151 tags.push_back(
"Saturated");
152 tags.push_back(
"Ignored");
158 unsigned int nbUnresolved = 0;
159 unsigned int nbSaturated = 0;
160 unsigned int nbIgnored = 0;
165 unique_ptr<SiteContainer> alignment;
180 for (
size_t i = 0; i < alignment->getNumberOfSites(); ++i) {
182 const Site& site = alignment->getSite(i);
183 map<int, unsigned int> counts;
184 bool isUnresolved =
false;
185 bool isSaturated =
false;
186 for (
size_t j = 0; !isUnresolved && !isSaturated && j < site.
size(); ++j) {
192 if (counts.size() > 2) {
199 }
else if (isSaturated) {
201 }
else if (hasOutgroup && (
202 alignment->getAlphabet()->isGap((*outgroupSeq)[i]) ||
203 alignment->getAlphabet()->isUnresolved((*outgroupSeq)[i]))) {
208 if (counts.size() == 1) {
210 if (counts.begin()->first == (*outgroupSeq)[i])
213 count = counts.begin()->second;
218 map<int, unsigned int>::iterator it = counts.begin();
219 unsigned int count1 = it->second;
221 unsigned int count2 = it->second;
223 if (counts.begin()->first == (*outgroupSeq)[i])
224 count = counts.rbegin()->second;
225 else if (counts.rbegin()->first == (*outgroupSeq)[i])
226 count = counts.begin()->second;
232 count = min(count1, count2);
250 for (
size_t i = 0; i <
counts_.size(); ++i) {
258 tags.push_back(
"f1100");
259 tags.push_back(
"f0110");
260 tags.push_back(
"f1010");
261 tags.push_back(
"Ignored");
269 if (alignment->getNumberOfSequences() == 4) {
270 unsigned int nbIgnored = 0;
272 const Site& site = alignment->getSite(i);
274 if (site[0] == site[1] &&
275 site[2] != site[1] &&
278 else if (site[1] == site[2] &&
279 site[1] != site[0] &&
282 else if (site[0] == site[2] &&
283 site[1] != site[0] &&
305 tags.push_back(
"NbWithoutGap");
306 tags.push_back(
"NbComplete");
307 tags.push_back(
"NbConstant");
308 tags.push_back(
"NbBiallelic");
309 tags.push_back(
"NbTriallelic");
310 tags.push_back(
"NbQuadriallelic");
311 tags.push_back(
"NbParsimonyInformative");
318 unsigned int nbNg = 0;
319 unsigned int nbCo = 0;
320 unsigned int nbPi = 0;
321 unsigned int nbP1 = 0;
322 unsigned int nbP2 = 0;
323 unsigned int nbP3 = 0;
324 unsigned int nbP4 = 0;
325 if (alignment->getNumberOfSequences() > 0) {
326 for (
size_t i = 0; i < alignment->getNumberOfSites(); ++i) {
331 map<int, size_t> counts;
333 switch (counts.size()) {
334 case 1: nbP1++;
break;
335 case 2: nbP2++;
break;
336 case 3: nbP3++;
break;
337 case 4: nbP4++;
break;
338 default:
throw Exception(
"The impossible happened. Probably a distortion in the Minkowski space.");
359 tags.push_back(
"FP");
360 tags.push_back(
"PF");
361 tags.push_back(
"FF");
363 tags.push_back(
"FX");
364 tags.push_back(
"PX");
365 tags.push_back(
"XF");
366 tags.push_back(
"XP");
391 unsigned int nbF = 0;
392 unsigned int nbP = 0;
393 unsigned int nbFF = 0;
394 unsigned int nbFP = 0;
395 unsigned int nbPF = 0;
396 unsigned int nbX = 0;
397 unsigned int nbFX = 0;
398 unsigned int nbPX = 0;
399 unsigned int nbXF = 0;
400 unsigned int nbXP = 0;
404 if (alignments[0]->getNumberOfSequences() > 0) {
407 if (alignments[1]->getNumberOfSequences() > 0) {
412 int p1 = patterns1[i];
413 int p2 = patterns2[i];
474 tags.push_back(
"NbSeggregating");
475 tags.push_back(
"WattersonTheta");
476 tags.push_back(
"TajimaPi");
477 tags.push_back(
"TajimaD");
487 throw Exception(
"SequenceDiversityMafStatistics::compute : alignment2 not plain VectorSiteContainer.");
491 size_t n = alignment2->getNumberOfSequences();
493 for (
size_t i = 0; i < alignment2->getNumberOfSites(); ++i) {
494 const Site& site = alignment2->getSite(i);
502 nbTot = alignment2->getNumberOfSites();
507 double dn =
static_cast<double>(n);
508 for (
double i = 1; i < dn; ++i) {
512 double wt = S / (
static_cast<double>(nbTot) * a1);
513 double b1 = (dn + 1) / (3 * (dn - 1));
514 double b2 = 2 * (dn * dn + dn + 3) / (9 * dn * (dn - 1));
515 double c1 = b1 - 1. / a1;
516 double c2 = b2 - (dn + 2) / (a1 * dn) + a2 / (a1 * a1);
518 double e2 = c2 / (a1 * a1 + a2);
522 for (
size_t i = 0; i < n - 1; ++i) {
523 for (
size_t j = i + 1; j < n; ++j) {
525 alignment2->getSequence(i),
526 alignment2->getSequence(j),
532 pi /=
static_cast<double>((n - 1) * n / 2);
535 double tajd =
static_cast<double>(nbTot) * (pi - wt) / sqrt(e1 * S + e2 * S * (S - 1));
MafStatisticsResult result_
std::vector< SiteContainer * > getSiteContainers_(const MafBlock &block)
AbstractSpeciesMultipleSelectionMafStatistics(const std::vector< std::vector< std::string > > &species)
std::vector< std::vector< std::string > > species_
SiteContainer * getSiteContainer_(const MafBlock &block)
virtual size_t getNumberOfSites() const=0
virtual std::string intToChar(int state) const=0
virtual bool isUnresolved(int state) const=0
virtual bool isGap(int state) const=0
virtual int getGapCharacterCode() const=0
virtual unsigned int getSize() const=0
std::vector< std::string > getSupportedTags() const
void compute(const MafBlock &block)
const Alphabet * alphabet_
virtual size_t size() const=0
std::vector< unsigned int > counts_
void compute(const MafBlock &block)
std::vector< std::string > getSupportedTags() const
A synteny block data structure, the basic unit of a MAF alignement file.
size_t getNumberOfSequences() const
std::vector< const MafSequence * > getSequencesForSpecies(const std::string &species) const
size_t getNumberOfSites() const
AlignedSequenceContainer & getAlignment()
bool hasSequenceForSpecies(const std::string &species) const
const MafSequence & getSequenceForSpecies(const std::string &species) const
virtual void setValue(const std::string &tag, double value)
Associate a value to a certain tag. Any existing tag will be overwritten.
void compute(const MafBlock &block)
static std::vector< int > getPatterns_(const SiteContainer &sites)
void compute(const MafBlock &block)
std::vector< std::string > getSupportedTags() const
std::vector< std::string > getSupportedTags() const
void compute(const MafBlock &block)
virtual const Alphabet * getAlphabet() const=0
virtual const Site & getSite(size_t siteIndex) const=0
size_t getNumberOfCategories() const
size_t getCategory(double value) const
std::vector< std::string > getSupportedTags() const
std::vector< unsigned int > counts_
const Alphabet * alphabet_
void compute(const MafBlock &block)
void compute(const MafBlock &block)
std::vector< std::string > getSupportedTags() const
void addSequence(const Sequence &sequence, bool checkName=true)
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)