7 #include "../Alphabet/AlphabetTools.h"
9 #include "../CodonSiteTools.h"
10 #include "../SequenceTools.h"
11 #include "../SymbolListTools.h"
30 const std::string& name,
40 map<int, double> freq;
48 if (it.second > max && it.first != -1)
66 consensus.push_back(cons);
69 auto seqConsensus = make_unique<Sequence>(name, consensus, alphaPtr);
77 int unknownCode = sites.
getAlphabet()->getUnknownCharacterCode();
80 unique_ptr<Site> site(sites.
site(i).
clone());
87 (*site)[j] = unknownCode;
101 unique_ptr<ProbabilisticSite> site(sites.
site(i).clone());
106 vector<double>& element = (*site)[j];
122 int gapCode = sites.
getAlphabet()->getGapCharacterCode();
125 unique_ptr<Site> site(sites.
site(i).
clone());
131 (*site)[j] = gapCode;
142 std::shared_ptr<const Alphabet>& resolvedAlphabet)
145 throw AlphabetException(
"SiteContainerTools::resolveDottedAlignment. Alignment alphabet should of class 'DefaultAlphabet'.", dottedAln.
getAlphabet());
150 throw Exception(
"SiteContainerTools::resolveDottedAlignment. Input alignment contains no sequence.");
153 for (
size_t i = 0; i < n; ++i)
157 for (
size_t j = 0; isRef && j < seq.
size(); ++j)
168 throw Exception(
"SiteContainerTools::resolveDottedAlignment. No reference sequence was found in the input alignment.");
171 auto sites = make_unique<VectorSiteContainer>(dottedAln.
getSequenceKeys(), resolvedAlphabet);
177 for (
size_t i = 0; i < m; ++i)
179 string resolved = refSeq->
getChar(i);
180 const Site& site = dottedAln.
site(i);
181 auto resolvedSite = make_unique<Site>(resolvedAlphabet, site.
getCoordinate());
182 for (
size_t j = 0; j < n; ++j)
189 resolvedSite->addElement(state);
192 sites->addSite(resolvedSite,
false);
203 int gapCode = seq.
getAlphabet()->getGapCharacterCode();
204 map<size_t, size_t> tln;
208 for (
size_t i = 0; i < seq.
size(); ++i)
210 if (seq[i] != gapCode)
223 int gapCode = seq.
getAlphabet()->getGapCharacterCode();
224 map<size_t, size_t> tln;
228 for (
size_t i = 0; i < seq.
size(); ++i)
230 if (seq[i] != gapCode)
245 map<size_t, size_t> tln;
246 if (seq1.
size() == 0)
248 unsigned int count1 = 0;
249 unsigned int count2 = 0;
250 if (seq2.
size() == 0)
252 int state1 = seq1[count1];
253 int state2 = seq2[count2];
260 if (count1 < seq1.
size())
261 state1 = seq1[count1];
268 if (count2 < seq2.
size())
269 state2 = seq2[count2];
273 if (state1 != state2)
275 tln[count1 + 1] = count2 + 1;
276 if (count1 == seq1.
size() - 1)
280 if (count2 == seq2.
size() - 1)
282 state1 = seq1[++count1];
286 if (count1 < seq1.
size())
287 state1 = seq1[count1];
298 state1 = seq1[++count1];
299 state2 = seq2[++count2];
312 map<size_t, size_t> tln;
327 tln[count1] = (state2 == -1 ? 0 : count2);
346 unique_ptr<Sequence> s1(seq1.
clone());
348 unique_ptr<Sequence> s2(seq2.
clone());
354 double choice1, choice2, choice3, mx;
356 for (
size_t i = 0; i <= s1->size(); i++)
358 m(i, 0) =
static_cast<double>(i) * gap;
360 for (
size_t j = 0; j <= s2->size(); j++)
362 m(0, j) =
static_cast<double>(j) * gap;
364 for (
size_t i = 1; i <= s1->size(); i++)
366 for (
size_t j = 1; j <= s2->size(); j++)
368 choice1 = m(i - 1, j - 1) +
static_cast<double>(s.
getIndex((*s1)[i - 1], (*s2)[j - 1]));
369 choice2 = m(i - 1, j) + gap;
370 choice3 = m(i, j - 1) + gap;
371 mx = choice1; px =
'd';
374 mx = choice2; px =
'u';
378 mx = choice3; px =
'l';
381 p(i - 1, j - 1) = px;
387 size_t i = s1->size(), j = s2->size();
389 while (i > 0 && j > 0)
394 a1.push_front((*s1)[i - 1]);
395 a2.push_front((*s2)[j - 1]);
401 a1.push_front((*s1)[i - 1]);
408 a2.push_front((*s2)[j - 1]);
414 a1.push_front((*s1)[i - 1]);
421 a2.push_front((*s2)[j - 1]);
424 s1->setContent(vector<int>(a1.begin(), a1.end()));
425 s2->setContent(vector<int>(a2.begin(), a2.end()));
426 auto alphaPtr = s1->getAlphabet();
427 auto asc = make_unique<AlignedSequenceContainer>(alphaPtr);
428 asc->addSequence(s1->getName(), s1);
429 asc->addSequence(s2->getName(), s2);
447 unique_ptr<Sequence> s1(seq1.
clone());
449 unique_ptr<Sequence> s2(seq2.
clone());
458 double choice1, choice2, choice3, mx;
461 for (
size_t i = 0; i <= s1->size(); i++)
465 for (
size_t j = 0; j <= s2->size(); j++)
469 for (
size_t i = 1; i <= s1->size(); i++)
471 m(i, 0) = h(i, 0) = opening +
static_cast<double>(i) * extending;
473 for (
size_t j = 1; j <= s2->size(); j++)
475 m(0, j) = v(0, j) = opening +
static_cast<double>(j) * extending;
478 for (
size_t i = 1; i <= s1->size(); i++)
480 for (
size_t j = 1; j <= s2->size(); j++)
482 choice1 = m(i - 1, j - 1) + s.
getIndex((*s1)[i - 1], (*s2)[j - 1]);
483 choice2 = h(i - 1, j - 1) + opening + extending;
484 choice3 = v(i - 1, j - 1) + opening + extending;
496 choice1 = m(i, j - 1) + opening + extending;
497 choice2 = h(i, j - 1) + extending;
505 choice1 = m(i - 1, j) + opening + extending;
506 choice2 = v(i - 1, j) + extending;
515 if (v(i, j) > m(i, j))
517 if (h(i, j) > m(i, j))
519 p(i - 1, j - 1) = px;
525 size_t i = s1->size(), j = s2->size();
527 while (i > 0 && j > 0)
532 a1.push_front((*s1)[i - 1]);
533 a2.push_front((*s2)[j - 1]);
539 a1.push_front((*s1)[i - 1]);
546 a2.push_front((*s2)[j - 1]);
552 a1.push_front((*s1)[i - 1]);
559 a2.push_front((*s2)[j - 1]);
562 s1->setContent(vector<int>(a1.begin(), a1.end()));
563 s2->setContent(vector<int>(a2.begin(), a2.end()));
564 auto alphaPtr = s1->getAlphabet();
565 auto asc = make_unique<AlignedSequenceContainer>(alphaPtr);
566 asc->addSequence(s1->getName(), s1);
567 asc->addSequence(s2->getName(), s2);
584 const std::string& gapOption,
585 bool unresolvedAsGap)
592 std::shared_ptr<const Alphabet> alpha = seq1.
getAlphabet();
595 for (
size_t i = 0; i < seq1.
size(); i++)
599 int gapCode = alpha->getGapCharacterCode();
602 if (alpha->isUnresolved(x))
604 if (alpha->isUnresolved(y))
607 if (gapOption == SIMILARITY_ALL)
610 if (x == y && !alpha->isGap(x) && !alpha->isGap(y))
613 else if (gapOption == SIMILARITY_NODOUBLEGAP)
615 if (!alpha->isGap(x) || !alpha->isGap(y))
622 else if (gapOption == SIMILARITY_NOGAP)
624 if (!alpha->isGap(x) && !alpha->isGap(y))
632 throw Exception(
"SiteContainerTools::computeSimilarity. Invalid gap option: " + gapOption);
634 double r = (t == 0 ? 0. :
static_cast<double>(s) /
static_cast<double>(t));
635 return dist ? 1 - r : r;
643 const std::string& gapOption,
644 bool unresolvedAsGap)
648 string pairwiseGapOption = gapOption;
649 unique_ptr<SiteContainerInterface> sites2;
650 if (gapOption == SIMILARITY_NOFULLGAP)
654 auto tmp = removeGapOrUnresolvedOnlySites(sites);
655 sites2 = make_unique<AlignedSequenceContainer>(*tmp);
659 auto tmp = removeGapOnlySites(sites);
660 sites2 = make_unique<AlignedSequenceContainer>(*tmp);
662 pairwiseGapOption = SIMILARITY_ALL;
666 sites2 = make_unique<AlignedSequenceContainer>(sites);
669 for (
size_t i = 0; i < n; ++i)
671 (*mat)(i, i) = dist ? 0. : 1.;
672 const Sequence& seq1 = sites2->sequence(i);
673 for (
size_t j = i + 1; j < n; ++j)
675 const Sequence& seq2 = sites2->sequence(j);
676 (*mat)(i, j) = (*mat)(j, i) = computeSimilarity(seq1, seq2, dist, pairwiseGapOption, unresolvedAsGap);
689 int gapCode = sites.
getAlphabet()->getGapCharacterCode();
696 if (seq[j] != gapCode)
699 positions(i, j) = pos;
714 throw Exception(
"SiteContainerTools::getColumnScores. The two input alignments must have the same number of sequences!");
723 if (positions1(j, i) > 0)
726 whichPos = positions1(j, i);
741 if (positions2(whichSeq, j) == whichPos)
749 throw Exception(
"SiteContainerTools::getColumnScores(). Position " +
TextTools::toString(whichPos) +
" of sequence " +
TextTools::toString(whichSeq) +
" not found in reference alignment. Please make sure the two indexes are built from the same data!");
755 test = (positions1(j, i) == positions2(j, i2));
757 scores[i] = test ? 1 : 0;
767 throw Exception(
"SiteContainerTools::getColumnScores. The two input alignments must have the same number of sequences!");
772 size_t countAlignable = 0;
773 size_t countAligned = 0;
777 size_t whichPos = positions1(j, i);
788 if (positions2(j, k) == whichPos)
796 throw Exception(
"SiteContainerTools::getColumnScores(). Position " +
TextTools::toString(whichPos) +
" of sequence " +
TextTools::toString(j) +
" not found in reference alignment. Please make sure the two indexes are built from the same data!");
802 size_t whichPos2 = positions1(k, i);
810 if (positions2(k, i2) == whichPos2)
814 scores[i] = countAlignable == 0 ? na :
static_cast<double>(countAligned) /
static_cast<double>(countAlignable);
int getCoordinate() const override
Get the coordinate associated to this site.
std::shared_ptr< const Alphabet > getAlphabet() const override
Get the alphabet associated to the list.
size_t size() const override
Get the number of elements in the list.
The alphabet exception base class.
Two dimensionnal alphabet index interface.
virtual double getIndex(int state1, int state2) const =0
Get the index associated to a pair of states.
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get the alphabet associated to this index.
Exception thrown when two alphabets do not match.
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get the alphabet associated to the list.
virtual size_t size() const =0
Get the number of elements in the list.
std::string getChar(size_t pos) const override
Get the element at position 'pos' as a character.
virtual size_t getNumberOfColumns() const=0
virtual void resize(size_t nRows, size_t nCols)=0
virtual size_t getNumberOfRows() const=0
Exception thrown when a sequence is not align with others.
A basic implementation of the Sequence interface.
Sequence * clone() const override
std::string getChar(size_t pos) const override
Get the element at position 'pos' as a character.
Loop over all sites in a SiteContainer.
bool hasMoreSites() const override
const SiteType & nextSite() override
virtual const SequenceType & sequence(const HashType &sequenceKey) const override=0
Retrieve a sequence object from the container.
virtual std::vector< std::string > getSequenceNames() const =0
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.
virtual std::vector< HashType > getSequenceKeys() const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get a pointer toward the container's alphabet.
virtual const Alphabet & alphabet() const =0
Get the container's alphabet.
virtual void setSite(size_t sitePosition, std::unique_ptr< SiteType > &site, bool checkCoordinate=true)=0
Set a site in the container.
virtual const SiteType & site(size_t sitePosition) const override=0
Get a site from the container.
virtual size_t getNumberOfSites() const override=0
Get the number of aligned positions in the container.
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)
This alphabet is used to deal NumericAlphabet.