7 #include <Bpp/Seq/CodonSiteTools.h> 17 const std::string& path,
18 shared_ptr<const Alphabet> alpha)
22 auto seqc = ms.readSequences(path, alpha);
23 auto psc = make_unique<PolymorphismSequenceContainer>(*seqc);
24 Comments maseFileHeader = seqc->getComments();
25 auto groupMap = MaseTools::getAvailableSequenceSelections(maseFileHeader);
26 for (
auto& mi : groupMap)
29 if (key.compare(0, 8,
"OUTGROUP") == 0)
31 auto ss = MaseTools::getSequenceSet(maseFileHeader, key);
32 for (
size_t i = 0; i != ss.size(); ++i)
34 psc->setAsOutgroupMember(ss[i]);
47 auto psci = make_unique<PolymorphismSequenceContainer>(psc);
48 for (
size_t i = 0; i < psc.getNumberOfSequences(); ++i)
53 if (ss.size() == psc.getNumberOfSequences())
55 throw Exception(
"PolymorphismSequenceContainerTools::extractIngroup: no Ingroup sequences found.");
57 for (
size_t i = ss.size(); i > 0; --i)
59 psci->deleteSequence(ss[i - 1]);
70 auto psci = make_unique<PolymorphismSequenceContainer>(psc);
71 for (
size_t i = 0; i < psc.getNumberOfSequences(); ++i)
76 if (ss.size() == psc.getNumberOfSequences())
78 throw Exception(
"PolymorphismSequenceContainerTools::extractOutgroup: no Outgroup sequences found.");
80 for (
size_t i = ss.size(); i > 0; i--)
82 psci->deleteSequence(ss[i - 1]);
94 auto psci = make_unique<PolymorphismSequenceContainer>(psc);
95 for (
size_t i = 0; i < psc.getNumberOfSequences(); ++i)
100 if (ss.size() == psc.getNumberOfSequences())
102 throw GroupNotFoundException(
"PolymorphismSequenceContainerTools::extractGroup: group_id not found.", groupId);
104 for (
size_t i = ss.size(); i > 0; i--)
106 psci->deleteSequence(ss[i - 1]);
115 const SequenceSelection& ss)
117 auto newpsc = make_unique<PolymorphismSequenceContainer>(psc.getAlphabet());
118 for (
size_t i = 0; i < ss.size(); ++i)
120 auto tmpSeq = make_unique<Sequence>(psc.sequence(ss[i]));
121 newpsc->addSequenceWithFrequency(tmpSeq->getName(), tmpSeq, psc.
getSequenceCount(i));
123 newpsc->setAsIngroupMember(i);
126 newpsc->setAsOutgroupMember(i);
130 newpsc->setComments(psc.getComments());
141 size_t nbSeq = psc.getNumberOfSequences();
143 for (
size_t i = 0; i < nbSeq; ++i)
147 vector<size_t> vv(n);
148 RandomTools::getSample(v, vv, replace);
158 auto seqNames = psc.getSequenceNames();
159 auto noGapCont = make_unique<PolymorphismSequenceContainer>(psc.getNumberOfSequences(), psc.getAlphabet());
160 noGapCont->setSequenceNames(seqNames,
false);
161 size_t nbSeq = psc.getNumberOfSequences();
162 for (
size_t i = 0; i < nbSeq; ++i)
166 noGapCont->setAsIngroupMember(i);
169 noGapCont->setAsOutgroupMember(i);
174 while (ngsi.hasMoreSites())
176 auto tmpSite = make_unique<Site>(ngsi.nextSite());
177 noGapCont->addSite(tmpSite);
188 size_t count = psc.getNumberOfSites();
189 unique_ptr<PolymorphismSequenceContainer> npsc =
nullptr;
190 unique_ptr<SimpleSiteContainerIterator> ssi =
nullptr;
193 npsc = extractIngroup(psc);
197 ssi.reset(
new SimpleTemplateSiteContainerIterator<Site, Sequence, string>(psc));
198 while (ssi->hasMoreSites())
199 if (SiteTools::hasGap(ssi->nextSite()))
210 size_t count = psc.getNumberOfSites();
211 unique_ptr<PolymorphismSequenceContainer> npsc =
nullptr;
212 unique_ptr<SimpleSiteContainerIterator> ssi =
nullptr;
215 npsc = extractIngroup(psc);
220 while (ssi->hasMoreSites())
221 if (!SiteTools::isComplete(ssi->nextSite()))
231 auto seqNames = psc.getSequenceNames();
232 auto complete = make_unique<PolymorphismSequenceContainer>(psc.getNumberOfSequences(), psc.getAlphabet());
233 complete->setSequenceNames(seqNames,
false);
234 size_t nbSeq = psc.getNumberOfSequences();
235 for (
size_t i = 0; i < nbSeq; ++i)
239 complete->setAsIngroupMember(i);
242 complete->setAsOutgroupMember(i);
247 while (csi.hasMoreSites())
249 auto tmpSite = make_unique<Site>(csi.nextSite());
250 complete->addSite(tmpSite);
260 auto psci = make_unique<PolymorphismSequenceContainer>(psc);
261 while (SiteTools::hasGap(psci->site(0)))
264 size_t n = psci->getNumberOfSites();
265 while (SiteTools::hasGap(psci->site(n - i - 1)))
267 psci->deleteSite(n - i - 1);
277 const string& setName,
280 auto pscc = MaseTools::getSelectedSites(psc, setName);
281 auto maseFileHeader = psc.getComments();
284 for (
size_t i = 1; i < MaseTools::getPhase(maseFileHeader, setName); ++i)
289 auto psci = make_unique<PolymorphismSequenceContainer>(*pscc);
290 for (
size_t i = 0; i < psc.getNumberOfSequences(); ++i)
296 psci->setAsOutgroupMember(i);
300 psci->clearComments();
308 const string& setName)
311 auto maseFileHeader = psc.getComments();
312 auto codss = MaseTools::getSiteSet(maseFileHeader, setName);
313 for (
size_t i = 0; i < psc.getNumberOfSites(); ++i)
315 if (find(codss.begin(), codss.end(), i) == codss.end())
318 auto sc = SiteContainerTools::getSelectedSites(psc, ss);
319 auto psci = make_unique<PolymorphismSequenceContainer>(*sc);
320 for (
size_t i = 0; i < psc.getNumberOfSequences(); ++i)
323 psci->setAsIngroupMember(i);
326 psci->setAsOutgroupMember(i);
337 const string& setName,
340 auto maseFileHeader = psc.getComments();
344 start = MaseTools::getPhase(maseFileHeader, setName);
352 if (static_cast<int>(pos) - static_cast<int>(start) >= 0)
356 while (i < psc.getNumberOfSites())
361 auto sc = SiteContainerTools::getSelectedSites(psc, ss);
362 auto newpsc = make_unique<PolymorphismSequenceContainer>(*sc);
363 for (
size_t j = 0; j < psc.getNumberOfSequences(); ++j)
366 newpsc->setAsIngroupMember(j);
369 newpsc->setAsOutgroupMember(j);
380 const string& setName,
381 const GeneticCode& gCode)
383 auto maseFileHeader = psc.getComments();
385 auto codss = MaseTools::getSiteSet(maseFileHeader, setName);
386 size_t start = MaseTools::getPhase(maseFileHeader, setName);
387 size_t first = 0, last = psc.getNumberOfSites();
390 psc.site(codss[0]).getValue(0) == 0 &&
391 psc.site(codss[1]).getValue(0) == 3 &&
392 psc.site(codss[2]).getValue(0) == 2)
395 int c1 = psc.site(codss[codss.size() - 3]).getValue(0);
396 int c2 = psc.site(codss[codss.size() - 2]).getValue(0);
397 int c3 = psc.site(codss[codss.size() - 1]).getValue(0);
398 if (gCode.isStop(gCode.codonAlphabet().getCodon(c1, c2, c3)))
399 last = codss[codss.size() - 1];
401 for (
size_t i = first; i < last; i++)
403 if (find(codss.begin(), codss.end(), i) == codss.end())
408 auto sc = SiteContainerTools::getSelectedSites(psc, ss);
409 auto psci = make_unique<PolymorphismSequenceContainer>(*sc);
410 for (
size_t i = 0; i < psc.getNumberOfSequences(); ++i)
413 psci->setAsIngroupMember(i);
416 psci->setAsOutgroupMember(i);
427 const string& setName)
429 auto maseFileHeader = psc.getComments();
431 auto codss = MaseTools::getSiteSet(maseFileHeader, setName);
432 size_t start = MaseTools::getPhase(maseFileHeader, setName);
436 psc.site(codss[0]).getValue(0) == 0 &&
437 psc.site(codss[1]).getValue(0) == 3 &&
438 psc.site(codss[2]).getValue(0) == 2)
440 for (
size_t i = 0; i < last; ++i)
442 if (find(codss.begin(), codss.end(), i) == codss.end())
447 auto sc = SiteContainerTools::getSelectedSites(psc, ss);
448 auto psci = make_unique<PolymorphismSequenceContainer>(*sc);
449 for (
size_t i = 0; i < psc.getNumberOfSequences(); ++i)
452 psci->setAsIngroupMember(i);
455 psci->setAsOutgroupMember(i);
466 const string& setName,
467 const GeneticCode& gCode)
469 auto maseFileHeader = psc.getComments();
471 auto codss = MaseTools::getSiteSet(maseFileHeader, setName);
472 size_t first = psc.getNumberOfSites() - 1;
474 int c1 = psc.site(codss[codss.size() - 3]).getValue(0);
475 int c2 = psc.site(codss[codss.size() - 2]).getValue(0);
476 int c3 = psc.site(codss[codss.size() - 1]).getValue(0);
477 if (gCode.isStop(gCode.codonAlphabet().getCodon(c1, c2, c3)))
478 first = codss[codss.size() - 1];
479 for (
size_t i = first; i < psc.getNumberOfSites(); ++i)
481 if (find(codss.begin(), codss.end(), i) == codss.end())
486 auto sc = SiteContainerTools::getSelectedSites(psc, ss);
487 auto psci = make_unique<PolymorphismSequenceContainer>(*sc);
488 for (
size_t i = 0; i < psc.getNumberOfSequences(); ++i)
491 psci->setAsIngroupMember(i);
494 psci->setAsOutgroupMember(i);
507 auto maseFileHeader = psc.getComments();
508 if (!maseFileHeader.size())
510 auto groupMap = MaseTools::getAvailableSequenceSelections(maseFileHeader);
511 for (
auto& mi : groupMap)
514 if (key.compare(0, 7,
"INGROUP") == 0)
516 StringTokenizer sptk(key,
"_");
517 speciesName = sptk.getToken(1) +
" " + sptk.getToken(2);
527 const GeneticCode& gCode)
529 auto psco = make_unique<PolymorphismSequenceContainer>(psc.getSequenceNames(), psc.getAlphabet());
530 for (
size_t i = 0; i < psc.getNumberOfSites(); ++i)
532 const Site& site = psc.site(i);
533 if (CodonSiteTools::isSynonymousPolymorphic(site, gCode))
535 auto tmpSite = make_unique<Site>(site);
536 psco->addSite(tmpSite);
539 for (
size_t i = 0; i < psc.getNumberOfSequences(); ++i)
543 psco->setAsIngroupMember(i);
546 psco->setAsOutgroupMember(i);
557 const GeneticCode& gCode)
559 auto psco = make_unique<PolymorphismSequenceContainer>(psc.getSequenceNames(), psc.getAlphabet());
560 for (
size_t i = 0; i < psc.getNumberOfSites(); ++i)
562 const Site& site = psc.site(i);
563 if (!CodonSiteTools::isSynonymousPolymorphic(site, gCode))
565 auto tmpSite = make_unique<Site>(site);
566 psco->addSite(tmpSite);
569 for (
size_t i = 0; i < psc.getNumberOfSequences(); ++i)
573 psco->setAsIngroupMember(i);
576 psco->setAsOutgroupMember(i);
The GroupNotFoundException class.
SimpleTemplateSiteContainerIterator< Site, Sequence, std::string > SimpleSiteContainerIterator
size_t getGroupId(size_t index) const
Get the group identifier of the sequence.
NoGapTemplateSiteContainerIterator< Site, Sequence, std::string > NoGapSiteContainerIterator
unsigned int getSequenceCount(size_t index) const
Get the count of a sequence by index.
CompleteTemplateSiteContainerIterator< Site, Sequence, std::string > CompleteSiteContainerIterator
void setAsIngroupMember(size_t index)
Set a sequence as ingroup member by index.
void setSequenceCount(size_t index, unsigned int count)
Set the count of a sequence by index.
The PolymorphismSequenceContainer class.
bool isIngroupMember(size_t index) const
Tell if the sequence is ingroup by index.