1 #ifndef BPP_SEQ_CONTAINER_SITECONTAINERTOOLS_H
2 #define BPP_SEQ_CONTAINER_SITECONTAINERTOOLS_H
14 #include "../AlphabetIndex/AlphabetIndex2.h"
15 #include "../DistanceMatrix.h"
16 #include "../GeneticCode/GeneticCode.h"
17 #include "../SiteTools.h"
18 #include "../CodonSiteTools.h"
51 template<
class SiteType,
class SequenceType>
52 static std::unique_ptr<TemplateVectorSiteContainer<SiteType, SequenceType>>
56 std::shared_ptr<const Alphabet> alphaPtr = sites.
getAlphabet();
57 auto selectedSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sequenceKeys, alphaPtr);
62 std::unique_ptr<SiteType> sitePtr(sites.
site(i).clone());
63 selectedSites->addSite(sitePtr,
false);
81 template<
class SiteType,
class SequenceType>
82 static std::unique_ptr<TemplateVectorSiteContainer<SiteType, SequenceType>>
86 std::shared_ptr<const Alphabet> alphaPtr = sites.
getAlphabet();
87 auto selectedSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sequenceKeys, alphaPtr);
92 std::unique_ptr<SiteType> sitePtr(sites.
site(i).clone());
93 selectedSites->addSite(sitePtr,
false);
110 template<
class SiteType,
class SequenceType>
111 static std::unique_ptr<TemplateSiteContainerInterface<SiteType, SequenceType, std::string>>
115 throw Exception(
"SiteContainerTools::removeGapOnlySites. Container is empty.");
118 auto newContainer = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sequenceKeys, alphaPtr);
124 auto site2 = std::unique_ptr<SiteType>(site.
clone());
125 newContainer->addSite(site2,
false);
137 template<
class SiteType,
class SequenceType,
class HashType>
141 throw Exception(
"SiteContainerTools::removeGapOnlySites. Container is empty.");
148 const SiteType* site = &sites.
site(i - 1);
155 site = &sites.
site(i - 1);
180 template<
class SiteType,
class SequenceType>
181 static std::unique_ptr<TemplateVectorSiteContainer<SiteType, SequenceType>>
185 throw Exception(
"SiteContainerTools::removeGapOrUnresolvedOnlySites. Container is empty.");
189 auto newContainer = std::make_unique<TemplateVectorSiteContainer<SiteType, SequenceType>>(sequenceKeys, alphaPtr);
195 auto site2 = std::unique_ptr<SiteType>(site.
clone());
196 newContainer->addSite(site2,
false);
208 template<
class SiteType,
class SequenceType,
class HashType>
212 throw Exception(
"SiteContainerTools::removeGapOrUnresolvedOnlySites. Container is empty.");
219 const SiteType& site = sites.
site(i - 1);
226 site = &sites.
site(i - 1);
236 const SiteType& site = sites.
site(0);
248 template<
class SiteType,
class SequenceType>
249 static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType>>
255 throw Exception(
"SiteContainerTools::removeGapSites. Container is empty.");
258 auto newContainer = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sequenceKeys, sites.
getAlphabet());
261 std::map<int, double> freq;
264 if (freq[-1] <= maxFreqGaps)
266 auto site2 = std::make_unique<SiteType>(site.
clone());
267 newContainer->addSite(site2,
false);
281 template<
class SiteType,
class SequenceType,
class HashType>
287 throw Exception(
"SiteContainerTools::removeGapSites. Container is empty.");
291 std::map<int, double> freq;
293 if (freq[-1] > maxFreqGaps)
315 std::shared_ptr<const CodonAlphabet> pca = std::dynamic_pointer_cast<const CodonAlphabet>(sites.
getAlphabet());
319 throw Exception(
"SiteContainerTools::getSitesWithoutStopCodon. Container is empty.");
323 auto newContainer = std::make_unique<VectorSiteContainer>(sequenceKeys, alphaP);
329 std::unique_ptr<Site> site2(site.
clone());
330 newContainer->addSite(site2,
false);
347 std::shared_ptr<const CodonAlphabet> pca = std::dynamic_pointer_cast<const CodonAlphabet>(sites.
getAlphabet());
351 throw Exception(
"SiteContainerTools::removeSitesWithStopCodon. Container is empty.");
355 const Site& site = sites.
site(i - 1);
370 throw Exception(
"SiteContainerTools::removeSitesWithStopCodon. Method not supported for probabilistic sequences.");
384 template<
class SiteType,
class SequenceType,
class HashType>
390 for (
auto pos : selection)
392 auto sitePtr = std::unique_ptr<SiteType>(sites.
site(pos).clone());
393 outputSites.
addSite(sitePtr,
false);
408 template<
class SiteType,
class SequenceType>
409 static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType>>
415 auto outputSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sites.
getSequenceKeys(), alphaPtr);
417 getSelectedSites<SiteType, SequenceType, std::string>(sites, selection, *outputSites);
432 static std::unique_ptr<AlignmentDataInterface>
440 auto sel = getSelectedSites<Site, Sequence>(sc, selection);
441 return std::move(sel);
443 catch (std::bad_cast& e) {}
448 auto sel = getSelectedSites<ProbabilisticSite, ProbabilisticSequence>(psc, selection);
449 return std::move(sel);
451 catch (std::bad_cast& e) {}
453 throw Exception(
"SiteContainerTools::getSelectedSites : unsupported container type.");
469 template<
class SiteType,
class SequenceType,
class HashType>
475 size_t wsize = sites.
getAlphabet()->getStateCodingSize();
478 if (selection.size() % wsize != 0)
479 throw IOException(
"SiteContainerTools::getSelectedPositions: Positions selection is not compatible with the alphabet in use in the container.");
481 for (
size_t i = 0; i < selection.size(); i += wsize)
483 if (selection[i] % wsize != 0)
484 throw IOException(
"SiteContainerTools::getSelectedPositions: Positions selection is not compatible with the alphabet in use in the container.");
486 for (
size_t j = 1; j < wsize; ++j)
488 if (selection[i + j] != (selection[i + j - 1] + 1))
489 throw IOException(
"SiteContainerTools::getSelectedPositions: Positions selection is not compatible with the alphabet in use in the container.");
491 selection2.push_back(selection[i] / wsize);
513 template<
class SiteType,
class SequenceType>
514 static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType>>
520 auto outputSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sites.
getSequenceKeys(), alphaPtr);
522 getSelectedPositions<SiteType, SequenceType, std::string>(sites, selection, *outputSites);
541 const std::string& name =
"consensus",
542 bool ignoreGap =
true,
543 bool resolveUnknown =
false);
602 std::shared_ptr<const Alphabet>& resolvedAlphabet);
694 static std::unique_ptr<AlignedSequenceContainer>
alignNW(
715 static std::unique_ptr<AlignedSequenceContainer>
alignNW(
735 template<
class SiteType,
class SequenceType,
class HashType>
740 std::shared_ptr< std::vector<size_t>> index =
nullptr)
742 for (
size_t i = 0; i < nbSites; ++i)
745 auto s = std::unique_ptr<SiteType>(sites.
site(pos).clone());
749 index->push_back(pos);
768 template<
class SiteType,
class SequenceType>
769 static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType>>
773 std::shared_ptr< std::vector<size_t>> index =
nullptr)
775 auto sampledSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sites.
getAlphabet());
776 sampleSites<SiteType, SequenceType, std::string>(sites, nbSites, *sampledSites, index);
792 template<
class SiteType,
class SequenceType,
class HashType>
812 template<
class SiteType,
class SequenceType>
813 static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType>>
816 auto outputSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sites.
getAlphabet());
817 bootstrapSites<SiteType, SequenceType, std::string>(sites, *outputSites);
848 bool unresolvedAsGap =
true);
876 bool unresolvedAsGap =
true);
903 template<
class SiteType,
class SequenceType,
class HashType>
907 bool leavePositionAsIs =
false)
916 if (seqKeys1 == seqKeys2)
918 seqCont2bis = &seqCont2;
925 seqCont2bis = seqCont2ter;
929 if (leavePositionAsIs)
933 std::unique_ptr<Site> site(seqCont2bis->
site(i).clone());
942 std::unique_ptr<Site> site(seqCont2bis->
site(i).clone());
943 site->setCoordinate(offset + site->getCoordinate());
The alphabet exception base class.
Two dimensionnal alphabet index interface.
Exception thrown when two alphabets do not match.
Partial implementation of the Transliterator interface for genetic code object.
A basic implementation of the Sequence interface.
The Container of Aligned Values interface.
virtual void setSequenceNames(const std::vector< std::string > &names, bool updateKeys)=0
Batch-set all sequence names.
virtual std::vector< std::string > getSequenceNames() const =0
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.
virtual std::vector< HashType > getSequenceKeys() const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get a pointer toward the container's alphabet.
virtual void deleteSite(size_t sitePosition)=0
Delete a site from the container.
virtual void addSite(std::unique_ptr< SiteType > &site, bool checkCoordinate)=0
Add a site in the container.
TemplateSiteContainerInterface< SiteType, SequenceType, HashType > * createEmptyContainer() const override=0
Return a copy of this container, but with no data inside.
virtual const SiteType & site(size_t sitePosition) const override=0
Get a site from the container.
virtual void deleteSites(size_t sitePosition, size_t length) override=0
Remove a continuous range of sites in the container.
virtual size_t getNumberOfSites() const override=0
Get the number of aligned positions in the container.
This alphabet is used to deal NumericAlphabet.
std::vector< size_t > SiteSelection