bpp-phyl3 3.0.0
PatternTools.h
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#ifndef BPP_PHYL_PATTERNTOOLS_H
6#define BPP_PHYL_PATTERNTOOLS_H
7
9
10#include "Tree/PhyloTree.h"
12
13// From SeqLib:
15#include <Bpp/Seq/Site.h>
19
20// From the STL:
21#include <map>
22
23namespace bpp
24{
32{
33public:
43 template<class N, class E, class I>
44 static std::unique_ptr<AlignmentDataInterface> getSequenceSubset(
45 const AlignmentDataInterface& sequenceSet,
46 const std::shared_ptr<N> node,
48 {
49 try
50 {
51 const auto& siteContainer = dynamic_cast<const SiteContainerInterface&>(sequenceSet);
52 return getSequenceSubset(siteContainer, node, tree);
53 }
54 catch (std::bad_cast& e) {}
55
56 try
57 {
58 const auto& siteContainer = dynamic_cast<const ProbabilisticSiteContainerInterface&>(sequenceSet);
59 return getSequenceSubset(siteContainer, node, tree);
60 }
61 catch (std::bad_cast& e) {}
62
63 throw Exception("PatternTools::getSequenceSubset : unsupported sequence type.");
64 }
65
75 template<class N, class E, class I>
76 static std::unique_ptr<SiteContainerInterface> getSequenceSubset(
77 const SiteContainerInterface& sequenceSet,
78 const std::shared_ptr<N> node,
80 {
81 size_t nbSites = sequenceSet.getNumberOfSites();
82 auto alphabet = sequenceSet.getAlphabet();
83 auto sequenceSubset = std::make_unique<VectorSiteContainer>(alphabet);
84
85 std::vector<std::shared_ptr<N>> leaves = tree.getLeavesUnderNode(node);
86
87 for (auto i : leaves)
88 {
89 if (i->hasName())
90 {
91 // Use sequence name as key.
92 try
93 {
94 auto newSeq = std::make_unique<Sequence>(sequenceSet.sequence(i->getName()));
95 sequenceSubset->addSequence(i->getName(), newSeq);
96 }
97 catch (std::exception& e)
98 {
99 ApplicationTools::displayWarning("PatternTools::getSequenceSubset : Leaf name not found in sequence file: " + i->getName() + " : Replaced with unknown sequence");
100
101 auto seq = std::make_unique<Sequence>(i->getName(), "", alphabet);
102 seq->setToSizeR(nbSites);
104 sequenceSubset->addSequence(i->getName(), seq);
105 }
106 }
107 }
108 sequenceSubset->setSiteCoordinates(sequenceSet.getSiteCoordinates());
109 return sequenceSubset;
110 }
111
121 template<class N, class E, class I>
122 static std::unique_ptr<ProbabilisticSiteContainerInterface> getSequenceSubset(
123 const ProbabilisticSiteContainerInterface& sequenceSet,
124 const std::shared_ptr<N> node,
126 {
127 size_t nbSites = sequenceSet.getNumberOfSites();
128 auto alphabet = sequenceSet.getAlphabet();
129 auto sequenceSubset = std::make_unique<ProbabilisticVectorSiteContainer>(alphabet);
130
131 std::vector<std::shared_ptr<N>> leaves = tree.getLeavesUnderNode(node);
132
133 for (auto i : leaves)
134 {
135 if (i->hasName())
136 {
137 // Use sequence name as key.
138 try
139 {
140 auto newSeq = std::make_unique<ProbabilisticSequence>(sequenceSet.sequence(i->getName()));
141 sequenceSubset->addSequence(newSeq->getName(), newSeq);
142 }
143 catch (std::exception const& e)
144 {
145 ApplicationTools::displayWarning("PatternTools::getSequenceSubset : Leaf name not found in sequence file: " + i->getName() + " : Replaced with unknown sequence");
146
147 auto newSeq = std::make_unique<ProbabilisticSequence>(i->getName(), Table<double>(alphabet->getSize(), 0), alphabet);
148 newSeq->setToSizeR(nbSites);
150 sequenceSubset->addSequence(i->getName(), newSeq);
151 }
152 }
153 }
154 sequenceSubset->setSiteCoordinates(sequenceSet.getSiteCoordinates());
155 return sequenceSubset;
156 }
157
166 static std::unique_ptr<AlignmentDataInterface> getSequenceSubset(
167 const AlignmentDataInterface& sequenceSet,
168 const Node& node);
169
178 static std::unique_ptr<SiteContainerInterface> getSequenceSubset(
179 const SiteContainerInterface& sequenceSet,
180 const Node& node);
181
190 static std::unique_ptr<ProbabilisticSiteContainerInterface> getSequenceSubset(
191 const ProbabilisticSiteContainerInterface& sequenceSet,
192 const Node& node);
193
202 static std::unique_ptr<AlignmentDataInterface> getSequenceSubset(
203 const AlignmentDataInterface& sequenceSet,
204 const std::vector<std::string>& names);
205
214 static std::unique_ptr<SiteContainerInterface> getSequenceSubset(
215 const SiteContainerInterface& sequenceSet,
216 const std::vector<std::string>& names);
217
226 static std::unique_ptr<ProbabilisticSiteContainerInterface> getSequenceSubset(
227 const ProbabilisticSiteContainerInterface& sequenceSet,
228 const std::vector<std::string>& names);
229
237 static std::unique_ptr<AlignmentDataInterface> shrinkSiteSet(
238 const AlignmentDataInterface& siteSet);
239
247 static std::unique_ptr<SiteContainerInterface> shrinkSiteSet(
248 const SiteContainerInterface& siteSet);
249
257 static std::unique_ptr<ProbabilisticSiteContainerInterface> shrinkSiteSet(
259
268 static Vint getIndexes(
269 const AlignmentDataInterface& sequences1,
270 const AlignmentDataInterface& sequences2);
271};
272} // end of namespace bpp.
273#endif // BPP_PHYL_PATTERNTOOLS_H
static void displayWarning(const std::string &text)
std::vector< std::shared_ptr< N > > getLeavesUnderNode(std::shared_ptr< N > node) const
The phylogenetic node class.
Definition: Node.h:59
Utilitary methods to compute site patterns.
Definition: PatternTools.h:32
static std::unique_ptr< AlignmentDataInterface > shrinkSiteSet(const AlignmentDataInterface &siteSet)
Compress a site container by removing duplicated sites.
static std::unique_ptr< ProbabilisticSiteContainerInterface > getSequenceSubset(const ProbabilisticSiteContainerInterface &sequenceSet, const std::shared_ptr< N > node, const AssociationTreeGraphImplObserver< N, E, I > &tree)
Extract the sequences corresponding to a given subtree.
Definition: PatternTools.h:122
static std::unique_ptr< AlignmentDataInterface > getSequenceSubset(const AlignmentDataInterface &sequenceSet, const std::shared_ptr< N > node, const AssociationTreeGraphImplObserver< N, E, I > &tree)
Extract the sequences corresponding to a given subtree.
Definition: PatternTools.h:44
static std::unique_ptr< SiteContainerInterface > getSequenceSubset(const SiteContainerInterface &sequenceSet, const std::shared_ptr< N > node, const AssociationTreeGraphImplObserver< N, E, I > &tree)
Extract the sequences corresponding to a given subtree.
Definition: PatternTools.h:76
static Vint getIndexes(const AlignmentDataInterface &sequences1, const AlignmentDataInterface &sequences2)
Look for the occurrence of each site in sequences1 in sequences2 and send the position of the first o...
static void changeGapsToUnknownCharacters(IntSymbolListInterface &l)
virtual const CoreSequenceInterface & sequence(const std::string &sequenceKey) const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Defines the basic types of data flow nodes.
std::vector< int > Vint