bpp-phyl3  3.0.0
PatternTools.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_PHYL_PATTERNTOOLS_H
6 #define BPP_PHYL_PATTERNTOOLS_H
7 
9 
10 #include "Tree/PhyloTree.h"
11 #include "Tree/TreeTemplateTools.h"
12 
13 // From SeqLib:
15 #include <Bpp/Seq/Site.h>
19 
20 // From the STL:
21 #include <map>
22 
23 namespace bpp
24 {
32 {
33 public:
43  template<class N, class E, class I>
44  static std::unique_ptr<AlignmentDataInterface> getSequenceSubset(
45  const AlignmentDataInterface& sequenceSet,
46  const std::shared_ptr<N> node,
48  {
49  try
50  {
51  const auto& siteContainer = dynamic_cast<const SiteContainerInterface&>(sequenceSet);
52  return getSequenceSubset(siteContainer, node, tree);
53  }
54  catch (std::bad_cast& e) {}
55 
56  try
57  {
58  const auto& siteContainer = dynamic_cast<const ProbabilisticSiteContainerInterface&>(sequenceSet);
59  return getSequenceSubset(siteContainer, node, tree);
60  }
61  catch (std::bad_cast& e) {}
62 
63  throw Exception("PatternTools::getSequenceSubset : unsupported sequence type.");
64  }
65 
75  template<class N, class E, class I>
76  static std::unique_ptr<SiteContainerInterface> getSequenceSubset(
77  const SiteContainerInterface& sequenceSet,
78  const std::shared_ptr<N> node,
80  {
81  size_t nbSites = sequenceSet.getNumberOfSites();
82  auto alphabet = sequenceSet.getAlphabet();
83  auto sequenceSubset = std::make_unique<VectorSiteContainer>(alphabet);
84 
85  std::vector<std::shared_ptr<N>> leaves = tree.getLeavesUnderNode(node);
86 
87  for (auto i : leaves)
88  {
89  if (i->hasName())
90  {
91  // Use sequence name as key.
92  try
93  {
94  auto newSeq = std::make_unique<Sequence>(sequenceSet.sequence(i->getName()));
95  sequenceSubset->addSequence(i->getName(), newSeq);
96  }
97  catch (std::exception& e)
98  {
99  ApplicationTools::displayWarning("PatternTools::getSequenceSubset : Leaf name not found in sequence file: " + i->getName() + " : Replaced with unknown sequence");
100 
101  auto seq = std::make_unique<Sequence>(i->getName(), "", alphabet);
102  seq->setToSizeR(nbSites);
104  sequenceSubset->addSequence(i->getName(), seq);
105  }
106  }
107  }
108  sequenceSubset->setSiteCoordinates(sequenceSet.getSiteCoordinates());
109  return sequenceSubset;
110  }
111 
121  template<class N, class E, class I>
122  static std::unique_ptr<ProbabilisticSiteContainerInterface> getSequenceSubset(
123  const ProbabilisticSiteContainerInterface& sequenceSet,
124  const std::shared_ptr<N> node,
126  {
127  size_t nbSites = sequenceSet.getNumberOfSites();
128  auto alphabet = sequenceSet.getAlphabet();
129  auto sequenceSubset = std::make_unique<ProbabilisticVectorSiteContainer>(alphabet);
130 
131  std::vector<std::shared_ptr<N>> leaves = tree.getLeavesUnderNode(node);
132 
133  for (auto i : leaves)
134  {
135  if (i->hasName())
136  {
137  // Use sequence name as key.
138  try
139  {
140  auto newSeq = std::make_unique<ProbabilisticSequence>(sequenceSet.sequence(i->getName()));
141  sequenceSubset->addSequence(newSeq->getName(), newSeq);
142  }
143  catch (std::exception const& e)
144  {
145  ApplicationTools::displayWarning("PatternTools::getSequenceSubset : Leaf name not found in sequence file: " + i->getName() + " : Replaced with unknown sequence");
146 
147  auto newSeq = std::make_unique<ProbabilisticSequence>(i->getName(), Table<double>(alphabet->getSize(), 0), alphabet);
148  newSeq->setToSizeR(nbSites);
150  sequenceSubset->addSequence(i->getName(), newSeq);
151  }
152  }
153  }
154  sequenceSubset->setSiteCoordinates(sequenceSet.getSiteCoordinates());
155  return sequenceSubset;
156  }
157 
166  static std::unique_ptr<AlignmentDataInterface> getSequenceSubset(
167  const AlignmentDataInterface& sequenceSet,
168  const Node& node);
169 
178  static std::unique_ptr<SiteContainerInterface> getSequenceSubset(
179  const SiteContainerInterface& sequenceSet,
180  const Node& node);
181 
190  static std::unique_ptr<ProbabilisticSiteContainerInterface> getSequenceSubset(
191  const ProbabilisticSiteContainerInterface& sequenceSet,
192  const Node& node);
193 
202  static std::unique_ptr<AlignmentDataInterface> getSequenceSubset(
203  const AlignmentDataInterface& sequenceSet,
204  const std::vector<std::string>& names);
205 
214  static std::unique_ptr<SiteContainerInterface> getSequenceSubset(
215  const SiteContainerInterface& sequenceSet,
216  const std::vector<std::string>& names);
217 
226  static std::unique_ptr<ProbabilisticSiteContainerInterface> getSequenceSubset(
227  const ProbabilisticSiteContainerInterface& sequenceSet,
228  const std::vector<std::string>& names);
229 
237  static std::unique_ptr<AlignmentDataInterface> shrinkSiteSet(
238  const AlignmentDataInterface& siteSet);
239 
247  static std::unique_ptr<SiteContainerInterface> shrinkSiteSet(
248  const SiteContainerInterface& siteSet);
249 
257  static std::unique_ptr<ProbabilisticSiteContainerInterface> shrinkSiteSet(
258  const ProbabilisticSiteContainerInterface& siteSet);
259 
268  static Vint getIndexes(
269  const AlignmentDataInterface& sequences1,
270  const AlignmentDataInterface& sequences2);
271 };
272 } // end of namespace bpp.
273 #endif // BPP_PHYL_PATTERNTOOLS_H
static void displayWarning(const std::string &text)
std::vector< std::shared_ptr< N > > getLeavesUnderNode(std::shared_ptr< N > node) const
The phylogenetic node class.
Definition: Node.h:59
Utilitary methods to compute site patterns.
Definition: PatternTools.h:32
static std::unique_ptr< AlignmentDataInterface > shrinkSiteSet(const AlignmentDataInterface &siteSet)
Compress a site container by removing duplicated sites.
static std::unique_ptr< ProbabilisticSiteContainerInterface > getSequenceSubset(const ProbabilisticSiteContainerInterface &sequenceSet, const std::shared_ptr< N > node, const AssociationTreeGraphImplObserver< N, E, I > &tree)
Extract the sequences corresponding to a given subtree.
Definition: PatternTools.h:122
static std::unique_ptr< AlignmentDataInterface > getSequenceSubset(const AlignmentDataInterface &sequenceSet, const std::shared_ptr< N > node, const AssociationTreeGraphImplObserver< N, E, I > &tree)
Extract the sequences corresponding to a given subtree.
Definition: PatternTools.h:44
static std::unique_ptr< SiteContainerInterface > getSequenceSubset(const SiteContainerInterface &sequenceSet, const std::shared_ptr< N > node, const AssociationTreeGraphImplObserver< N, E, I > &tree)
Extract the sequences corresponding to a given subtree.
Definition: PatternTools.h:76
static Vint getIndexes(const AlignmentDataInterface &sequences1, const AlignmentDataInterface &sequences2)
Look for the occurrence of each site in sequences1 in sequences2 and send the position of the first o...
static void changeGapsToUnknownCharacters(IntSymbolListInterface &l)
virtual const CoreSequenceInterface & sequence(const std::string &sequenceKey) const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Defines the basic types of data flow nodes.
std::vector< int > Vint