bpp-phyl3  3.0.0
MarginalAncestralReconstruction.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
7 
9 
10 using namespace bpp;
11 using namespace std;
12 
13 vector<size_t> MarginalAncestralReconstruction::getAncestralStatesForNode(uint nodeId, VVdouble& probs, bool sample) const
14 {
15  vector<size_t> ancestors(nbSites_);
16 
17  auto vv = likelihood_->getLikelihoodsAtNode(nodeId)->targetValue();
18 
19  probs.resize(nbSites_);
20 
21  for (uint i = 0; i < static_cast<uint>(nbSites_); ++i)
22  {
23  copyEigenToBpp(vv.col(i) / vv.col(i).sum(), probs[static_cast<size_t>(i)]);
24  }
25 
26  if (sample)
27  {
28  for (size_t i = 0; i < nbSites_; ++i)
29  {
30  const auto& coli = probs[i];
32  for (size_t j = 0; j < nbStates_; ++j)
33  {
34  r -= coli[j];
35  if (r < 0)
36  {
37  ancestors[i] = j;
38  break;
39  }
40  }
41  }
42  }
43  else
44  {
45  size_t pos;
46  for (size_t i = 0; i < nbSites_; ++i)
47  {
48  vv.col(Eigen::Index(i)).maxCoeff(&pos);
49  ancestors[i] = pos;
50  }
51  }
52  return ancestors;
53 }
54 
56 {
57  map<uint, vector<size_t>> ancestors;
58  // Clone the data into a AlignedSequenceContainer for more efficiency:
59  shared_ptr<AlignmentDataInterface> data = make_shared<AlignedSequenceContainer>(dynamic_cast<const SiteContainerInterface&>(likelihood_->shrunkData()));
60  recursiveMarginalAncestralStates(tree_->getRoot(), ancestors, *data);
61  return ancestors;
62 }
63 
64 unique_ptr<Sequence> MarginalAncestralReconstruction::getAncestralSequenceForNode(uint nodeId, VVdouble* probs, bool sample) const
65 {
66  string name = tree_->getNode(nodeId)->hasName() ? tree_->getNode(nodeId)->getName() : ("" + TextTools::toString(nodeId));
67  vector<int> allStates(nbSites_);
68 
69  const auto& stateMap = likelihood_->stateMap();
70 
71  VVdouble patternedProbs;
72 
73  if (probs)
74  {
75  auto states = getAncestralStatesForNode(nodeId, *probs, sample);
76  for (size_t i = 0; i < nbSites_; ++i)
77  {
78  allStates[i] = stateMap.getAlphabetStateAsInt(states[i]);
79  }
80  }
81  else
82  {
83  auto states = getAncestralStatesForNode(nodeId, patternedProbs, sample);
84  for (size_t i = 0; i < nbSites_; ++i)
85  {
86  allStates[i] = stateMap.getAlphabetStateAsInt(states[i]);
87  }
88  }
89 
90  return make_unique<Sequence>(name, allStates, alphabet_);
91 }
92 
94  const std::shared_ptr<PhyloNode> node,
95  map<uint, vector<size_t>>& ancestors,
96  AlignmentDataInterface& data) const
97 {
98  if (tree_->isLeaf(node))
99  {
100  try
101  {
102  auto& sc = dynamic_cast<const SiteContainerInterface&>(data);
103  const Sequence& seq = sc.sequence(node->getName());
104  vector<size_t>* v = &ancestors[tree_->getNodeIndex(node)];
105  v->resize(seq.size());
106  // This is a tricky way to store the real sequence as an ancestral one...
107  // In case of Markov Modulated models, we consider that the real sequences
108  // are all in the first category.
109  const auto& stateMap = likelihood_->stateMap();
110  for (size_t i = 0; i < seq.size(); ++i)
111  {
112  (*v)[i] = stateMap.getModelStates(seq[i])[0];
113  }
114  }
115  catch (bad_cast&)
116  {
117  ancestors[tree_->getNodeIndex(node)] = getAncestralStatesForNode(tree_->getNodeIndex(node));
118  }
119  }
120  else
121  {
122  ancestors[tree_->getNodeIndex(node)] = getAncestralStatesForNode(tree_->getNodeIndex(node));
123  vector<shared_ptr<PhyloNode>> vsons = tree_->getSons(node);
124 
125  for (size_t i = 0; i < vsons.size(); i++)
126  {
127  recursiveMarginalAncestralStates(vsons[i], ancestors, data);
128  }
129  }
130 }
131 
132 unique_ptr<AlignedSequenceContainer> MarginalAncestralReconstruction::getAncestralSequences(bool sample) const
133 {
134  auto asc = make_unique<AlignedSequenceContainer>(alphabet_);
135  vector<shared_ptr<PhyloNode>> inNodes = tree_->getAllInnerNodes();
136  for (size_t i = 0; i < inNodes.size(); ++i)
137  {
138  auto seq = getAncestralSequenceForNode(tree_->getNodeIndex(inNodes[i]), nullptr, sample);
139  asc->addSequence(seq->getName(), seq);
140  }
141  return asc;
142 }
virtual size_t size() const=0
std::unique_ptr< AlignedSequenceContainer > getAncestralSequences() const override
Get all the ancestral sequences for all nodes.
void recursiveMarginalAncestralStates(const std::shared_ptr< PhyloNode > node, std::map< uint, std::vector< size_t >> &ancestors, AlignmentDataInterface &data) const
std::unique_ptr< Sequence > getAncestralSequenceForNode(uint nodeId, VVdouble *probs, bool sample) const
Get an ancestral sequence for a given node.
std::map< uint, std::vector< size_t > > getAllAncestralStates() const override
Get all ancestral states for all nodes.
std::vector< size_t > getAncestralStatesForNode(uint nodeId, VVdouble &probs, bool sample) const
Get ancestral states for a given node as a vector of int.
static double giveRandomNumberBetweenZeroAndEntry(double entry)
std::string toString(T t)
Defines the basic types of data flow nodes.
template void copyEigenToBpp(const ExtendedFloatMatrixXd &eigenMatrix, std::vector< std::vector< double >> &bppMatrix)
std::vector< Vdouble > VVdouble