bpp-phyl3  3.0.0
MarginalAncestralStateReconstruction.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
8 
9 using namespace bpp;
10 using namespace std;
11 
12 vector<size_t> LegacyMarginalAncestralStateReconstruction::getAncestralStatesForNode(int nodeId, VVdouble& probs, bool sample) const
13 {
14  vector<size_t> ancestors(nbDistinctSites_);
15  probs.resize(nbDistinctSites_);
16  double cumProb = 0;
17  double r;
18  if (likelihood_->tree().isLeaf(nodeId))
19  {
20  VVdouble larray = likelihood_->likelihoodData().getLeafLikelihoods(nodeId);
21  for (size_t i = 0; i < nbDistinctSites_; ++i)
22  {
23  Vdouble* probs_i = &probs[i];
24  probs_i->resize(nbStates_);
25  size_t j = VectorTools::whichMax(larray[i]);
26  ancestors[i] = j;
27  (*probs_i)[j] = 1.;
28  }
29  }
30  else
31  {
32  VVVdouble larray;
33 
34  likelihood_->computeLikelihoodAtNode(nodeId, larray);
35  for (size_t i = 0; i < nbDistinctSites_; i++)
36  {
37  VVdouble* larray_i = &larray[i];
38  Vdouble* probs_i = &probs[i];
39  probs_i->resize(nbStates_);
40  for (size_t c = 0; c < nbClasses_; c++)
41  {
42  Vdouble* larray_i_c = &(*larray_i)[c];
43  for (size_t x = 0; x < nbStates_; x++)
44  {
45  (*probs_i)[x] += (*larray_i_c)[x] * r_[c] / l_[i];
46  }
47  }
48  if (sample)
49  {
50  cumProb = 0;
52  for (size_t j = 0; j < nbStates_; j++)
53  {
54  cumProb += (*probs_i)[j];
55  if (r <= cumProb)
56  {
57  ancestors[i] = j;
58  break;
59  }
60  }
61  }
62  else
63  ancestors[i] = VectorTools::whichMax(*probs_i);
64  }
65  }
66  return ancestors;
67 }
68 
70 {
71  map<int, vector<size_t>> ancestors;
72  // Clone the data into a AlignedSequenceContainer for more efficiency:
73  auto data = make_unique<AlignedSequenceContainer>(dynamic_cast<const SiteContainerInterface&>(likelihood_->likelihoodData().shrunkData()));
74  recursiveMarginalAncestralStates(tree_.getRootNode(), ancestors, *data);
75  return ancestors;
76 }
77 
78 std::unique_ptr<Sequence> LegacyMarginalAncestralStateReconstruction::getAncestralSequenceForNode(int nodeId, VVdouble* probs, bool sample) const
79 {
80  string name = tree_.hasNodeName(nodeId) ? tree_.getNodeName(nodeId) : ("" + TextTools::toString(nodeId));
81  const vector<size_t>& rootPatternLinks = likelihood_->likelihoodData().getRootArrayPositions();
82  auto model = likelihood_->getModelForSite(tree_.getNodesId()[0], 0); // We assume all nodes have a model with the same number of states.
83  vector<size_t> states;
84  vector<int> allStates(nbSites_);
85  VVdouble patternedProbs;
86  if (probs)
87  {
88  states = getAncestralStatesForNode(nodeId, patternedProbs, sample);
89  probs->resize(nbSites_);
90  for (size_t i = 0; i < nbSites_; i++)
91  {
92  allStates[i] = model->getAlphabetStateAsInt(states[rootPatternLinks[i]]);
93  (*probs)[i] = patternedProbs[rootPatternLinks[i]];
94  }
95  }
96  else
97  {
98  states = getAncestralStatesForNode(nodeId, patternedProbs, sample);
99  for (size_t i = 0; i < nbSites_; i++)
100  {
101  allStates[i] = model->getAlphabetStateAsInt(states[rootPatternLinks[i]]);
102  }
103  }
104  std::shared_ptr<const Alphabet> alpha = alphabet_; // Copy needed because of const
105  return make_unique<Sequence>(name, allStates, alpha);
106 }
107 
109  const Node* node,
110  map<int, vector<size_t>>& ancestors,
111  AlignedSequenceContainer& data) const
112 {
113  if (node->isLeaf())
114  {
115  const Sequence& seq = data.sequence(node->getName());
116  vector<size_t>* v = &ancestors[node->getId()];
117  v->resize(seq.size());
118  // This is a tricky way to store the real sequence as an ancestral one...
119  // In case of Markov Modulated models, we consider that the real sequences
120  // Are all in the first category.
121  auto model = likelihood_->getModelForSite(tree_.getNodesId()[0], 0); // We assume all nodes have a model with the same number of states.
122  for (size_t i = 0; i < seq.size(); i++)
123  {
124  (*v)[i] = model->getModelStates(seq[i])[0];
125  }
126  }
127  else
128  {
129  ancestors[node->getId()] = getAncestralStatesForNode(node->getId());
130  for (size_t i = 0; i < node->getNumberOfSons(); i++)
131  {
132  recursiveMarginalAncestralStates(node->getSon(i), ancestors, data);
133  }
134  }
135 }
136 
137 unique_ptr<SiteContainerInterface> LegacyMarginalAncestralStateReconstruction::getAncestralSequences(bool sample) const
138 {
139  unique_ptr<SiteContainerInterface> asc = make_unique<AlignedSequenceContainer>(alphabet_);
140  vector<int> ids = tree_.getInnerNodesId();
141  for (size_t i = 0; i < ids.size(); i++)
142  {
143  auto seq = getAncestralSequenceForNode(ids[i], nullptr, sample);
144  asc->addSequence(seq->getName(), seq);
145  }
146  return asc;
147 }
virtual size_t size() const=0
std::map< int, std::vector< size_t > > getAllAncestralStates() const override
Get all ancestral states for all nodes.
std::unique_ptr< Sequence > getAncestralSequenceForNode(int nodeId, VVdouble *probs, bool sample) const
Get the ancestral sequence for a given node.
std::vector< size_t > getAncestralStatesForNode(int nodeId, VVdouble &probs, bool sample) const
Get ancestral states for a given node as a vector of int.
void recursiveMarginalAncestralStates(const Node *node, std::map< int, std::vector< size_t >> &ancestors, AlignedSequenceContainer &data) const
std::unique_ptr< SiteContainerInterface > getAncestralSequences() const override
Get all the ancestral sequences for all nodes.
The phylogenetic node class.
Definition: Node.h:59
virtual std::string getName() const
Get the name associated to this node, if there is one, otherwise throw a NodeException.
Definition: Node.h:203
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual const Node * getSon(size_t pos) const
Definition: Node.h:362
virtual bool isLeaf() const
Definition: Node.h:667
virtual size_t getNumberOfSons() const
Definition: Node.h:355
static double giveRandomNumberBetweenZeroAndEntry(double entry)
const SequenceType & sequence(const std::string &sequenceKey) const override
static size_t whichMax(const std::vector< T > &v)
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble