bpp-phyl3 3.0.0
MarginalAncestralStateReconstruction.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
8
9using namespace bpp;
10using namespace std;
11
12vector<size_t> LegacyMarginalAncestralStateReconstruction::getAncestralStatesForNode(int nodeId, VVdouble& probs, bool sample) const
13{
14 vector<size_t> ancestors(nbDistinctSites_);
15 probs.resize(nbDistinctSites_);
16 double cumProb = 0;
17 double r;
18 if (likelihood_->tree().isLeaf(nodeId))
19 {
20 VVdouble larray = likelihood_->likelihoodData().getLeafLikelihoods(nodeId);
21 for (size_t i = 0; i < nbDistinctSites_; ++i)
22 {
23 Vdouble* probs_i = &probs[i];
24 probs_i->resize(nbStates_);
25 size_t j = VectorTools::whichMax(larray[i]);
26 ancestors[i] = j;
27 (*probs_i)[j] = 1.;
28 }
29 }
30 else
31 {
32 VVVdouble larray;
33
34 likelihood_->computeLikelihoodAtNode(nodeId, larray);
35 for (size_t i = 0; i < nbDistinctSites_; i++)
36 {
37 VVdouble* larray_i = &larray[i];
38 Vdouble* probs_i = &probs[i];
39 probs_i->resize(nbStates_);
40 for (size_t c = 0; c < nbClasses_; c++)
41 {
42 Vdouble* larray_i_c = &(*larray_i)[c];
43 for (size_t x = 0; x < nbStates_; x++)
44 {
45 (*probs_i)[x] += (*larray_i_c)[x] * r_[c] / l_[i];
46 }
47 }
48 if (sample)
49 {
50 cumProb = 0;
52 for (size_t j = 0; j < nbStates_; j++)
53 {
54 cumProb += (*probs_i)[j];
55 if (r <= cumProb)
56 {
57 ancestors[i] = j;
58 break;
59 }
60 }
61 }
62 else
63 ancestors[i] = VectorTools::whichMax(*probs_i);
64 }
65 }
66 return ancestors;
67}
68
70{
71 map<int, vector<size_t>> ancestors;
72 // Clone the data into a AlignedSequenceContainer for more efficiency:
73 auto data = make_unique<AlignedSequenceContainer>(dynamic_cast<const SiteContainerInterface&>(likelihood_->likelihoodData().shrunkData()));
74 recursiveMarginalAncestralStates(tree_.getRootNode(), ancestors, *data);
75 return ancestors;
76}
77
78std::unique_ptr<Sequence> LegacyMarginalAncestralStateReconstruction::getAncestralSequenceForNode(int nodeId, VVdouble* probs, bool sample) const
79{
80 string name = tree_.hasNodeName(nodeId) ? tree_.getNodeName(nodeId) : ("" + TextTools::toString(nodeId));
81 const vector<size_t>& rootPatternLinks = likelihood_->likelihoodData().getRootArrayPositions();
82 auto model = likelihood_->getModelForSite(tree_.getNodesId()[0], 0); // We assume all nodes have a model with the same number of states.
83 vector<size_t> states;
84 vector<int> allStates(nbSites_);
85 VVdouble patternedProbs;
86 if (probs)
87 {
88 states = getAncestralStatesForNode(nodeId, patternedProbs, sample);
89 probs->resize(nbSites_);
90 for (size_t i = 0; i < nbSites_; i++)
91 {
92 allStates[i] = model->getAlphabetStateAsInt(states[rootPatternLinks[i]]);
93 (*probs)[i] = patternedProbs[rootPatternLinks[i]];
94 }
95 }
96 else
97 {
98 states = getAncestralStatesForNode(nodeId, patternedProbs, sample);
99 for (size_t i = 0; i < nbSites_; i++)
100 {
101 allStates[i] = model->getAlphabetStateAsInt(states[rootPatternLinks[i]]);
102 }
103 }
104 std::shared_ptr<const Alphabet> alpha = alphabet_; // Copy needed because of const
105 return make_unique<Sequence>(name, allStates, alpha);
106}
107
109 const Node* node,
110 map<int, vector<size_t>>& ancestors,
111 AlignedSequenceContainer& data) const
112{
113 if (node->isLeaf())
114 {
115 const Sequence& seq = data.sequence(node->getName());
116 vector<size_t>* v = &ancestors[node->getId()];
117 v->resize(seq.size());
118 // This is a tricky way to store the real sequence as an ancestral one...
119 // In case of Markov Modulated models, we consider that the real sequences
120 // Are all in the first category.
121 auto model = likelihood_->getModelForSite(tree_.getNodesId()[0], 0); // We assume all nodes have a model with the same number of states.
122 for (size_t i = 0; i < seq.size(); i++)
123 {
124 (*v)[i] = model->getModelStates(seq[i])[0];
125 }
126 }
127 else
128 {
129 ancestors[node->getId()] = getAncestralStatesForNode(node->getId());
130 for (size_t i = 0; i < node->getNumberOfSons(); i++)
131 {
132 recursiveMarginalAncestralStates(node->getSon(i), ancestors, data);
133 }
134 }
135}
136
137unique_ptr<SiteContainerInterface> LegacyMarginalAncestralStateReconstruction::getAncestralSequences(bool sample) const
138{
139 unique_ptr<SiteContainerInterface> asc = make_unique<AlignedSequenceContainer>(alphabet_);
140 vector<int> ids = tree_.getInnerNodesId();
141 for (size_t i = 0; i < ids.size(); i++)
142 {
143 auto seq = getAncestralSequenceForNode(ids[i], nullptr, sample);
144 asc->addSequence(seq->getName(), seq);
145 }
146 return asc;
147}
virtual size_t size() const=0
std::shared_ptr< const DRTreeLikelihoodInterface > likelihood_
std::map< int, std::vector< size_t > > getAllAncestralStates() const override
Get all ancestral states for all nodes.
std::unique_ptr< SiteContainerInterface > getAncestralSequences() const override
Get all the ancestral sequences for all nodes.
std::unique_ptr< Sequence > getAncestralSequenceForNode(int nodeId, VVdouble *probs, bool sample) const
Get the ancestral sequence for a given node.
std::vector< size_t > getAncestralStatesForNode(int nodeId, VVdouble &probs, bool sample) const
Get ancestral states for a given node as a vector of int.
void recursiveMarginalAncestralStates(const Node *node, std::map< int, std::vector< size_t > > &ancestors, AlignedSequenceContainer &data) const
The phylogenetic node class.
Definition: Node.h:59
virtual std::string getName() const
Get the name associated to this node, if there is one, otherwise throw a NodeException.
Definition: Node.h:203
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual const Node * getSon(size_t pos) const
Definition: Node.h:362
virtual bool isLeaf() const
Definition: Node.h:679
virtual size_t getNumberOfSons() const
Definition: Node.h:355
static double giveRandomNumberBetweenZeroAndEntry(double entry)
const SequenceType & sequence(const std::string &sequenceKey) const override
static size_t whichMax(const std::vector< T > &v)
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble