bpp-phyl3 3.0.0
MarginalAncestralReconstruction.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
7
9
10using namespace bpp;
11using namespace std;
12
13vector<size_t> MarginalAncestralReconstruction::getAncestralStatesForNode(uint nodeId, VVdouble& probs, bool sample) const
14{
15 vector<size_t> ancestors(nbSites_);
16
17 auto vv = likelihood_->getLikelihoodsAtNode(nodeId)->targetValue();
18
19 probs.resize(nbSites_);
20
21 for (uint i = 0; i < static_cast<uint>(nbSites_); ++i)
22 {
23 copyEigenToBpp(vv.col(i) / vv.col(i).sum(), probs[static_cast<size_t>(i)]);
24 }
25
26 if (sample)
27 {
28 for (size_t i = 0; i < nbSites_; ++i)
29 {
30 const auto& coli = probs[i];
32 for (size_t j = 0; j < nbStates_; ++j)
33 {
34 r -= coli[j];
35 if (r < 0)
36 {
37 ancestors[i] = j;
38 break;
39 }
40 }
41 }
42 }
43 else
44 {
45 size_t pos;
46 for (size_t i = 0; i < nbSites_; ++i)
47 {
48 vv.col(Eigen::Index(i)).maxCoeff(&pos);
49 ancestors[i] = pos;
50 }
51 }
52 return ancestors;
53}
54
56{
57 map<uint, vector<size_t>> ancestors;
58 // Clone the data into a AlignedSequenceContainer for more efficiency:
59 shared_ptr<AlignmentDataInterface> data = make_shared<AlignedSequenceContainer>(dynamic_cast<const SiteContainerInterface&>(likelihood_->shrunkData()));
60 recursiveMarginalAncestralStates(tree_->getRoot(), ancestors, *data);
61 return ancestors;
62}
63
64unique_ptr<Sequence> MarginalAncestralReconstruction::getAncestralSequenceForNode(uint nodeId, VVdouble* probs, bool sample) const
65{
66 string name = tree_->getNode(nodeId)->hasName() ? tree_->getNode(nodeId)->getName() : ("" + TextTools::toString(nodeId));
67 vector<int> allStates(nbSites_);
68
69 const auto& stateMap = likelihood_->stateMap();
70
71 VVdouble patternedProbs;
72
73 if (probs)
74 {
75 auto states = getAncestralStatesForNode(nodeId, *probs, sample);
76 for (size_t i = 0; i < nbSites_; ++i)
77 {
78 allStates[i] = stateMap.getAlphabetStateAsInt(states[i]);
79 }
80 }
81 else
82 {
83 auto states = getAncestralStatesForNode(nodeId, patternedProbs, sample);
84 for (size_t i = 0; i < nbSites_; ++i)
85 {
86 allStates[i] = stateMap.getAlphabetStateAsInt(states[i]);
87 }
88 }
89
90 return make_unique<Sequence>(name, allStates, alphabet_);
91}
92
94 const std::shared_ptr<PhyloNode> node,
95 map<uint, vector<size_t>>& ancestors,
96 AlignmentDataInterface& data) const
97{
98 if (tree_->isLeaf(node))
99 {
100 try
101 {
102 auto& sc = dynamic_cast<const SiteContainerInterface&>(data);
103 const Sequence& seq = sc.sequence(node->getName());
104 vector<size_t>* v = &ancestors[tree_->getNodeIndex(node)];
105 v->resize(seq.size());
106 // This is a tricky way to store the real sequence as an ancestral one...
107 // In case of Markov Modulated models, we consider that the real sequences
108 // are all in the first category.
109 const auto& stateMap = likelihood_->stateMap();
110 for (size_t i = 0; i < seq.size(); ++i)
111 {
112 (*v)[i] = stateMap.getModelStates(seq[i])[0];
113 }
114 }
115 catch (bad_cast&)
116 {
117 ancestors[tree_->getNodeIndex(node)] = getAncestralStatesForNode(tree_->getNodeIndex(node));
118 }
119 }
120 else
121 {
122 ancestors[tree_->getNodeIndex(node)] = getAncestralStatesForNode(tree_->getNodeIndex(node));
123 vector<shared_ptr<PhyloNode>> vsons = tree_->getSons(node);
124
125 for (size_t i = 0; i < vsons.size(); i++)
126 {
127 recursiveMarginalAncestralStates(vsons[i], ancestors, data);
128 }
129 }
130}
131
132unique_ptr<AlignedSequenceContainer> MarginalAncestralReconstruction::getAncestralSequences(bool sample) const
133{
134 auto asc = make_unique<AlignedSequenceContainer>(alphabet_);
135 vector<shared_ptr<PhyloNode>> inNodes = tree_->getAllInnerNodes();
136 for (size_t i = 0; i < inNodes.size(); ++i)
137 {
138 auto seq = getAncestralSequenceForNode(tree_->getNodeIndex(inNodes[i]), nullptr, sample);
139 asc->addSequence(seq->getName(), seq);
140 }
141 return asc;
142}
virtual size_t size() const=0
void recursiveMarginalAncestralStates(const std::shared_ptr< PhyloNode > node, std::map< uint, std::vector< size_t > > &ancestors, AlignmentDataInterface &data) const
std::shared_ptr< const ParametrizablePhyloTree > tree_
std::unique_ptr< Sequence > getAncestralSequenceForNode(uint nodeId, VVdouble *probs, bool sample) const
Get an ancestral sequence for a given node.
std::shared_ptr< LikelihoodCalculationSingleProcess > likelihood_
std::map< uint, std::vector< size_t > > getAllAncestralStates() const override
Get all ancestral states for all nodes.
std::unique_ptr< AlignedSequenceContainer > getAncestralSequences() const override
Get all the ancestral sequences for all nodes.
std::vector< size_t > getAncestralStatesForNode(uint nodeId, VVdouble &probs, bool sample) const
Get ancestral states for a given node as a vector of int.
static double giveRandomNumberBetweenZeroAndEntry(double entry)
std::string toString(T t)
Defines the basic types of data flow nodes.
template void copyEigenToBpp(const ExtendedFloatMatrixXd &eigenMatrix, std::vector< std::vector< double > > &bppMatrix)
std::vector< Vdouble > VVdouble