bpp-phyl3 3.0.0
NNIHomogeneousTreeLikelihood.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
8
10
11using namespace bpp;
12
13// From the STL:
14#include <iostream>
15
16using namespace std;
17
18/*******************************************************************************/
19void BranchLikelihood::initModel(
20 std::shared_ptr<const TransitionModelInterface> model,
21 std::shared_ptr<const DiscreteDistributionInterface> rDist)
22{
23 model_ = model;
24 rDist_ = rDist;
25 nbStates_ = model->getNumberOfStates();
26 nbClasses_ = rDist->getNumberOfCategories();
27 pxy_.resize(nbClasses_);
28 for (size_t i = 0; i < nbClasses_; i++)
29 {
30 pxy_[i].resize(nbStates_);
31 for (size_t j = 0; j < nbStates_; j++)
32 {
33 pxy_[i][j].resize(nbStates_);
34 }
35 }
36}
37
38/*******************************************************************************/
40{
41 double l = getParameterValue("BrLen");
42
43 // Computes all pxy once for all:
44 for (size_t c = 0; c < nbClasses_; c++)
45 {
46 VVdouble* pxy__c = &pxy_[c];
47 RowMatrix<double> Q = model_->getPij_t(l * rDist_->getCategory(c));
48 for (size_t x = 0; x < nbStates_; x++)
49 {
50 Vdouble* pxy__c_x = &(*pxy__c)[x];
51 for (size_t y = 0; y < nbStates_; y++)
52 {
53 (*pxy__c_x)[y] = Q(x, y);
54 }
55 }
56 }
57}
58
59/*******************************************************************************/
61{
62 lnL_ = 0;
63
64 vector<double> la(array1_->size());
65 for (size_t i = 0; i < array1_->size(); i++)
66 {
67 double Li = 0;
68 for (size_t c = 0; c < nbClasses_; c++)
69 {
70 double rc = rDist_->getProbability(c);
71 for (size_t x = 0; x < nbStates_; x++)
72 {
73 for (size_t y = 0; y < nbStates_; y++)
74 {
75 Li += rc * (*array1_)[i][c][x] * pxy_[c][x][y] * (*array2_)[i][c][y];
76 }
77 }
78 }
79 la[i] = weights_[i] * log(Li);
80 }
81
82 sort(la.begin(), la.end());
83 for (size_t i = array1_->size(); i > 0; i--)
84 {
85 lnL_ -= la[i - 1];
86 }
87}
88
89/******************************************************************************/
90
92 const Tree& tree,
93 std::shared_ptr<TransitionModelInterface> model,
94 std::shared_ptr<DiscreteDistributionInterface> rDist,
95 bool checkRooted,
96 bool verbose) :
97 DRHomogeneousTreeLikelihood(tree, model, rDist, checkRooted, verbose),
98 brLikFunction_(),
99 brentOptimizer_(),
100 brLenNNIValues_(),
101 brLenNNIParams_()
102{
103 brentOptimizer_ = make_unique<BrentOneDimension>();
105 brentOptimizer_->setProfiler(0);
106 brentOptimizer_->setMessageHandler(0);
107 brentOptimizer_->setVerbose(0);
108}
109
110/******************************************************************************/
111
113 const Tree& tree,
114 const AlignmentDataInterface& data,
115 std::shared_ptr<TransitionModelInterface> model,
116 std::shared_ptr<DiscreteDistributionInterface> rDist,
117 bool checkRooted,
118 bool verbose) :
119 DRHomogeneousTreeLikelihood(tree, data, model, rDist, checkRooted, verbose),
120 brLikFunction_(),
121 brentOptimizer_(),
122 brLenNNIValues_(),
123 brLenNNIParams_()
124{
125 brentOptimizer_ = make_unique<BrentOneDimension>();
127 brentOptimizer_->setProfiler(0);
128 brentOptimizer_->setMessageHandler(0);
129 brentOptimizer_->setVerbose(0);
130 // We have to do this since the DRHomogeneousTreeLikelihood constructor will not call the overloaded setData method:
131 brLikFunction_ = make_shared<BranchLikelihood>(likelihoodData().getWeights());
132}
133
134/******************************************************************************/
135
138 brLikFunction_(),
139 brentOptimizer_(),
140 brLenNNIValues_(),
141 brLenNNIParams_()
142{
143 brLikFunction_ = shared_ptr<BranchLikelihood>(lik.brLikFunction_->clone());
144 brentOptimizer_ = unique_ptr<BrentOneDimension>(lik.brentOptimizer_->clone());
147}
148
149/******************************************************************************/
150
152{
154 brLikFunction_ = shared_ptr<BranchLikelihood>(lik.brLikFunction_->clone());
155 brentOptimizer_ = unique_ptr<BrentOneDimension>(lik.brentOptimizer_->clone());
158 return *this;
159}
160
161/******************************************************************************/
162
164
165/******************************************************************************/
166
168{
169 const Node* son = tree_->getNode(nodeId);
170 if (!son->hasFather())
171 throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'son' must not be the root node.", son);
172 const Node* parent = son->getFather();
173 if (!parent->hasFather())
174 throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'parent' must not be the root node.", parent);
175 const Node* grandFather = parent->getFather();
176 // From here: Bifurcation assumed.
177 // In case of multifurcation, an arbitrary uncle is chosen.
178 // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
179 size_t parentPosition = grandFather->getSonPosition(parent);
180 // const Node * uncle = grandFather->getSon(parentPosition > 1 ? parentPosition - 1 : 1 - parentPosition);
181 const Node* uncle = grandFather->getSon(parentPosition > 1 ? 0 : 1 - parentPosition);
182
183 // Retrieving arrays of interest:
184 const DRASDRTreeLikelihoodNodeData* parentData = &likelihoodData().getNodeData(parent->getId());
185 const VVVdouble* sonArray = &parentData->getLikelihoodArrayForNeighbor(son->getId());
186 vector<const Node*> parentNeighbors = TreeTemplateTools::getRemainingNeighbors(parent, grandFather, son);
187 size_t nbParentNeighbors = parentNeighbors.size();
188 vector<const VVVdouble*> parentArrays(nbParentNeighbors);
189 vector<const VVVdouble*> parentTProbs(nbParentNeighbors);
190 for (size_t k = 0; k < nbParentNeighbors; k++)
191 {
192 const Node* n = parentNeighbors[k]; // This neighbor
193 parentArrays[k] = &parentData->getLikelihoodArrayForNeighbor(n->getId());
194 // if(n != grandFather) parentTProbs[k] = & pxy_[n->getId()];
195 // else parentTProbs[k] = & pxy_[parent->getId()];
196 parentTProbs[k] = &pxy_[n->getId()];
197 }
198
199 const DRASDRTreeLikelihoodNodeData* grandFatherData = &likelihoodData().getNodeData(grandFather->getId());
200 const VVVdouble* uncleArray = &grandFatherData->getLikelihoodArrayForNeighbor(uncle->getId());
201 vector<const Node*> grandFatherNeighbors = TreeTemplateTools::getRemainingNeighbors(grandFather, parent, uncle);
202 size_t nbGrandFatherNeighbors = grandFatherNeighbors.size();
203 vector<const VVVdouble*> grandFatherArrays;
204 vector<const VVVdouble*> grandFatherTProbs;
205 for (size_t k = 0; k < nbGrandFatherNeighbors; k++)
206 {
207 const Node* n = grandFatherNeighbors[k]; // This neighbor
208 if (grandFather->getFather() == NULL || n != grandFather->getFather())
209 {
210 grandFatherArrays.push_back(&grandFatherData->getLikelihoodArrayForNeighbor(n->getId()));
211 grandFatherTProbs.push_back(&pxy_[n->getId()]);
212 }
213 }
214
215 // Compute array 1: grand father array
216 VVVdouble array1 = *sonArray;
217 resetLikelihoodArray(array1);
218 grandFatherArrays.push_back(sonArray);
219 grandFatherTProbs.push_back(&pxy_[son->getId()]);
220 if (grandFather->hasFather())
221 {
222 computeLikelihoodFromArrays(grandFatherArrays, grandFatherTProbs, &grandFatherData->getLikelihoodArrayForNeighbor(grandFather->getFather()->getId()), &pxy_[grandFather->getId()], array1, nbGrandFatherNeighbors, nbDistinctSites_, nbClasses_, nbStates_, false);
223 }
224 else
225 {
226 computeLikelihoodFromArrays(grandFatherArrays, grandFatherTProbs, array1, nbGrandFatherNeighbors + 1, nbDistinctSites_, nbClasses_, nbStates_, false);
227
228 // This is the root node, we have to account for the ancestral frequencies:
229 for (size_t i = 0; i < nbDistinctSites_; i++)
230 {
231 for (size_t j = 0; j < nbClasses_; j++)
232 {
233 for (size_t x = 0; x < nbStates_; x++)
234 {
235 array1[i][j][x] *= rootFreqs_[x];
236 }
237 }
238 }
239 }
240
241 // Compute array 2: parent array
242 VVVdouble array2 = *uncleArray;
243 resetLikelihoodArray(array2);
244 parentArrays.push_back(uncleArray);
245 parentTProbs.push_back(&pxy_[uncle->getId()]);
246 computeLikelihoodFromArrays(parentArrays, parentTProbs, array2, nbParentNeighbors + 1, nbDistinctSites_, nbClasses_, nbStates_, false);
247
248 // Initialize BranchLikelihood:
250 brLikFunction_->initLikelihoods(&array1, &array2);
251 ParameterList parameters;
252 size_t pos = 0;
253 while (pos < nodes_.size() && nodes_[pos]->getId() != parent->getId())
254 pos++;
255 if (pos == nodes_.size())
256 throw Exception("NNIHomogeneousTreeLikelihood::testNNI. Invalid node id.");
257 Parameter brLen = parameter("BrLen" + TextTools::toString(pos));
258 brLen.setName("BrLen");
259 parameters.addParameter(brLen);
260 brLikFunction_->setParameters(parameters);
261
262 // Re-estimate branch length:
263 brentOptimizer_->setFunction(brLikFunction_);
264 brentOptimizer_->getStopCondition()->setTolerance(0.1);
265 brentOptimizer_->setInitialInterval(brLen.getValue(), brLen.getValue() + 0.01);
266 brentOptimizer_->init(parameters);
267 brentOptimizer_->optimize();
268 // brLenNNIValues_[nodeId] = brLikFunction_->getParameterValue("BrLen");
269 brLenNNIValues_[nodeId] = brentOptimizer_->getParameters().parameter("BrLen").getValue();
270 brLikFunction_->resetLikelihoods(); // Array1 and Array2 will be destroyed after this function call.
271 // We should not keep pointers towards them...
272
273 // Return the resulting likelihood:
274 return brLikFunction_->getValue() - getValue();
275}
276
277/*******************************************************************************/
279{
280 // Perform the topological move, the likelihood array will have to be recomputed...
281 Node* son = tree_->getNode(nodeId);
282 if (!son->hasFather())
283 throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'son' must not be the root node.", son);
284 Node* parent = son->getFather();
285 if (!parent->hasFather())
286 throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'parent' must not be the root node.", parent);
287 Node* grandFather = parent->getFather();
288 // From here: Bifurcation assumed.
289 // In case of multifurcation, an arbitrary uncle is chosen.
290 // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
291 size_t parentPosition = grandFather->getSonPosition(parent);
292 Node* uncle = grandFather->getSon(parentPosition > 1 ? 0 : 1 - parentPosition);
293 // Swap nodes:
294 parent->removeSon(son);
295 grandFather->removeSon(uncle);
296 parent->addSon(uncle);
297 grandFather->addSon(son);
298 size_t pos = 0;
299 while (pos < nodes_.size() && nodes_[pos]->getId() != parent->getId())
300 pos++;
301 if (pos == nodes_.size())
302 throw Exception("NNIHomogeneousTreeLikelihood::doNNI. Invalid node id.");
303
304 string name = "BrLen" + TextTools::toString(pos);
305 if (brLenNNIValues_.find(nodeId) != brLenNNIValues_.end())
306 {
307 double length = brLenNNIValues_[nodeId];
309 getParameter_(name).setValue(length);
310 parent->setDistanceToFather(length);
311 }
312 else
313 cerr << "ERROR, branch not found: " << nodeId << endl;
314 try
315 {
317 }
318 catch (ParameterException& ex)
319 {
320 StdErr errout;
321 cerr << "DEBUG:" << endl;
323 cerr << "DEBUG:" << name << endl;
324 }
325 // In case of copy of this object, we must remove the constraint associated to this stored parameter:
326 // (It should be also possible to update the pointer in the copy constructor,
327 // but we do not need the constraint info here...).
328 brLenNNIParams_[brLenNNIParams_.size() - 1].removeConstraint();
329}
330
331/*******************************************************************************/
static void resetLikelihoodArray(VVVdouble &likelihoodArray)
Set all conditional likelihoods to 1.
std::vector< Node * > nodes_
Pointer toward all nodes in the tree.
std::shared_ptr< TransitionModelInterface > model_
const Parameter & parameter(const std::string &name) const override
double getParameterValue(const std::string &name) const override
Parameter & getParameter_(const std::string &name)
std::shared_ptr< TreeTemplate< Node > > tree_
static std::string CONSTRAINTS_AUTO
std::vector< unsigned int > weights_
std::shared_ptr< const DiscreteDistributionInterface > rDist_
std::shared_ptr< const TransitionModelInterface > model_
DRASDRTreeLikelihoodNodeData & getNodeData(int nodeId)
Likelihood data structure for a node.
VVVdouble & getLikelihoodArrayForNeighbor(int neighborId)
This class implements the likelihood computation for a tree using the double-recursive algorithm.
static void computeLikelihoodFromArrays(const std::vector< const VVVdouble * > &iLik, const std::vector< const VVVdouble * > &tProb, VVVdouble &oLik, size_t nbNodes, size_t nbDistinctSites, size_t nbClasses, size_t nbStates, bool reset=true)
Compute conditional likelihoods.
double getValue() const override
Function and NNISearchable interface.
DRHomogeneousTreeLikelihood & operator=(const DRHomogeneousTreeLikelihood &lik)
DRASDRTreeLikelihoodData & likelihoodData() override
This class adds support for NNI topology estimation to the DRHomogeneousTreeLikelihood class.
NNIHomogeneousTreeLikelihood(const Tree &tree, std::shared_ptr< TransitionModelInterface > model, std::shared_ptr< DiscreteDistributionInterface > rDist, bool checkRooted=true, bool verbose=true)
Build a new NNIHomogeneousTreeLikelihood object.
NNIHomogeneousTreeLikelihood & operator=(const NNIHomogeneousTreeLikelihood &lik)
std::shared_ptr< BranchLikelihood > brLikFunction_
std::map< int, double > brLenNNIValues_
Hash used for backing up branch lengths when testing NNIs.
std::unique_ptr< BrentOneDimension > brentOptimizer_
Optimizer used for testing NNI.
void doNNI(int nodeId) override
Perform a NNI movement.
double testNNI(int nodeId) const override
Send the score of a NNI movement, without performing it.
General exception thrown when something is wrong with a particular node.
The phylogenetic node class.
Definition: Node.h:59
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:266
virtual Node * removeSon(size_t pos)
Definition: Node.h:423
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:386
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:306
virtual const Node * getSon(size_t pos) const
Definition: Node.h:362
virtual size_t getSonPosition(const Node *son) const
Definition: Node.cpp:153
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:346
size_t size() const
virtual void setParameterValue(const std::string &name, double value)
virtual void addParameter(const Parameter &param)
virtual const Parameter & parameter(const std::string &name) const
virtual void printParameters(OutputStream &out) const
virtual void setValue(double value)
virtual double getValue() const
virtual void setName(const std::string &name)
static std::vector< const Node * > getRemainingNeighbors(const Node *node1, const Node *node2, const Node *node3)
Get a subset of node neighbors.
Interface for phylogenetic tree objects.
Definition: Tree.h:115
std::string toString(T t)
Defines the basic types of data flow nodes.
double log(const ExtendedFloat &ef)
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble