bpp-phyl3  3.0.0
NNIHomogeneousTreeLikelihood.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
7 #include <Bpp/Text/TextTools.h>
8 
10 
11 using namespace bpp;
12 
13 // From the STL:
14 #include <iostream>
15 
16 using namespace std;
17 
18 /*******************************************************************************/
20  std::shared_ptr<const TransitionModelInterface> model,
21  std::shared_ptr<const DiscreteDistributionInterface> rDist)
22 {
23  model_ = model;
24  rDist_ = rDist;
25  nbStates_ = model->getNumberOfStates();
26  nbClasses_ = rDist->getNumberOfCategories();
27  pxy_.resize(nbClasses_);
28  for (size_t i = 0; i < nbClasses_; i++)
29  {
30  pxy_[i].resize(nbStates_);
31  for (size_t j = 0; j < nbStates_; j++)
32  {
33  pxy_[i][j].resize(nbStates_);
34  }
35  }
36 }
37 
38 /*******************************************************************************/
40 {
41  double l = getParameterValue("BrLen");
42 
43  // Computes all pxy once for all:
44  for (size_t c = 0; c < nbClasses_; c++)
45  {
46  VVdouble* pxy__c = &pxy_[c];
47  RowMatrix<double> Q = model_->getPij_t(l * rDist_->getCategory(c));
48  for (size_t x = 0; x < nbStates_; x++)
49  {
50  Vdouble* pxy__c_x = &(*pxy__c)[x];
51  for (size_t y = 0; y < nbStates_; y++)
52  {
53  (*pxy__c_x)[y] = Q(x, y);
54  }
55  }
56  }
57 }
58 
59 /*******************************************************************************/
61 {
62  lnL_ = 0;
63 
64  vector<double> la(array1_->size());
65  for (size_t i = 0; i < array1_->size(); i++)
66  {
67  double Li = 0;
68  for (size_t c = 0; c < nbClasses_; c++)
69  {
70  double rc = rDist_->getProbability(c);
71  for (size_t x = 0; x < nbStates_; x++)
72  {
73  for (size_t y = 0; y < nbStates_; y++)
74  {
75  Li += rc * (*array1_)[i][c][x] * pxy_[c][x][y] * (*array2_)[i][c][y];
76  }
77  }
78  }
79  la[i] = weights_[i] * log(Li);
80  }
81 
82  sort(la.begin(), la.end());
83  for (size_t i = array1_->size(); i > 0; i--)
84  {
85  lnL_ -= la[i - 1];
86  }
87 }
88 
89 /******************************************************************************/
90 
92  const Tree& tree,
93  std::shared_ptr<TransitionModelInterface> model,
94  std::shared_ptr<DiscreteDistributionInterface> rDist,
95  bool checkRooted,
96  bool verbose) :
97  DRHomogeneousTreeLikelihood(tree, model, rDist, checkRooted, verbose),
98  brLikFunction_(),
99  brentOptimizer_(),
100  brLenNNIValues_(),
101  brLenNNIParams_()
102 {
103  brentOptimizer_ = make_unique<BrentOneDimension>();
104  brentOptimizer_->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
105  brentOptimizer_->setProfiler(0);
106  brentOptimizer_->setMessageHandler(0);
107  brentOptimizer_->setVerbose(0);
108 }
109 
110 /******************************************************************************/
111 
113  const Tree& tree,
114  const AlignmentDataInterface& data,
115  std::shared_ptr<TransitionModelInterface> model,
116  std::shared_ptr<DiscreteDistributionInterface> rDist,
117  bool checkRooted,
118  bool verbose) :
119  DRHomogeneousTreeLikelihood(tree, data, model, rDist, checkRooted, verbose),
120  brLikFunction_(),
121  brentOptimizer_(),
122  brLenNNIValues_(),
123  brLenNNIParams_()
124 {
125  brentOptimizer_ = make_unique<BrentOneDimension>();
126  brentOptimizer_->setConstraintPolicy(AutoParameter::CONSTRAINTS_AUTO);
127  brentOptimizer_->setProfiler(0);
128  brentOptimizer_->setMessageHandler(0);
129  brentOptimizer_->setVerbose(0);
130  // We have to do this since the DRHomogeneousTreeLikelihood constructor will not call the overloaded setData method:
131  brLikFunction_ = make_shared<BranchLikelihood>(likelihoodData().getWeights());
132 }
133 
134 /******************************************************************************/
135 
138  brLikFunction_(),
139  brentOptimizer_(),
140  brLenNNIValues_(),
141  brLenNNIParams_()
142 {
143  brLikFunction_ = shared_ptr<BranchLikelihood>(lik.brLikFunction_->clone());
144  brentOptimizer_ = unique_ptr<BrentOneDimension>(lik.brentOptimizer_->clone());
147 }
148 
149 /******************************************************************************/
150 
152 {
154  brLikFunction_ = shared_ptr<BranchLikelihood>(lik.brLikFunction_->clone());
155  brentOptimizer_ = unique_ptr<BrentOneDimension>(lik.brentOptimizer_->clone());
158  return *this;
159 }
160 
161 /******************************************************************************/
162 
164 
165 /******************************************************************************/
166 
168 {
169  const Node* son = tree_->getNode(nodeId);
170  if (!son->hasFather())
171  throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'son' must not be the root node.", son);
172  const Node* parent = son->getFather();
173  if (!parent->hasFather())
174  throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'parent' must not be the root node.", parent);
175  const Node* grandFather = parent->getFather();
176  // From here: Bifurcation assumed.
177  // In case of multifurcation, an arbitrary uncle is chosen.
178  // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
179  size_t parentPosition = grandFather->getSonPosition(parent);
180  // const Node * uncle = grandFather->getSon(parentPosition > 1 ? parentPosition - 1 : 1 - parentPosition);
181  const Node* uncle = grandFather->getSon(parentPosition > 1 ? 0 : 1 - parentPosition);
182 
183  // Retrieving arrays of interest:
184  const DRASDRTreeLikelihoodNodeData* parentData = &likelihoodData().getNodeData(parent->getId());
185  const VVVdouble* sonArray = &parentData->getLikelihoodArrayForNeighbor(son->getId());
186  vector<const Node*> parentNeighbors = TreeTemplateTools::getRemainingNeighbors(parent, grandFather, son);
187  size_t nbParentNeighbors = parentNeighbors.size();
188  vector<const VVVdouble*> parentArrays(nbParentNeighbors);
189  vector<const VVVdouble*> parentTProbs(nbParentNeighbors);
190  for (size_t k = 0; k < nbParentNeighbors; k++)
191  {
192  const Node* n = parentNeighbors[k]; // This neighbor
193  parentArrays[k] = &parentData->getLikelihoodArrayForNeighbor(n->getId());
194  // if(n != grandFather) parentTProbs[k] = & pxy_[n->getId()];
195  // else parentTProbs[k] = & pxy_[parent->getId()];
196  parentTProbs[k] = &pxy_[n->getId()];
197  }
198 
199  const DRASDRTreeLikelihoodNodeData* grandFatherData = &likelihoodData().getNodeData(grandFather->getId());
200  const VVVdouble* uncleArray = &grandFatherData->getLikelihoodArrayForNeighbor(uncle->getId());
201  vector<const Node*> grandFatherNeighbors = TreeTemplateTools::getRemainingNeighbors(grandFather, parent, uncle);
202  size_t nbGrandFatherNeighbors = grandFatherNeighbors.size();
203  vector<const VVVdouble*> grandFatherArrays;
204  vector<const VVVdouble*> grandFatherTProbs;
205  for (size_t k = 0; k < nbGrandFatherNeighbors; k++)
206  {
207  const Node* n = grandFatherNeighbors[k]; // This neighbor
208  if (grandFather->getFather() == NULL || n != grandFather->getFather())
209  {
210  grandFatherArrays.push_back(&grandFatherData->getLikelihoodArrayForNeighbor(n->getId()));
211  grandFatherTProbs.push_back(&pxy_[n->getId()]);
212  }
213  }
214 
215  // Compute array 1: grand father array
216  VVVdouble array1 = *sonArray;
217  resetLikelihoodArray(array1);
218  grandFatherArrays.push_back(sonArray);
219  grandFatherTProbs.push_back(&pxy_[son->getId()]);
220  if (grandFather->hasFather())
221  {
222  computeLikelihoodFromArrays(grandFatherArrays, grandFatherTProbs, &grandFatherData->getLikelihoodArrayForNeighbor(grandFather->getFather()->getId()), &pxy_[grandFather->getId()], array1, nbGrandFatherNeighbors, nbDistinctSites_, nbClasses_, nbStates_, false);
223  }
224  else
225  {
226  computeLikelihoodFromArrays(grandFatherArrays, grandFatherTProbs, array1, nbGrandFatherNeighbors + 1, nbDistinctSites_, nbClasses_, nbStates_, false);
227 
228  // This is the root node, we have to account for the ancestral frequencies:
229  for (size_t i = 0; i < nbDistinctSites_; i++)
230  {
231  for (size_t j = 0; j < nbClasses_; j++)
232  {
233  for (size_t x = 0; x < nbStates_; x++)
234  {
235  array1[i][j][x] *= rootFreqs_[x];
236  }
237  }
238  }
239  }
240 
241  // Compute array 2: parent array
242  VVVdouble array2 = *uncleArray;
243  resetLikelihoodArray(array2);
244  parentArrays.push_back(uncleArray);
245  parentTProbs.push_back(&pxy_[uncle->getId()]);
246  computeLikelihoodFromArrays(parentArrays, parentTProbs, array2, nbParentNeighbors + 1, nbDistinctSites_, nbClasses_, nbStates_, false);
247 
248  // Initialize BranchLikelihood:
250  brLikFunction_->initLikelihoods(&array1, &array2);
251  ParameterList parameters;
252  size_t pos = 0;
253  while (pos < nodes_.size() && nodes_[pos]->getId() != parent->getId())
254  pos++;
255  if (pos == nodes_.size())
256  throw Exception("NNIHomogeneousTreeLikelihood::testNNI. Invalid node id.");
257  Parameter brLen = parameter("BrLen" + TextTools::toString(pos));
258  brLen.setName("BrLen");
259  parameters.addParameter(brLen);
260  brLikFunction_->setParameters(parameters);
261 
262  // Re-estimate branch length:
263  brentOptimizer_->setFunction(brLikFunction_);
264  brentOptimizer_->getStopCondition()->setTolerance(0.1);
265  brentOptimizer_->setInitialInterval(brLen.getValue(), brLen.getValue() + 0.01);
266  brentOptimizer_->init(parameters);
267  brentOptimizer_->optimize();
268  // brLenNNIValues_[nodeId] = brLikFunction_->getParameterValue("BrLen");
269  brLenNNIValues_[nodeId] = brentOptimizer_->getParameters().parameter("BrLen").getValue();
270  brLikFunction_->resetLikelihoods(); // Array1 and Array2 will be destroyed after this function call.
271  // We should not keep pointers towards them...
272 
273  // Return the resulting likelihood:
274  return brLikFunction_->getValue() - getValue();
275 }
276 
277 /*******************************************************************************/
279 {
280  // Perform the topological move, the likelihood array will have to be recomputed...
281  Node* son = tree_->getNode(nodeId);
282  if (!son->hasFather())
283  throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'son' must not be the root node.", son);
284  Node* parent = son->getFather();
285  if (!parent->hasFather())
286  throw NodePException("DRHomogeneousTreeLikelihood::testNNI(). Node 'parent' must not be the root node.", parent);
287  Node* grandFather = parent->getFather();
288  // From here: Bifurcation assumed.
289  // In case of multifurcation, an arbitrary uncle is chosen.
290  // If we are at root node with a trifurcation, this does not matter, since 2 NNI are possible (see doc of the NNISearchable interface).
291  size_t parentPosition = grandFather->getSonPosition(parent);
292  Node* uncle = grandFather->getSon(parentPosition > 1 ? 0 : 1 - parentPosition);
293  // Swap nodes:
294  parent->removeSon(son);
295  grandFather->removeSon(uncle);
296  parent->addSon(uncle);
297  grandFather->addSon(son);
298  size_t pos = 0;
299  while (pos < nodes_.size() && nodes_[pos]->getId() != parent->getId())
300  pos++;
301  if (pos == nodes_.size())
302  throw Exception("NNIHomogeneousTreeLikelihood::doNNI. Invalid node id.");
303 
304  string name = "BrLen" + TextTools::toString(pos);
305  if (brLenNNIValues_.find(nodeId) != brLenNNIValues_.end())
306  {
307  double length = brLenNNIValues_[nodeId];
308  brLenParameters_.setParameterValue(name, length);
309  getParameter_(name).setValue(length);
310  parent->setDistanceToFather(length);
311  }
312  else
313  cerr << "ERROR, branch not found: " << nodeId << endl;
314  try
315  {
317  }
318  catch (ParameterException& ex)
319  {
320  StdErr errout;
321  cerr << "DEBUG:" << endl;
323  cerr << "DEBUG:" << name << endl;
324  }
325  // In case of copy of this object, we must remove the constraint associated to this stored parameter:
326  // (It should be also possible to update the pointer in the copy constructor,
327  // but we do not need the constraint info here...).
328  brLenNNIParams_[brLenNNIParams_.size() - 1].removeConstraint();
329 }
330 
331 /*******************************************************************************/
static void resetLikelihoodArray(VVVdouble &likelihoodArray)
Set all conditional likelihoods to 1.
std::vector< Node * > nodes_
Pointer toward all nodes in the tree.
std::shared_ptr< TransitionModelInterface > model_
const Parameter & parameter(const std::string &name) const override
Parameter & getParameter_(const std::string &name)
std::shared_ptr< TreeTemplate< Node > > tree_
static std::string CONSTRAINTS_AUTO
void initModel(std::shared_ptr< const TransitionModelInterface > model, std::shared_ptr< const DiscreteDistributionInterface > rDist)
DRASDRTreeLikelihoodNodeData & getNodeData(int nodeId)
Likelihood data structure for a node.
VVVdouble & getLikelihoodArrayForNeighbor(int neighborId)
This class implements the likelihood computation for a tree using the double-recursive algorithm.
static void computeLikelihoodFromArrays(const std::vector< const VVVdouble * > &iLik, const std::vector< const VVVdouble * > &tProb, VVVdouble &oLik, size_t nbNodes, size_t nbDistinctSites, size_t nbClasses, size_t nbStates, bool reset=true)
Compute conditional likelihoods.
double getValue() const override
Function and NNISearchable interface.
DRHomogeneousTreeLikelihood & operator=(const DRHomogeneousTreeLikelihood &lik)
DRASDRTreeLikelihoodData & likelihoodData() override
This class adds support for NNI topology estimation to the DRHomogeneousTreeLikelihood class.
NNIHomogeneousTreeLikelihood(const Tree &tree, std::shared_ptr< TransitionModelInterface > model, std::shared_ptr< DiscreteDistributionInterface > rDist, bool checkRooted=true, bool verbose=true)
Build a new NNIHomogeneousTreeLikelihood object.
NNIHomogeneousTreeLikelihood & operator=(const NNIHomogeneousTreeLikelihood &lik)
std::shared_ptr< BranchLikelihood > brLikFunction_
std::map< int, double > brLenNNIValues_
Hash used for backing up branch lengths when testing NNIs.
std::unique_ptr< BrentOneDimension > brentOptimizer_
Optimizer used for testing NNI.
void doNNI(int nodeId) override
Perform a NNI movement.
double testNNI(int nodeId) const override
Send the score of a NNI movement, without performing it.
General exception thrown when something is wrong with a particular node.
The phylogenetic node class.
Definition: Node.h:59
virtual void setDistanceToFather(double distance)
Set or update the distance toward the father node.
Definition: Node.h:266
virtual Node * removeSon(size_t pos)
Definition: Node.h:411
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual void addSon(size_t pos, Node *node)
Definition: Node.h:374
virtual const Node * getSon(size_t pos) const
Definition: Node.h:362
virtual const Node * getFather() const
Get the father of this node is there is one.
Definition: Node.h:306
virtual size_t getSonPosition(const Node *son) const
Definition: Node.cpp:151
virtual bool hasFather() const
Tell if this node has a father node.
Definition: Node.h:346
size_t size() const
virtual void setParameterValue(const std::string &name, double value)
virtual void addParameter(const Parameter &param)
virtual const Parameter & parameter(const std::string &name) const
virtual void printParameters(OutputStream &out) const
virtual void setValue(double value)
virtual double getValue() const
virtual void setName(const std::string &name)
static std::vector< const Node * > getRemainingNeighbors(const Node *node1, const Node *node2, const Node *node3)
Get a subset of node neighbors.
Interface for phylogenetic tree objects.
Definition: Tree.h:115
std::string toString(T t)
Defines the basic types of data flow nodes.
double log(const ExtendedFloat &ef)
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble