bpp-phyl3  3.0.0
DRHomogeneousMixedTreeLikelihood.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 
7 // From the STL:
8 #include <iostream>
9 
10 #include <cmath>
11 #include "../../PatternTools.h"
12 
15 
16 using namespace bpp;
17 using namespace std;
18 
20  const Tree& tree,
21  std::shared_ptr<TransitionModelInterface> model,
22  std::shared_ptr<DiscreteDistributionInterface> rDist,
23  bool checkRooted,
24  bool verbose,
25  bool rootArray) :
26  DRHomogeneousTreeLikelihood(tree, model, rDist, checkRooted, verbose),
27  treeLikelihoodsContainer_(),
28  probas_(),
29  rootArray_(rootArray)
30 {
31  shared_ptr<MixedTransitionModelInterface> mixedmodel;
32 
33  if ((mixedmodel = dynamic_pointer_cast<MixedTransitionModelInterface>(model_)) == nullptr)
34  throw Exception("Bad model: DRHomogeneousMixedTreeLikelihood needs a MixedTransitionModel.");
35 
36  size_t s = mixedmodel->getNumberOfModels();
37  for (size_t i = 0; i < s; ++i)
38  {
39  treeLikelihoodsContainer_.push_back(
40  new DRHomogeneousTreeLikelihood(tree, unique_ptr<TransitionModelInterface>(mixedmodel->nModel(i).clone()), rDist, checkRooted, false));
41  probas_.push_back(mixedmodel->getNProbability(i));
42  }
43 }
44 
46  const Tree& tree,
47  const AlignmentDataInterface& data,
48  std::shared_ptr<TransitionModelInterface> model,
49  std::shared_ptr<DiscreteDistributionInterface> rDist,
50  bool checkRooted,
51  bool verbose,
52  bool rootArray) :
53  DRHomogeneousTreeLikelihood(tree, model, rDist, checkRooted, verbose),
54  treeLikelihoodsContainer_(),
55  probas_(),
56  rootArray_(rootArray)
57 {
58  shared_ptr<MixedTransitionModelInterface> mixedmodel;
59 
60  if ((mixedmodel = dynamic_pointer_cast<MixedTransitionModelInterface>(model_)) == nullptr)
61  throw Exception("Bad model: DRHomogeneousMixedTreeLikelihood needs a MixedTransitionModel.");
62 
63  size_t s = mixedmodel->getNumberOfModels();
64 
65  for (size_t i = 0; i < s; i++)
66  {
67  treeLikelihoodsContainer_.push_back(
68  new DRHomogeneousTreeLikelihood(tree, unique_ptr<TransitionModelInterface>(mixedmodel->nModel(i).clone()), rDist, checkRooted, false));
69  probas_.push_back(mixedmodel->getNProbability(i));
70  }
71  setData(data);
72 }
73 
74 
76 {
78 
80  probas_.clear();
81 
82  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
83  {
84  treeLikelihoodsContainer_.push_back(lik.treeLikelihoodsContainer_[i]->clone());
85  probas_.push_back(lik.probas_[i]);
86  }
87 
88  rootArray_ = lik.rootArray_;
89 
90  return *this;
91 }
92 
95  treeLikelihoodsContainer_(lik.treeLikelihoodsContainer_.size()),
96  probas_(lik.probas_.size()),
97  rootArray_(lik.rootArray_)
98 {
99  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
100  {
101  treeLikelihoodsContainer_.push_back(lik.treeLikelihoodsContainer_[i]->clone());
102  probas_.push_back(lik.probas_[i]);
103  }
104 }
105 
107 {
108  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
109  {
110  delete treeLikelihoodsContainer_[i];
111  }
112 }
113 
114 
116 {
117  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
118  {
119  treeLikelihoodsContainer_[i]->initialize();
120  }
122  if (rootArray_)
124 }
125 
127 {
129 
130  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); ++i)
131  {
132  treeLikelihoodsContainer_[i]->setData(sites);
133  }
134 }
135 
136 
138 {
139  applyParameters();
140 
141  auto mixedmodel = dynamic_pointer_cast<MixedTransitionModelInterface>(model_);
142 
143  size_t s = mixedmodel->getNumberOfModels();
144 
145  for (size_t i = 0; i < s; ++i)
146  {
147  ParameterList pl;
148  const TransitionModelInterface& pm = mixedmodel->nModel(i);
149  pl.addParameters(pm.getParameters());
151  treeLikelihoodsContainer_[i]->matchParametersValues(pl);
152  }
153  probas_ = mixedmodel->getProbabilities();
154 
156 }
157 
159 {
160  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
161  {
162  treeLikelihoodsContainer_[i]->resetLikelihoodArrays(node);
163  }
164 }
165 
167 {
168  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
169  {
170  treeLikelihoodsContainer_[i]->computeTreeLikelihood();
171  }
172  if (rootArray_)
174 }
175 
176 /******************************************************************************
177 * Likelihoods *
178 ******************************************************************************/
180 {
181  double l = 1.;
182  vector<Vdouble*> llik;
183  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
184  {
185  llik.push_back(&treeLikelihoodsContainer_[i]->likelihoodData_->getRootRateSiteLikelihoodArray());
186  }
187 
188  double x;
189  const vector<unsigned int>* w = &likelihoodData_->getWeights();
190  for (unsigned int i = 0; i < nbDistinctSites_; i++)
191  {
192  x = 0;
193  for (unsigned int j = 0; j < treeLikelihoodsContainer_.size(); j++)
194  {
195  x += (*llik[j])[i] * probas_[j];
196  }
197  l *= std::pow(x, (int)(*w)[i]);
198  }
199  return l;
200 }
201 
203 {
204  double ll = 0;
205 
206  vector<Vdouble*> llik;
207  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
208  {
209  llik.push_back(&treeLikelihoodsContainer_[i]->likelihoodData_->getRootRateSiteLikelihoodArray());
210  }
211 
212  double x;
213  const vector<unsigned int>* w = &likelihoodData_->getWeights();
214  vector<double> la(nbDistinctSites_);
215  for (unsigned int i = 0; i < nbDistinctSites_; i++)
216  {
217  x = 0;
218  for (unsigned int j = 0; j < treeLikelihoodsContainer_.size(); j++)
219  {
220  x += (*llik[j])[i] * probas_[j];
221  }
222  la[i] = (*w)[i] * log(x);
223  }
224  sort(la.begin(), la.end());
225  for (size_t i = nbDistinctSites_; i > 0; i--)
226  {
227  ll += la[i - 1];
228  }
229 
230  return ll;
231 }
232 
233 
235 {
236  double res = 0;
237  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
238  {
239  res += treeLikelihoodsContainer_[i]->getLikelihoodForASite(site) * probas_[i];
240  }
241 
242  return res;
243 }
244 
246 {
247  double x = getLikelihoodForASite(site);
248  if (x < 0)
249  x = 0;
250  return log(x);
251 }
252 
254 {
255  double res = 0;
256  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
257  {
258  res += treeLikelihoodsContainer_[i]->getLikelihoodForASiteForARateClass(site, rateClass) * probas_[i];
259  }
260 
261  return res;
262 }
263 
265 {
266  double x = getLikelihoodForASiteForARateClass(site, rateClass);
267  if (x < 0)
268  x = 0;
269  return log(x);
270 }
271 
272 double DRHomogeneousMixedTreeLikelihood::getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
273 {
274  double res = 0;
275 
276  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
277  {
278  res += treeLikelihoodsContainer_[i]->getLikelihoodForASiteForARateClassForAState(site, rateClass, state) * probas_[i];
279  }
280 
281  return res;
282 }
283 
284 double DRHomogeneousMixedTreeLikelihood::getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
285 {
286  double x = getLikelihoodForASiteForARateClassForAState(site, rateClass, state);
287  if (x < 0)
288  x = 0;
289  return log(x);
290 }
291 
292 
294 {
295  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
296  {
297  treeLikelihoodsContainer_[i]->computeSubtreeLikelihoodPostfix(node);
298  }
299 }
300 
302 {
303  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
304  {
305  treeLikelihoodsContainer_[i]->computeSubtreeLikelihoodPostfix(node);
306  }
307 }
308 
310 {
311  for (size_t i = 0; i < treeLikelihoodsContainer_.size(); i++)
312  {
313  treeLikelihoodsContainer_[i]->computeRootLikelihood();
314  }
315 }
316 
317 void DRHomogeneousMixedTreeLikelihood::computeLikelihoodAtNode_(const Node* node, VVVdouble& likelihoodArray, const Node* sonNode) const
318 {
319  likelihoodArray.resize(nbDistinctSites_);
320  for (size_t i = 0; i < nbDistinctSites_; i++)
321  {
322  VVdouble* likelihoodArray_i = &likelihoodArray[i];
323  likelihoodArray_i->resize(nbClasses_);
324  for (size_t c = 0; c < nbClasses_; c++)
325  {
326  Vdouble* likelihoodArray_i_c = &(*likelihoodArray_i)[c];
327  likelihoodArray_i_c->resize(nbStates_);
328  for (size_t x = 0; x < nbStates_; x++)
329  {
330  (*likelihoodArray_i_c)[x] = 0;
331  }
332  }
333  }
334 
335  VVVdouble lArray;
336  for (size_t nm = 0; nm < treeLikelihoodsContainer_.size(); nm++)
337  {
338  treeLikelihoodsContainer_[nm]->computeLikelihoodAtNode_(node, lArray, sonNode);
339 
340  for (size_t i = 0; i < nbDistinctSites_; i++)
341  {
342  VVdouble* likelihoodArray_i = &likelihoodArray[i];
343  VVdouble* lArray_i = &lArray[i];
344 
345  for (size_t c = 0; c < nbClasses_; c++)
346  {
347  Vdouble* likelihoodArray_i_c = &(*likelihoodArray_i)[c];
348  Vdouble* lArray_i_c = &(*lArray_i)[c];
349  for (size_t x = 0; x < nbStates_; x++)
350  {
351  (*likelihoodArray_i_c)[x] += (*lArray_i_c)[x] * probas_[nm];
352  }
353  }
354  }
355  }
356 }
357 
358 /******************************************************************************
359 * First Order Derivatives *
360 ******************************************************************************/
362 {
363  for (size_t i = 0; i < treeLikelihoodsContainer_.size(); i++)
364  {
365  treeLikelihoodsContainer_[i]->computeTreeDLikelihoods();
366  }
367 }
368 
369 double DRHomogeneousMixedTreeLikelihood::getFirstOrderDerivative(const std::string& variable) const
370 {
371  if (!hasParameter(variable))
372  throw ParameterNotFoundException("DRHomogeneousTreeLikelihood::getFirstOrderDerivative().", variable);
374  {
375  throw Exception("Derivatives respective to rate distribution parameters are not implemented.");
376  }
378  {
379  throw Exception("Derivatives respective to substitution model parameters are not implemented.");
380  }
381 
382  //
383  // Computation for branch lengths:
384  //
385 
386  // Get the node with the branch whose length must be derivated:
387  unsigned int brI = TextTools::to<unsigned int>(variable.substr(5));
388  const Node* branch = nodes_[brI];
389  vector< Vdouble*> _vdLikelihoods_branch;
390  for (size_t i = 0; i < treeLikelihoodsContainer_.size(); i++)
391  {
392  _vdLikelihoods_branch.push_back(&treeLikelihoodsContainer_[i]->likelihoodData_->getDLikelihoodArray(branch->getId()));
393  }
394 
395  double d = 0;
396  double x;
397  const vector<unsigned int>* w = &likelihoodData_->getWeights();
398  for (unsigned int i = 0; i < nbDistinctSites_; i++)
399  {
400  x = 0;
401  for (unsigned int j = 0; j < treeLikelihoodsContainer_.size(); j++)
402  {
403  x += (*_vdLikelihoods_branch[j])[i] * probas_[j];
404  }
405  d += (*w)[i] * x;
406  }
407 
408  return -d;
409 }
410 
411 
412 /******************************************************************************
413 * Second Order Derivatives *
414 ******************************************************************************/
416 {
417  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
418  {
419  treeLikelihoodsContainer_[i]->computeTreeD2LikelihoodAtNode(node);
420  }
421 }
422 
424 {
425  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
426  {
427  treeLikelihoodsContainer_[i]->computeTreeD2Likelihoods();
428  }
429 }
430 
431 double DRHomogeneousMixedTreeLikelihood::getSecondOrderDerivative(const std::string& variable) const
432 {
433  if (!hasParameter(variable))
434  throw ParameterNotFoundException("DRHomogeneousTreeLikelihood::getFirstOrderDerivative().", variable);
436  {
437  throw Exception("Derivatives respective to rate distribution parameters are not implemented.");
438  }
440  {
441  throw Exception("Derivatives respective to substitution model parameters are not implemented.");
442  }
443 
444  //
445  // Computation for branch lengths:
446  //
447 
448  // Get the node with the branch whose length must be derivated:
449  unsigned int brI = TextTools::to<unsigned int>(variable.substr(5));
450  const Node* branch = nodes_[brI];
451  vector< Vdouble*> _vdLikelihoods_branch, _vd2Likelihoods_branch;
452  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
453  {
454  _vdLikelihoods_branch.push_back(&treeLikelihoodsContainer_[i]->likelihoodData_->getDLikelihoodArray(branch->getId()));
455  _vd2Likelihoods_branch.push_back(&treeLikelihoodsContainer_[i]->likelihoodData_->getD2LikelihoodArray(branch->getId()));
456  }
457 
458  double d = 0;
459  double x, x2;
460  const vector<unsigned int>* w = &likelihoodData_->getWeights();
461  for (unsigned int i = 0; i < nbDistinctSites_; i++)
462  {
463  x = 0;
464  x2 = 0;
465  for (unsigned int j = 0; j < treeLikelihoodsContainer_.size(); j++)
466  {
467  x += (*_vdLikelihoods_branch[j])[i] * probas_[j];
468  }
469  for (unsigned int j = 0; j < treeLikelihoodsContainer_.size(); j++)
470  {
471  x2 += (*_vd2Likelihoods_branch[j])[i] * probas_[j];
472  }
473 
474  d += (*w)[i] * (x2 - pow(x, 2));
475  }
476 
477  return -d;
478 }
479 
480 
482 {
483  for (unsigned int i = 0; i < treeLikelihoodsContainer_.size(); i++)
484  {
485  treeLikelihoodsContainer_[i]->displayLikelihood(node);
486  }
487 }
std::vector< Node * > nodes_
Pointer toward all nodes in the tree.
virtual void applyParameters()
All parameters are stored in a parameter list. This function apply these parameters to the substituti...
std::shared_ptr< TransitionModelInterface > model_
ParameterList getRateDistributionParameters() const
Get the parameters associated to the rate distribution.
ParameterList getSubstitutionModelParameters() const
Get the parameters associated to substitution model(s).
bool hasParameter(const std::string &name) const override
const ParameterList & getParameters() const override
const AlignmentDataInterface & data() const
Get the dataset for which the likelihood must be evaluated.
const Tree & tree() const
Get the tree (topology and branch lengths).
A class to compute the average of several DRHomogeneousTreeLikelihood defined from a Mixed Substituti...
virtual void displayLikelihood(const Node *node)
This method is mainly for debugging purpose.
double getSecondOrderDerivative(const std::string &variable) const
double getLogLikelihoodForASite(size_t site) const
Get the logarithm of the likelihood for a site.
double getLogLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the logarithm of the likelihood for a site knowing its rate class.
double getFirstOrderDerivative(const std::string &variable) const
double getLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the likelihood for a site knowing its rate class and its ancestral state.
std::vector< DRHomogeneousTreeLikelihood * > treeLikelihoodsContainer_
double getLikelihood() const
Get the likelihood for the whole dataset.
double getLogLikelihood() const
Get the logarithm of the likelihood for the whole dataset.
virtual void computeLikelihoodAtNode_(const Node *node, VVVdouble &likelihoodArray, const Node *sonNode=0) const
double getLogLikelihoodForASiteForARateClassForAState(size_t site, size_t rateClass, int state) const
Get the logarithm of the likelihood for a site knowing its rate class and its ancestral state.
void fireParameterChanged(const ParameterList &params)
DRHomogeneousMixedTreeLikelihood & operator=(const DRHomogeneousMixedTreeLikelihood &lik)
void setData(const AlignmentDataInterface &sites)
Set the dataset for which the likelihood must be evaluated.
virtual void computeSubtreeLikelihoodPrefix(const Node *node)
double getLikelihoodForASiteForARateClass(size_t site, size_t rateClass) const
Get the likelihood for a site knowing its rate class.
virtual void computeSubtreeLikelihoodPostfix(const Node *node)
Compute the likelihood for a subtree defined by the Tree::Node node.
double getLikelihoodForASite(size_t site) const
Get the likelihood for a site.
This class implements the likelihood computation for a tree using the double-recursive algorithm.
DRHomogeneousTreeLikelihood(const Tree &tree, std::shared_ptr< TransitionModelInterface > model, std::shared_ptr< DiscreteDistributionInterface > rDist, bool checkRooted=true, bool verbose=true)
Build a new DRHomogeneousTreeLikelihood object without data.
void setData(const AlignmentDataInterface &sites) override
Set the dataset for which the likelihood must be evaluated.
std::unique_ptr< DRASDRTreeLikelihoodData > likelihoodData_
DRHomogeneousTreeLikelihood & operator=(const DRHomogeneousTreeLikelihood &lik)
The phylogenetic node class.
Definition: Node.h:59
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual void addParameters(const ParameterList &params)
virtual void includeParameters(const ParameterList &params)
virtual const ParameterList & getParameters() const=0
Interface for all transition models.
Interface for phylogenetic tree objects.
Definition: Tree.h:115
Defines the basic types of data flow nodes.
double log(const ExtendedFloat &ef)
std::vector< double > Vdouble
ExtendedFloat pow(const ExtendedFloat &ef, double exp)
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble