bpp-phyl3  3.0.0
AbstractNonHomogeneousTreeLikelihood.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../../PatternTools.h"
7 
8 // From SeqLib:
9 #include <Bpp/Seq/SiteTools.h>
11 
12 #include <Bpp/Text/TextTools.h>
14 
15 using namespace bpp;
16 
17 // From the STL:
18 #include <iostream>
19 
20 using namespace std;
21 
22 /******************************************************************************/
23 
25  const Tree& tree,
26  std::shared_ptr<SubstitutionModelSet> modelSet,
27  std::shared_ptr<DiscreteDistributionInterface> rDist,
28  bool verbose,
29  bool reparametrizeRoot) :
31  modelSet_(0),
32  brLenParameters_(),
33  pxy_(),
34  dpxy_(),
35  d2pxy_(),
36  rootFreqs_(),
37  nodes_(),
38  idToNode_(),
39  nbSites_(),
40  nbDistinctSites_(),
41  nbClasses_(),
42  nbStates_(),
43  nbNodes_(),
44  verbose_(),
45  minimumBrLen_(),
46  maximumBrLen_(),
47  brLenConstraint_(),
48  reparametrizeRoot_(reparametrizeRoot),
49  root1_(),
50  root2_()
51 {
52  init_(tree, modelSet, rDist, verbose);
53 }
54 
55 /******************************************************************************/
56 
60  modelSet_(lik.modelSet_),
61  brLenParameters_(lik.brLenParameters_),
62  pxy_(lik.pxy_),
63  dpxy_(lik.dpxy_),
64  d2pxy_(lik.d2pxy_),
65  rootFreqs_(lik.rootFreqs_),
66  nodes_(),
67  idToNode_(),
68  nbSites_(lik.nbSites_),
69  nbDistinctSites_(lik.nbDistinctSites_),
70  nbClasses_(lik.nbClasses_),
71  nbStates_(lik.nbStates_),
72  nbNodes_(lik.nbNodes_),
73  verbose_(lik.verbose_),
74  minimumBrLen_(lik.minimumBrLen_),
75  maximumBrLen_(lik.maximumBrLen_),
76  brLenConstraint_(lik.brLenConstraint_->clone()),
77  reparametrizeRoot_(lik.reparametrizeRoot_),
78  root1_(lik.root1_),
79  root2_(lik.root2_)
80 {
81  nodes_ = tree_->getNodes();
82  nodes_.pop_back(); // Remove the root node (the last added!).
83  // Rebuild nodes index:
84  for (unsigned int i = 0; i < nodes_.size(); i++)
85  {
86  const Node* node = nodes_[i];
87  idToNode_[node->getId()] = node;
88  }
89 }
90 
91 /******************************************************************************/
92 
95 {
97  modelSet_ = lik.modelSet_;
99  pxy_ = lik.pxy_;
100  dpxy_ = lik.dpxy_;
101  d2pxy_ = lik.d2pxy_;
102  rootFreqs_ = lik.rootFreqs_;
103  nodes_ = tree_->getNodes();
104  nodes_.pop_back(); // Remove the root node (the last added!).
105  nbSites_ = lik.nbSites_;
107  nbClasses_ = lik.nbClasses_;
108  nbStates_ = lik.nbStates_;
109  nbNodes_ = lik.nbNodes_;
110  verbose_ = lik.verbose_;
113  brLenConstraint_ = std::shared_ptr<ConstraintInterface>(lik.brLenConstraint_->clone());
115  root1_ = lik.root1_;
116  root2_ = lik.root2_;
117  // Rebuild nodes index:
118  for (unsigned int i = 0; i < nodes_.size(); i++)
119  {
120  const Node* node = nodes_[i];
121  idToNode_[node->getId()] = node;
122  }
123  return *this;
124 }
125 
126 /******************************************************************************/
127 
129  const Tree& tree,
130  std::shared_ptr<SubstitutionModelSet> modelSet,
131  std::shared_ptr<DiscreteDistributionInterface> rDist,
132  bool verbose)
133 {
134  TreeTools::checkIds(tree, true);
135  tree_ = make_unique< TreeTemplate<Node>>(tree);
136  root1_ = tree_->getRootNode()->getSon(0)->getId();
137  root2_ = tree_->getRootNode()->getSon(1)->getId();
138  nodes_ = tree_->getNodes();
139  nodes_.pop_back(); // Remove the root node (the last added!).
140  nbNodes_ = nodes_.size();
141  // Build nodes index:
142  for (size_t i = 0; i < nodes_.size(); ++i)
143  {
144  const Node* node = nodes_[i];
145  idToNode_[node->getId()] = node;
146  }
147  nbClasses_ = rateDistribution_->getNumberOfCategories();
148 
149  verbose_ = verbose;
150 
151  minimumBrLen_ = 0.000001;
152  maximumBrLen_ = 10000;
153  brLenConstraint_ = std::make_shared<IntervalConstraint>(minimumBrLen_, maximumBrLen_, true, true);
154  setSubstitutionModelSet(modelSet);
155 }
156 
157 /******************************************************************************/
158 
160  std::shared_ptr<SubstitutionModelSet> modelSet
161  )
162 {
163  // Check:
164  if (data_)
165  {
166  if (modelSet->getAlphabet()->getAlphabetType() != data_->getAlphabet()->getAlphabetType())
167  throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::setSubstitutionModelSet(). Model alphabet do not match existing data.");
168  }
169 
170  modelSet_ = modelSet;
171 
172  if (data_)
173  {
174  if (modelSet->getNumberOfStates() != modelSet_->getNumberOfStates())
175  setData(*data_); // Have to reinitialize the whole data structure.
176  }
177 
178  nbStates_ = modelSet->getNumberOfStates();
179 
180  // Allocate transition probabilities arrays:
181  for (unsigned int l = 0; l < nbNodes_; l++)
182  {
183  // For each son node,i
184  Node* son = nodes_[l];
185 
186  VVVdouble* pxy__son = &pxy_[son->getId()];
187  pxy__son->resize(nbClasses_);
188  for (unsigned int c = 0; c < nbClasses_; c++)
189  {
190  VVdouble* pxy__son_c = &(*pxy__son)[c];
191  pxy__son_c->resize(nbStates_);
192  for (unsigned int x = 0; x < nbStates_; x++)
193  {
194  (*pxy__son_c)[x].resize(nbStates_);
195  }
196  }
197 
198  VVVdouble* dpxy__son = &dpxy_[son->getId()];
199  dpxy__son->resize(nbClasses_);
200  for (unsigned int c = 0; c < nbClasses_; c++)
201  {
202  VVdouble* dpxy__son_c = &(*dpxy__son)[c];
203  dpxy__son_c->resize(nbStates_);
204  for (unsigned int x = 0; x < nbStates_; x++)
205  {
206  (*dpxy__son_c)[x].resize(nbStates_);
207  }
208  }
209 
210  VVVdouble* d2pxy__son = &d2pxy_[son->getId()];
211  d2pxy__son->resize(nbClasses_);
212  for (unsigned int c = 0; c < nbClasses_; c++)
213  {
214  VVdouble* d2pxy__son_c = &(*d2pxy__son)[c];
215  d2pxy__son_c->resize(nbStates_);
216  for (unsigned int x = 0; x < nbStates_; x++)
217  {
218  (*d2pxy__son_c)[x].resize(nbStates_);
219  }
220  }
221  }
222 
223  // We have to reset parameters. If the instance is not initialized, this will be done by the initialize method.
224  if (initialized_)
225  {
226  initParameters();
229  }
230 }
231 
232 /******************************************************************************/
233 
235 {
236  if (initialized_)
237  throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::initialize(). Object is already initialized.");
238  if (!data_)
239  throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::initialize(). Data are no set.");
240  initParameters();
241  initialized_ = true;
242 
245 }
246 
247 /******************************************************************************/
248 
250 {
251  if (!initialized_)
252  throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::getBranchLengthsParameters(). Object is not initialized.");
254 }
255 
256 /******************************************************************************/
257 
259 {
260  if (!initialized_)
261  throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::getSubstitutionModelParameters(). Object is not initialized.");
262  return modelSet_->getParameters().getCommonParametersWith(getParameters());
263 }
264 
265 /******************************************************************************/
266 
268 {
269  // Reset parameters:
271 
272  // Branch lengths:
275 
276  // Substitution model:
277  addParameters_(modelSet_->getIndependentParameters());
278 
279  // Rate distribution:
280  addParameters_(rateDistribution_->getIndependentParameters());
281 }
282 
283 /******************************************************************************/
284 
286 {
287  if (!initialized_)
288  throw Exception("AbstractBranchNonHomogeneousTreeLikelihood::applyParameters(). Object not initialized.");
289  // Apply branch lengths:
290  for (unsigned int i = 0; i < nbNodes_; i++)
291  {
292  int id = nodes_[i]->getId();
293  if (reparametrizeRoot_ && id == root1_)
294  {
295  const Parameter* rootBrLen = &parameter("BrLenRoot");
296  const Parameter* rootPos = &parameter("RootPosition");
297  nodes_[i]->setDistanceToFather(rootBrLen->getValue() * rootPos->getValue());
298  }
299  else if (reparametrizeRoot_ && id == root2_)
300  {
301  const Parameter* rootBrLen = &parameter("BrLenRoot");
302  const Parameter* rootPos = &parameter("RootPosition");
303  nodes_[i]->setDistanceToFather(rootBrLen->getValue() * (1. - rootPos->getValue()));
304  }
305  else
306  {
307  const Parameter* brLen = &parameter(string("BrLen") + TextTools::toString(i));
308  if (brLen)
309  nodes_[i]->setDistanceToFather(brLen->getValue());
310  }
311  }
312  // Apply substitution model parameters:
313 
314  modelSet_->matchParametersValues(getParameters());
315  // Apply rate distribution parameters:
316  rateDistribution_->matchParametersValues(getParameters());
317 }
318 
319 /******************************************************************************/
320 
322 {
324  double l1 = 0, l2 = 0;
325  for (unsigned int i = 0; i < nbNodes_; i++)
326  {
327  double d = minimumBrLen_;
328  if (!nodes_[i]->hasDistanceToFather())
329  {
330  if (verbose)
331  ApplicationTools::displayWarning("Missing branch length " + TextTools::toString(i) + ". Value is set to " + TextTools::toString(minimumBrLen_));
332  nodes_[i]->setDistanceToFather(minimumBrLen_);
333  }
334  else
335  {
336  d = nodes_[i]->getDistanceToFather();
337  if (d < minimumBrLen_)
338  {
339  if (verbose)
340  ApplicationTools::displayWarning("Branch length " + TextTools::toString(i) + " is too small: " + TextTools::toString(d) + ". Value is set to " + TextTools::toString(minimumBrLen_));
341  nodes_[i]->setDistanceToFather(minimumBrLen_);
342  d = minimumBrLen_;
343  }
344  if (d > maximumBrLen_)
345  {
346  if (verbose)
347  ApplicationTools::displayWarning("Branch length " + TextTools::toString(i) + " is too big: " + TextTools::toString(d) + ". Value is set to " + TextTools::toString(maximumBrLen_));
348  nodes_[i]->setDistanceToFather(maximumBrLen_);
349  d = maximumBrLen_;
350  }
351  }
352  if (reparametrizeRoot_ && nodes_[i]->getId() == root1_)
353  l1 = d;
354  else if (reparametrizeRoot_ && nodes_[i]->getId() == root2_)
355  l2 = d;
356  else
357  {
359  }
360  }
361  if (reparametrizeRoot_)
362  {
364  brLenParameters_.addParameter(Parameter("RootPosition", l1 / (l1 + l2), Parameter::PROP_CONSTRAINT_EX));
365  }
366 }
367 
368 /*******************************************************************************/
369 
371 {
372  for (unsigned int l = 0; l < nbNodes_; l++)
373  {
374  // For each node,
375  Node* node = nodes_[l];
377  }
378  rootFreqs_ = modelSet_->getRootFrequencies();
379 }
380 
381 /*******************************************************************************/
382 
384 {
385  auto model = modelSet_->getModelForNode(node->getId());
386  double l = node->getDistanceToFather();
387 
388  // Computes all pxy and pyx once for all:
389  VVVdouble* pxy__node = &pxy_[node->getId()];
390  for (unsigned int c = 0; c < nbClasses_; c++)
391  {
392  VVdouble* pxy__node_c = &(*pxy__node)[c];
393  RowMatrix<double> Q = model->getPij_t(l * rateDistribution_->getCategory(c));
394  for (unsigned int x = 0; x < nbStates_; x++)
395  {
396  Vdouble* pxy__node_c_x = &(*pxy__node_c)[x];
397  for (unsigned int y = 0; y < nbStates_; y++)
398  {
399  (*pxy__node_c_x)[y] = Q(x, y);
400  }
401  }
402  }
403 
405  {
406  // Computes all dpxy/dt once for all:
407  VVVdouble* dpxy__node = &dpxy_[node->getId()];
408 
409  for (unsigned int c = 0; c < nbClasses_; c++)
410  {
411  VVdouble* dpxy__node_c = &(*dpxy__node)[c];
412  double rc = rateDistribution_->getCategory(c);
413 
414  RowMatrix<double> dQ = model->getdPij_dt(l * rc);
415 
416  for (unsigned int x = 0; x < nbStates_; x++)
417  {
418  Vdouble* dpxy__node_c_x = &(*dpxy__node_c)[x];
419  for (unsigned int y = 0; y < nbStates_; y++)
420  {
421  (*dpxy__node_c_x)[y] = rc * dQ(x, y);
422  }
423  }
424  }
425  }
426 
428  {
429  // Computes all d2pxy/dt2 once for all:
430  VVVdouble* d2pxy__node = &d2pxy_[node->getId()];
431  for (unsigned int c = 0; c < nbClasses_; c++)
432  {
433  VVdouble* d2pxy__node_c = &(*d2pxy__node)[c];
434  double rc = rateDistribution_->getCategory(c);
435  RowMatrix<double> d2Q = model->getd2Pij_dt2(l * rc);
436  for (unsigned int x = 0; x < nbStates_; x++)
437  {
438  Vdouble* d2pxy__node_c_x = &(*d2pxy__node_c)[x];
439  for (unsigned int y = 0; y < nbStates_; y++)
440  {
441  (*d2pxy__node_c_x)[y] = rc * rc * d2Q(x, y);
442  }
443  }
444  }
445  }
446 }
447 
448 /*******************************************************************************/
Partial implementation of the DiscreteRatesAcrossSitesTreeLikelihood interface.
AbstractDiscreteRatesAcrossSitesTreeLikelihood & operator=(const AbstractDiscreteRatesAcrossSitesTreeLikelihood &tl)
Partial implementation for branch non-homogeneous models of the TreeLikelihood interface.
virtual void initParameters()
This builds the parameters list from all parametrizable objects, i.e. substitution model,...
ParameterList getBranchLengthsParameters() const override
Get the branch lengths parameters.
AbstractNonHomogeneousTreeLikelihood & operator=(const AbstractNonHomogeneousTreeLikelihood &lik)
Assignation operator.
void initialize() override
Init the likelihood object.
virtual void computeAllTransitionProbabilities()
Fill the pxy_, dpxy_ and d2pxy_ arrays for all nodes.
void setSubstitutionModelSet(std::shared_ptr< SubstitutionModelSet > modelSet) override
std::shared_ptr< ConstraintInterface > brLenConstraint_
void init_(const Tree &tree, std::shared_ptr< SubstitutionModelSet > modelSet, std::shared_ptr< DiscreteDistributionInterface > rDist, bool verbose)
Method called by constructor.
ParameterList getSubstitutionModelParameters() const override
Get the parameters associated to substitution model(s).
virtual void computeTransitionProbabilitiesForNode(const Node *node)
Fill the pxy_, dpxy_ and d2pxy_ arrays for one node.
std::map< int, const Node * > idToNode_
An index linking nodes to their id, for faster access than the getNode() method.
AbstractNonHomogeneousTreeLikelihood(const Tree &tree, std::shared_ptr< SubstitutionModelSet > modelSet, std::shared_ptr< DiscreteDistributionInterface > rDist, bool verbose=true, bool reparametrizeRoot=true)
std::vector< Node * > nodes_
Pointer toward all nodes in the tree.
virtual void applyParameters()
All parameters are stored in a parameter list. This function apply these parameters to the substituti...
const Parameter & parameter(const std::string &name) const override
virtual void fireParameterChanged(const ParameterList &parameters)
virtual void addParameters_(const ParameterList &parameters)
const ParameterList & getParameters() const override
const Tree & tree() const
Get the tree (topology and branch lengths).
std::unique_ptr< const AlignmentDataInterface > data_
std::shared_ptr< TreeTemplate< Node > > tree_
static void displayWarning(const std::string &text)
The phylogenetic node class.
Definition: Node.h:59
virtual int getId() const
Get the node's id.
Definition: Node.h:170
virtual double getDistanceToFather() const
Get the distance to the father node is there is one, otherwise throw a NodeException.
Definition: Node.h:250
virtual ParameterList getCommonParametersWith(const ParameterList &params) const
virtual void addParameter(const Parameter &param)
virtual void reset()
virtual double getValue() const
static const std::shared_ptr< IntervalConstraint > PROP_CONSTRAINT_EX
virtual void setData(const AlignmentDataInterface &sites)=0
Set the dataset for which the likelihood must be evaluated.
static bool checkIds(const Tree &tree, bool throwException)
Check if the ids are uniques.
Definition: TreeTools.cpp:747
Interface for phylogenetic tree objects.
Definition: Tree.h:115
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::vector< VVdouble > VVVdouble
std::vector< Vdouble > VVdouble