bpp-phyl3  3.0.0
LikelihoodCalculationSingleProcess.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_PHYL_LIKELIHOOD_DATAFLOW_LIKELIHOODCALCULATIONSINGLEPROCESS_H
6 #define BPP_PHYL_LIKELIHOOD_DATAFLOW_LIKELIHOODCALCULATIONSINGLEPROCESS_H
7 
10 
13 #include "Bpp/Phyl/SitePatterns.h"
20 
21 namespace bpp
22 {
47 class ProcessTree;
48 class ForwardLikelihoodTree;
49 class BackwardLikelihoodTree;
50 
51 // using RowLik = Eigen::Matrix<double, 1, Eigen::Dynamic>;
52 // using MatrixLik = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>;
53 
65 
68 
75 
84 
87 
90 
92 
94 
95 /*
96  * @brief DAG of the conditional likelihoods (product of above and
97  * below likelihoods), with same topology as forward & backward
98  * likelihood DAGs.
99  *
100  */
101 
103 
105 
107 
109 
110 using DAGindexes = std::vector<uint>;
111 
114 {
115 private:
117  {
118 public:
119  std::shared_ptr<ProcessTree> phyloTree;
120  std::shared_ptr<ForwardLikelihoodTree> flt;
121 
125  std::shared_ptr<BackwardLikelihoodTree> blt;
126 
130  std::shared_ptr<ConditionalLikelihoodDAG> clt;
131 
135  std::shared_ptr<SiteLikelihoodsDAG> lt;
136 
146  std::shared_ptr<SiteLikelihoodsTree> speciesLt;
147 
149  };
150 
156  {
157 public:
158  std::shared_ptr<ProcessTree> treeNode_;
159  std::shared_ptr<ConfiguredModel> modelNode_; // Used for
160  // StateMap and root frequencies if stationarity
161 
162  std::shared_ptr<ConfiguredFrequencySet> rootFreqsNode_;
163  std::shared_ptr<ConfiguredDistribution> ratesNode_;
164  };
165 
166  /************************************/
167  /* Dependencies */
168 
169  std::shared_ptr<const SubstitutionProcessInterface> process_;
170  std::shared_ptr<const AlignmentDataInterface> psites_;
171 
172  /*****************************
173  ****** Patterns
174  *
175  * @brief Links between sites and patterns.
176  *
177  * The size of this vector is equal to the number of sites in the container,
178  * each element corresponds to a site in the container and points to the
179  * corresponding column in the likelihood array of the root node.
180  * If the container contains no repeated site, there will be a strict
181  * equivalence between each site and the likelihood array of the root node.
182  * However, if this is not the case, some pointers may point toward the same
183  * element in the likelihood array.
184  */
185 
187 
192  std::shared_ptr<SiteWeights> rootWeights_;
193  std::shared_ptr<AlignmentDataInterface> shrunkData_;
194 
195  /************************************/
196  /* DataFlow objects */
197 
199 
201 
202  /* Likelihood Trees with for all rate categories */
203  std::vector<RateCategoryTrees> vRateCatTrees_;
204 
205  /*
206  * Node for the probabilities of the rate classes
207  *
208  */
209 
211 
212  /* Likelihood tree on mean likelihoods on rate categories */
213  std::shared_ptr<ConditionalLikelihoodTree> condLikelihoodTree_;
214  /**************************************/
215 
216 public:
218  std::shared_ptr<const AlignmentDataInterface> sites,
219  std::shared_ptr<const SubstitutionProcessInterface> process);
220 
222  std::shared_ptr<const SubstitutionProcessInterface> process);
223 
224  /*
225  * @brief Build using Nodes of CollectionNodes.
226  *
227  * @param collection The CollectionNodes
228  * @param nProcess the process Number in the collection
229  * @param nData the data Number in the collection
230  */
231  LikelihoodCalculationSingleProcess(std::shared_ptr<CollectionNodes> collection,
232  std::shared_ptr<const AlignmentDataInterface> sites,
233  size_t nProcess);
234 
235 
236  LikelihoodCalculationSingleProcess(std::shared_ptr<CollectionNodes> collection,
237  size_t nProcess);
238 
239 
240  /*
241  * @brief Copy the likelihood calculation IN THE SAME CONTEXT.
242  *
243  */
244 
246 
248  {
249  throw bpp::Exception("LikelihoodCalculationSingleProcess clone should not happen.");
250  }
251 
252  void setData(std::shared_ptr<const AlignmentDataInterface> sites)
253  {
254  if (psites_)
256 
257  psites_ = sites;
258  setPatterns_();
259  if (isInitialized())
260  {
261  vRateCatTrees_.clear();
263  }
264  }
265 
269  void setNumericalDerivateConfiguration(double delta, const NumericalDerivativeType& config);
270 
276  void setClockLike(double rate = 1);
277 
278  /**************************************************/
279 
280  /*
281  * @brief Return the loglikehood (see as a function, ie
282  * computation node).
283  *
284  */
286  {
287  if (!psites_)
288  throw Exception("LikelihoodCalculationSingleProcess::makeLikelihoods : data not set.");
289 
291  }
292 
293 
300  const DAGindexes& getNodesIds(uint speciesId) const;
301 
309  const DAGindexes& getEdgesIds(uint speciesId, size_t nCat) const;
310 
311  size_t getNumberOfSites() const
312  {
313  return psites_ ? psites_->getNumberOfSites() : 0;
314  }
315 
317  {
318  return shrunkData_ ? shrunkData_->getNumberOfSites() : getNumberOfSites();
319  }
320 
328  {
329  return *process_;
330  }
331 
332  std::shared_ptr<const SubstitutionProcessInterface> getSubstitutionProcess() const
333  {
334  return process_;
335  }
336 
338  {
339  return *psites_;
340  }
341 
342  std::shared_ptr<const AlignmentDataInterface> getData() const
343  {
344  return psites_;
345  }
346 
348  {
349  return processNodes_.modelNode_->targetValue()->stateMap();
350  }
351 
352  std::shared_ptr<const StateMapInterface> getStateMap() const
353  {
354  return processNodes_.modelNode_->targetValue()->getStateMap();
355  }
356 
357  /************************************************
358  *** Patterns
359  ****************************/
360 
361  /*
362  * @brief the relations between real position and shrunked data
363  * positions.
364  *
365  * @param currentPosition : position in real data
366  *
367  * @return matching position in shrunked data
368  *
369  */
370  size_t getRootArrayPosition(size_t currentPosition) const
371  {
372  return rootPatternLinks_ ? rootPatternLinks_->targetValue()(Eigen::Index(currentPosition)) : currentPosition;
373  }
374 
375  const PatternType& getRootArrayPositions() const { return rootPatternLinks_->targetValue(); }
376 
378  {
379  return rootPatternLinks_;
380  }
381 
383  {
384  return *shrunkData_;
385  }
386 
387  std::shared_ptr<const AlignmentDataInterface> getShrunkData() const
388  {
389  return shrunkData_;
390  }
391 
392  /*
393  * @brief Expands (ie reverse of shrunkage) a vector computed of
394  * shrunked data (ie from one value per distinct site to one
395  * value per site).
396  *
397  */
399  {
400  if (!rootPatternLinks_)
401  return vector;
402  else
404  }
405 
406  /*
407  * @brief Expands (ie reverse of shrunkage) a matrix computed of
408  * shrunked data (ie from one value per distinct site to one
409  * value per site). Columns are sites.
410  *
411  */
413  {
414  if (!rootPatternLinks_)
415  return matrix;
416  else
417  return CWisePattern<MatrixLik>::create(getContext_(), {matrix, rootPatternLinks_}, MatrixDimension (matrix->targetValue().rows(), Eigen::Index (getData()->getNumberOfSites())));
418  }
419 
420  /*
421  * @brief: Get the weight of a position in the shrunked data (ie
422  * the number of sites corresponding to this site)
423  *
424  */
425  unsigned int getWeight(size_t pos) const
426  {
427  return (uint)(rootWeights_->targetValue()(Eigen::Index(pos)));
428  }
429 
430  std::shared_ptr<SiteWeights> getRootWeights()
431  {
432  return rootWeights_;
433  }
434 
436  {
437  return rFreqs_;
438  }
439 
440  /********************************************************
441  * @Likelihoods
442  *
443  *****************************************************/
444 
445  /*
446  * @brief Get Matrix of Conditional Likelihoods at Node *
447  *
448  * @param nodeId Id of the node in PhyloTree, ie species id
449  * @param shrunk if matrix is on shrunked data (default: false)
450  *
451  */
452  ConditionalLikelihoodRef getLikelihoodsAtNode(uint nodeId, bool shrunk = false)
453  {
454  if (!(condLikelihoodTree_ && condLikelihoodTree_->hasNode(nodeId)))
455  makeLikelihoodsAtNode_(nodeId);
456 
457  auto vv = condLikelihoodTree_->getNode(nodeId);
458 
459  return shrunk ? vv : expandMatrix(vv);
460  }
461 
462  /*
463  * @brief Get the number of rate classes
464  *
465  */
466  size_t getNumberOfClasses() const
467  {
468  return vRateCatTrees_.size();
469  }
470 
471  /*
472  * @brief Get forward shrunked likelihood matrix at Node (ie just
473  * above the node), for a given rate class.
474  *
475  * @param nodeId Node Index in the forward tree (! ie in the
476  * computation tree, not the species tree).
477  *
478  * @param nCat Rate class category
479  *
480  */
481 
483 
484  /*
485  * @brief Get backward shrunked likelihood matrix at Node (ie at the
486  * top of the node), for a given rate class.
487  *
488  * @param nodeId Node Index in the backward tree (! ie in the
489  * computation tree, not the species tree).
490  *
491  * @param nCat Rate class category
492  *
493  */
494 
496 
497  /*
498  * @brief Get backward shrunked likelihood matrix at Edge (ie at
499  * the top of the edge), for a given rate class.
500  *
501  * These likelihoods are multiplied by the probability of the edge
502  *
503  * @param edgeId Edge Index in the backward tree (! ie in the
504  * computation tree, not the species tree).
505  *
506  * @param nCat Rate class category
507  *
508  */
509 
511 
512  /*
513  * @brief Get shrunked conditional likelihood matrix at Node (ie
514  * just above the node), for a given rate class.
515  *
516  * These likelihoods are multiplied by the probability of the node.
517  *
518  * @param nodeId Node Index in the forward tree (! ie in the
519  * computation tree, not the species tree).
520  *
521  * @param nCat Rate class category
522  *
523  */
524 
526 
539  SiteLikelihoodsRef getLikelihoodsAtNodeForClass(uint nodeId, size_t nCat);
540 
545  {
546  auto allIndex = process_->parametrizablePhyloTree().getAllNodesIndexes();
547 
548  for (auto id: allIndex)
549  {
551  }
552  }
553 
554  /*********************************/
555  /* @brief Methods for external usage (after lik computation) */
556 
563  RowLik getSiteLikelihoodsForAClass(size_t nCat, bool shrunk = false);
564 
572 
573 
579  std::shared_ptr<ProcessTree> getTreeNode(size_t nCat)
580  {
581  if (nCat >= vRateCatTrees_.size())
582  throw Exception("LikelihoodCalculationSingleProcess::getTreeNode : bad class number " + TextTools::toString(nCat));
583 
584  return vRateCatTrees_[nCat].phyloTree;
585  }
586 
587  std::shared_ptr<ForwardLikelihoodTree> getForwardLikelihoodTree(size_t nCat);
588 
589  std::shared_ptr<BackwardLikelihoodTree> getBackwardLikelihoodTree(size_t nCat);
590 
591 private:
592  void setPatterns_();
593 
595 
596  void makeProcessNodes_();
597 
598  void makeRootFreqs_();
599 
600  void makeLikelihoodsAtRoot_();
601 
606  void makeProcessNodes_(CollectionNodes& pl, size_t nProc);
607 
617  void makeLikelihoodsAtNode_(uint nodeId);
618 
627  void makeLikelihoodsAtDAGNode_(uint nodeId);
628 
629  std::shared_ptr<SiteLikelihoodsTree> getSiteLikelihoodsTree_(size_t nCat);
630 
631 public:
632  void cleanAllLikelihoods();
633 
635 };
636 } // namespace bpp
637 #endif // BPP_PHYL_LIKELIHOOD_DATAFLOW_LIKELIHOODCALCULATIONSINGLEPROCESS_H
static ValueRef< R > create(Context &c, NodeRefVec &&deps, const Dimension< R > &dim)
Build a new CWisePattern node.
Context for dataflow node construction.
Definition: DataFlow.h:527
DF Nodes used in the process. ProcessTree is used without any rate multiplier.
std::shared_ptr< ConditionalLikelihoodDAG > clt
for each node n: clt[n] = flt[n] * dlt[n]
std::shared_ptr< SiteLikelihoodsDAG > lt
for each node n: clt[n] = sum_states(clt[n][s])
std::shared_ptr< SiteLikelihoodsTree > speciesLt
for each node n: lt[n] = sum_{state s} flt[n][s]
std::shared_ptr< BackwardLikelihoodTree > blt
backward likelihood tree (only computed when needed)
SiteLikelihoodsRef getLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
Get shrunked conditional likelihood matrix at Node (ie just above the node), for a given rate class.
std::shared_ptr< SiteLikelihoodsTree > getSiteLikelihoodsTree_(size_t nCat)
LikelihoodCalculationSingleProcess * clone() const
std::shared_ptr< const SubstitutionProcessInterface > getSubstitutionProcess() const
const SubstitutionProcessInterface & substitutionProcess() const
Return the ref to the SubstitutionProcess.
ValueRef< MatrixLik > expandMatrix(ValueRef< MatrixLik > matrix)
ConditionalLikelihoodRef getLikelihoodsAtNode(uint nodeId, bool shrunk=false)
std::shared_ptr< ConditionalLikelihoodTree > condLikelihoodTree_
size_t getRootArrayPosition(size_t currentPosition) const
std::shared_ptr< ForwardLikelihoodTree > getForwardLikelihoodTree(size_t nCat)
std::shared_ptr< const SubstitutionProcessInterface > process_
AllRatesSiteLikelihoods getSiteLikelihoodsForAllClasses(bool shrunk=false)
Output array (Classes X Sites) of likelihoods for all sites & classes.
std::shared_ptr< const AlignmentDataInterface > psites_
LikelihoodCalculationSingleProcess(Context &context, std::shared_ptr< const AlignmentDataInterface > sites, std::shared_ptr< const SubstitutionProcessInterface > process)
std::shared_ptr< const StateMapInterface > getStateMap() const
std::shared_ptr< SiteWeights > rootWeights_
The frequency of each site.
ConditionalLikelihoodRef getBackwardLikelihoodsAtEdgeForClass(uint edgeId, size_t nCat)
ConditionalLikelihoodRef getForwardLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
std::shared_ptr< BackwardLikelihoodTree > getBackwardLikelihoodTree(size_t nCat)
void makeLikelihoodsAtNode_(uint nodeId)
Compute the likelihood at a given node in the tree, which number may not be the same number in the DA...
void setData(std::shared_ptr< const AlignmentDataInterface > sites)
const AlignmentDataInterface & shrunkData() const
void makeLikelihoodsAtDAGNode_(uint nodeId)
Compute the likelihood at a given node in the DAG,.
void makeLikelihoodsTree()
make backward likelihood tree (only computed when needed)
std::shared_ptr< const AlignmentDataInterface > getShrunkData() const
ValueRef< RowLik > expandVector(ValueRef< RowLik > vector)
std::shared_ptr< AlignmentDataInterface > shrunkData_
const DAGindexes & getEdgesIds(uint speciesId, size_t nCat) const
Get indexes of the non-empty edges in the Likelihood DAG that have a given species index for a given ...
ConditionalLikelihoodRef getBackwardLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
const DAGindexes & getNodesIds(uint speciesId) const
Get indexes of the nodes in the Likelihood DAG that have a given species index.
ConditionalLikelihoodRef getConditionalLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
RowLik getSiteLikelihoodsForAClass(size_t nCat, bool shrunk=false)
Get site likelihoods for a rate category.
std::shared_ptr< ProcessTree > getTreeNode(size_t nCat)
Get process tree for a rate category.
void setNumericalDerivateConfiguration(double delta, const NumericalDerivativeType &config)
Set derivation procedure (see DataFlowNumeric.h)
std::shared_ptr< const AlignmentDataInterface > getData() const
virtual bool isInitialized() const
r = constant_value.
Map the states of a given alphabet which have a model state.
Definition: StateMap.h:25
This interface describes the substitution process along the tree and sites of the alignment.
std::string toString(T t)
Defines the basic types of data flow nodes.
std::shared_ptr< Value< T > > ValueRef
Shared pointer alias for Value<T>.
Definition: DataFlow.h:84
std::vector< uint > DAGindexes
Helper: create a map with mutable dataflow nodes for each branch of the tree. The map is indexed by b...
ValueRef< MatrixLik > ConditionalLikelihoodRef
ExtendedFloatMatrixXd MatrixLik
Definition: Definitions.h:13
Eigen::Matrix< size_t, -1, 1 > PatternType
ValueRef< RowLik > SiteLikelihoodsRef
Basic matrix dimension type.