bpp-phyl3 3.0.0
LikelihoodCalculationSingleProcess.h
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#ifndef BPP_PHYL_LIKELIHOOD_DATAFLOW_LIKELIHOODCALCULATIONSINGLEPROCESS_H
6#define BPP_PHYL_LIKELIHOOD_DATAFLOW_LIKELIHOODCALCULATIONSINGLEPROCESS_H
7
10
20
21namespace bpp
22{
47class ProcessTree;
48class ForwardLikelihoodTree;
49class BackwardLikelihoodTree;
50
51// using RowLik = Eigen::Matrix<double, 1, Eigen::Dynamic>;
52// using MatrixLik = Eigen::Matrix<double, Eigen::Dynamic, Eigen::Dynamic>;
53
65
68
75
84
87
90
92
94
95/*
96 * @brief DAG of the conditional likelihoods (product of above and
97 * below likelihoods), with same topology as forward & backward
98 * likelihood DAGs.
99 *
100 */
101
103
105
107
109
110using DAGindexes = std::vector<uint>;
111
114{
115private:
117 {
118public:
119 std::shared_ptr<ProcessTree> phyloTree;
120 std::shared_ptr<ForwardLikelihoodTree> flt;
121
125 std::shared_ptr<BackwardLikelihoodTree> blt;
126
130 std::shared_ptr<ConditionalLikelihoodDAG> clt;
131
135 std::shared_ptr<SiteLikelihoodsDAG> lt;
136
146 std::shared_ptr<SiteLikelihoodsTree> speciesLt;
147
149 };
150
156 {
157public:
158 std::shared_ptr<ProcessTree> treeNode_;
159 std::shared_ptr<ConfiguredModel> modelNode_; // Used for
160 // StateMap and root frequencies if stationarity
161
162 std::shared_ptr<ConfiguredFrequencySet> rootFreqsNode_;
163 std::shared_ptr<ConfiguredDistribution> ratesNode_;
164 };
165
166 /************************************/
167 /* Dependencies */
168
169 std::shared_ptr<const SubstitutionProcessInterface> process_;
170 std::shared_ptr<const AlignmentDataInterface> psites_;
171
172 /*****************************
173 ****** Patterns
174 *
175 * @brief Links between sites and patterns.
176 *
177 * The size of this vector is equal to the number of sites in the container,
178 * each element corresponds to a site in the container and points to the
179 * corresponding column in the likelihood array of the root node.
180 * If the container contains no repeated site, there will be a strict
181 * equivalence between each site and the likelihood array of the root node.
182 * However, if this is not the case, some pointers may point toward the same
183 * element in the likelihood array.
184 */
185
187
192 std::shared_ptr<SiteWeights> rootWeights_;
193 std::shared_ptr<AlignmentDataInterface> shrunkData_;
194
195 /************************************/
196 /* DataFlow objects */
197
199
201
202 /* Likelihood Trees with for all rate categories */
203 std::vector<RateCategoryTrees> vRateCatTrees_;
204
205 /*
206 * Node for the probabilities of the rate classes
207 *
208 */
209
211
212 /* Likelihood tree on mean likelihoods on rate categories */
213 std::shared_ptr<ConditionalLikelihoodTree> condLikelihoodTree_;
214 /**************************************/
215
216public:
218 std::shared_ptr<const AlignmentDataInterface> sites,
219 std::shared_ptr<const SubstitutionProcessInterface> process);
220
222 std::shared_ptr<const SubstitutionProcessInterface> process);
223
224 /*
225 * @brief Build using Nodes of CollectionNodes.
226 *
227 * @param collection The CollectionNodes
228 * @param nProcess the process Number in the collection
229 * @param nData the data Number in the collection
230 */
231 LikelihoodCalculationSingleProcess(std::shared_ptr<CollectionNodes> collection,
232 std::shared_ptr<const AlignmentDataInterface> sites,
233 size_t nProcess);
234
235
236 LikelihoodCalculationSingleProcess(std::shared_ptr<CollectionNodes> collection,
237 size_t nProcess);
238
239
240 /*
241 * @brief Copy the likelihood calculation IN THE SAME CONTEXT.
242 *
243 */
244
246
248 {
249 throw bpp::Exception("LikelihoodCalculationSingleProcess clone should not happen.");
250 }
251
252 void setData(std::shared_ptr<const AlignmentDataInterface> sites)
253 {
254 if (psites_)
256
257 psites_ = sites;
258 setPatterns_();
259 if (isInitialized())
260 {
261 vRateCatTrees_.clear();
263 }
264 }
265
269 void setNumericalDerivateConfiguration(double delta, const NumericalDerivativeType& config);
270
276 void setClockLike(double rate = 1);
277
278 /**************************************************/
279
280 /*
281 * @brief Return the loglikehood (see as a function, ie
282 * computation node).
283 *
284 */
286 {
287 if (!psites_)
288 throw Exception("LikelihoodCalculationSingleProcess::makeLikelihoods : data not set.");
289
291 }
292
293
300 const DAGindexes& getNodesIds(uint speciesId) const;
301
309 const DAGindexes& getEdgesIds(uint speciesId, size_t nCat) const;
310
311 size_t getNumberOfSites() const
312 {
313 return psites_ ? psites_->getNumberOfSites() : 0;
314 }
315
317 {
318 return shrunkData_ ? shrunkData_->getNumberOfSites() : getNumberOfSites();
319 }
320
328 {
329 return *process_;
330 }
331
332 std::shared_ptr<const SubstitutionProcessInterface> getSubstitutionProcess() const
333 {
334 return process_;
335 }
336
338 {
339 return *psites_;
340 }
341
342 std::shared_ptr<const AlignmentDataInterface> getData() const
343 {
344 return psites_;
345 }
346
348 {
349 return processNodes_.modelNode_->targetValue()->stateMap();
350 }
351
352 std::shared_ptr<const StateMapInterface> getStateMap() const
353 {
354 return processNodes_.modelNode_->targetValue()->getStateMap();
355 }
356
357 /************************************************
358 *** Patterns
359 ****************************/
360
361 /*
362 * @brief the relations between real position and shrunked data
363 * positions.
364 *
365 * @param currentPosition : position in real data
366 *
367 * @return matching position in shrunked data
368 *
369 */
370 size_t getRootArrayPosition(size_t currentPosition) const
371 {
373 {
374 auto pos = Eigen::Index(currentPosition);
375 if (pos>=rootPatternLinks_->targetValue().rows())
376 throw BadSizeException("Forbidden access in getRootArrayPosition.",pos,rootPatternLinks_->targetValue().rows());
377 else
378 return rootPatternLinks_->targetValue()(pos);
379 }
380 else
381 return currentPosition;
382 }
383
384 const PatternType& getRootArrayPositions() const { return rootPatternLinks_->targetValue(); }
385
387 {
388 return rootPatternLinks_;
389 }
390
392 {
393 return *shrunkData_;
394 }
395
396 std::shared_ptr<const AlignmentDataInterface> getShrunkData() const
397 {
398 return shrunkData_;
399 }
400
401 /*
402 * @brief Expands (ie reverse of shrunkage) a vector computed of
403 * shrunked data (ie from one value per distinct site to one
404 * value per site).
405 *
406 */
408 {
410 return vector;
411 else
413 }
414
415 /*
416 * @brief Expands (ie reverse of shrunkage) a matrix computed of
417 * shrunked data (ie from one value per distinct site to one
418 * value per site). Columns are sites.
419 *
420 */
422 {
424 return matrix;
425 else
426 return CWisePattern<MatrixLik>::create(getContext_(), {matrix, rootPatternLinks_}, MatrixDimension (matrix->targetValue().rows(), Eigen::Index (getData()->getNumberOfSites())));
427 }
428
429 /*
430 * @brief: Get the weight of a position in the shrunked data (ie
431 * the number of sites corresponding to this site)
432 *
433 */
434 unsigned int getWeight(size_t pos) const
435 {
436 return (uint)(rootWeights_->targetValue()(Eigen::Index(pos)));
437 }
438
439 std::shared_ptr<SiteWeights> getRootWeights()
440 {
441 return rootWeights_;
442 }
443
445 {
446 return rFreqs_;
447 }
448
449 /********************************************************
450 * @Likelihoods
451 *
452 *****************************************************/
453
454 /*
455 * @brief Get Matrix of Conditional Likelihoods at Node *
456 *
457 * @param nodeId Id of the node in PhyloTree, ie species id
458 * @param shrunk if matrix is on shrunked data (default: false)
459 *
460 */
461 ConditionalLikelihoodRef getLikelihoodsAtNode(uint nodeId, bool shrunk = false)
462 {
463 if (!(condLikelihoodTree_ && condLikelihoodTree_->hasNode(nodeId)))
465
466 auto vv = condLikelihoodTree_->getNode(nodeId);
467
468 return shrunk ? vv : expandMatrix(vv);
469 }
470
471 /*
472 * @brief Get the number of rate classes
473 *
474 */
475 size_t getNumberOfClasses() const
476 {
477 return vRateCatTrees_.size();
478 }
479
480 /*
481 * @brief Get forward shrunked likelihood matrix at Node (ie just
482 * above the node), for a given rate class.
483 *
484 * @param nodeId Node Index in the forward tree (! ie in the
485 * computation tree, not the species tree).
486 *
487 * @param nCat Rate class category
488 *
489 */
490
492
493 /*
494 * @brief Get backward shrunked likelihood matrix at Node (ie at the
495 * top of the node), for a given rate class.
496 *
497 * @param nodeId Node Index in the backward tree (! ie in the
498 * computation tree, not the species tree).
499 *
500 * @param nCat Rate class category
501 *
502 */
503
505
506 /*
507 * @brief Get backward shrunked likelihood matrix at Edge (ie at
508 * the top of the edge), for a given rate class.
509 *
510 * These likelihoods are multiplied by the probability of the edge
511 *
512 * @param edgeId Edge Index in the backward tree (! ie in the
513 * computation tree, not the species tree).
514 *
515 * @param nCat Rate class category
516 *
517 */
518
520
521 /*
522 * @brief Get shrunked conditional likelihood matrix at Node (ie
523 * just above the node), for a given rate class.
524 *
525 * These likelihoods are multiplied by the probability of the node.
526 *
527 * @param nodeId Node Index in the forward tree (! ie in the
528 * computation tree, not the species tree).
529 *
530 * @param nCat Rate class category
531 *
532 */
533
535
548 SiteLikelihoodsRef getLikelihoodsAtNodeForClass(uint nodeId, size_t nCat);
549
554 {
555 auto allIndex = process_->parametrizablePhyloTree().getAllNodesIndexes();
556
557 for (auto id: allIndex)
558 {
560 }
561 }
562
563 /*********************************/
564 /* @brief Methods for external usage (after lik computation) */
565
572 RowLik getSiteLikelihoodsForAClass(size_t nCat, bool shrunk = false);
573
581
582
588 std::shared_ptr<ProcessTree> getTreeNode(size_t nCat)
589 {
590 if (nCat >= vRateCatTrees_.size())
591 throw Exception("LikelihoodCalculationSingleProcess::getTreeNode : bad class number " + TextTools::toString(nCat));
592
593 return vRateCatTrees_[nCat].phyloTree;
594 }
595
596 std::shared_ptr<ForwardLikelihoodTree> getForwardLikelihoodTree(size_t nCat);
597
598 std::shared_ptr<BackwardLikelihoodTree> getBackwardLikelihoodTree(size_t nCat);
599
600private:
601 void setPatterns_();
602
604
605 void makeProcessNodes_();
606
607 void makeRootFreqs_();
608
610
615 void makeProcessNodes_(CollectionNodes& pl, size_t nProc);
616
626 void makeLikelihoodsAtNode_(uint nodeId);
627
636 void makeLikelihoodsAtDAGNode_(uint nodeId);
637
638 std::shared_ptr<SiteLikelihoodsTree> getSiteLikelihoodsTree_(size_t nCat);
639
640public:
641 void cleanAllLikelihoods();
642
644};
645} // namespace bpp
646#endif // BPP_PHYL_LIKELIHOOD_DATAFLOW_LIKELIHOODCALCULATIONSINGLEPROCESS_H
static ValueRef< R > create(Context &c, NodeRefVec &&deps, const Dimension< R > &dim)
Build a new CWisePattern node.
Context for dataflow node construction.
Definition: DataFlow.h:527
DF Nodes used in the process. ProcessTree is used without any rate multiplier.
std::shared_ptr< ConditionalLikelihoodDAG > clt
for each node n: clt[n] = flt[n] * dlt[n]
std::shared_ptr< SiteLikelihoodsDAG > lt
for each node n: clt[n] = sum_states(clt[n][s])
std::shared_ptr< SiteLikelihoodsTree > speciesLt
for each node n: lt[n] = sum_{state s} flt[n][s]
std::shared_ptr< BackwardLikelihoodTree > blt
backward likelihood tree (only computed when needed)
SiteLikelihoodsRef getLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
Get shrunked conditional likelihood matrix at Node (ie just above the node), for a given rate class.
std::shared_ptr< ProcessTree > getTreeNode(size_t nCat)
Get process tree for a rate category.
std::shared_ptr< SiteLikelihoodsTree > getSiteLikelihoodsTree_(size_t nCat)
LikelihoodCalculationSingleProcess * clone() const
ConditionalLikelihoodRef getLikelihoodsAtNode(uint nodeId, bool shrunk=false)
std::shared_ptr< ConditionalLikelihoodTree > condLikelihoodTree_
const AlignmentDataInterface & shrunkData() const
size_t getRootArrayPosition(size_t currentPosition) const
ValueRef< MatrixLik > expandMatrix(ValueRef< MatrixLik > matrix)
std::shared_ptr< const SubstitutionProcessInterface > getSubstitutionProcess() const
std::shared_ptr< ForwardLikelihoodTree > getForwardLikelihoodTree(size_t nCat)
std::shared_ptr< const SubstitutionProcessInterface > process_
AllRatesSiteLikelihoods getSiteLikelihoodsForAllClasses(bool shrunk=false)
Output array (Classes X Sites) of likelihoods for all sites & classes.
const SubstitutionProcessInterface & substitutionProcess() const
Return the ref to the SubstitutionProcess.
std::shared_ptr< const AlignmentDataInterface > psites_
LikelihoodCalculationSingleProcess(Context &context, std::shared_ptr< const AlignmentDataInterface > sites, std::shared_ptr< const SubstitutionProcessInterface > process)
std::shared_ptr< SiteWeights > rootWeights_
The frequency of each site.
ConditionalLikelihoodRef getBackwardLikelihoodsAtEdgeForClass(uint edgeId, size_t nCat)
ConditionalLikelihoodRef getForwardLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
std::shared_ptr< BackwardLikelihoodTree > getBackwardLikelihoodTree(size_t nCat)
void makeLikelihoodsAtNode_(uint nodeId)
Compute the likelihood at a given node in the tree, which number may not be the same number in the DA...
void setData(std::shared_ptr< const AlignmentDataInterface > sites)
std::shared_ptr< const AlignmentDataInterface > getData() const
ValueRef< RowLik > expandVector(ValueRef< RowLik > vector)
void makeLikelihoodsAtDAGNode_(uint nodeId)
Compute the likelihood at a given node in the DAG,.
void makeLikelihoodsTree()
make backward likelihood tree (only computed when needed)
std::shared_ptr< const AlignmentDataInterface > getShrunkData() const
std::shared_ptr< AlignmentDataInterface > shrunkData_
const DAGindexes & getEdgesIds(uint speciesId, size_t nCat) const
Get indexes of the non-empty edges in the Likelihood DAG that have a given species index for a given ...
std::shared_ptr< const StateMapInterface > getStateMap() const
ConditionalLikelihoodRef getBackwardLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
const DAGindexes & getNodesIds(uint speciesId) const
Get indexes of the nodes in the Likelihood DAG that have a given species index.
ConditionalLikelihoodRef getConditionalLikelihoodsAtNodeForClass(uint nodeId, size_t nCat)
RowLik getSiteLikelihoodsForAClass(size_t nCat, bool shrunk=false)
Get site likelihoods for a rate category.
void setNumericalDerivateConfiguration(double delta, const NumericalDerivativeType &config)
Set derivation procedure (see DataFlowNumeric.h)
virtual bool isInitialized() const
r = constant_value.
Map the states of a given alphabet which have a model state.
Definition: StateMap.h:25
This interface describes the substitution process along the tree and sites of the alignment.
std::string toString(T t)
Defines the basic types of data flow nodes.
std::shared_ptr< Value< T > > ValueRef
Shared pointer alias for Value<T>.
Definition: DataFlow.h:84
std::vector< uint > DAGindexes
Helper: create a map with mutable dataflow nodes for each branch of the tree. The map is indexed by b...
ValueRef< MatrixLik > ConditionalLikelihoodRef
ExtendedFloatMatrixXd MatrixLik
Definition: Definitions.h:13
Eigen::Matrix< size_t, -1, 1 > PatternType
ValueRef< RowLik > SiteLikelihoodsRef
Basic matrix dimension type.