bpp-phyl3  3.0.0
SingleProcessPhyloLikelihood.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_PHYL_LIKELIHOOD_PHYLOLIKELIHOODS_SINGLEPROCESSPHYLOLIKELIHOOD_H
6 #define BPP_PHYL_LIKELIHOOD_PHYLOLIKELIHOODS_SINGLEPROCESSPHYLOLIKELIHOOD_H
7 
8 #include <Bpp/Exceptions.h>
10 #include <Bpp/Numeric/Parameter.h>
12 #include <unordered_map>
13 
14 #include "../DataFlow/DataFlowNumeric.h"
15 #include "../DataFlow/LikelihoodCalculationSingleProcess.h"
16 #include "../DataFlow/Parameter.h"
18 
19 /* This file contains wrappers.
20  * They are used to bridge the gap between bpp::dataflow stuff and the rest of bpp.
21  *
22  */
23 
24 namespace bpp
25 {
32 {
33 protected:
34  // Store nodes
35  mutable std::shared_ptr<LikelihoodCalculationSingleProcess> likCal_;
36 
40  size_t nProc_;
41 
45  mutable std::unordered_map<std::string, ValueRef<RowLik>> firstOrderDerivativeVectors_;
46 
47  mutable std::unordered_map<std::pair<std::string, std::string>, ValueRef<RowLik>,
50 
51 public:
53  std::shared_ptr<LikelihoodCalculationSingleProcess> likCal,
54  const ParameterList& variableNodes,
55  size_t nProc = 0, size_t nData = 0) :
58  AbstractSingleDataPhyloLikelihood(context, likCal->getNumberOfSites(), likCal->stateMap().getNumberOfModelStates(), nData),
60  likCal_(likCal), nProc_(nProc)
61  {
62  shareParameters_(variableNodes);
63  }
64 
69  std::shared_ptr<LikelihoodCalculationSingleProcess> likCal,
70  size_t nProc = 0, size_t nData = 0) :
73  AbstractSingleDataPhyloLikelihood(context, likCal->getNumberOfSites(), likCal->stateMap().getNumberOfModelStates(), nData),
75  likCal_(likCal), nProc_(nProc)
76  {
77 #ifdef DEBUG
78  std::cerr << "SingleProcessPhyloLikelihood(context, LikelihoodCalculationSingleProcess)" << std::endl;
79 #endif
80  shareParameters_(likCal_->getIndependentParameters());
81 #ifdef DEBUG
82  std::cerr << "singleprocessphylolikelihood(context, likelihoodcalculationsingleprocess)" << std::endl;
83 #endif
84  }
85 
86  // Legacy boilerplate
88  {
89  throw Exception("SingleProcessPhyloLikelihood::clone should not be called.");
90  return new SingleProcessPhyloLikelihood (*this);
91  }
92 
93  void setData(std::shared_ptr<const AlignmentDataInterface> sites, size_t nData = 0) override
94  {
97  }
98 
103  std::shared_ptr<const AlignmentDataInterface> getShrunkData() const
104  {
106  }
107 
112  std::shared_ptr<const AlignmentDataInterface> getData() const override
113  {
115  }
116 
117  size_t getNumberOfSites() const override
118  {
120  }
121 
123  {
125  }
126 
132 
133 
134  std::shared_ptr<const Alphabet> getAlphabet() const override
135  {
137  }
138 
139  /*
140  * @brief Get the ParametrizablePhyloTree.
141  *
142  * Warning: the branch lengths may not be up to date with those of
143  * the LikelihoodCalculationSingleProcess.
144  *
145  */
146  std::shared_ptr<const ParametrizablePhyloTree> tree() const
147  {
149  }
150 
159  {
161  }
162 
170  std::shared_ptr<const SubstitutionProcessInterface> getSubstitutionProcess() const
171  {
173  }
174 
175  size_t getSubstitutionProcessNumber() const { return nProc_; }
176 
180  bool isInitialized() const override
181  {
183  }
184 
191  {
193  }
194 
196  {
198  }
199 
206  {
208  }
209 
216  {
218  }
219 
226  {
228  }
229 
237  {
239  }
240 
246  {
247  return *likCal_;
248  }
249 
250  std::shared_ptr<LikelihoodCalculation> getLikelihoodCalculation() const override
251  {
252  return likCal_;
253  }
254 
256  {
257  return *likCal_;
258  }
259 
260  std::shared_ptr<AlignedLikelihoodCalculation> getAlignedLikelihoodCalculation() const override
261  {
262  return likCal_;
263  }
264 
266  {
267  return *likCal_;
268  }
269 
270  std::shared_ptr<LikelihoodCalculationSingleProcess> getLikelihoodCalculationSingleProcess() const
271  {
272  return likCal_;
273  }
274 
275  // Get nodes of derivatives directly
276 
277  ValueRef<RowLik> getFirstOrderDerivativeVector (const std::string& variable) const
278  {
279  return firstOrderDerivativeVector(variable);
280  }
281 
282  ValueRef<RowLik> firstOrderDerivativeVector (const std::string& variable) const
283  {
284  const auto it = firstOrderDerivativeVectors_.find (variable);
285  if (it != firstOrderDerivativeVectors_.end ())
286  {
287  return it->second;
288  }
289  else
290  {
291  auto vector = getLikelihoodCalculationSingleProcess()->getSiteLikelihoods(true)->deriveAsValue (context_, accessVariableNode (variable));
292  firstOrderDerivativeVectors_.emplace (variable, vector);
293  return vector;
294  }
295  }
296 
297  ValueRef<RowLik> getSecondOrderDerivativeVector (const std::string& variable) const
298  {
299  return getSecondOrderDerivativeVector (variable, variable);
300  }
301 
302  ValueRef<RowLik> getSecondOrderDerivativeVector (const std::string& variable1,
303  const std::string& variable2) const
304  {
305  return secondOrderDerivativeVector (variable1, variable2);
306  }
307 
308  ValueRef<RowLik> secondOrderDerivativeVector (const std::string& variable1,
309  const std::string& variable2) const
310  {
311  const auto key = std::make_pair (variable1, variable2);
312  const auto it = secondOrderDerivativeVectors_.find (key);
313  if (it != secondOrderDerivativeVectors_.end ())
314  {
315  return it->second;
316  }
317  else
318  {
319  // Reuse firstOrderDerivative() to generate the first derivative with caching
320  auto vector =
321  firstOrderDerivativeVector (variable1)->deriveAsValue (context_, accessVariableNode (variable2));
322  secondOrderDerivativeVectors_.emplace (key, vector);
323  return vector;
324  }
325  }
326 
336 
338 
339  /*
340  *@brief return the likelihood of rate classes on each site.
341  *
342  *@return 2D-vector sites x classes
343  */
344 
346 
347  std::vector<size_t> getClassWithMaxPostProbPerSite() const;
348 
350 
352 };
353 } // namespace bpp
354 #endif // BPP_PHYL_LIKELIHOOD_PHYLOLIKELIHOODS_SINGLEPROCESSPHYLOLIKELIHOOD_H
virtual void shareParameters_(const ParameterList &parameters)
const Context & context() const override
static Node_DF & accessVariableNode(const Parameter &param)
virtual void setData(std::shared_ptr< const AlignmentDataInterface > sites, size_t nData=0)
Set the dataset for which the likelihood must be evaluated.
Context for dataflow node construction.
Definition: DataFlow.h:527
std::shared_ptr< const SubstitutionProcessInterface > getSubstitutionProcess() const
const SubstitutionProcessInterface & substitutionProcess() const
Return the ref to the SubstitutionProcess.
void setData(std::shared_ptr< const AlignmentDataInterface > sites)
std::shared_ptr< const AlignmentDataInterface > getShrunkData() const
std::shared_ptr< const AlignmentDataInterface > getData() const
virtual bool isInitialized() const
Wraps a dataflow graph as a function: resultNode = f(variableNodes).
ParameterList getBranchLengthParameters() const override
Get the independent branch lengths parameters.
std::shared_ptr< const AlignmentDataInterface > getData() const override
return a pointer to the original data.
size_t getNumberOfSites() const override
Get the number of sites in the dataset.
std::shared_ptr< const ParametrizablePhyloTree > tree() const
ValueRef< RowLik > firstOrderDerivativeVector(const std::string &variable) const
ParameterList getRootFrequenciesParameters() const override
Get the independent parameters associated to the root frequencies(s).
const SubstitutionProcessInterface & substitutionProcess() const
Return the ref to the SubstitutionProcess used to build the phylolikelihood.
size_t getNumberOfClasses() const
Get the number of model classes.
std::shared_ptr< const AlignmentDataInterface > getShrunkData() const
return a pointer to the compressed data.
std::shared_ptr< AlignedLikelihoodCalculation > getAlignedLikelihoodCalculation() const override
std::shared_ptr< const SubstitutionProcessInterface > getSubstitutionProcess() const
Return a smarter pointer to the SubstitutionProcess used to build the phylolikelihood.
ParameterList getRateDistributionParameters() const override
Get the independent parameters associated to the rate distribution(s).
SingleProcessPhyloLikelihood(Context &context, std::shared_ptr< LikelihoodCalculationSingleProcess > likCal, const ParameterList &variableNodes, size_t nProc=0, size_t nData=0)
ParameterList getDerivableParameters() const override
VVdouble getPosteriorProbabilitiesPerSitePerClass() const
Get the posterior probabilities of each class, for each site.
ValueRef< RowLik > getFirstOrderDerivativeVector(const std::string &variable) const
LikelihoodCalculation & likelihoodCalculation() const override
SingleProcessPhyloLikelihood * clone() const override
std::shared_ptr< LikelihoodCalculation > getLikelihoodCalculation() const override
LikelihoodCalculationSingleProcess & likelihoodCalculationSingleProcess() const
Vdouble getPosteriorProbabilitiesForSitePerClass(size_t pos) const
AlignedLikelihoodCalculation & alignedLikelihoodCalculation() const override
std::shared_ptr< LikelihoodCalculationSingleProcess > getLikelihoodCalculationSingleProcess() const
ValueRef< RowLik > secondOrderDerivativeVector(const std::string &variable1, const std::string &variable2) const
ParameterList getNonDerivableParameters() const override
std::unordered_map< std::string, ValueRef< RowLik > > firstOrderDerivativeVectors_
For Dataflow computing.
std::shared_ptr< const Alphabet > getAlphabet() const override
Get the alphabet associated to the dataset.
std::vector< size_t > getClassWithMaxPostProbPerSite() const
void setData(std::shared_ptr< const AlignmentDataInterface > sites, size_t nData=0) override
Set the dataset for which the likelihood must be evaluated.
ValueRef< RowLik > getSecondOrderDerivativeVector(const std::string &variable) const
ParameterList getSubstitutionModelParameters() const override
Get the independent parameters associated to substitution model(s).
ValueRef< RowLik > getSecondOrderDerivativeVector(const std::string &variable1, const std::string &variable2) const
size_t nProc_
the Substitution Process number
std::unordered_map< std::pair< std::string, std::string >, ValueRef< RowLik >, StringPairHash > secondOrderDerivativeVectors_
std::shared_ptr< LikelihoodCalculationSingleProcess > likCal_
SingleProcessPhyloLikelihood(Context &context, std::shared_ptr< LikelihoodCalculationSingleProcess > likCal, size_t nProc=0, size_t nData=0)
: the parameters the independent parameters of the LikelihoodCalculation
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
This interface describes the substitution process along the tree and sites of the alignment.
virtual ParameterList getSubstitutionModelParameters(bool independent) const =0
Methods to retrieve the parameters of specific objects.
virtual size_t getNumberOfClasses() const =0
virtual ParameterList getRootFrequenciesParameters(bool independent) const =0
virtual ParameterList getNonDerivableParameters() const =0
virtual ParameterList getRateDistributionParameters(bool independent) const =0
virtual ParameterList getBranchLengthParameters(bool independent) const =0
virtual std::shared_ptr< const ParametrizablePhyloTree > getParametrizablePhyloTree() const =0
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::shared_ptr< Value< T > > ValueRef
Shared pointer alias for Value<T>.
Definition: DataFlow.h:84
std::vector< VDataLik > VVDataLik
Definition: Definitions.h:24
std::vector< Vdouble > VVdouble