bpp-phyl3  3.0.0
OneProcessSequencePhyloLikelihood.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 
7 using namespace std;
8 using namespace bpp;
9 using namespace numeric;
10 
11 /******************************************************************************/
12 
13 OneProcessSequencePhyloLikelihood::OneProcessSequencePhyloLikelihood(
14  Context& context,
15  std::shared_ptr<OneProcessSequenceEvolution> evol,
16  size_t nSeqEvol) :
17  AbstractPhyloLikelihood(context),
19  AbstractSingleDataPhyloLikelihood(context, 0, (evol->getSubstitutionProcessNumbers().size() != 0) ? evol->substitutionProcess(evol->getSubstitutionProcessNumbers()[0]).getNumberOfStates() : 0, 0),
20  AbstractSequencePhyloLikelihood(context, evol, nSeqEvol),
22  AbstractParametrizableSequencePhyloLikelihood(context, evol, nSeqEvol),
23  mSeqEvol_(evol),
24  likCal_()
25 {
27 
28  auto sp = evol->getSubstitutionProcess();
29  likCal_ = std::make_shared<LikelihoodCalculationSingleProcess>(context, sp);
30 
31  shareParameters_(likCal_->getParameters());
32 }
33 
34 /******************************************************************************/
35 
37  Context& context,
38  std::shared_ptr<const AlignmentDataInterface> data,
39  std::shared_ptr<OneProcessSequenceEvolution> evol,
40  size_t nSeqEvol,
41  size_t nData) :
42  AbstractPhyloLikelihood(context),
43  AbstractAlignedPhyloLikelihood(context, data->getNumberOfSites()),
44  AbstractSingleDataPhyloLikelihood(context, data->getNumberOfSites(), (evol->getSubstitutionProcessNumbers().size() != 0) ? evol->substitutionProcess(evol->getSubstitutionProcessNumbers()[0]).getNumberOfStates() : 0, nData),
45  AbstractSequencePhyloLikelihood(context, evol, nData),
47  AbstractParametrizableSequencePhyloLikelihood(context, evol, nSeqEvol),
48  mSeqEvol_(evol),
49  likCal_()
50 {
52 
53  const auto& sp = evol->getSubstitutionProcess();
54  likCal_ = std::make_shared<LikelihoodCalculationSingleProcess>(context, data, sp);
55  shareParameters_(likCal_->getParameters());
56 }
57 
58 /******************************************************************************/
59 
61  std::shared_ptr<const AlignmentDataInterface> data,
62  std::shared_ptr<OneProcessSequenceEvolution> evol,
63  std::shared_ptr<CollectionNodes> collNodes,
64  size_t nSeqEvol,
65  size_t nData) :
66  AbstractPhyloLikelihood(collNodes->context()),
67  AbstractAlignedPhyloLikelihood(collNodes->context(), data->getNumberOfSites()),
68  AbstractSingleDataPhyloLikelihood(collNodes->context(), data->getNumberOfSites(), (evol->getSubstitutionProcessNumbers().size() != 0) ? evol->substitutionProcess(evol->getSubstitutionProcessNumbers()[0]).getNumberOfStates() : 0, nData),
69  AbstractSequencePhyloLikelihood(collNodes->context(), evol, nData),
71  AbstractParametrizableSequencePhyloLikelihood(collNodes->context(), evol, nSeqEvol),
72  mSeqEvol_(evol),
73  likCal_()
74 {
76 
77  likCal_ = std::make_shared<LikelihoodCalculationSingleProcess>(collNodes, data, nSeqEvol);
78 
79  shareParameters_(likCal_->getParameters());
80 }
81 
82 
83 /******************************************************************************/
84 
86 {
87  VVdouble vd;
88  copyEigenToBpp(getLikelihoodCalculationSingleProcess()->getSiteLikelihoodsForAllClasses(), vd);
89  return vd;
90 }
91 
92 
93 /******************************************************************************/
94 
96 {
97  auto rates = getLikelihoodCalculationSingleProcess()->substitutionProcess().getRateDistribution();
98 
99  if (!rates || rates->getNumberOfCategories() == 1)
100  return Vdouble(1, 1);
101  else
102  {
103  auto probas = rates->getProbabilities();
104  std::vector<DataLik> vv(rates->getNumberOfCategories());
105  for (size_t i = 0; i < vv.size(); i++)
106  {
107  vv[i] = probas[i] * (getLikelihoodCalculationSingleProcess()->getSiteLikelihoodsForAClass(i))(Eigen::Index(pos));
108  }
109 
110  auto sv = VectorTools::sum(vv);
111  Vdouble res(rates->getNumberOfCategories());
112 
113  for (size_t i = 0; i < vv.size(); i++)
114  {
115  res[i] = convert(vv[i] / sv);
116  }
117  return res;
118  }
119 }
120 
121 /******************************************************************************/
122 
124 {
125  auto rates = getLikelihoodCalculationSingleProcess()->substitutionProcess().getRateDistribution();
126 
127  auto nbS = getLikelihoodCalculationSingleProcess()->getNumberOfSites();
128  VVdouble vv(nbS);
129 
130  if (!rates || rates->getNumberOfCategories() == 1)
131  {
132  for (auto& v:vv)
133  {
134  v.resize(1, 1);
135  }
136  }
137  else
138  {
139  Eigen::VectorXd probas;
140  copyBppToEigen(rates->getProbabilities(), probas);
141 
142  auto vvLik = getLikelihoodCalculationSingleProcess()->getSiteLikelihoodsForAllClasses();
143  for (size_t i = 0; i < nbS; i++)
144  {
145  vv[i].resize(size_t(vvLik.rows()));
146  VectorLik sv(numeric::cwise(vvLik.col(Eigen::Index(i))) * probas.array());
147  sv /= sv.sum();
148  copyEigenToBpp(sv, vv[i]);
149  }
150  }
151  return vv;
152 }
153 
154 /******************************************************************************/
155 
157 {
158  size_t nbSites = getNumberOfSites();
160  vector<size_t> classes(nbSites);
161  for (size_t i = 0; i < nbSites; ++i)
162  {
163  classes[i] = VectorTools::whichMax<double>(l[i]);
164  }
165  return classes;
166 }
167 
168 
169 /******************************************************************************/
170 
172 {
175 
176  size_t nbSites = getNumberOfSites();
177  size_t nbClasses = getNumberOfClasses();
178  auto pb = getLikelihoodPerSitePerClass();
179  auto l = getLikelihoodPerSite();
180  Vdouble prates(nbSites, 0.);
181  for (size_t i = 0; i < nbSites; i++)
182  {
183  for (size_t j = 0; j < nbClasses; j++)
184  {
185  prates[i] += convert((pb[i][j] / l[i]) * probas[j] * rates[j]);
186  }
187  }
188  return prates;
189 }
190 
191 /******************************************************************************/
192 
193 
195 {
196  auto vv = getLikelihoodCalculationSingleProcess()->getLikelihoodsAtNode(nodeId)->targetValue();
197 
198  size_t nbSites = getNumberOfSites();
199  VVdouble pp;
200  pp.resize(nbSites);
201 
202  for (uint i = 0; i < (uint)nbSites; i++)
203  {
204  copyEigenToBpp(vv.col(i) / vv.col(i).sum(), pp[size_t(i)]);
205  }
206 
207  Vdouble v(nbStates_);
208  for (uint st = 0; st < (uint)nbStates_; st++)
209  {
210  auto s = 0.0;
211  for (uint i = 0; i < (uint)nbSites; i++)
212  {
213  s += pp[(size_t)i][size_t(st)];
214  }
215 
216  v[size_t(st)] = s / (double)nbSites;
217  }
218  return v;
219 }
VDataLik getLikelihoodPerSite() const
Get the likelihood for each site.
size_t getNumberOfSites() const
Get the number of sites in the dataset.
virtual void shareParameters_(const ParameterList &parameters)
const Context & context() const override
Context for dataflow node construction.
Definition: DataFlow.h:527
virtual Vdouble getCategories() const=0
virtual Vdouble getProbabilities() const=0
const ExtendedFloat & sum() const
const SubstitutionProcessInterface & substitutionProcess() const
Return the ref to the SubstitutionProcess.
std::shared_ptr< LikelihoodCalculationSingleProcess > getLikelihoodCalculationSingleProcess() const
std::shared_ptr< LikelihoodCalculationSingleProcess > likCal_
LikelihoodCalculationSingleProcess & likelihoodCalculationSingleProcess() const
OneProcessSequencePhyloLikelihood(Context &context, std::shared_ptr< OneProcessSequenceEvolution > evol, size_t nSeqEvol=0)
size_t getNumberOfClasses() const
Get the number of model classes.
virtual const DiscreteDistributionInterface & rateDistribution() const =0
Get the rate distribution.
static T sum(const std::vector< T > &v1)
T & cwise(T &t)
Definition: DataFlowCWise.h:29
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
template void copyEigenToBpp(const ExtendedFloatMatrixXd &eigenMatrix, std::vector< std::vector< double >> &bppMatrix)
template void copyBppToEigen(const std::vector< ExtendedFloat > &bppVector, Eigen::RowVectorXd &eigenVector)
double convert(const bpp::ExtendedFloat &ef)
std::vector< Vdouble > VVdouble