bpp-phyl3  3.0.0
PairedSiteLikelihoods.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 // From the STL
6 #include <vector>
7 #include <string>
8 #include <numeric>
9 #include <cmath>
10 
11 #include "PairedSiteLikelihoods.h"
12 #include "TreeLikelihood.h"
13 
14 using namespace std;
15 using namespace bpp;
16 
17 /***
18  * Constructors:
19  ***/
20 
21 PairedSiteLikelihoods::PairedSiteLikelihoods() :
22  logLikelihoods_(),
23  modelNames_()
24 {}
25 
27  const vector<vector<double>>& siteLogLikelihoods,
28  const vector<string>& modelNames) :
29  logLikelihoods_(siteLogLikelihoods),
30  modelNames_(modelNames)
31 {
32  if (modelNames_.size() != getNumberOfModels())
33  {
34  if (modelNames_.size() == 0)
35  modelNames_.assign(getNumberOfModels(), string());
36  else
37  throw Exception("PairedSiteLikelihoods: There should be as many model names as model site-loglikelihoods records.");
38  }
39 
40  if (this->getNumberOfModels() > 0)
41  {
42  for (vector<vector<double>>::const_iterator siteLLiks = siteLogLikelihoods.begin();
43  siteLLiks != siteLogLikelihoods.end();
44  ++siteLLiks)
45  {
46  if (siteLLiks->size() != getNumberOfSites())
47  throw Exception("PairedSiteLikelihoods: Models site-loglikelihoods records do not have the same number of elements.");
48  }
49  }
50 }
51 
53  const vector<double>& siteLogLikelihoods,
54  const string& modelName
55  )
56 {
57  if (getNumberOfModels() > 0 && siteLogLikelihoods.size() != getNumberOfSites())
58  throw Exception("PairedSiteLikelihoods::appendModel: Model site-loglikelihoods record does not have the correct number of elements");
59 
60  logLikelihoods_.push_back(siteLogLikelihoods);
61  modelNames_.push_back(modelName);
62 }
63 
65 {
66  const vector<double>& siteLogLikelihoods = treeLikelihood.getLogLikelihoodPerSite();
67  const string& modelName = treeLikelihood.tree().getName();
68 
69  PairedSiteLikelihoods::appendModel(siteLogLikelihoods, modelName);
70 }
71 
73 {
74  if (getNumberOfModels() > 0 && psl.getNumberOfModels() > 0 && psl.getNumberOfSites() != getNumberOfSites())
75  throw Exception("PairedSiteLikelihoods::appendModels: The two PairedSiteLikelihood objects have different number of sites.");
76 
77  logLikelihoods_.insert(logLikelihoods_.end(),
78  psl.logLikelihoods_.begin(),
79  psl.logLikelihoods_.end()
80  );
81 
82  modelNames_.insert(modelNames_.end(),
83  psl.modelNames_.begin(),
84  psl.modelNames_.end()
85  );
86 }
87 
88 
89 pair<vector<string>, vector<double>> PairedSiteLikelihoods::computeExpectedLikelihoodWeights (int replicates) const
90 {
91  // Initialize the weights
92  vector<double> weights(getNumberOfModels(), 0);
93 
94  // Sum the model weights over replicates
95  for (int r = 0; r < replicates; ++r)
96  {
97  // Draw the pseudoreplicate
98  vector<int> siteCounts = bootstrap(getNumberOfSites());
99 
100  // Compute the loglikelihood of each model for this replicate
101  vector<double> models_logliks (getNumberOfModels(), 0);
102  for (size_t m = 0; m < getNumberOfModels(); ++m)
103  {
104  const vector<double>& modelSiteLLiks = logLikelihoods_.at(m);
105  double Y = 0;
106  for (size_t s = 0; s < getNumberOfSites(); ++s)
107  {
108  Y += modelSiteLLiks.at(s) * siteCounts.at(s);
109  }
110  models_logliks.at(m) = Y;
111  }
112 
113  // Get the best loglikelihood
114  double Ymax = *max_element(models_logliks.begin(), models_logliks.end());
115 
116  // Get the exponentials of the loglikelihood differences
117  // and the sum of these values
118  vector<double> exp_logliks_diffs (getNumberOfModels(), 0);
119  for (size_t m = 0; m < getNumberOfModels(); ++m)
120  {
121  exp_logliks_diffs.at(m) = exp(models_logliks.at(m) - Ymax);
122  }
123 
124  double sumELLD = accumulate(exp_logliks_diffs.begin(), exp_logliks_diffs.end(), 0.0);
125 
126  // Get the models weights for this replicate
127  for (size_t m = 0; m < getNumberOfModels(); ++m)
128  {
129  double w = exp_logliks_diffs.at(m) / sumELLD;
130  weights.at(m) += w;
131  }
132  } // for replicates
133 
134  // Divide all weights by the number of replicates.
135  for (vector<double>::iterator w = weights.begin(); w != weights.end(); ++w)
136  {
137  *w /= replicates;
138  }
139 
140  return make_pair(modelNames_, weights);
141 }
142 
143 std::vector<int> PairedSiteLikelihoods::bootstrap(std::size_t length, double scaling)
144 {
145  vector<int> v(length, 0);
146 
147  for (size_t i = 0; i < static_cast<size_t>(static_cast < double > (length) * scaling + 0.5); ++i)
148  {
149  ++v[RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(length)];
150  }
151 
152  return v;
153 }
A container for paired-site likelihoods (likelihoods over the same sites for different models,...
size_t getNumberOfModels() const
Get the number of models in the container.
std::vector< std::vector< double > > logLikelihoods_
std::size_t getNumberOfSites() const
void appendModels(const PairedSiteLikelihoods &psl)
Append models by concatenation.
std::vector< std::string > modelNames_
std::pair< std::vector< std::string >, std::vector< double > > computeExpectedLikelihoodWeights(int replicates=10000) const
Compute the Expected Likelihood Weights of the models.
void appendModel(const std::vector< double > &siteLogLikelihoods, const std::string &modelName="")
Append a model.
static std::vector< int > bootstrap(std::size_t length, double scaling=1)
Draw a nonparametric pseudoreplicate.
The TreeLikelihood interface.
virtual const Tree & tree() const =0
Get the tree (topology and branch lengths).
virtual Vdouble getLogLikelihoodPerSite() const =0
Get the logarithm of the likelihood for each site.
virtual std::string getName() const =0
Tree name.
Defines the basic types of data flow nodes.
ExtendedFloat exp(const ExtendedFloat &ef)