bpp-phyl3 3.0.0
PairedSiteLikelihoods.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5// From the STL
6#include <vector>
7#include <string>
8#include <numeric>
9#include <cmath>
10
12#include "TreeLikelihood.h"
13
14using namespace std;
15using namespace bpp;
16
17/***
18 * Constructors:
19 ***/
20
21PairedSiteLikelihoods::PairedSiteLikelihoods() :
22 logLikelihoods_(),
23 modelNames_()
24{}
25
27 const vector<vector<double>>& siteLogLikelihoods,
28 const vector<string>& modelNames) :
29 logLikelihoods_(siteLogLikelihoods),
30 modelNames_(modelNames)
31{
32 if (modelNames_.size() != getNumberOfModels())
33 {
34 if (modelNames_.size() == 0)
35 modelNames_.assign(getNumberOfModels(), string());
36 else
37 throw Exception("PairedSiteLikelihoods: There should be as many model names as model site-loglikelihoods records.");
38 }
39
40 if (this->getNumberOfModels() > 0)
41 {
42 for (vector<vector<double>>::const_iterator siteLLiks = siteLogLikelihoods.begin();
43 siteLLiks != siteLogLikelihoods.end();
44 ++siteLLiks)
45 {
46 if (siteLLiks->size() != getNumberOfSites())
47 throw Exception("PairedSiteLikelihoods: Models site-loglikelihoods records do not have the same number of elements.");
48 }
49 }
50}
51
53 const vector<double>& siteLogLikelihoods,
54 const string& modelName
55 )
56{
57 if (getNumberOfModels() > 0 && siteLogLikelihoods.size() != getNumberOfSites())
58 throw Exception("PairedSiteLikelihoods::appendModel: Model site-loglikelihoods record does not have the correct number of elements");
59
60 logLikelihoods_.push_back(siteLogLikelihoods);
61 modelNames_.push_back(modelName);
62}
63
65{
66 const vector<double>& siteLogLikelihoods = treeLikelihood.getLogLikelihoodPerSite();
67 const string& modelName = treeLikelihood.tree().getName();
68
69 PairedSiteLikelihoods::appendModel(siteLogLikelihoods, modelName);
70}
71
73{
74 if (getNumberOfModels() > 0 && psl.getNumberOfModels() > 0 && psl.getNumberOfSites() != getNumberOfSites())
75 throw Exception("PairedSiteLikelihoods::appendModels: The two PairedSiteLikelihood objects have different number of sites.");
76
78 psl.logLikelihoods_.begin(),
79 psl.logLikelihoods_.end()
80 );
81
82 modelNames_.insert(modelNames_.end(),
83 psl.modelNames_.begin(),
84 psl.modelNames_.end()
85 );
86}
87
88
89pair<vector<string>, vector<double>> PairedSiteLikelihoods::computeExpectedLikelihoodWeights (int replicates) const
90{
91 // Initialize the weights
92 vector<double> weights(getNumberOfModels(), 0);
93
94 // Sum the model weights over replicates
95 for (int r = 0; r < replicates; ++r)
96 {
97 // Draw the pseudoreplicate
98 vector<int> siteCounts = bootstrap(getNumberOfSites());
99
100 // Compute the loglikelihood of each model for this replicate
101 vector<double> models_logliks (getNumberOfModels(), 0);
102 for (size_t m = 0; m < getNumberOfModels(); ++m)
103 {
104 const vector<double>& modelSiteLLiks = logLikelihoods_.at(m);
105 double Y = 0;
106 for (size_t s = 0; s < getNumberOfSites(); ++s)
107 {
108 Y += modelSiteLLiks.at(s) * siteCounts.at(s);
109 }
110 models_logliks.at(m) = Y;
111 }
112
113 // Get the best loglikelihood
114 double Ymax = *max_element(models_logliks.begin(), models_logliks.end());
115
116 // Get the exponentials of the loglikelihood differences
117 // and the sum of these values
118 vector<double> exp_logliks_diffs (getNumberOfModels(), 0);
119 for (size_t m = 0; m < getNumberOfModels(); ++m)
120 {
121 exp_logliks_diffs.at(m) = exp(models_logliks.at(m) - Ymax);
122 }
123
124 double sumELLD = accumulate(exp_logliks_diffs.begin(), exp_logliks_diffs.end(), 0.0);
125
126 // Get the models weights for this replicate
127 for (size_t m = 0; m < getNumberOfModels(); ++m)
128 {
129 double w = exp_logliks_diffs.at(m) / sumELLD;
130 weights.at(m) += w;
131 }
132 } // for replicates
133
134 // Divide all weights by the number of replicates.
135 for (vector<double>::iterator w = weights.begin(); w != weights.end(); ++w)
136 {
137 *w /= replicates;
138 }
139
140 return make_pair(modelNames_, weights);
141}
142
143std::vector<int> PairedSiteLikelihoods::bootstrap(std::size_t length, double scaling)
144{
145 vector<int> v(length, 0);
146
147 for (size_t i = 0; i < static_cast<size_t>(static_cast < double > (length) * scaling + 0.5); ++i)
148 {
149 ++v[RandomTools::giveIntRandomNumberBetweenZeroAndEntry<size_t>(length)];
150 }
151
152 return v;
153}
A container for paired-site likelihoods (likelihoods over the same sites for different models,...
size_t getNumberOfModels() const
Get the number of models in the container.
std::vector< std::vector< double > > logLikelihoods_
std::size_t getNumberOfSites() const
void appendModels(const PairedSiteLikelihoods &psl)
Append models by concatenation.
std::vector< std::string > modelNames_
std::pair< std::vector< std::string >, std::vector< double > > computeExpectedLikelihoodWeights(int replicates=10000) const
Compute the Expected Likelihood Weights of the models.
void appendModel(const std::vector< double > &siteLogLikelihoods, const std::string &modelName="")
Append a model.
static std::vector< int > bootstrap(std::size_t length, double scaling=1)
Draw a nonparametric pseudoreplicate.
The TreeLikelihood interface.
virtual const Tree & tree() const =0
Get the tree (topology and branch lengths).
virtual Vdouble getLogLikelihoodPerSite() const =0
Get the logarithm of the likelihood for each site.
virtual std::string getName() const =0
Tree name.
Defines the basic types of data flow nodes.
ExtendedFloat exp(const ExtendedFloat &ef)