bpp-phyl3 3.0.0
UniformizationSubstitutionCount.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#include <vector>
6
10
11using namespace bpp;
12using namespace std;
13
14/******************************************************************************/
15
16UniformizationSubstitutionCount::UniformizationSubstitutionCount(
17 std::shared_ptr<const SubstitutionModelInterface> model,
18 std::shared_ptr<const SubstitutionRegisterInterface> reg,
19 std::shared_ptr<const AlphabetIndex2> weights,
20 std::shared_ptr<const AlphabetIndex2> distances) :
24 model_(model),
25 nbStates_(model->getNumberOfStates()),
26 bMatrices_(reg->getNumberOfSubstitutionTypes()),
27 power_(),
28 s_(reg->getNumberOfSubstitutionTypes()),
29 miu_(0),
30 counts_(reg->getNumberOfSubstitutionTypes()),
31 currentLength_(0)
32{
33 // Check compatibility between model and substitution register:
34 if (model->getAlphabet()->getAlphabetType() != reg->getAlphabet()->getAlphabetType())
35 throw Exception("UniformizationSubstitutionCount (constructor): alphabets do not match between register and model.");
36
37 // Initialize all B matrices according to substitution register. This is done once for all,
38 // unless the number of states changes:
41
42 for (unsigned int i = 0; i < nbStates_; ++i)
43 {
44 double diagQ = abs(model_->Qij(i, i));
45 if (diagQ > miu_)
46 miu_ = diagQ;
47 }
48
49 if (miu_ > 10000)
50 throw Exception("UniformizationSubstitutionCount::UniformizationSubstitutionCount The maximum diagonal values of generator is above 10000. Abort, chose another mapping method");
51}
52
54 const StateMapInterface& stateMap,
55 std::shared_ptr<const SubstitutionRegisterInterface> reg,
56 std::shared_ptr<const AlphabetIndex2> weights,
57 std::shared_ptr<const AlphabetIndex2> distances) :
61 model_(0),
62 nbStates_(stateMap.getNumberOfModelStates()),
63 bMatrices_(reg->getNumberOfSubstitutionTypes()),
64 power_(),
65 s_(reg->getNumberOfSubstitutionTypes()),
66 miu_(0),
67 counts_(reg->getNumberOfSubstitutionTypes()),
68 currentLength_(0)
69{}
70
71/******************************************************************************/
72
74{
75 size_t nbTypes = register_->getNumberOfSubstitutionTypes();
76 bMatrices_.resize(nbTypes);
77 counts_.resize(nbTypes);
78 s_.resize(nbTypes);
79}
80
81
83{
84 // Re-initialize all B matrices according to substitution register.
85 for (size_t i = 0; i < register_->getNumberOfSubstitutionTypes(); ++i)
86 {
87 bMatrices_[i].resize(nbStates_, nbStates_);
88 counts_[i].resize(nbStates_, nbStates_);
89 }
90}
91
93{
94 vector<int> supportedStates = model_->getAlphabetStates();
95 for (size_t j = 0; j < nbStates_; ++j)
96 {
97 for (size_t k = 0; k < nbStates_; ++k)
98 {
99 size_t i = register_->getType(j, k);
100 if (i > 0 && k != j)
101 {
102 bMatrices_[i - 1](j, k) = model_->Qij(j, k);
103 }
104 }
105 }
106
107 if (distances_)
109}
110
112{
113 vector<int> supportedStates = model_->getAlphabetStates();
114 for (size_t j = 0; j < nbStates_; ++j)
115 {
116 for (size_t k = 0; k < nbStates_; ++k)
117 {
118 size_t i = register_->getType(j, k);
119 if (i > 0 && k != j)
120 {
121 bMatrices_[i - 1](j, k) *= distances_->getIndex(supportedStates[j], supportedStates[k]);
122 }
123 }
124 }
125}
126
127
128/******************************************************************************/
129
131{
132 double lam = miu_ * length;
135 RowMatrix<double> R(model_->generator());
136 MatrixTools::scale(R, 1. / miu_);
137 MatrixTools::add(R, I);
138
139 // compute the stopping point
140 // use the tail of Poisson distribution
141 // can be approximated by 4 + 6 * sqrt(lam) + lam
142 size_t nMax = static_cast<size_t>(ceil(4 + 6 * sqrt(lam) + lam));
143
144 // compute the powers of R
145 power_.resize(nMax + 1);
146 power_[0] = I;
147 for (size_t i = 1; i < nMax + 1; ++i)
148 {
149 MatrixTools::mult(power_[i - 1], R, power_[i]);
150 }
151
152 for (size_t i = 0; i < register_->getNumberOfSubstitutionTypes(); ++i)
153 {
154 s_[i].resize(nMax + 1);
155 MatrixTools::mult(bMatrices_[i], power_[0], s_[i][0]);
157 for (size_t l = 1; l < nMax + 1; ++l)
158 {
159 MatrixTools::mult(R, s_[i][l - 1], s_[i][l]);
161 MatrixTools::add(s_[i][l], tmp);
162 }
164 for (size_t l = 0; l < nMax + 1; ++l)
165 {
166 tmp = s_[i][l];
167 // double f = (pow(lam, static_cast<double>(l + 1)) * exp(-lam) / static_cast<double>(NumTools::fact(l + 1))) / miu_;
168 double logF = static_cast<double>(l + 1) * log(lam) - lam - log(miu_) - NumTools::logFact(static_cast<double>(l + 1));
169 MatrixTools::scale(tmp, exp(logF));
170 MatrixTools::add(counts_[i], tmp);
171 }
172 }
173
174 // Now we must divide by pijt and account for putative weights:
175 vector<int> supportedStates = model_->getAlphabetStates();
176 RowMatrix<double> P = model_->getPij_t(length);
177 for (size_t i = 0; i < register_->getNumberOfSubstitutionTypes(); i++)
178 {
179 for (size_t j = 0; j < nbStates_; j++)
180 {
181 for (size_t k = 0; k < nbStates_; k++)
182 {
183 counts_[i](j, k) /= P(j, k);
184 if (std::isinf(counts_[i](j, k)) || std::isnan(counts_[i](j, k)) || (!weights_ && counts_[i](j, k) < 0.))
185 counts_[i](j, k) = 0;
186 if (weights_)
187 counts_[i](j, k) *= weights_->getIndex(supportedStates[j], supportedStates[k]);
188 }
189 }
190 }
191}
192
193/******************************************************************************/
194
195unique_ptr<Matrix<double>> UniformizationSubstitutionCount::getAllNumbersOfSubstitutions(double length, size_t type) const
196{
197 if (!model_)
198 throw Exception("UniformizationSubstitutionCount::getAllNumbersOfSubstitutions: model not defined.");
199
200 if (length < 0)
201 throw Exception("UniformizationSubstitutionCount::getAllNumbersOfSubstitutions. Negative branch length: " + TextTools::toString(length) + ".");
202 if (length != currentLength_)
203 {
204 computeCounts_(length);
205 currentLength_ = length;
206 }
207 return make_unique<RowMatrix<double>>(counts_[type - 1]);
208}
209
210/******************************************************************************/
211
212void UniformizationSubstitutionCount::storeAllNumbersOfSubstitutions(double length, size_t type, Eigen::MatrixXd& mat) const
213{
214 if (!model_)
215 throw Exception("UniformizationSubstitutionCount::storeAllNumbersOfSubstitutions: model not defined.");
216
217 if (length < 0)
218 throw Exception("UniformizationSubstitutionCount::storeAllNumbersOfSubstitutions. Negative branch length: " + TextTools::toString(length) + ".");
219 if (length != currentLength_)
220 {
221 computeCounts_(length);
222 currentLength_ = length;
223 }
224
225 mat.resize(Eigen::Index(nbStates_), Eigen::Index(nbStates_));
226
227 const auto& ct = counts_[type - 1];
228 for (size_t i = 0; i < nbStates_; i++)
229 {
230 for (size_t j = 0; j < nbStates_; j++)
231 {
232 mat(Eigen::Index(i), Eigen::Index(j)) = isnan(ct(i, j)) ? 0 : ct(i, j);
233 }
234 }
235}
236
237/******************************************************************************/
238
239double UniformizationSubstitutionCount::getNumberOfSubstitutions(size_t initialState, size_t finalState, double length, size_t type) const
240{
241 if (!model_)
242 throw Exception("UniformizationSubstitutionCount::getNumberOfSubstitutions: model not defined.");
243
244 if (length < 0)
245 throw Exception("UniformizationSubstitutionCount::getNumbersOfSubstitutions. Negative branch length: " + TextTools::toString(length) + ".");
246 if (length != currentLength_)
247 {
248 computeCounts_(length);
249 currentLength_ = length;
250 }
251 return counts_[type - 1](initialState, finalState);
252}
253
254/******************************************************************************/
255
256std::vector<double> UniformizationSubstitutionCount::getNumberOfSubstitutionsPerType(size_t initialState, size_t finalState, double length) const
257{
258 if (!model_)
259 throw Exception("UniformizationSubstitutionCount::getNumberOfSubstitutionsPerTye: model not defined.");
260
261 if (length < 0)
262 throw Exception("UniformizationSubstitutionCount::getNumbersOfSubstitutions. Negative branch length: " + TextTools::toString(length) + ".");
263 if (length != currentLength_)
264 {
265 computeCounts_(length);
266 currentLength_ = length;
267 }
268 std::vector<double> v(getNumberOfSubstitutionTypes());
269 for (unsigned int t = 0; t < getNumberOfSubstitutionTypes(); ++t)
270 {
271 v[t] = counts_[t](initialState, finalState);
272 }
273 return v;
274}
275
276/******************************************************************************/
277
279 std::shared_ptr<const SubstitutionModelInterface> model)
280{
281 model_ = model;
282
283 if (!model_)
284 return;
285
286 // Check compatibility between model and substitution register:
287 if (model->alphabet().getAlphabetType() != register_->alphabet().getAlphabetType())
288 throw Exception("UniformizationSubstitutionCount::setSubstitutionModel: alphabets do not match between register and model.");
289
290 size_t n = model->getNumberOfStates();
291 if (n != nbStates_)
292 {
293 nbStates_ = n;
294 // Re-initialize all B matrices according to substitution register.
296 }
298
299 miu_ = 0;
300 for (size_t i = 0; i < nbStates_; ++i)
301 {
302 double diagQ = abs(model_->Qij(i, i));
303 if (diagQ > miu_)
304 miu_ = diagQ;
305 }
306
307 if (miu_ > 10000)
308 throw Exception("UniformizationSubstitutionCount::setSubstitutionModel(). The maximum diagonal values of generator is above 10000. Abort, chose another mapping method.");
309
310 // Recompute counts:
311 if (currentLength_ > 0)
313}
314
315/******************************************************************************/
316
318{
319 if (!model_)
320 return;
321
322 // Check compatibility between model and substitution register:
323 if (model_->getAlphabet()->getAlphabetType() != register_->getAlphabet()->getAlphabetType())
324 throw Exception("UniformizationSubstitutionCount::substitutionRegisterHasChanged: alphabets do not match between register and model.");
325
329
330 // Recompute counts:
331 if (currentLength_ > 0)
333}
334
335/******************************************************************************/
336
338{
339 if (!model_)
340 return;
341
342 if (weights_->getAlphabet()->getAlphabetType() != register_->getAlphabet()->getAlphabetType())
343 throw Exception("UniformizationSubstitutionCount::weightsHaveChanged. Incorrect alphabet type.");
344
345 // Recompute counts:
346 if (currentLength_ > 0)
348}
349
351{
352 if (!model_)
353 return;
354
355 if (distances_->getAlphabet()->getAlphabetType() != register_->getAlphabet()->getAlphabetType())
356 throw Exception("UniformizationSubstitutionCount::distancesHaveChanged. Incorrect alphabet type.");
357
358 // Recompute counts:
360
361 if (currentLength_ > 0)
363}
364
365/******************************************************************************/
Partial implementation of the SubstitutionCount interface.
std::shared_ptr< const SubstitutionRegisterInterface > register_
Partial implementation of the SubstitutionDistance interface.
std::shared_ptr< const AlphabetIndex2 > distances_
Partial implementation of the WeightedSubstitutionCount interface.
std::shared_ptr< const AlphabetIndex2 > weights_
static void add(MatrixA &A, const MatrixB &B)
static void mult(const Matrix< Scalar > &A, const Matrix< Scalar > &B, Matrix< Scalar > &O)
static void getId(size_t n, Matrix &O)
static void scale(Matrix &A, Scalar a, Scalar b=0)
static void fill(Matrix &M, Scalar x)
static T logFact(T n)
Map the states of a given alphabet which have a model state.
Definition: StateMap.h:25
virtual size_t getNumberOfSubstitutionTypes() const
Short cut function, equivalent to getSubstitutionRegister().getNumberOfSubstitutionTypes().
double getNumberOfSubstitutions(size_t initialState, size_t finalState, double length, size_t type=1) const override
Get the number of susbstitutions on a branch, given the initial and final states, and the branch leng...
std::vector< RowMatrix< double > > bMatrices_
void storeAllNumbersOfSubstitutions(double length, size_t type, Eigen::MatrixXd &mat) const override
Stores the numbers of susbstitutions on a branch, for each initial and final states,...
UniformizationSubstitutionCount(std::shared_ptr< const SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=nullptr, std::shared_ptr< const AlphabetIndex2 > distances=nullptr)
void setSubstitutionModel(std::shared_ptr< const SubstitutionModelInterface > model) override
Set the substitution model associated with this count, if relevant.
std::vector< std::vector< RowMatrix< double > > > s_
std::unique_ptr< Matrix< double > > getAllNumbersOfSubstitutions(double length, size_t type=1) const override
Get the numbers of susbstitutions on a branch, for each initial and final states, and given the branc...
std::vector< RowMatrix< double > > counts_
std::vector< double > getNumberOfSubstitutionsPerType(size_t initialState, size_t finalState, double length) const override
Get the numbers of susbstitutions on a branch for all types, for an initial and final states,...
std::shared_ptr< const SubstitutionModelInterface > model_
std::string toString(T t)
bool isinf(const double &d)
Defines the basic types of data flow nodes.
double log(const ExtendedFloat &ef)
ExtendedFloat exp(const ExtendedFloat &ef)
ExtendedFloat abs(const ExtendedFloat &ef)