bpp-phyl3 3.0.0
AbstractSubstitutionModel.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
10
12
13// From SeqLib:
15
16using namespace bpp;
17using namespace std;
18
19/******************************************************************************/
20
21AbstractTransitionModel::AbstractTransitionModel(
22 std::shared_ptr<const Alphabet> alpha,
23 std::shared_ptr<const StateMapInterface> stateMap,
24 const string& prefix) :
26 alphabet_(alpha),
27 stateMap_(stateMap),
28 size_(alpha->getSize()),
29 rate_(1),
30 freq_(size_),
31 pijt_(size_, size_),
32 dpijt_(size_, size_),
33 d2pijt_(size_, size_),
34 verboseLevel_(0)
35{
37 for (auto& fr : freq_)
38 {
39 fr = 1.0 / static_cast<double>(size_);
40 }
41}
42
43/******************************************************************************/
44
46{
47 return rate_;
48}
49
50/******************************************************************************/
51
53{
54 if (rate <= 0)
55 throw Exception("Bad value for rate: " + TextTools::toString(rate));
56
57 if (hasParameter("rate"))
58 setParameterValue("rate", rate);
59 else
60 rate_ = rate;
61}
62
64{
66}
67
68/******************************************************************************/
69
70double AbstractTransitionModel::getInitValue(size_t i, int state) const
71{
72 if (i >= size_)
73 throw IndexOutOfBoundsException("AbstractTransitionModel::getInitValue", i, 0, size_ - 1);
74 if (state < 0 || !alphabet_->isIntInAlphabet(state))
75 throw BadIntException(state, "AbstractTransitionModel::getInitValue. Character " + alphabet_->intToChar(state) + " is not allowed in model.", alphabet_.get());
76 vector<int> states = alphabet_->getAlias(state);
77
78 for (size_t j = 0; j < states.size(); ++j)
79 {
80 if (getAlphabetStateAsInt(i) == states[j])
81 return 1.;
82 }
83 return 0.;
84}
85
86/******************************************************************************/
87
89{
90 map<int, double> freqs;
91 SequenceContainerTools::getFrequencies(data, freqs, pseudoCount);
92 // Re-compute generator and eigen values:
93 setFreq(freqs);
94}
95
96/******************************************************************************/
97
98void AbstractTransitionModel::setFreq(map<int, double>& freqs)
99{
100 for (auto i : freqs)
101 {
102 freq_[(size_t)i.first] = i.second;
103 }
104
105 // Re-compute generator and eigen values:
107}
108
109
110/******************************************************************************/
111
113 std::shared_ptr<const Alphabet> alpha,
114 std::shared_ptr<const StateMapInterface> stateMap,
115 const string& prefix) :
117 AbstractTransitionModel(alpha, stateMap, prefix),
118 isScalable_(true),
119 generator_(size_, size_),
120 computeFreq_(true),
121 exchangeability_(size_, size_),
122 eigenDecompose_(true),
123 eigenValues_(size_),
124 iEigenValues_(size_),
125 isDiagonalizable_(false),
126 rightEigenVectors_(size_, size_),
127 isNonSingular_(false),
128 leftEigenVectors_(size_, size_),
129 vPowGen_(),
130 tmpMat_(size_, size_)
131{}
132
133
134/******************************************************************************/
135
137{
138 // Compute eigen values and vectors:
140 {
141 // Look for null lines (such as stop lines)
142 // ie null diagonal elements
143
144 size_t nbStop = 0;
145 size_t salph = getNumberOfStates();
146 vector<bool> vnull(salph); // vector of the indices of lines with
147 // only zeros
148
149 for (size_t i = 0; i < salph; i++)
150 {
151 bool flag = (abs(generator_(i, i)) < NumConstants::TINY());
152
153 if (flag)
154 for (size_t j = 0; j < salph; j++)
155 {
156 if (abs(generator_(j, i)) >= NumConstants::TINY())
157 {
158 flag = false;
159 break;
160 }
161 }
162
163 if (flag)
164 {
165 nbStop++;
166 vnull[i] = true;
167 }
168 else
169 vnull[i] = false;
170 }
171
172 if (nbStop != 0)
173 {
174 size_t salphok = salph - nbStop;
175
176 RowMatrix<double> gk(salphok, salphok);
177 size_t gi = 0, gj = 0;
178
179 for (size_t i = 0; i < salph; i++)
180 {
181 if (!vnull[i])
182 {
183 gj = 0;
184 for (size_t j = 0; j < salph; j++)
185 {
186 if (!vnull[j])
187 {
188 gk(i - gi, j - gj) = generator_(i, j);
189 }
190 else
191 gj++;
192 }
193 }
194 else
195 gi++;
196 }
197
198 EigenValue<double> ev(gk);
201
202 for (size_t i = 0; i < nbStop; i++)
203 {
204 eigenValues_.push_back(0);
205 iEigenValues_.push_back(0);
206 }
207
208 RowMatrix<double> rev = ev.getV();
209 rightEigenVectors_.resize(salph, salph);
210 gi = 0;
211 for (size_t i = 0; i < salph; i++)
212 {
213 if (vnull[i])
214 {
215 gi++;
216 for (size_t j = 0; j < salph; j++)
217 {
218 rightEigenVectors_(i, j) = 0;
219 }
220
221 rightEigenVectors_(i, salphok + gi - 1) = 1;
222 }
223 else
224 {
225 for (size_t j = 0; j < salphok; j++)
226 {
227 rightEigenVectors_(i, j) = rev(i - gi, j);
228 }
229
230 for (size_t j = salphok; j < salph; j++)
231 {
232 rightEigenVectors_(i, j) = 0;
233 }
234 }
235 }
236 }
237 else
238 {
243 nbStop = 0;
244 }
245
247 try
248 {
250
251 // is it diagonalizable ?
252 isDiagonalizable_ = true;
253
254 if (!dynamic_cast<ReversibleSubstitutionModelInterface*>(this))
255 {
256 for (auto& vi : iEigenValues_)
257 {
258 if (abs(vi) > NumConstants::TINY())
259 {
260 isDiagonalizable_ = false;
261 break;
262 }
263 }
264 }
265
266 // looking for the vector of 0 eigenvalues
267
268 vector<size_t> vNullEv;
269 double fact = 0.1;
270
271 while (vNullEv.size() == 0 && fact < 1000)
272 {
273 fact *= 10;
274
275 for (size_t i = 0; i < salph - nbStop; i++)
276 {
277 if ((abs(eigenValues_[i]) < fact * NumConstants::NANO()) && (abs(iEigenValues_[i]) < NumConstants::NANO()))
278 vNullEv.push_back(i);
279 }
280 }
281
282 // pb to find unique null eigenvalue
283 isNonSingular_ = (vNullEv.size() == 1);
284
285 size_t nulleigen = 0;
286
287 double val;
288 if (vNullEv.size() > 1)
289 {
290 // look or check which non-stop right eigen vector elements are
291 // equal.
292 for (auto cnull : vNullEv)
293 {
294 size_t i = 0;
295 while (vnull[i])
296 i++;
297
298 val = rightEigenVectors_(i, cnull);
299 i++;
300
301 while (i < salph)
302 {
303 if (!vnull[i])
304 {
305 if (abs((rightEigenVectors_(i, cnull) - val) / val) > NumConstants::SMALL())
306 break;
307 }
308 i++;
309 }
310
311 if (i >= salph)
312 {
313 isNonSingular_ = true;
314 nulleigen = cnull;
315 break;
316 }
317 }
318 }
319 else
320 nulleigen = vNullEv[0];
321
322 if (isNonSingular_)
323 {
324 eigenValues_[nulleigen] = 0; // to avoid approximation errors on long long branches
325 iEigenValues_[nulleigen] = 0; // to avoid approximation errors on long long branches
326
327 if (computeFrequencies())
328 {
329 for (size_t i = 0; i < salph; i++)
330 {
331 freq_[i] = leftEigenVectors_(nulleigen, i);
332 }
333
334 double x = VectorTools::sum(freq_);
335 freq_ /= x;
336 }
337 }
338 else
339 {
340 if (verboseLevel_ > 0)
341 ApplicationTools::displayMessage("AbstractSubstitutionModel::updateMatrices : Unable to find eigenvector for eigenvalue 0. Taylor series used instead.");
342 isDiagonalizable_ = false;
343 }
344 }
345 // if rightEigenVectors_ is singular
346 catch (ZeroDivisionException& e)
347 {
348 if (verboseLevel_ > 0)
349 ApplicationTools::displayMessage("AbstractSubstitutionModel::updateMatrices : Singularity during diagonalization. Taylor series used instead.");
350 isNonSingular_ = false;
351 isDiagonalizable_ = false;
352 }
353
354 if (!isNonSingular_)
355 {
356 double min = generator_(0, 0);
357 for (size_t i = 1; i < salph; i++)
358 {
359 if (min > generator_(i, i))
360 min = generator_(i, i);
361 }
362
363 setScale(-1 / min);
364
365 if (vPowGen_.size() == 0)
366 vPowGen_.resize(30);
367
368
369 if (computeFrequencies())
370 {
371 MatrixTools::getId(salph, tmpMat_); // to compute the equilibrium frequency (Q+Id)^256
374
375 for (size_t i = 0; i < salph; i++)
376 {
377 freq_[i] = vPowGen_[0](0, i);
378 }
379 }
380
381 MatrixTools::getId(salph, vPowGen_[0]);
382 }
383
384 // normalization
385 normalize();
386
387 if (!isNonSingular_)
389 }
390}
391
392
393/******************************************************************************/
394
396{
397 if (t == 0)
398 {
400 }
401 else if (isNonSingular_)
402 {
404 {
405 MatrixTools::mult<double>(rightEigenVectors_, VectorTools::exp(eigenValues_ * (rate_ * t)), leftEigenVectors_, pijt_);
406 }
407 else
408 {
409 std::vector<double> vdia(size_);
410 std::vector<double> vup(size_ - 1);
411 std::vector<double> vlo(size_ - 1);
412 double c = 0, s = 0;
413 double l = rate_ * t;
414 for (size_t i = 0; i < size_; i++)
415 {
416 vdia[i] = std::exp(eigenValues_[i] * l);
417 if (iEigenValues_[i] != 0)
418 {
419 s = std::sin(iEigenValues_[i] * l);
420 c = std::cos(iEigenValues_[i] * l);
421 vup[i] = vdia[i] * s;
422 vlo[i] = -vup[i];
423 vdia[i] *= c;
424 vdia[i + 1] = vdia[i]; // trick to avoid computation
425 i++;
426 }
427 else
428 {
429 if (i < size_ - 1)
430 {
431 vup[i] = 0;
432 vlo[i] = 0;
433 }
434 }
435 }
436 MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, pijt_);
437 }
438 }
439 else
440 {
442 double s = 1.0;
443 double v = rate_ * t;
444 size_t m = 0;
445 while (v > 0.5) // exp(r*t*A)=(exp(r*t/(2^m) A))^(2^m)
446 {
447 m += 1;
448 v /= 2;
449 }
450 for (size_t i = 1; i < vPowGen_.size(); i++)
451 {
452 s *= v / static_cast<double>(i); // v^n/n!
454 }
455
456 while (m > 0) // recover the 2^m
457 {
460 m--;
461 }
462 }
463
464
465 // Check to avoid numerical issues
466 // if (t<= NumConstants::SMALL())
467 for (size_t i = 0; i < size_; i++)
468 {
469 for (size_t j = 0; j < size_; j++)
470 {
471 if (pijt_(i, j) < 0.)
472 {
473 if (std::abs(pijt_(i, j)) > NumConstants::SMALL())
474 {
475 throw Exception("There is an issue in the computation of transition matrix of " + getName() + " : pijt_(" + to_string(i) + "," + to_string(j) + ", " + to_string(t) + ")=" + to_string(pijt_(i, j)));
476 }
477 pijt_(i, j) = 0.;
478 }
479 }
480 }
481 return pijt_;
482}
483
484/******************************************************************************/
485
487{
488 if (isNonSingular_)
489 {
491 {
493 }
494 else
495 {
496 std::vector<double> vdia(size_);
497 std::vector<double> vup(size_ - 1);
498 std::vector<double> vlo(size_ - 1);
499 double c, s, e;
500 double l = rate_ * t;
501 for (size_t i = 0; i < size_; i++)
502 {
503 e = std::exp(eigenValues_[i] * l);
504 if (iEigenValues_[i] != 0)
505 {
506 s = std::sin(iEigenValues_[i] * l);
507 c = std::cos(iEigenValues_[i] * l);
508 vdia[i] = rate_ * (eigenValues_[i] * c - iEigenValues_[i] * s) * e;
509 vup[i] = rate_ * (eigenValues_[i] * s + iEigenValues_[i] * c) * e;
510 vlo[i] = -vup[i];
511 vdia[i + 1] = vdia[i]; // trick to avoid computation
512 i++;
513 }
514 else
515 {
516 if (i < size_ - 1)
517 {
518 vup[i] = 0;
519 vlo[i] = 0;
520 }
521 }
522 }
523 MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, dpijt_);
524 }
525 }
526 else
527 {
529 double s = 1.0;
530 double v = rate_ * t;
531 size_t m = 0;
532 while (v > 0.5) // r*A*exp(t*r*A)=r*A*(exp(r*t/(2^m) A))^(2^m)
533 {
534 m += 1;
535 v /= 2;
536 }
537 for (size_t i = 1; i < vPowGen_.size(); i++)
538 {
539 s *= v / static_cast<double>(i);
541 }
542 while (m > 0) // recover the 2^m
543 {
546 m--;
547 }
551 }
552 return dpijt_;
553}
554
555/******************************************************************************/
556
558{
559 if (isNonSingular_)
560 {
562 {
564 }
565 else
566 {
567 std::vector<double> vdia(size_);
568 std::vector<double> vup(size_ - 1);
569 std::vector<double> vlo(size_ - 1);
570 double c, s, e;
571 double l = rate_ * t;
572 for (size_t i = 0; i < size_; i++)
573 {
574 e = std::exp(eigenValues_[i] * l);
575 if (iEigenValues_[i] != 0)
576 {
577 s = std::sin(iEigenValues_[i] * l);
578 c = std::cos(iEigenValues_[i] * l);
579 vdia[i] = NumTools::sqr(rate_)
581 - 2 * eigenValues_[i] * iEigenValues_[i] * s) * e;
582 vup[i] = NumTools::sqr(rate_)
584 - 2 * eigenValues_[i] * iEigenValues_[i] * c) * e;
585 vlo[i] = -vup[i];
586 vdia[i + 1] = vdia[i]; // trick to avoid computation
587 i++;
588 }
589 else
590 {
591 if (i < size_ - 1)
592 {
593 vup[i] = 0;
594 vlo[i] = 0;
595 }
596 }
597 }
598 MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, d2pijt_);
599 }
600 }
601 else
602 {
604 double s = 1.0;
605 double v = rate_ * t;
606 size_t m = 0;
607 while (v > 0.5) // r^2*A^2*exp(t*r*A)=r^2*A^2*(exp(r*t/(2^m) A))^(2^m)
608 {
609 m += 1;
610 v /= 2;
611 }
612 for (size_t i = 1; i < vPowGen_.size(); i++)
613 {
614 s *= v / static_cast<double>(i);
616 }
617 while (m > 0) // recover the 2^m
618 {
621 m--;
622 }
626 }
627 return d2pijt_;
628}
629
630/******************************************************************************/
631
633{
634 vector<double> v;
636 return -VectorTools::scalar<double, double>(v, freq_);
637}
638
639/******************************************************************************/
640
642{
643 if (isScalable_)
644 {
646 eigenValues_ *= scale;
647 iEigenValues_ *= scale;
648 }
649}
650
651
652/******************************************************************************/
653
655{
656 for (size_t i = 0; i < size_; i++)
657 {
658 double lambda = 0;
659 Vdouble& row = generator_.getRow(i);
660
661 for (size_t j = 0; j < size_; j++)
662 {
663 if (j != i)
664 lambda += row[j];
665 }
666 row[i] = -lambda;
667 }
668}
669
670
671/******************************************************************************/
672
674{
675 if (isScalable_)
676 setScale(1 / getScale());
677}
678
679/******************************************************************************/
680
682{
683 MatrixTools::hadamardMult(exchangeability_, freq_, generator_, false); // Diagonal elements of the exchangeability matrix will be ignored.
684
685 // Normalization:
686 setDiagonal();
687 normalize();
688
690}
691
692/******************************************************************************/
void addParameter_(Parameter *parameter)
void setParameterValue(const std::string &name, double value) override
std::string getNamespace() const override
bool hasParameter(const std::string &name) const override
virtual void updateMatrices_() override
Compute and diagonalize the matrix, and fill the eigenValues_, leftEigenVectors_ and rightEigenVecto...
RowMatrix< double > generator_
The generator matrix of the model.
bool isDiagonalizable_
boolean value for diagonalizability in R of the generator_
bool isNonSingular_
boolean value for non-singularity of rightEigenVectors_
std::vector< RowMatrix< double > > vPowGen_
vector of the powers of generator_ for Taylor development (if rightEigenVectors_ is singular).
Vdouble eigenValues_
The vector of eigen values.
AbstractSubstitutionModel(std::shared_ptr< const Alphabet > alpha, std::shared_ptr< const StateMapInterface > stateMap, const std::string &prefix)
void normalize()
normalize the generator
void setDiagonal()
set the diagonal of the generator such that sum on each line equals 0.
RowMatrix< double > exchangeability_
The exchangeability matrix of the model, defined as . When the model is reversible,...
const Matrix< double > & getdPij_dt(double t) const
Vdouble iEigenValues_
The vector of the imaginary part of the eigen values.
RowMatrix< double > leftEigenVectors_
The matrix made of left eigen vectors (by row) if rightEigenVectors_ is non-singular.
bool isScalable_
If the model is scalable (ie generator can be normalized automatically).
const Matrix< double > & getPij_t(double t) const
virtual void updateMatrices_()
Diagonalize the matrix, and fill the eigenValues_, iEigenValues_, leftEigenVectors_ and rightEigenVe...
RowMatrix< double > rightEigenVectors_
The matrix made of right eigen vectors (by column).
bool enableEigenDecomposition()
Tell if eigenValues and Vectors must be computed.
const Matrix< double > & getd2Pij_dt2(double t) const
void setScale(double scale)
Multiplies the current generator by the given scale.
RowMatrix< double > tmpMat_
For computational issues.
Partial implementation of the TransitionModel interface.
void setFreqFromData(const SequenceDataInterface &data, double pseudoCount=0) override
Set equilibrium frequencies equal to the frequencies estimated from the data.
void addRateParameter() override
add a "rate" parameter to the model, that handles the overall rate of the process.
RowMatrix< double > pijt_
These ones are for bookkeeping:
size_t size_
The number of states.
Vdouble freq_
The vector of equilibrium frequencies.
virtual void setRate(double rate) override
Set the rate of the model (must be positive).
int getAlphabetStateAsInt(size_t index) const override
virtual void updateMatrices_()=0
Diagonalize the matrix, and fill the eigenValues_, iEigenValues_, leftEigenVectors_ and rightEigenVe...
double rate_
The rate of the model (default: 1). The generator (and all its vectorial components) is independent o...
std::shared_ptr< const Alphabet > alphabet_
The alphabet relevant to this model.
size_t getNumberOfStates() const override
Get the number of states.
virtual double getRate() const override
The rate of the substitution process.
double getInitValue(size_t i, int state) const override
virtual void setFreq(std::map< int, double > &freqs) override
Set equilibrium frequencies.
static void displayMessage(const std::string &text)
virtual std::string getName() const =0
Get the name of the model.
const RowMatrix< Real > & getV() const
const std::vector< Real > & getImagEigenValues() const
const std::vector< Real > & getRealEigenValues() const
static Scalar inv(const Matrix< Scalar > &A, Matrix< Scalar > &O)
static void Taylor(const Matrix &A, size_t p, std::vector< RowMatrix< Scalar > > &vO)
static void pow(const Matrix &A, size_t p, Matrix &O)
static void copy(const MatrixA &A, MatrixO &O)
static void add(MatrixA &A, const MatrixB &B)
static void mult(const Matrix< Scalar > &A, const Matrix< Scalar > &B, Matrix< Scalar > &O)
static void getId(size_t n, Matrix &O)
static void diag(const std::vector< Scalar > &D, Matrix< Scalar > &O)
static void hadamardMult(const Matrix< Scalar > &A, const Matrix< Scalar > &B, Matrix< Scalar > &O)
static void scale(Matrix &A, Scalar a, Scalar b=0)
static double TINY()
static double NANO()
static double SMALL()
static T sqr(T a)
static const std::shared_ptr< IntervalConstraint > R_PLUS_STAR
Interface for reversible substitution models.
const std::vector< double > & getRow(size_t i) const
void resize(size_t nRows, size_t nCols)
static void getFrequencies(const SequenceContainerInterface &sc, std::map< int, double > &f, double pseudoCount=0)
static double exp(const T &x)
static T sum(const std::vector< T > &v1)
static std::vector< T > sqr(const std::vector< T > &v1)
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::string to_string(const NoDimension &)
ExtendedFloat abs(const ExtendedFloat &ef)