bpp-phyl3  3.0.0
AbstractSubstitutionModel.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
9 #include <Bpp/Text/TextTools.h>
10 
12 
13 // From SeqLib:
15 
16 using namespace bpp;
17 using namespace std;
18 
19 /******************************************************************************/
20 
22  std::shared_ptr<const Alphabet> alpha,
23  std::shared_ptr<const StateMapInterface> stateMap,
24  const string& prefix) :
26  alphabet_(alpha),
27  stateMap_(stateMap),
28  size_(alpha->getSize()),
29  rate_(1),
30  freq_(size_),
31  pijt_(size_, size_),
32  dpijt_(size_, size_),
33  d2pijt_(size_, size_),
34  verboseLevel_(0)
35 {
36  if (computeFrequencies())
37  for (auto& fr : freq_)
38  {
39  fr = 1.0 / static_cast<double>(size_);
40  }
41 }
42 
43 /******************************************************************************/
44 
46 {
47  return rate_;
48 }
49 
50 /******************************************************************************/
51 
53 {
54  if (rate <= 0)
55  throw Exception("Bad value for rate: " + TextTools::toString(rate));
56 
57  if (hasParameter("rate"))
58  setParameterValue("rate", rate);
59  else
60  rate_ = rate;
61 }
62 
64 {
66 }
67 
68 /******************************************************************************/
69 
70 double AbstractTransitionModel::getInitValue(size_t i, int state) const
71 {
72  if (i >= size_)
73  throw IndexOutOfBoundsException("AbstractTransitionModel::getInitValue", i, 0, size_ - 1);
74  if (state < 0 || !alphabet_->isIntInAlphabet(state))
75  throw BadIntException(state, "AbstractTransitionModel::getInitValue. Character " + alphabet_->intToChar(state) + " is not allowed in model.", alphabet_.get());
76  vector<int> states = alphabet_->getAlias(state);
77 
78  for (size_t j = 0; j < states.size(); ++j)
79  {
80  if (getAlphabetStateAsInt(i) == states[j])
81  return 1.;
82  }
83  return 0.;
84 }
85 
86 /******************************************************************************/
87 
89 {
90  map<int, double> freqs;
91  SequenceContainerTools::getFrequencies(data, freqs, pseudoCount);
92  // Re-compute generator and eigen values:
93  setFreq(freqs);
94 }
95 
96 /******************************************************************************/
97 
98 void AbstractTransitionModel::setFreq(map<int, double>& freqs)
99 {
100  for (auto i : freqs)
101  {
102  freq_[(size_t)i.first] = i.second;
103  }
104 
105  // Re-compute generator and eigen values:
106  updateMatrices_();
107 }
108 
109 
110 /******************************************************************************/
111 
113  std::shared_ptr<const Alphabet> alpha,
114  std::shared_ptr<const StateMapInterface> stateMap,
115  const string& prefix) :
117  AbstractTransitionModel(alpha, stateMap, prefix),
118  isScalable_(true),
119  generator_(size_, size_),
120  computeFreq_(true),
121  exchangeability_(size_, size_),
122  eigenDecompose_(true),
123  eigenValues_(size_),
124  iEigenValues_(size_),
125  isDiagonalizable_(false),
126  rightEigenVectors_(size_, size_),
127  isNonSingular_(false),
128  leftEigenVectors_(size_, size_),
129  vPowGen_(),
130  tmpMat_(size_, size_)
131 {}
132 
133 
134 /******************************************************************************/
135 
137 {
138  // Compute eigen values and vectors:
140  {
141  // Look for null lines (such as stop lines)
142  // ie null diagonal elements
143 
144  size_t nbStop = 0;
145  size_t salph = getNumberOfStates();
146  vector<bool> vnull(salph); // vector of the indices of lines with
147  // only zeros
148 
149  for (size_t i = 0; i < salph; i++)
150  {
151  bool flag = (abs(generator_(i, i)) < NumConstants::TINY());
152 
153  if (flag)
154  for (size_t j = 0; j < salph; j++)
155  {
156  if (abs(generator_(j, i)) >= NumConstants::TINY())
157  {
158  flag = false;
159  break;
160  }
161  }
162 
163  if (flag)
164  {
165  nbStop++;
166  vnull[i] = true;
167  }
168  else
169  vnull[i] = false;
170  }
171 
172  if (nbStop != 0)
173  {
174  size_t salphok = salph - nbStop;
175 
176  RowMatrix<double> gk(salphok, salphok);
177  size_t gi = 0, gj = 0;
178 
179  for (size_t i = 0; i < salph; i++)
180  {
181  if (!vnull[i])
182  {
183  gj = 0;
184  for (size_t j = 0; j < salph; j++)
185  {
186  if (!vnull[j])
187  {
188  gk(i - gi, j - gj) = generator_(i, j);
189  }
190  else
191  gj++;
192  }
193  }
194  else
195  gi++;
196  }
197 
198  EigenValue<double> ev(gk);
201 
202  for (size_t i = 0; i < nbStop; i++)
203  {
204  eigenValues_.push_back(0);
205  iEigenValues_.push_back(0);
206  }
207 
208  RowMatrix<double> rev = ev.getV();
209  rightEigenVectors_.resize(salph, salph);
210  gi = 0;
211  for (size_t i = 0; i < salph; i++)
212  {
213  if (vnull[i])
214  {
215  gi++;
216  for (size_t j = 0; j < salph; j++)
217  {
218  rightEigenVectors_(i, j) = 0;
219  }
220 
221  rightEigenVectors_(i, salphok + gi - 1) = 1;
222  }
223  else
224  {
225  for (size_t j = 0; j < salphok; j++)
226  {
227  rightEigenVectors_(i, j) = rev(i - gi, j);
228  }
229 
230  for (size_t j = salphok; j < salph; j++)
231  {
232  rightEigenVectors_(i, j) = 0;
233  }
234  }
235  }
236  }
237  else
238  {
240  rightEigenVectors_ = ev.getV();
243  nbStop = 0;
244  }
245 
247  try
248  {
250 
251  // is it diagonalizable ?
252  isDiagonalizable_ = true;
253 
254  if (!dynamic_cast<ReversibleSubstitutionModelInterface*>(this))
255  {
256  for (auto& vi : iEigenValues_)
257  {
258  if (abs(vi) > NumConstants::TINY())
259  {
260  isDiagonalizable_ = false;
261  break;
262  }
263  }
264  }
265 
266  // looking for the vector of 0 eigenvalues
267 
268  vector<size_t> vNullEv;
269  double fact = 0.1;
270 
271  while (vNullEv.size() == 0 && fact < 1000)
272  {
273  fact *= 10;
274 
275  for (size_t i = 0; i < salph - nbStop; i++)
276  {
277  if ((abs(eigenValues_[i]) < fact * NumConstants::NANO()) && (abs(iEigenValues_[i]) < NumConstants::NANO()))
278  vNullEv.push_back(i);
279  }
280  }
281 
282  // pb to find unique null eigenvalue
283  isNonSingular_ = (vNullEv.size() == 1);
284 
285  size_t nulleigen;
286 
287  double val;
288  if (vNullEv.size() > 1)
289  {
290  // look or check which non-stop right eigen vector elements are
291  // equal.
292  for (auto cnull : vNullEv)
293  {
294  size_t i = 0;
295  while (vnull[i])
296  i++;
297 
298  val = rightEigenVectors_(i, cnull);
299  i++;
300 
301  while (i < salph)
302  {
303  if (!vnull[i])
304  {
305  if (abs((rightEigenVectors_(i, cnull) - val) / val) > NumConstants::SMALL())
306  break;
307  }
308  i++;
309  }
310 
311  if (i >= salph)
312  {
313  isNonSingular_ = true;
314  nulleigen = cnull;
315  break;
316  }
317  }
318  }
319  else
320  nulleigen = vNullEv[0];
321 
322  if (isNonSingular_)
323  {
324  eigenValues_[nulleigen] = 0; // to avoid approximation errors on long long branches
325  iEigenValues_[nulleigen] = 0; // to avoid approximation errors on long long branches
326 
327  if (computeFrequencies())
328  {
329  for (size_t i = 0; i < salph; i++)
330  {
331  freq_[i] = leftEigenVectors_(nulleigen, i);
332  }
333 
334  double x = VectorTools::sum(freq_);
335  freq_ /= x;
336  }
337  }
338  else
339  {
340  if (verboseLevel_ > 0)
341  ApplicationTools::displayMessage("AbstractSubstitutionModel::updateMatrices : Unable to find eigenvector for eigenvalue 0. Taylor series used instead.");
342  isDiagonalizable_ = false;
343  }
344  }
345  // if rightEigenVectors_ is singular
346  catch (ZeroDivisionException& e)
347  {
348  if (verboseLevel_ > 0)
349  ApplicationTools::displayMessage("AbstractSubstitutionModel::updateMatrices : Singularity during diagonalization. Taylor series used instead.");
350  isNonSingular_ = false;
351  isDiagonalizable_ = false;
352  }
353 
354  if (!isNonSingular_)
355  {
356  double min = generator_(0, 0);
357  for (size_t i = 1; i < salph; i++)
358  {
359  if (min > generator_(i, i))
360  min = generator_(i, i);
361  }
362 
363  setScale(-1 / min);
364 
365  if (vPowGen_.size() == 0)
366  vPowGen_.resize(30);
367 
368 
369  if (computeFrequencies())
370  {
371  MatrixTools::getId(salph, tmpMat_); // to compute the equilibrium frequency (Q+Id)^256
374 
375  for (size_t i = 0; i < salph; i++)
376  {
377  freq_[i] = vPowGen_[0](0, i);
378  }
379  }
380 
381  MatrixTools::getId(salph, vPowGen_[0]);
382  }
383 
384  // normalization
385  normalize();
386 
387  if (!isNonSingular_)
389  }
390 }
391 
392 
393 /******************************************************************************/
394 
396 {
397  if (t == 0)
398  {
400  }
401  else if (isNonSingular_)
402  {
403  if (isDiagonalizable_)
404  {
405  MatrixTools::mult<double>(rightEigenVectors_, VectorTools::exp(eigenValues_ * (rate_ * t)), leftEigenVectors_, pijt_);
406  }
407  else
408  {
409  std::vector<double> vdia(size_);
410  std::vector<double> vup(size_ - 1);
411  std::vector<double> vlo(size_ - 1);
412  double c = 0, s = 0;
413  double l = rate_ * t;
414  for (size_t i = 0; i < size_; i++)
415  {
416  vdia[i] = std::exp(eigenValues_[i] * l);
417  if (iEigenValues_[i] != 0)
418  {
419  s = std::sin(iEigenValues_[i] * l);
420  c = std::cos(iEigenValues_[i] * l);
421  vup[i] = vdia[i] * s;
422  vlo[i] = -vup[i];
423  vdia[i] *= c;
424  vdia[i + 1] = vdia[i]; // trick to avoid computation
425  i++;
426  }
427  else
428  {
429  if (i < size_ - 1)
430  {
431  vup[i] = 0;
432  vlo[i] = 0;
433  }
434  }
435  }
436  MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, pijt_);
437  }
438  }
439  else
440  {
442  double s = 1.0;
443  double v = rate_ * t;
444  size_t m = 0;
445  while (v > 0.5) // exp(r*t*A)=(exp(r*t/(2^m) A))^(2^m)
446  {
447  m += 1;
448  v /= 2;
449  }
450  for (size_t i = 1; i < vPowGen_.size(); i++)
451  {
452  s *= v / static_cast<double>(i); // v^n/n!
454  }
455 
456  while (m > 0) // recover the 2^m
457  {
460  m--;
461  }
462  }
463 
464 
465  // Check to avoid numerical issues
466  // if (t<= NumConstants::SMALL())
467  for (size_t i = 0; i < size_; i++)
468  {
469  for (size_t j = 0; j < size_; j++)
470  {
471  if (pijt_(i, j) < 0.)
472  {
473  if (std::abs(pijt_(i, j)) > NumConstants::SMALL())
474  {
475  throw Exception("There is an issue in the computation of transition matrix of " + getName() + " : pijt_(" + to_string(i) + "," + to_string(j) + ", " + to_string(t) + ")=" + to_string(pijt_(i, j)));
476  }
477  pijt_(i, j) = 0.;
478  }
479  }
480  }
481  return pijt_;
482 }
483 
484 /******************************************************************************/
485 
487 {
488  if (isNonSingular_)
489  {
490  if (isDiagonalizable_)
491  {
493  }
494  else
495  {
496  std::vector<double> vdia(size_);
497  std::vector<double> vup(size_ - 1);
498  std::vector<double> vlo(size_ - 1);
499  double c, s, e;
500  double l = rate_ * t;
501  for (size_t i = 0; i < size_; i++)
502  {
503  e = std::exp(eigenValues_[i] * l);
504  if (iEigenValues_[i] != 0)
505  {
506  s = std::sin(iEigenValues_[i] * l);
507  c = std::cos(iEigenValues_[i] * l);
508  vdia[i] = rate_ * (eigenValues_[i] * c - iEigenValues_[i] * s) * e;
509  vup[i] = rate_ * (eigenValues_[i] * s + iEigenValues_[i] * c) * e;
510  vlo[i] = -vup[i];
511  vdia[i + 1] = vdia[i]; // trick to avoid computation
512  i++;
513  }
514  else
515  {
516  if (i < size_ - 1)
517  {
518  vup[i] = 0;
519  vlo[i] = 0;
520  }
521  }
522  }
523  MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, dpijt_);
524  }
525  }
526  else
527  {
529  double s = 1.0;
530  double v = rate_ * t;
531  size_t m = 0;
532  while (v > 0.5) // r*A*exp(t*r*A)=r*A*(exp(r*t/(2^m) A))^(2^m)
533  {
534  m += 1;
535  v /= 2;
536  }
537  for (size_t i = 1; i < vPowGen_.size(); i++)
538  {
539  s *= v / static_cast<double>(i);
541  }
542  while (m > 0) // recover the 2^m
543  {
546  m--;
547  }
551  }
552  return dpijt_;
553 }
554 
555 /******************************************************************************/
556 
558 {
559  if (isNonSingular_)
560  {
561  if (isDiagonalizable_)
562  {
564  }
565  else
566  {
567  std::vector<double> vdia(size_);
568  std::vector<double> vup(size_ - 1);
569  std::vector<double> vlo(size_ - 1);
570  double c, s, e;
571  double l = rate_ * t;
572  for (size_t i = 0; i < size_; i++)
573  {
574  e = std::exp(eigenValues_[i] * l);
575  if (iEigenValues_[i] != 0)
576  {
577  s = std::sin(iEigenValues_[i] * l);
578  c = std::cos(iEigenValues_[i] * l);
579  vdia[i] = NumTools::sqr(rate_)
581  - 2 * eigenValues_[i] * iEigenValues_[i] * s) * e;
582  vup[i] = NumTools::sqr(rate_)
584  - 2 * eigenValues_[i] * iEigenValues_[i] * c) * e;
585  vlo[i] = -vup[i];
586  vdia[i + 1] = vdia[i]; // trick to avoid computation
587  i++;
588  }
589  else
590  {
591  if (i < size_ - 1)
592  {
593  vup[i] = 0;
594  vlo[i] = 0;
595  }
596  }
597  }
598  MatrixTools::mult<double>(rightEigenVectors_, vdia, vup, vlo, leftEigenVectors_, d2pijt_);
599  }
600  }
601  else
602  {
604  double s = 1.0;
605  double v = rate_ * t;
606  size_t m = 0;
607  while (v > 0.5) // r^2*A^2*exp(t*r*A)=r^2*A^2*(exp(r*t/(2^m) A))^(2^m)
608  {
609  m += 1;
610  v /= 2;
611  }
612  for (size_t i = 1; i < vPowGen_.size(); i++)
613  {
614  s *= v / static_cast<double>(i);
616  }
617  while (m > 0) // recover the 2^m
618  {
621  m--;
622  }
626  }
627  return d2pijt_;
628 }
629 
630 /******************************************************************************/
631 
633 {
634  vector<double> v;
636  return -VectorTools::scalar<double, double>(v, freq_);
637 }
638 
639 /******************************************************************************/
640 
642 {
643  if (isScalable_)
644  {
646  eigenValues_ *= scale;
647  iEigenValues_ *= scale;
648  }
649 }
650 
651 
652 /******************************************************************************/
653 
655 {
656  for (size_t i = 0; i < size_; i++)
657  {
658  double lambda = 0;
659  Vdouble& row = generator_.getRow(i);
660 
661  for (size_t j = 0; j < size_; j++)
662  {
663  if (j != i)
664  lambda += row[j];
665  }
666  row[i] = -lambda;
667  }
668 }
669 
670 
671 /******************************************************************************/
672 
674 {
675  if (isScalable_)
676  setScale(1 / getScale());
677 }
678 
679 /******************************************************************************/
680 
682 {
683  MatrixTools::hadamardMult(exchangeability_, freq_, generator_, false); // Diagonal elements of the exchangeability matrix will be ignored.
684 
685  // Normalization:
686  setDiagonal();
687  normalize();
688 
690 }
691 
692 /******************************************************************************/
void addParameter_(Parameter *parameter)
void setParameterValue(const std::string &name, double value) override
std::string getNamespace() const override
bool hasParameter(const std::string &name) const override
virtual void updateMatrices_() override
Compute and diagonalize the matrix, and fill the eigenValues_, leftEigenVectors_ and rightEigenVecto...
RowMatrix< double > generator_
The generator matrix of the model.
bool isDiagonalizable_
boolean value for diagonalizability in R of the generator_
bool isNonSingular_
boolean value for non-singularity of rightEigenVectors_
std::vector< RowMatrix< double > > vPowGen_
vector of the powers of generator_ for Taylor development (if rightEigenVectors_ is singular).
Vdouble eigenValues_
The vector of eigen values.
AbstractSubstitutionModel(std::shared_ptr< const Alphabet > alpha, std::shared_ptr< const StateMapInterface > stateMap, const std::string &prefix)
void normalize()
normalize the generator
void setDiagonal()
set the diagonal of the generator such that sum on each line equals 0.
RowMatrix< double > exchangeability_
The exchangeability matrix of the model, defined as . When the model is reversible,...
const Matrix< double > & getdPij_dt(double t) const
Vdouble iEigenValues_
The vector of the imaginary part of the eigen values.
RowMatrix< double > leftEigenVectors_
The matrix made of left eigen vectors (by row) if rightEigenVectors_ is non-singular.
bool isScalable_
If the model is scalable (ie generator can be normalized automatically).
const Matrix< double > & getPij_t(double t) const
virtual void updateMatrices_()
Diagonalize the matrix, and fill the eigenValues_, iEigenValues_, leftEigenVectors_ and rightEigenVe...
RowMatrix< double > rightEigenVectors_
The matrix made of right eigen vectors (by column).
bool enableEigenDecomposition()
Tell if eigenValues and Vectors must be computed.
const Matrix< double > & getd2Pij_dt2(double t) const
void setScale(double scale)
Multiplies the current generator by the given scale.
RowMatrix< double > tmpMat_
For computational issues.
Partial implementation of the TransitionModel interface.
void setFreqFromData(const SequenceDataInterface &data, double pseudoCount=0) override
Set equilibrium frequencies equal to the frequencies estimated from the data.
void addRateParameter() override
add a "rate" parameter to the model, that handles the overall rate of the process.
RowMatrix< double > pijt_
These ones are for bookkeeping:
size_t size_
The number of states.
Vdouble freq_
The vector of equilibrium frequencies.
virtual void setRate(double rate) override
Set the rate of the model (must be positive).
int getAlphabetStateAsInt(size_t index) const override
virtual void updateMatrices_()=0
Diagonalize the matrix, and fill the eigenValues_, iEigenValues_, leftEigenVectors_ and rightEigenVe...
double rate_
The rate of the model (default: 1). The generator (and all its vectorial components) is independent o...
std::shared_ptr< const Alphabet > alphabet_
The alphabet relevant to this model.
size_t getNumberOfStates() const override
Get the number of states.
virtual double getRate() const override
The rate of the substitution process.
double getInitValue(size_t i, int state) const override
AbstractTransitionModel(std::shared_ptr< const Alphabet > alpha, std::shared_ptr< const StateMapInterface > stateMap, const std::string &prefix)
virtual void setFreq(std::map< int, double > &freqs) override
Set equilibrium frequencies.
static void displayMessage(const std::string &text)
virtual std::string getName() const =0
Get the name of the model.
const RowMatrix< Real > & getV() const
const std::vector< Real > & getImagEigenValues() const
const std::vector< Real > & getRealEigenValues() const
static Scalar inv(const Matrix< Scalar > &A, Matrix< Scalar > &O)
static void Taylor(const Matrix &A, size_t p, std::vector< RowMatrix< Scalar >> &vO)
static void pow(const Matrix &A, size_t p, Matrix &O)
static void copy(const MatrixA &A, MatrixO &O)
static void add(MatrixA &A, const MatrixB &B)
static void mult(const Matrix< Scalar > &A, const Matrix< Scalar > &B, Matrix< Scalar > &O)
static void getId(size_t n, Matrix &O)
static void diag(const std::vector< Scalar > &D, Matrix< Scalar > &O)
static void hadamardMult(const Matrix< Scalar > &A, const Matrix< Scalar > &B, Matrix< Scalar > &O)
static void scale(Matrix &A, Scalar a, Scalar b=0)
static double TINY()
static double NANO()
static double SMALL()
static T sqr(T a)
static const std::shared_ptr< IntervalConstraint > R_PLUS_STAR
Interface for reversible substitution models.
const std::vector< double > & getRow(size_t i) const
void resize(size_t nRows, size_t nCols)
static void getFrequencies(const SequenceContainerInterface &sc, std::map< int, double > &f, double pseudoCount=0)
static double exp(const T &x)
static T sum(const std::vector< T > &v1)
static std::vector< T > sqr(const std::vector< T > &v1)
std::string toString(T t)
Defines the basic types of data flow nodes.
std::vector< double > Vdouble
std::string to_string(const NoDimension &)
ExtendedFloat abs(const ExtendedFloat &ef)