bpp-phyl3  3.0.0
UniformizationSubstitutionCount.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <vector>
6 
8 #include "Bpp/Numeric/NumTools.h"
10 
11 using namespace bpp;
12 using namespace std;
13 
14 /******************************************************************************/
15 
17  std::shared_ptr<const SubstitutionModelInterface> model,
18  std::shared_ptr<const SubstitutionRegisterInterface> reg,
19  std::shared_ptr<const AlphabetIndex2> weights,
20  std::shared_ptr<const AlphabetIndex2> distances) :
24  model_(model),
25  nbStates_(model->getNumberOfStates()),
26  bMatrices_(reg->getNumberOfSubstitutionTypes()),
27  power_(),
28  s_(reg->getNumberOfSubstitutionTypes()),
29  miu_(0),
30  counts_(reg->getNumberOfSubstitutionTypes()),
31  currentLength_(0)
32 {
33  // Check compatibility between model and substitution register:
34  if (model->getAlphabet()->getAlphabetType() != reg->getAlphabet()->getAlphabetType())
35  throw Exception("UniformizationSubstitutionCount (constructor): alphabets do not match between register and model.");
36 
37  // Initialize all B matrices according to substitution register. This is done once for all,
38  // unless the number of states changes:
41 
42  for (unsigned int i = 0; i < nbStates_; ++i)
43  {
44  double diagQ = abs(model_->Qij(i, i));
45  if (diagQ > miu_)
46  miu_ = diagQ;
47  }
48 
49  if (miu_ > 10000)
50  throw Exception("UniformizationSubstitutionCount::UniformizationSubstitutionCount The maximum diagonal values of generator is above 10000. Abort, chose another mapping method");
51 }
52 
54  const StateMapInterface& stateMap,
55  std::shared_ptr<const SubstitutionRegisterInterface> reg,
56  std::shared_ptr<const AlphabetIndex2> weights,
57  std::shared_ptr<const AlphabetIndex2> distances) :
61  model_(0),
62  nbStates_(stateMap.getNumberOfModelStates()),
63  bMatrices_(reg->getNumberOfSubstitutionTypes()),
64  power_(),
65  s_(reg->getNumberOfSubstitutionTypes()),
66  miu_(0),
67  counts_(reg->getNumberOfSubstitutionTypes()),
68  currentLength_(0)
69 {}
70 
71 /******************************************************************************/
72 
74 {
75  size_t nbTypes = register_->getNumberOfSubstitutionTypes();
76  bMatrices_.resize(nbTypes);
77  counts_.resize(nbTypes);
78  s_.resize(nbTypes);
79 }
80 
81 
83 {
84  // Re-initialize all B matrices according to substitution register.
85  for (size_t i = 0; i < register_->getNumberOfSubstitutionTypes(); ++i)
86  {
87  bMatrices_[i].resize(nbStates_, nbStates_);
88  counts_[i].resize(nbStates_, nbStates_);
89  }
90 }
91 
93 {
94  vector<int> supportedStates = model_->getAlphabetStates();
95  for (size_t j = 0; j < nbStates_; ++j)
96  {
97  for (size_t k = 0; k < nbStates_; ++k)
98  {
99  size_t i = register_->getType(j, k);
100  if (i > 0 && k != j)
101  {
102  bMatrices_[i - 1](j, k) = model_->Qij(j, k);
103  }
104  }
105  }
106 
107  if (distances_)
109 }
110 
112 {
113  vector<int> supportedStates = model_->getAlphabetStates();
114  for (size_t j = 0; j < nbStates_; ++j)
115  {
116  for (size_t k = 0; k < nbStates_; ++k)
117  {
118  size_t i = register_->getType(j, k);
119  if (i > 0 && k != j)
120  {
121  bMatrices_[i - 1](j, k) *= distances_->getIndex(supportedStates[j], supportedStates[k]);
122  }
123  }
124  }
125 }
126 
127 
128 /******************************************************************************/
129 
131 {
132  double lam = miu_ * length;
135  RowMatrix<double> R(model_->generator());
136  MatrixTools::scale(R, 1. / miu_);
137  MatrixTools::add(R, I);
138 
139  // compute the stopping point
140  // use the tail of Poisson distribution
141  // can be approximated by 4 + 6 * sqrt(lam) + lam
142  size_t nMax = static_cast<size_t>(ceil(4 + 6 * sqrt(lam) + lam));
143 
144  // compute the powers of R
145  power_.resize(nMax + 1);
146  power_[0] = I;
147  for (size_t i = 1; i < nMax + 1; ++i)
148  {
149  MatrixTools::mult(power_[i - 1], R, power_[i]);
150  }
151 
152  for (size_t i = 0; i < register_->getNumberOfSubstitutionTypes(); ++i)
153  {
154  s_[i].resize(nMax + 1);
155  MatrixTools::mult(bMatrices_[i], power_[0], s_[i][0]);
157  for (size_t l = 1; l < nMax + 1; ++l)
158  {
159  MatrixTools::mult(R, s_[i][l - 1], s_[i][l]);
160  MatrixTools::mult(bMatrices_[i], power_[l], tmp);
161  MatrixTools::add(s_[i][l], tmp);
162  }
163  MatrixTools::fill(counts_[i], 0);
164  for (size_t l = 0; l < nMax + 1; ++l)
165  {
166  tmp = s_[i][l];
167  // double f = (pow(lam, static_cast<double>(l + 1)) * exp(-lam) / static_cast<double>(NumTools::fact(l + 1))) / miu_;
168  double logF = static_cast<double>(l + 1) * log(lam) - lam - log(miu_) - NumTools::logFact(static_cast<double>(l + 1));
169  MatrixTools::scale(tmp, exp(logF));
170  MatrixTools::add(counts_[i], tmp);
171  }
172  }
173 
174  // Now we must divide by pijt and account for putative weights:
175  vector<int> supportedStates = model_->getAlphabetStates();
176  RowMatrix<double> P = model_->getPij_t(length);
177  for (size_t i = 0; i < register_->getNumberOfSubstitutionTypes(); i++)
178  {
179  for (size_t j = 0; j < nbStates_; j++)
180  {
181  for (size_t k = 0; k < nbStates_; k++)
182  {
183  counts_[i](j, k) /= P(j, k);
184  if (std::isinf(counts_[i](j, k)) || std::isnan(counts_[i](j, k)) || (!weights_ && counts_[i](j, k) < 0.))
185  counts_[i](j, k) = 0;
186  if (weights_)
187  counts_[i](j, k) *= weights_->getIndex(supportedStates[j], supportedStates[k]);
188  }
189  }
190  }
191 }
192 
193 /******************************************************************************/
194 
195 unique_ptr<Matrix<double>> UniformizationSubstitutionCount::getAllNumbersOfSubstitutions(double length, size_t type) const
196 {
197  if (!model_)
198  throw Exception("UniformizationSubstitutionCount::getAllNumbersOfSubstitutions: model not defined.");
199 
200  if (length < 0)
201  throw Exception("UniformizationSubstitutionCount::getAllNumbersOfSubstitutions. Negative branch length: " + TextTools::toString(length) + ".");
202  if (length != currentLength_)
203  {
204  computeCounts_(length);
205  currentLength_ = length;
206  }
207  return make_unique<RowMatrix<double>>(counts_[type - 1]);
208 }
209 
210 /******************************************************************************/
211 
212 void UniformizationSubstitutionCount::storeAllNumbersOfSubstitutions(double length, size_t type, Eigen::MatrixXd& mat) const
213 {
214  if (!model_)
215  throw Exception("UniformizationSubstitutionCount::storeAllNumbersOfSubstitutions: model not defined.");
216 
217  if (length < 0)
218  throw Exception("UniformizationSubstitutionCount::storeAllNumbersOfSubstitutions. Negative branch length: " + TextTools::toString(length) + ".");
219  if (length != currentLength_)
220  {
221  computeCounts_(length);
222  currentLength_ = length;
223  }
224 
225  mat.resize(Eigen::Index(nbStates_), Eigen::Index(nbStates_));
226 
227  const auto& ct = counts_[type - 1];
228  for (size_t i = 0; i < nbStates_; i++)
229  {
230  for (size_t j = 0; j < nbStates_; j++)
231  {
232  mat(Eigen::Index(i), Eigen::Index(j)) = isnan(ct(i, j)) ? 0 : ct(i, j);
233  }
234  }
235 }
236 
237 /******************************************************************************/
238 
239 double UniformizationSubstitutionCount::getNumberOfSubstitutions(size_t initialState, size_t finalState, double length, size_t type) const
240 {
241  if (!model_)
242  throw Exception("UniformizationSubstitutionCount::getNumberOfSubstitutions: model not defined.");
243 
244  if (length < 0)
245  throw Exception("UniformizationSubstitutionCount::getNumbersOfSubstitutions. Negative branch length: " + TextTools::toString(length) + ".");
246  if (length != currentLength_)
247  {
248  computeCounts_(length);
249  currentLength_ = length;
250  }
251  return counts_[type - 1](initialState, finalState);
252 }
253 
254 /******************************************************************************/
255 
256 std::vector<double> UniformizationSubstitutionCount::getNumberOfSubstitutionsPerType(size_t initialState, size_t finalState, double length) const
257 {
258  if (!model_)
259  throw Exception("UniformizationSubstitutionCount::getNumberOfSubstitutionsPerTye: model not defined.");
260 
261  if (length < 0)
262  throw Exception("UniformizationSubstitutionCount::getNumbersOfSubstitutions. Negative branch length: " + TextTools::toString(length) + ".");
263  if (length != currentLength_)
264  {
265  computeCounts_(length);
266  currentLength_ = length;
267  }
268  std::vector<double> v(getNumberOfSubstitutionTypes());
269  for (unsigned int t = 0; t < getNumberOfSubstitutionTypes(); ++t)
270  {
271  v[t] = counts_[t](initialState, finalState);
272  }
273  return v;
274 }
275 
276 /******************************************************************************/
277 
279  std::shared_ptr<const SubstitutionModelInterface> model)
280 {
281  model_ = model;
282 
283  if (!model_)
284  return;
285 
286  // Check compatibility between model and substitution register:
287  if (model->alphabet().getAlphabetType() != register_->alphabet().getAlphabetType())
288  throw Exception("UniformizationSubstitutionCount::setSubstitutionModel: alphabets do not match between register and model.");
289 
290  size_t n = model->getNumberOfStates();
291  if (n != nbStates_)
292  {
293  nbStates_ = n;
294  // Re-initialize all B matrices according to substitution register.
295  initBMatrices_();
296  }
297  fillBMatrices_();
298 
299  miu_ = 0;
300  for (size_t i = 0; i < nbStates_; ++i)
301  {
302  double diagQ = abs(model_->Qij(i, i));
303  if (diagQ > miu_)
304  miu_ = diagQ;
305  }
306 
307  if (miu_ > 10000)
308  throw Exception("UniformizationSubstitutionCount::setSubstitutionModel(). The maximum diagonal values of generator is above 10000. Abort, chose another mapping method.");
309 
310  // Recompute counts:
311  if (currentLength_ > 0)
313 }
314 
315 /******************************************************************************/
316 
318 {
319  if (!model_)
320  return;
321 
322  // Check compatibility between model and substitution register:
323  if (model_->getAlphabet()->getAlphabetType() != register_->getAlphabet()->getAlphabetType())
324  throw Exception("UniformizationSubstitutionCount::substitutionRegisterHasChanged: alphabets do not match between register and model.");
325 
326  resetBMatrices_();
327  initBMatrices_();
328  fillBMatrices_();
329 
330  // Recompute counts:
331  if (currentLength_ > 0)
333 }
334 
335 /******************************************************************************/
336 
338 {
339  if (!model_)
340  return;
341 
342  if (weights_->getAlphabet()->getAlphabetType() != register_->getAlphabet()->getAlphabetType())
343  throw Exception("UniformizationSubstitutionCount::weightsHaveChanged. Incorrect alphabet type.");
344 
345  // Recompute counts:
346  if (currentLength_ > 0)
348 }
349 
351 {
352  if (!model_)
353  return;
354 
355  if (distances_->getAlphabet()->getAlphabetType() != register_->getAlphabet()->getAlphabetType())
356  throw Exception("UniformizationSubstitutionCount::distancesHaveChanged. Incorrect alphabet type.");
357 
358  // Recompute counts:
360 
361  if (currentLength_ > 0)
363 }
364 
365 /******************************************************************************/
Partial implementation of the SubstitutionCount interface.
std::shared_ptr< const SubstitutionRegisterInterface > register_
Partial implementation of the SubstitutionDistance interface.
std::shared_ptr< const AlphabetIndex2 > distances_
Partial implementation of the WeightedSubstitutionCount interface.
std::shared_ptr< const AlphabetIndex2 > weights_
static void add(MatrixA &A, const MatrixB &B)
static void mult(const Matrix< Scalar > &A, const Matrix< Scalar > &B, Matrix< Scalar > &O)
static void getId(size_t n, Matrix &O)
static void scale(Matrix &A, Scalar a, Scalar b=0)
static void fill(Matrix &M, Scalar x)
static T logFact(T n)
Map the states of a given alphabet which have a model state.
Definition: StateMap.h:25
virtual size_t getNumberOfSubstitutionTypes() const
Short cut function, equivalent to getSubstitutionRegister().getNumberOfSubstitutionTypes().
double getNumberOfSubstitutions(size_t initialState, size_t finalState, double length, size_t type=1) const override
Get the number of susbstitutions on a branch, given the initial and final states, and the branch leng...
std::vector< RowMatrix< double > > bMatrices_
void storeAllNumbersOfSubstitutions(double length, size_t type, Eigen::MatrixXd &mat) const override
Stores the numbers of susbstitutions on a branch, for each initial and final states,...
UniformizationSubstitutionCount(std::shared_ptr< const SubstitutionModelInterface > model, std::shared_ptr< const SubstitutionRegisterInterface > reg, std::shared_ptr< const AlphabetIndex2 > weights=nullptr, std::shared_ptr< const AlphabetIndex2 > distances=nullptr)
void setSubstitutionModel(std::shared_ptr< const SubstitutionModelInterface > model) override
Set the substitution model associated with this count, if relevant.
std::vector< std::vector< RowMatrix< double > > > s_
std::unique_ptr< Matrix< double > > getAllNumbersOfSubstitutions(double length, size_t type=1) const override
Get the numbers of susbstitutions on a branch, for each initial and final states, and given the branc...
std::vector< RowMatrix< double > > counts_
std::vector< double > getNumberOfSubstitutionsPerType(size_t initialState, size_t finalState, double length) const override
Get the numbers of susbstitutions on a branch for all types, for an initial and final states,...
std::shared_ptr< const SubstitutionModelInterface > model_
std::string toString(T t)
bool isinf(const double &d)
Defines the basic types of data flow nodes.
double log(const ExtendedFloat &ef)
ExtendedFloat exp(const ExtendedFloat &ef)
ExtendedFloat abs(const ExtendedFloat &ef)