bpp-phyl3  3.0.0
AbstractWordSubstitutionModel.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
8 
10 
11 // From bpp-seq:
15 
17 
18 using namespace bpp;
19 
20 // From the STL:
21 #include <cmath>
22 #include <complex>
23 
24 using namespace std;
25 
26 /******************************************************************************/
27 
29  ModelList& modelList,
30  const std::string& prefix) :
33  modelList.getWordAlphabet(),
34  std::shared_ptr<const StateMapInterface>(new CanonicalStateMap(modelList.getWordAlphabet(), false)),
35  prefix),
36  newAlphabet_(true),
37  VSubMod_ (),
38  VnestedPrefix_(),
39  Vrate_ (modelList.size())
40 {
41  size_t i, j;
42  size_t n = modelList.size();
43 
44  // test whether two models are identical
45 
46  bool flag = 0;
47  i = 0;
48  j = 1;
49  while (!flag && i < (n - 1))
50  {
51  if (modelList.getModel(i) == modelList.getModel(j))
52  flag = 1;
53  else
54  {
55  j++;
56  if (j == n)
57  {
58  i++;
59  j = i + 1;
60  }
61  }
62  }
63 
64  if (!flag)
65  {
66  for (i = 0; i < n; i++)
67  {
68  VSubMod_.push_back(modelList.getModel(i));
69  VnestedPrefix_.push_back(modelList.getModel(i)->getNamespace());
70  VSubMod_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + VnestedPrefix_[i]);
72  }
73  }
74  else
75  {
76  string t = "";
77  for (i = 0; i < n; i++)
78  {
79  VSubMod_.push_back(modelList.getModel(0));
80  VnestedPrefix_.push_back(modelList.getModel(0)->getNamespace());
81  t += TextTools::toString(i + 1);
82  }
83  VSubMod_[0]->setNamespace(prefix + t + "_" + VnestedPrefix_[0]);
85  }
86 
87  for (i = 0; i < n; i++)
88  {
89  Vrate_[i] = 1.0 / static_cast<double>(n);
90  }
91 }
92 
94  std::shared_ptr<const Alphabet> alph,
95  std::shared_ptr<const StateMapInterface> stateMap,
96  const string& prefix) :
98  AbstractSubstitutionModel(alph, stateMap, prefix),
99  newAlphabet_(false),
100  VSubMod_ (),
101  VnestedPrefix_(),
102  Vrate_ (0)
103 {}
104 
106  unique_ptr<SubstitutionModelInterface> pmodel,
107  unsigned int num,
108  const std::string& prefix) :
110  AbstractSubstitutionModel(make_unique<WordAlphabet>(pmodel->getAlphabet(), num), 0, prefix),
111  newAlphabet_(true),
112  VSubMod_ (),
113  VnestedPrefix_(),
114  Vrate_ (num, 1.0 / num)
115 {
116  stateMap_ = std::shared_ptr<const StateMapInterface>(new CanonicalStateMap(getAlphabet(), false));
117 
118  size_t i;
119 
120  string t = "";
121  shared_ptr<SubstitutionModelInterface> pmodel2 = std::move(pmodel);
122  for (i = 0; i < num; i++)
123  {
124  VSubMod_.push_back(pmodel2);
125  VnestedPrefix_.push_back(pmodel2->getNamespace());
126  t += TextTools::toString(i + 1);
127  }
128 
129  pmodel2->setNamespace(prefix + t + "_" + VnestedPrefix_[0]);
130  addParameters_(pmodel2->getParameters());
131 }
132 
134  const AbstractWordSubstitutionModel& wrsm) :
137  newAlphabet_(wrsm.newAlphabet_),
138  VSubMod_ (),
139  VnestedPrefix_(wrsm.VnestedPrefix_),
140  Vrate_ (wrsm.Vrate_)
141 {
142  size_t i;
143  size_t num = wrsm.VSubMod_.size();
144 
145  if (wrsm.newAlphabet_)
146  alphabet_ = make_shared<WordAlphabet>(dynamic_cast<const WordAlphabet&>(wrsm.alphabet()));
147 
148  shared_ptr<SubstitutionModelInterface> pSM = nullptr;
149  if ((num > 1) & (wrsm.VSubMod_[0] == wrsm.VSubMod_[1]))
150  pSM.reset(wrsm.VSubMod_[0]->clone());
151 
152  for (i = 0; i < num; ++i)
153  {
154  VSubMod_.push_back(pSM ? pSM : shared_ptr<SubstitutionModelInterface>(wrsm.VSubMod_[i]->clone()));
155  }
156 }
157 
159  const AbstractWordSubstitutionModel& model)
160 {
163  newAlphabet_ = model.newAlphabet_;
165  Vrate_ = model.Vrate_;
166 
167  size_t i;
168  size_t num = model.VSubMod_.size();
169 
170  if (model.newAlphabet_)
171  alphabet_ = make_shared<WordAlphabet>(dynamic_cast<const WordAlphabet&>(model.alphabet()));
172 
173  shared_ptr<SubstitutionModelInterface> pSM = nullptr;
174  if ((num > 1) & (model.VSubMod_[0] == model.VSubMod_[1]))
175  pSM.reset(model.VSubMod_[0]->clone());
176 
177  for (i = 0; i < num; ++i)
178  {
179  VSubMod_[i] = (pSM ? pSM : shared_ptr<SubstitutionModelInterface>(model.VSubMod_[i]->clone()));
180  }
181 
182  return *this;
183 }
184 
185 void AbstractWordSubstitutionModel::setNamespace(const std::string& prefix)
186 {
188 
189  if (VSubMod_.size() < 2 || VSubMod_[0] == VSubMod_[1])
190  {
191  string t = "";
192  for (size_t i = 0; i < VSubMod_.size(); i++)
193  {
194  t += TextTools::toString(i + 1);
195  }
196  VSubMod_[0]->setNamespace(prefix + t + "_" + VnestedPrefix_[0]);
197  }
198  else
199  {
200  for (size_t i = 0; i < VSubMod_.size(); i++)
201  {
202  VSubMod_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + VnestedPrefix_[i]);
203  }
204  }
205 }
206 
207 /******************************************************************************/
208 
210 {
211  // First we update position specific models. This need to be done
212  // here and not in fireParameterChanged, as some parameter aliases
213  // might have been defined and need to be resolved first.
214  if (VSubMod_.size() < 2 || VSubMod_[0] == VSubMod_[1])
215  VSubMod_[0]->matchParametersValues(getParameters());
216  else
217  for (size_t i = 0; i < VSubMod_.size(); i++)
218  {
219  VSubMod_[i]->matchParametersValues(getParameters());
220  }
221 
222  size_t nbmod = VSubMod_.size();
223  vector<bool> vnull; // vector of the indices of lines with only zeros
224 
225  // Generator
226 
227  size_t i, j, n, l, k, m;
228 
229  vector<size_t> vsize;
230 
231  for (k = 0; k < nbmod; k++)
232  {
233  vsize.push_back(VSubMod_[k]->getNumberOfStates());
234  }
235 
236  RowMatrix<double> gk, exch;
237 
238  // First fill of the generator from simple position generators
239 
240  this->fillBasicGenerator_();
241 
242  // modification of generator_
243 
244  this->completeMatrices_();
245 
246  // sets diagonal terms
247 
248  setDiagonal();
249 
250  // at that point generator_ (and possibly freq_) are done for models
251  // without enableEigenDecomposition
252 
253  // Eigen values:
254 
256  {
258  }
259  else // compute freq_ if no eigenDecomposition
260  {
261  if (computeFrequencies())
262  {
263  size_t salph = getNumberOfStates();
264  for (auto& fr : freq_)
265  {
266  fr = 1;
267  }
268 
269  m = 1;
270  for (k = nbmod; k > 0; k--)
271  {
272  auto& pSM = VSubMod_[k - 1];
273  for (j = 0; j < vsize[k - 1]; ++j)
274  {
275  n = 0;
276  while (n < salph)
277  { // loop on prefix
278  for (l = 0; l < m; ++l)
279  { // loop on suffix
280  freq_[n + j * m + l] *= pSM->freq(j);
281  }
282  n += m * vsize[k - 1];
283  }
284  }
285  m *= vsize[k - 1];
286  }
287  // normalization
288  normalize();
289  }
290  }
291 
292  // compute the exchangeability_
293  for (i = 0; i < size_; i++)
294  {
295  for (j = 0; j < size_; j++)
296  {
297  exchangeability_(i, j) = generator_(i, j) / freq_[j];
298  }
299  }
300 }
301 
302 
304 {
305  size_t nbmod = VSubMod_.size();
306  size_t salph = getNumberOfStates();
307 
308 // Generator
309 
311 
312  vector<size_t> vsize;
313 
314  for (size_t k = 0; k < nbmod; k++)
315  {
316  vsize.push_back(VSubMod_[k]->getNumberOfStates());
317  }
318 
319  size_t m = 1;
320 
321  for (size_t k = nbmod; k > 0; k--)
322  {
323  gk = VSubMod_[k - 1]->generator();
324  for (size_t i = 0; i < vsize[k - 1]; i++)
325  {
326  const vector<double>& row_gi = gk.getRow(i);
327 
328  for (size_t j = 0; j < vsize[k - 1]; j++)
329  {
330  if (i != j)
331  {
332  size_t n = 0;
333  while (n < salph)
334  { // loop on prefix
335  for (size_t l = 0; l < m; l++)
336  { // loop on suffix
337  generator_(n + i * m + l, n + j * m + l) = row_gi[j] * Vrate_[k - 1];
338  }
339  n += m * vsize[k - 1];
340  }
341  }
342  }
343  }
344  m *= vsize[k - 1];
345  }
346 }
347 
348 
349 void AbstractWordSubstitutionModel::setFreq(std::map<int, double>& freqs)
350 {
351  map<int, double> tmpFreq;
352  size_t nbmod = VSubMod_.size();
353 
354  size_t i, j, s, k, d, size;
355  d = size = getNumberOfStates();
356 
357  if (VSubMod_.size() < 2 || VSubMod_[0] == VSubMod_[1])
358  {
359  s = VSubMod_[0]->getAlphabet()->getSize();
360  for (j = 0; j < s; j++)
361  {
362  tmpFreq[static_cast<int>(j)] = 0;
363  }
364 
365  for (i = 0; i < nbmod; i++)
366  {
367  d /= s;
368  for (k = 0; k < size; k++)
369  {
370  tmpFreq[static_cast<int>((k / d) % s)] += freqs[static_cast<int>(k)];
371  }
372  }
373 
374  for (k = 0; k < s; k++)
375  {
376  tmpFreq[static_cast<int>(k)] /= static_cast<double>(nbmod);
377  }
378 
379  VSubMod_[0]->setFreq(tmpFreq);
381  }
382  else
383  for (i = 0; i < nbmod; i++)
384  {
385  tmpFreq.clear();
386  s = VSubMod_[i]->getAlphabet()->getSize();
387  d /= s;
388  for (j = 0; j < s; j++)
389  {
390  tmpFreq[static_cast<int>(j)] = 0;
391  }
392  for (k = 0; k < size; k++)
393  {
394  tmpFreq[static_cast<int>((k / d) % s)] += freqs[static_cast<int>(k)];
395  }
396  VSubMod_[i]->setFreq(tmpFreq);
398  }
399 }
void addParameters_(const ParameterList &parameters)
AbstractParameterAliasable & operator=(const AbstractParameterAliasable &ap)
void setNamespace(const std::string &prefix)
bool matchParametersValues(const ParameterList &parameters) override
const ParameterList & getParameters() const override
RowMatrix< double > generator_
The generator matrix of the model.
AbstractSubstitutionModel & operator=(const AbstractSubstitutionModel &model)
void normalize()
normalize the generator
void setDiagonal()
set the diagonal of the generator such that sum on each line equals 0.
RowMatrix< double > exchangeability_
The exchangeability matrix of the model, defined as . When the model is reversible,...
virtual void updateMatrices_()
Diagonalize the matrix, and fill the eigenValues_, iEigenValues_, leftEigenVectors_ and rightEigenVe...
bool enableEigenDecomposition()
Tell if eigenValues and Vectors must be computed.
std::shared_ptr< const StateMapInterface > stateMap_
The map of model states with alphabet states.
std::shared_ptr< const Alphabet > getAlphabet() const override
size_t size_
The number of states.
Vdouble freq_
The vector of equilibrium frequencies.
const Alphabet & alphabet() const override
std::shared_ptr< const Alphabet > alphabet_
The alphabet relevant to this model.
size_t getNumberOfStates() const override
Get the number of states.
Abstract Basal class for words of substitution models.
virtual void completeMatrices_()=0
Called by updateMatrices to handle specific modifications for inheriting classes.
virtual void setFreq(std::map< int, double > &freqs)
Estimation of the parameters of the models so that the equilibrium frequencies match the given ones.
virtual void fillBasicGenerator_()
First fill of the generator, from the position model.
void updateMatrices_()
Diagonalize the matrix, and fill the eigenValues_, iEigenValues_, leftEigenVectors_ and rightEigenVe...
bool newAlphabet_
boolean flag to check if a specific WordAlphabet has been built
AbstractWordSubstitutionModel(ModelList &modelList, const std::string &prefix)
Build a new AbstractWordSubstitutionModel object from a vector of pointers to SubstitutionModels.
std::vector< std::shared_ptr< SubstitutionModelInterface > > VSubMod_
AbstractWordSubstitutionModel & operator=(const AbstractWordSubstitutionModel &)
This class implements a state map where all resolved states are modeled.
Definition: StateMap.h:168
A list of models, for building a WordSubstitutionModel.
std::shared_ptr< SubstitutionModelInterface > getModel(size_t i)
const std::vector< double > & getRow(size_t i) const
Map the states of a given alphabet which have a model state.
Definition: StateMap.h:25
std::string toString(T t)
Defines the basic types of data flow nodes.