bpp-phyl3 3.0.0
AbstractWordSubstitutionModel.cpp
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
8
10
11// From bpp-seq:
15
17
18using namespace bpp;
19
20// From the STL:
21#include <cmath>
22#include <complex>
23
24using namespace std;
25
26/******************************************************************************/
27
28AbstractWordSubstitutionModel::AbstractWordSubstitutionModel(
29 ModelList& modelList,
30 const std::string& prefix) :
33 modelList.getWordAlphabet(),
34 std::shared_ptr<const StateMapInterface>(new CanonicalStateMap(modelList.getWordAlphabet(), false)),
35 prefix),
36 newAlphabet_(true),
37 VSubMod_ (),
38 VnestedPrefix_(),
39 Vrate_ (modelList.size())
40{
41 size_t i, j;
42 size_t n = modelList.size();
43
44 // test whether two models are identical
45
46 bool flag = 0;
47 i = 0;
48 j = 1;
49 while (!flag && i < (n - 1))
50 {
51 if (modelList.getModel(i) == modelList.getModel(j))
52 flag = 1;
53 else
54 {
55 j++;
56 if (j == n)
57 {
58 i++;
59 j = i + 1;
60 }
61 }
62 }
63
64 if (!flag)
65 {
66 for (i = 0; i < n; i++)
67 {
68 VSubMod_.push_back(modelList.getModel(i));
69 VnestedPrefix_.push_back(modelList.getModel(i)->getNamespace());
70 VSubMod_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + VnestedPrefix_[i]);
72 }
73 }
74 else
75 {
76 string t = "";
77 for (i = 0; i < n; i++)
78 {
79 VSubMod_.push_back(modelList.getModel(0));
80 VnestedPrefix_.push_back(modelList.getModel(0)->getNamespace());
81 t += TextTools::toString(i + 1);
82 }
83 VSubMod_[0]->setNamespace(prefix + t + "_" + VnestedPrefix_[0]);
85 }
86
87 for (i = 0; i < n; i++)
88 {
89 Vrate_[i] = 1.0 / static_cast<double>(n);
90 }
91}
92
94 std::shared_ptr<const Alphabet> alph,
95 std::shared_ptr<const StateMapInterface> stateMap,
96 const string& prefix) :
98 AbstractSubstitutionModel(alph, stateMap, prefix),
99 newAlphabet_(false),
100 VSubMod_ (),
101 VnestedPrefix_(),
102 Vrate_ (0)
103{}
104
106 unique_ptr<SubstitutionModelInterface> pmodel,
107 unsigned int num,
108 const std::string& prefix) :
110 AbstractSubstitutionModel(make_unique<WordAlphabet>(pmodel->getAlphabet(), num), 0, prefix),
111 newAlphabet_(true),
112 VSubMod_ (),
113 VnestedPrefix_(),
114 Vrate_ (num, 1.0 / num)
115{
116 stateMap_ = std::shared_ptr<const StateMapInterface>(new CanonicalStateMap(getAlphabet(), false));
117
118 size_t i;
119
120 string t = "";
121 shared_ptr<SubstitutionModelInterface> pmodel2 = std::move(pmodel);
122 for (i = 0; i < num; i++)
123 {
124 VSubMod_.push_back(pmodel2);
125 VnestedPrefix_.push_back(pmodel2->getNamespace());
126 t += TextTools::toString(i + 1);
127 }
128
129 pmodel2->setNamespace(prefix + t + "_" + VnestedPrefix_[0]);
130 addParameters_(pmodel2->getParameters());
131}
132
134 const AbstractWordSubstitutionModel& wrsm) :
137 newAlphabet_(wrsm.newAlphabet_),
138 VSubMod_ (),
139 VnestedPrefix_(wrsm.VnestedPrefix_),
140 Vrate_ (wrsm.Vrate_)
141{
142 size_t i;
143 size_t num = wrsm.VSubMod_.size();
144
145 if (wrsm.newAlphabet_)
146 alphabet_ = make_shared<WordAlphabet>(dynamic_cast<const WordAlphabet&>(wrsm.alphabet()));
147
148 shared_ptr<SubstitutionModelInterface> pSM = nullptr;
149 if ((num > 1) & (wrsm.VSubMod_[0] == wrsm.VSubMod_[1]))
150 pSM.reset(wrsm.VSubMod_[0]->clone());
151
152 for (i = 0; i < num; ++i)
153 {
154 VSubMod_.push_back(pSM ? pSM : shared_ptr<SubstitutionModelInterface>(wrsm.VSubMod_[i]->clone()));
155 }
156}
157
160{
165 Vrate_ = model.Vrate_;
166
167 size_t i;
168 size_t num = model.VSubMod_.size();
169
170 if (model.newAlphabet_)
171 alphabet_ = make_shared<WordAlphabet>(dynamic_cast<const WordAlphabet&>(model.alphabet()));
172
173 shared_ptr<SubstitutionModelInterface> pSM = nullptr;
174 if ((num > 1) & (model.VSubMod_[0] == model.VSubMod_[1]))
175 pSM.reset(model.VSubMod_[0]->clone());
176
177 for (i = 0; i < num; ++i)
178 {
179 VSubMod_[i] = (pSM ? pSM : shared_ptr<SubstitutionModelInterface>(model.VSubMod_[i]->clone()));
180 }
181
182 return *this;
183}
184
185void AbstractWordSubstitutionModel::setNamespace(const std::string& prefix)
186{
188
189 if (VSubMod_.size() < 2 || VSubMod_[0] == VSubMod_[1])
190 {
191 string t = "";
192 for (size_t i = 0; i < VSubMod_.size(); i++)
193 {
194 t += TextTools::toString(i + 1);
195 }
196 VSubMod_[0]->setNamespace(prefix + t + "_" + VnestedPrefix_[0]);
197 }
198 else
199 {
200 for (size_t i = 0; i < VSubMod_.size(); i++)
201 {
202 VSubMod_[i]->setNamespace(prefix + TextTools::toString(i + 1) + "_" + VnestedPrefix_[i]);
203 }
204 }
205}
206
207/******************************************************************************/
208
210{
211 // First we update position specific models. This need to be done
212 // here and not in fireParameterChanged, as some parameter aliases
213 // might have been defined and need to be resolved first.
214 if (VSubMod_.size() < 2 || VSubMod_[0] == VSubMod_[1])
215 VSubMod_[0]->matchParametersValues(getParameters());
216 else
217 for (size_t i = 0; i < VSubMod_.size(); i++)
218 {
219 VSubMod_[i]->matchParametersValues(getParameters());
220 }
221
222 size_t nbmod = VSubMod_.size();
223 vector<bool> vnull; // vector of the indices of lines with only zeros
224
225 // Generator
226
227 size_t i, j, n, l, k, m;
228
229 vector<size_t> vsize;
230
231 for (k = 0; k < nbmod; k++)
232 {
233 vsize.push_back(VSubMod_[k]->getNumberOfStates());
234 }
235
236 RowMatrix<double> gk, exch;
237
238 // First fill of the generator from simple position generators
239
240 this->fillBasicGenerator_();
241
242 // modification of generator_
243
244 this->completeMatrices_();
245
246 // sets diagonal terms
247
248 setDiagonal();
249
250 // at that point generator_ (and possibly freq_) are done for models
251 // without enableEigenDecomposition
252
253 // Eigen values:
254
256 {
258 }
259 else // compute freq_ if no eigenDecomposition
260 {
261 if (computeFrequencies())
262 {
263 size_t salph = getNumberOfStates();
264 for (auto& fr : freq_)
265 {
266 fr = 1;
267 }
268
269 m = 1;
270 for (k = nbmod; k > 0; k--)
271 {
272 auto& pSM = VSubMod_[k - 1];
273 for (j = 0; j < vsize[k - 1]; ++j)
274 {
275 n = 0;
276 while (n < salph)
277 { // loop on prefix
278 for (l = 0; l < m; ++l)
279 { // loop on suffix
280 freq_[n + j * m + l] *= pSM->freq(j);
281 }
282 n += m * vsize[k - 1];
283 }
284 }
285 m *= vsize[k - 1];
286 }
287 // normalization
288 normalize();
289 }
290 }
291
292 // compute the exchangeability_
293 for (i = 0; i < size_; i++)
294 {
295 for (j = 0; j < size_; j++)
296 {
297 exchangeability_(i, j) = generator_(i, j) / freq_[j];
298 }
299 }
300}
301
302
304{
305 size_t nbmod = VSubMod_.size();
306 size_t salph = getNumberOfStates();
307
308 // Generator
309
311
312 vector<size_t> vsize;
313
314 for (size_t k = 0; k < nbmod; k++)
315 {
316 vsize.push_back(VSubMod_[k]->getNumberOfStates());
317 }
318
319 size_t m = 1;
320
321 for (size_t k = nbmod; k > 0; k--)
322 {
323 gk = VSubMod_[k - 1]->generator();
324 for (size_t i = 0; i < vsize[k - 1]; i++)
325 {
326 const vector<double>& row_gi = gk.getRow(i);
327
328 for (size_t j = 0; j < vsize[k - 1]; j++)
329 {
330 if (i != j)
331 {
332 size_t n = 0;
333 while (n < salph)
334 { // loop on prefix
335 for (size_t l = 0; l < m; l++)
336 { // loop on suffix
337 generator_(n + i * m + l, n + j * m + l) = row_gi[j] * Vrate_[k - 1];
338 }
339 n += m * vsize[k - 1];
340 }
341 }
342 }
343 }
344 m *= vsize[k - 1];
345 }
346}
347
348
349void AbstractWordSubstitutionModel::setFreq(std::map<int, double>& freqs)
350{
351 map<int, double> tmpFreq;
352 size_t nbmod = VSubMod_.size();
353
354 size_t i, j, s, k, d, size;
355 d = size = getNumberOfStates();
356
357 if (VSubMod_.size() < 2 || VSubMod_[0] == VSubMod_[1])
358 {
359 s = VSubMod_[0]->getAlphabet()->getSize();
360 for (j = 0; j < s; j++)
361 {
362 tmpFreq[static_cast<int>(j)] = 0;
363 }
364
365 for (i = 0; i < nbmod; i++)
366 {
367 d /= s;
368 for (k = 0; k < size; k++)
369 {
370 tmpFreq[static_cast<int>((k / d) % s)] += freqs[static_cast<int>(k)];
371 }
372 }
373
374 for (k = 0; k < s; k++)
375 {
376 tmpFreq[static_cast<int>(k)] /= static_cast<double>(nbmod);
377 }
378
379 VSubMod_[0]->setFreq(tmpFreq);
381 }
382 else
383 for (i = 0; i < nbmod; i++)
384 {
385 tmpFreq.clear();
386 s = VSubMod_[i]->getAlphabet()->getSize();
387 d /= s;
388 for (j = 0; j < s; j++)
389 {
390 tmpFreq[static_cast<int>(j)] = 0;
391 }
392 for (k = 0; k < size; k++)
393 {
394 tmpFreq[static_cast<int>((k / d) % s)] += freqs[static_cast<int>(k)];
395 }
396 VSubMod_[i]->setFreq(tmpFreq);
398 }
399}
void addParameters_(const ParameterList &parameters)
AbstractParameterAliasable & operator=(const AbstractParameterAliasable &ap)
void setNamespace(const std::string &prefix)
bool matchParametersValues(const ParameterList &parameters) override
const ParameterList & getParameters() const override
RowMatrix< double > generator_
The generator matrix of the model.
AbstractSubstitutionModel & operator=(const AbstractSubstitutionModel &model)
void normalize()
normalize the generator
void setDiagonal()
set the diagonal of the generator such that sum on each line equals 0.
RowMatrix< double > exchangeability_
The exchangeability matrix of the model, defined as . When the model is reversible,...
virtual void updateMatrices_()
Diagonalize the matrix, and fill the eigenValues_, iEigenValues_, leftEigenVectors_ and rightEigenVe...
bool enableEigenDecomposition()
Tell if eigenValues and Vectors must be computed.
std::shared_ptr< const StateMapInterface > stateMap_
The map of model states with alphabet states.
const Alphabet & alphabet() const override
size_t size_
The number of states.
Vdouble freq_
The vector of equilibrium frequencies.
std::shared_ptr< const Alphabet > getAlphabet() const override
std::shared_ptr< const Alphabet > alphabet_
The alphabet relevant to this model.
size_t getNumberOfStates() const override
Get the number of states.
Abstract Basal class for words of substitution models.
virtual void completeMatrices_()=0
Called by updateMatrices to handle specific modifications for inheriting classes.
virtual void setFreq(std::map< int, double > &freqs)
Estimation of the parameters of the models so that the equilibrium frequencies match the given ones.
virtual void fillBasicGenerator_()
First fill of the generator, from the position model.
void updateMatrices_()
Diagonalize the matrix, and fill the eigenValues_, iEigenValues_, leftEigenVectors_ and rightEigenVe...
bool newAlphabet_
boolean flag to check if a specific WordAlphabet has been built
AbstractWordSubstitutionModel(ModelList &modelList, const std::string &prefix)
Build a new AbstractWordSubstitutionModel object from a vector of pointers to SubstitutionModels.
std::vector< std::shared_ptr< SubstitutionModelInterface > > VSubMod_
AbstractWordSubstitutionModel & operator=(const AbstractWordSubstitutionModel &)
This class implements a state map where all resolved states are modeled.
Definition: StateMap.h:168
A list of models, for building a WordSubstitutionModel.
std::shared_ptr< SubstitutionModelInterface > getModel(size_t i)
const std::vector< double > & getRow(size_t i) const
Map the states of a given alphabet which have a model state.
Definition: StateMap.h:25
std::string toString(T t)
Defines the basic types of data flow nodes.