bpp-phyl3 3.0.0
CategorySubstitutionRegister.h
Go to the documentation of this file.
1// SPDX-FileCopyrightText: The Bio++ Development Group
2//
3// SPDX-License-Identifier: CECILL-2.1
4
5#ifndef BPP_PHYL_MAPPING_CATEGORYSUBSTITUTIONREGISTER_H
6#define BPP_PHYL_MAPPING_CATEGORYSUBSTITUTIONREGISTER_H
7
8
10
11namespace bpp
12{
27{
28protected:
29 bool within_;
31 mutable std::map<size_t, size_t> categories_;
32 std::vector<std::string> categoryNames_;
33 std::vector< std::vector<size_t>> index_;
34 std::vector< std::vector<size_t>> revIndex_;
35
37
38public:
45 CategorySubstitutionRegister(std::shared_ptr<const StateMapInterface> stateMap, bool within = false) :
47 within_(within),
51 index_(),
52 revIndex_(),
54 {}
55
56protected:
57 template<class T>
58 void setAlphabetCategories(const std::map<int, T>& categories)
59 {
60 // We need to convert alphabet states into model states.
61 std::map<size_t, T> modelCategories;
62 for (typename std::map<int, T>::const_iterator it = categories.begin(); it != categories.end(); ++it)
63 {
64 std::vector<size_t> states = stateMap().getModelStates(it->first);
65 for (auto st : states)
66 {
67 modelCategories[st] = it->second;
68 }
69 }
70 // Then we forward the model categories:
71 setModelCategories<T>(modelCategories);
72 }
73
74 template<class T>
75 void setModelCategories(const std::map<size_t, T>& categories)
76 {
77 // First index categories:
78 nbCategories_ = 0;
79 std::map<T, size_t> cats;
80 for (auto it = categories.begin(); it != categories.end(); ++it)
81 {
82 if (cats.find(it->second) == cats.end())
83 {
85 cats[it->second] = nbCategories_;
86 }
87 }
88
89 // Now creates categories:
90 categories_.clear();
92 for (size_t i = 0; i < stateMap().getNumberOfModelStates(); ++i)
93 {
94 auto it = categories.find(i);
95 if (it != categories.end())
96 {
97 categories_[i] = cats[it->second];
98 categoryNames_[cats[it->second] - 1] += stateMap().getStateDescription(i);
99 }
100 else
101 {
102 categories_[i] = 0;
103 }
104 }
105
106 size_t count = 1;
107 index_.resize(nbCategories_);
108 for (size_t i = 0; i < index_.size(); ++i)
109 {
110 index_[i].resize(nbCategories_);
111 for (size_t j = 0; j < index_.size(); ++j)
112 {
113 if (j != i)
114 {
115 index_[i][j] = count++;
116 std::vector<size_t> pos(2);
117 pos[0] = i;
118 pos[1] = j;
119 revIndex_.push_back(pos);
120 }
121 }
122 }
123 if (within_)
124 {
125 for (size_t i = 0; i < index_.size(); ++i)
126 {
127 index_[i][i] = count++;
128 std::vector<size_t> pos(2);
129 pos[0] = i;
130 pos[1] = i;
131 revIndex_.push_back(pos);
132 }
133 }
134 }
135
136public:
137 virtual size_t getCategory(size_t state) const
138 {
139 return categories_[state];
140 }
141
142 virtual size_t getCategoryFrom(size_t type) const
143 {
144 if (type <= nbCategories_ * (nbCategories_ - 1))
145 {
146 return revIndex_[type - 1][0] + 1;
147 }
148 else
149 {
150 if (within_)
151 return revIndex_[type - 1][0] + 1;
152 else
153 throw Exception("CategorySubstitutionRegister::getCategoryFrom. Bad substitution type.");
154 }
155 }
156
157 virtual size_t getCategoryTo(size_t type) const
158 {
159 if (type <= nbCategories_ * (nbCategories_ - 1))
160 {
161 return revIndex_[type - 1][1] + 1;
162 }
163 else
164 {
165 if (within_)
166 return revIndex_[type - 1][1] + 1;
167 else
168 throw Exception("CategorySubstitutionRegister::getCategoryTo. Bad substitution type.");
169 }
170 }
171
172 virtual std::string getCategoryName(size_t category) const
173 {
174 return categoryNames_[category - 1];
175 }
176
177 virtual bool allowWithin() const { return within_; }
178
179 bool isStationary() const
180 {
181 return stationarity_;
182 }
183
184 void setStationarity(bool stat)
185 {
186 stationarity_ = stat;
187 }
188
189 size_t getNumberOfCategories() const { return nbCategories_; }
190
192
193 virtual size_t getType(size_t fromState, size_t toState) const
194 {
195 size_t fromCat = categories_[fromState];
196 size_t toCat = categories_[toState];
197 if (fromCat > 0 && toCat > 0)
198 return index_[fromCat - 1][toCat - 1];
199 else
200 return 0;
201 }
202
203 std::string getTypeName(size_t type) const
204 {
205 return getCategoryName(getCategoryFrom(type)) + "->" + getCategoryName(getCategoryTo(type));
206 }
207};
208
209
219{
220public:
221 ComprehensiveSubstitutionRegister(std::shared_ptr<const StateMapInterface> stateMap, bool within = false) :
223 {
224 std::map<int, int> categories;
225 for (int i = 0; i < static_cast<int>(stateMap->getNumberOfModelStates()); ++i)
226 {
227 categories[i] = i;
228 }
229 setAlphabetCategories<int>(categories);
230 }
231
233};
234
245{
246public:
247 GCSubstitutionRegister(std::shared_ptr<const StateMapInterface> stateMap, bool within = false) :
249 {
250 std::map<int, int> categories;
251 categories[0] = 1;
252 categories[1] = 2;
253 categories[2] = 2;
254 categories[3] = 1;
255 setAlphabetCategories<int>(categories);
256 }
257
259};
260
261
277{
278private:
279 std::shared_ptr<const GeneticCode> genCode_;
280
281public:
282 GCSynonymousSubstitutionRegister(std::shared_ptr<const StateMapInterface> stateMap, std::shared_ptr<const GeneticCode> genCode, bool within = false) :
284 genCode_(genCode)
285 {
286 auto pCA = genCode_->getCodonAlphabet();
287
288 std::map<int, int> categories;
289 for (int i = 0; i < static_cast<int>(pCA->getSize()); i++)
290 {
291 int n = pCA->getThirdPosition(i);
292 switch (n)
293 {
294 case 0:
295 case 3:
296 categories[i] = 1;
297 break;
298 case 1:
299 case 2:
300 categories[i] = 2;
301 break;
302 }
303 }
304 setAlphabetCategories<int>(categories);
305 }
306
309 genCode_(reg.genCode_)
310 {}
311
313 {
315 genCode_ = reg.genCode_;
316 return *this;
317 }
318
320
321public:
322 size_t getType(size_t fromState, size_t toState) const
323 {
324 int x = stateMap().getAlphabetStateAsInt(fromState);
325 int y = stateMap().getAlphabetStateAsInt(toState);
326
327 auto pCA = genCode_->getCodonAlphabet();
328
329 if (genCode_->isStop(x) || genCode_->isStop( y) || !genCode_->areSynonymous(x, y))
330 return 0;
331
332 // only substitutions between 3rd positions
333
334 if ((pCA->getFirstPosition(x) != pCA->getFirstPosition(y)) ||
335 (pCA->getSecondPosition(x) != pCA->getSecondPosition(y)))
336 return 0;
337
338 size_t fromCat = categories_[fromState];
339 size_t toCat = categories_[toState];
340
341 if (fromCat > 0 && toCat > 0)
342 return index_[fromCat - 1][toCat - 1];
343 else
344 return 0;
345 }
346
347 std::string getTypeName (size_t type) const
348 {
349 switch (type)
350 {
351 case 0:
352 return "no AT<->GC substitution or non-synonymous substitution";
353 case 1:
354 return "AT->GC synonymous";
355 case 2:
356 return "GC->AT synonymous";
357 case 3:
358 return "AT->AT synonymous";
359 case 4:
360 return "GC->GC synonymous";
361 default:
362 throw Exception("GCSynonymousSubstitutionRegister::getTypeName. Bad substitution type.");
363 }
364 }
365};
366
367
380{
381private:
382 std::shared_ptr<const GeneticCode> genCode_;
383 size_t pos_;
384
385public:
387 std::shared_ptr<const StateMapInterface> stateMap,
388 std::shared_ptr<const GeneticCode> genCode,
389 size_t pos,
390 bool within = false) :
392 genCode_(genCode),
393 pos_(pos)
394 {
395 auto pCA = genCode_->getCodonAlphabet();
396
397 std::map<int, int> categories;
398 for (int i = 0; i < static_cast<int>(pCA->getSize()); i++)
399 {
400 int n = pCA->getNPosition(i, pos_);
401 switch (n)
402 {
403 case 0:
404 case 3:
405 categories[i] = 1;
406 break;
407 case 1:
408 case 2:
409 categories[i] = 2;
410 break;
411 }
412 }
413 setAlphabetCategories<int>(categories);
414 }
415
418 genCode_(reg.genCode_),
419 pos_(reg.pos_)
420 {}
421
423 {
425 genCode_ = reg.genCode_;
426 pos_ = reg.pos_;
427 return *this;
428 }
429
431
432public:
433 size_t getType(size_t fromState, size_t toState) const
434 {
435 int x = stateMap().getAlphabetStateAsInt(fromState);
436 int y = stateMap().getAlphabetStateAsInt(toState);
437
438 auto pCA = genCode_->getCodonAlphabet();
439
440 if (genCode_->isStop(x) || genCode_->isStop(y))
441 return 0;
442
443 // only substitutions between Nth positions
444
445 for (size_t pos = 0; pos < 3; pos++)
446 {
447 if (pos != pos_)
448 if (pCA->getNPosition(x, pos) != pCA->getNPosition(y, pos))
449 return 0;
450 }
451
452 size_t fromCat = categories_[fromState];
453 size_t toCat = categories_[toState];
454
455 if (fromCat > 0 && toCat > 0)
456 return index_[fromCat - 1][toCat - 1];
457 else
458 return 0;
459 }
460
461 std::string getTypeName (size_t type) const
462 {
463 auto p(TextTools::toString(pos_ + 1));
464
465 switch (type)
466 {
467 case 0:
468 return "no AT" + p + "<->GC" + p + " substitution";
469 case 1:
470 return "AT" + p + "->GC" + p;
471 case 2:
472 return "GC" + p + "->AT" + p;
473 case 3:
474 return "AT" + p + "->AT" + p;
475 case 4:
476 return "GC" + p + "->GC" + p;
477 default:
478 throw Exception("GCPositionSubstitutionRegister::getTypeName. Bad substitution type.");
479 }
480 }
481};
482} // end of namespace bpp.
483#endif // BPP_PHYL_MAPPING_CATEGORYSUBSTITUTIONREGISTER_H
const StateMapInterface & stateMap() const override
AbstractSubstitutionRegister & operator=(const AbstractSubstitutionRegister &asr)
The CategorySubstitutionRegisters.
void setAlphabetCategories(const std::map< int, T > &categories)
virtual size_t getCategoryFrom(size_t type) const
virtual size_t getCategoryTo(size_t type) const
virtual std::string getCategoryName(size_t category) const
std::vector< std::vector< size_t > > revIndex_
virtual size_t getCategory(size_t state) const
std::vector< std::vector< size_t > > index_
void setModelCategories(const std::map< size_t, T > &categories)
std::string getTypeName(size_t type) const
Get the name of a given substitution type.
CategorySubstitutionRegister(std::shared_ptr< const StateMapInterface > stateMap, bool within=false)
Build a new substitution register with categories. This class is meant to be inherited.
virtual size_t getType(size_t fromState, size_t toState) const
Get the substitution type far a given pair of model states.
Distinguishes all types of substitutions.
ComprehensiveSubstitutionRegister * clone() const
ComprehensiveSubstitutionRegister(std::shared_ptr< const StateMapInterface > stateMap, bool within=false)
Distinguishes AT->GC vs GC->AT on given codon position (0, 1, or 2)
GCPositionSubstitutionRegister & operator=(const GCPositionSubstitutionRegister &reg)
size_t getType(size_t fromState, size_t toState) const
Get the substitution type far a given pair of model states.
std::string getTypeName(size_t type) const
Get the name of a given substitution type.
GCPositionSubstitutionRegister * clone() const
GCPositionSubstitutionRegister(const GCPositionSubstitutionRegister &reg)
std::shared_ptr< const GeneticCode > genCode_
GCPositionSubstitutionRegister(std::shared_ptr< const StateMapInterface > stateMap, std::shared_ptr< const GeneticCode > genCode, size_t pos, bool within=false)
Distinguishes AT<->GC from GC<->AT.
GCSubstitutionRegister(std::shared_ptr< const StateMapInterface > stateMap, bool within=false)
GCSubstitutionRegister * clone() const
Distinguishes AT->GC vs GC->AT inside synonymous substitutions on third codon position.
GCSynonymousSubstitutionRegister(const GCSynonymousSubstitutionRegister &reg)
std::shared_ptr< const GeneticCode > genCode_
std::string getTypeName(size_t type) const
Get the name of a given substitution type.
size_t getType(size_t fromState, size_t toState) const
Get the substitution type far a given pair of model states.
GCSynonymousSubstitutionRegister * clone() const
GCSynonymousSubstitutionRegister & operator=(const GCSynonymousSubstitutionRegister &reg)
GCSynonymousSubstitutionRegister(std::shared_ptr< const StateMapInterface > stateMap, std::shared_ptr< const GeneticCode > genCode, bool within=false)
virtual size_t getNumberOfModelStates() const =0
virtual std::string getStateDescription(size_t index) const =0
virtual int getAlphabetStateAsInt(size_t index) const =0
virtual std::vector< size_t > getModelStates(const std::string &code) const =0
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)
Defines the basic types of data flow nodes.