bpp-phyl3  3.0.0
CategorySubstitutionRegister.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_PHYL_MAPPING_CATEGORYSUBSTITUTIONREGISTER_H
6 #define BPP_PHYL_MAPPING_CATEGORYSUBSTITUTIONREGISTER_H
7 
8 
9 #include "SubstitutionRegister.h"
10 
11 namespace bpp
12 {
27 {
28 protected:
29  bool within_;
30  size_t nbCategories_;
31  mutable std::map<size_t, size_t> categories_;
32  std::vector<std::string> categoryNames_;
33  std::vector< std::vector<size_t>> index_;
34  std::vector< std::vector<size_t>> revIndex_;
35 
37 
38 public:
45  CategorySubstitutionRegister(std::shared_ptr<const StateMapInterface> stateMap, bool within = false) :
47  within_(within),
48  nbCategories_(0),
49  categories_(),
51  index_(),
52  revIndex_(),
54  {}
55 
56 protected:
57  template<class T>
58  void setAlphabetCategories(const std::map<int, T>& categories)
59  {
60  // We need to convert alphabet states into model states.
61  std::map<size_t, T> modelCategories;
62  for (typename std::map<int, T>::const_iterator it = categories.begin(); it != categories.end(); ++it)
63  {
64  std::vector<size_t> states = stateMap().getModelStates(it->first);
65  for (auto st : states)
66  {
67  modelCategories[st] = it->second;
68  }
69  }
70  // Then we forward the model categories:
71  setModelCategories<T>(modelCategories);
72  }
73 
74  template<class T>
75  void setModelCategories(const std::map<size_t, T>& categories)
76  {
77  // First index categories:
78  nbCategories_ = 0;
79  std::map<T, size_t> cats;
80  for (auto it = categories.begin(); it != categories.end(); ++it)
81  {
82  if (cats.find(it->second) == cats.end())
83  {
84  ++nbCategories_;
85  cats[it->second] = nbCategories_;
86  }
87  }
88 
89  // Now creates categories:
90  categories_.clear();
92  for (size_t i = 0; i < stateMap().getNumberOfModelStates(); ++i)
93  {
94  auto it = categories.find(i);
95  if (it != categories.end())
96  {
97  categories_[i] = cats[it->second];
98  categoryNames_[cats[it->second] - 1] += stateMap().getStateDescription(i);
99  }
100  else
101  {
102  categories_[i] = 0;
103  }
104  }
105 
106  size_t count = 1;
107  index_.resize(nbCategories_);
108  for (size_t i = 0; i < index_.size(); ++i)
109  {
110  index_[i].resize(nbCategories_);
111  for (size_t j = 0; j < index_.size(); ++j)
112  {
113  if (j != i)
114  {
115  index_[i][j] = count++;
116  std::vector<size_t> pos(2);
117  pos[0] = i;
118  pos[1] = j;
119  revIndex_.push_back(pos);
120  }
121  }
122  }
123  if (within_)
124  {
125  for (size_t i = 0; i < index_.size(); ++i)
126  {
127  index_[i][i] = count++;
128  std::vector<size_t> pos(2);
129  pos[0] = i;
130  pos[1] = i;
131  revIndex_.push_back(pos);
132  }
133  }
134  }
135 
136 public:
137  virtual size_t getCategory(size_t state) const
138  {
139  return categories_[state];
140  }
141 
142  virtual size_t getCategoryFrom(size_t type) const
143  {
144  if (type <= nbCategories_ * (nbCategories_ - 1))
145  {
146  return revIndex_[type - 1][0] + 1;
147  }
148  else
149  {
150  if (within_)
151  return revIndex_[type - 1][0] + 1;
152  else
153  throw Exception("CategorySubstitutionRegister::getCategoryFrom. Bad substitution type.");
154  }
155  }
156 
157  virtual size_t getCategoryTo(size_t type) const
158  {
159  if (type <= nbCategories_ * (nbCategories_ - 1))
160  {
161  return revIndex_[type - 1][1] + 1;
162  }
163  else
164  {
165  if (within_)
166  return revIndex_[type - 1][1] + 1;
167  else
168  throw Exception("CategorySubstitutionRegister::getCategoryTo. Bad substitution type.");
169  }
170  }
171 
172  virtual std::string getCategoryName(size_t category) const
173  {
174  return categoryNames_[category - 1];
175  }
176 
177  virtual bool allowWithin() const { return within_; }
178 
179  bool isStationary() const
180  {
181  return stationarity_;
182  }
183 
184  void setStationarity(bool stat)
185  {
186  stationarity_ = stat;
187  }
188 
189  size_t getNumberOfCategories() const { return nbCategories_; }
190 
191  size_t getNumberOfSubstitutionTypes() const { return nbCategories_ * (nbCategories_ - 1) + (within_ ? nbCategories_ : 0); }
192 
193  virtual size_t getType(size_t fromState, size_t toState) const
194  {
195  size_t fromCat = categories_[fromState];
196  size_t toCat = categories_[toState];
197  if (fromCat > 0 && toCat > 0)
198  return index_[fromCat - 1][toCat - 1];
199  else
200  return 0;
201  }
202 
203  std::string getTypeName(size_t type) const
204  {
205  return getCategoryName(getCategoryFrom(type)) + "->" + getCategoryName(getCategoryTo(type));
206  }
207 };
208 
209 
219 {
220 public:
221  ComprehensiveSubstitutionRegister(std::shared_ptr<const StateMapInterface> stateMap, bool within = false) :
223  {
224  std::map<int, int> categories;
225  for (int i = 0; i < static_cast<int>(stateMap->getNumberOfModelStates()); ++i)
226  {
227  categories[i] = i;
228  }
229  setAlphabetCategories<int>(categories);
230  }
231 
233 };
234 
245 {
246 public:
247  GCSubstitutionRegister(std::shared_ptr<const StateMapInterface> stateMap, bool within = false) :
249  {
250  std::map<int, int> categories;
251  categories[0] = 1;
252  categories[1] = 2;
253  categories[2] = 2;
254  categories[3] = 1;
255  setAlphabetCategories<int>(categories);
256  }
257 
258  GCSubstitutionRegister* clone() const { return new GCSubstitutionRegister(*this); }
259 };
260 
261 
277 {
278 private:
279  std::shared_ptr<const GeneticCode> genCode_;
280 
281 public:
282  GCSynonymousSubstitutionRegister(std::shared_ptr<const StateMapInterface> stateMap, std::shared_ptr<const GeneticCode> genCode, bool within = false) :
284  genCode_(genCode)
285  {
286  auto pCA = genCode_->getCodonAlphabet();
287 
288  std::map<int, int> categories;
289  for (int i = 0; i < static_cast<int>(pCA->getSize()); i++)
290  {
291  int n = pCA->getThirdPosition(i);
292  switch (n)
293  {
294  case 0:
295  case 3:
296  categories[i] = 1;
297  break;
298  case 1:
299  case 2:
300  categories[i] = 2;
301  break;
302  }
303  }
304  setAlphabetCategories<int>(categories);
305  }
306 
309  genCode_(reg.genCode_)
310  {}
311 
313  {
315  genCode_ = reg.genCode_;
316  return *this;
317  }
318 
320 
321 public:
322  size_t getType(size_t fromState, size_t toState) const
323  {
324  int x = stateMap().getAlphabetStateAsInt(fromState);
325  int y = stateMap().getAlphabetStateAsInt(toState);
326 
327  auto pCA = genCode_->getCodonAlphabet();
328 
329  if (genCode_->isStop(x) || genCode_->isStop( y) || !genCode_->areSynonymous(x, y))
330  return 0;
331 
332  // only substitutions between 3rd positions
333 
334  if ((pCA->getFirstPosition(x) != pCA->getFirstPosition(y)) ||
335  (pCA->getSecondPosition(x) != pCA->getSecondPosition(y)))
336  return 0;
337 
338  size_t fromCat = categories_[fromState];
339  size_t toCat = categories_[toState];
340 
341  if (fromCat > 0 && toCat > 0)
342  return index_[fromCat - 1][toCat - 1];
343  else
344  return 0;
345  }
346 
347  std::string getTypeName (size_t type) const
348  {
349  switch (type)
350  {
351  case 0:
352  return "no AT<->GC substitution or non-synonymous substitution";
353  case 1:
354  return "AT->GC synonymous";
355  case 2:
356  return "GC->AT synonymous";
357  case 3:
358  return "AT->AT synonymous";
359  case 4:
360  return "GC->GC synonymous";
361  default:
362  throw Exception("GCSynonymousSubstitutionRegister::getTypeName. Bad substitution type.");
363  }
364  }
365 };
366 
367 
380 {
381 private:
382  std::shared_ptr<const GeneticCode> genCode_;
383  size_t pos_;
384 
385 public:
387  std::shared_ptr<const StateMapInterface> stateMap,
388  std::shared_ptr<const GeneticCode> genCode,
389  size_t pos,
390  bool within = false) :
392  genCode_(genCode),
393  pos_(pos)
394  {
395  auto pCA = genCode_->getCodonAlphabet();
396 
397  std::map<int, int> categories;
398  for (int i = 0; i < static_cast<int>(pCA->getSize()); i++)
399  {
400  int n = pCA->getNPosition(i, pos_);
401  switch (n)
402  {
403  case 0:
404  case 3:
405  categories[i] = 1;
406  break;
407  case 1:
408  case 2:
409  categories[i] = 2;
410  break;
411  }
412  }
413  setAlphabetCategories<int>(categories);
414  }
415 
418  genCode_(reg.genCode_),
419  pos_(reg.pos_)
420  {}
421 
423  {
425  genCode_ = reg.genCode_;
426  pos_ = reg.pos_;
427  return *this;
428  }
429 
431 
432 public:
433  size_t getType(size_t fromState, size_t toState) const
434  {
435  int x = stateMap().getAlphabetStateAsInt(fromState);
436  int y = stateMap().getAlphabetStateAsInt(toState);
437 
438  auto pCA = genCode_->getCodonAlphabet();
439 
440  if (genCode_->isStop(x) || genCode_->isStop(y))
441  return 0;
442 
443  // only substitutions between Nth positions
444 
445  for (size_t pos = 0; pos < 3; pos++)
446  {
447  if (pos != pos_)
448  if (pCA->getNPosition(x, pos) != pCA->getNPosition(y, pos))
449  return 0;
450  }
451 
452  size_t fromCat = categories_[fromState];
453  size_t toCat = categories_[toState];
454 
455  if (fromCat > 0 && toCat > 0)
456  return index_[fromCat - 1][toCat - 1];
457  else
458  return 0;
459  }
460 
461  std::string getTypeName (size_t type) const
462  {
463  auto p(TextTools::toString(pos_ + 1));
464 
465  switch (type)
466  {
467  case 0:
468  return "no AT" + p + "<->GC" + p + " substitution";
469  case 1:
470  return "AT" + p + "->GC" + p;
471  case 2:
472  return "GC" + p + "->AT" + p;
473  case 3:
474  return "AT" + p + "->AT" + p;
475  case 4:
476  return "GC" + p + "->GC" + p;
477  default:
478  throw Exception("GCPositionSubstitutionRegister::getTypeName. Bad substitution type.");
479  }
480  }
481 };
482 } // end of namespace bpp.
483 #endif // BPP_PHYL_MAPPING_CATEGORYSUBSTITUTIONREGISTER_H
AbstractSubstitutionRegister & operator=(const AbstractSubstitutionRegister &asr)
const StateMapInterface & stateMap() const override
The CategorySubstitutionRegisters.
void setAlphabetCategories(const std::map< int, T > &categories)
virtual size_t getCategoryFrom(size_t type) const
virtual size_t getCategoryTo(size_t type) const
virtual std::string getCategoryName(size_t category) const
std::vector< std::vector< size_t > > revIndex_
virtual size_t getCategory(size_t state) const
std::vector< std::vector< size_t > > index_
void setModelCategories(const std::map< size_t, T > &categories)
std::string getTypeName(size_t type) const
Get the name of a given substitution type.
CategorySubstitutionRegister(std::shared_ptr< const StateMapInterface > stateMap, bool within=false)
Build a new substitution register with categories. This class is meant to be inherited.
virtual size_t getType(size_t fromState, size_t toState) const
Get the substitution type far a given pair of model states.
Distinguishes all types of substitutions.
ComprehensiveSubstitutionRegister * clone() const
ComprehensiveSubstitutionRegister(std::shared_ptr< const StateMapInterface > stateMap, bool within=false)
Distinguishes AT->GC vs GC->AT on given codon position (0, 1, or 2)
GCPositionSubstitutionRegister & operator=(const GCPositionSubstitutionRegister &reg)
GCPositionSubstitutionRegister * clone() const
size_t getType(size_t fromState, size_t toState) const
Get the substitution type far a given pair of model states.
std::string getTypeName(size_t type) const
Get the name of a given substitution type.
GCPositionSubstitutionRegister(const GCPositionSubstitutionRegister &reg)
std::shared_ptr< const GeneticCode > genCode_
GCPositionSubstitutionRegister(std::shared_ptr< const StateMapInterface > stateMap, std::shared_ptr< const GeneticCode > genCode, size_t pos, bool within=false)
Distinguishes AT<->GC from GC<->AT.
GCSubstitutionRegister(std::shared_ptr< const StateMapInterface > stateMap, bool within=false)
GCSubstitutionRegister * clone() const
Distinguishes AT->GC vs GC->AT inside synonymous substitutions on third codon position.
GCSynonymousSubstitutionRegister(const GCSynonymousSubstitutionRegister &reg)
GCSynonymousSubstitutionRegister * clone() const
std::shared_ptr< const GeneticCode > genCode_
std::string getTypeName(size_t type) const
Get the name of a given substitution type.
size_t getType(size_t fromState, size_t toState) const
Get the substitution type far a given pair of model states.
GCSynonymousSubstitutionRegister(std::shared_ptr< const StateMapInterface > stateMap, std::shared_ptr< const GeneticCode > genCode, bool within=false)
GCSynonymousSubstitutionRegister & operator=(const GCSynonymousSubstitutionRegister &reg)
virtual size_t getNumberOfModelStates() const =0
virtual std::string getStateDescription(size_t index) const =0
virtual std::vector< size_t > getModelStates(const std::string &code) const =0
virtual int getAlphabetStateAsInt(size_t index) const =0
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)
Defines the basic types of data flow nodes.