bpp-seq3  3.0.0
CodonAlphabet.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_SEQ_ALPHABET_CODONALPHABET_H
6 #define BPP_SEQ_ALPHABET_CODONALPHABET_H
7 
8 
9 #include "NucleicAlphabet.h"
10 #include "WordAlphabet.h"
11 
12 // From the STL:
13 #include <string>
14 #include <memory>
15 
16 namespace bpp
17 {
29  public virtual CoreWordAlphabet,
30  public AbstractAlphabet
31 {
32 protected:
33  std::shared_ptr<const NucleicAlphabet> nAlph_;
34 
35 public:
36  // Constructor and destructor.
37 
43  CodonAlphabet(std::shared_ptr<const NucleicAlphabet> alpha) :
45  nAlph_(alpha)
46  {
47  build_();
48  }
49 
51  AbstractAlphabet(bia),
52  nAlph_(bia.nAlph_)
53  {}
54 
56  {
58  nAlph_ = bia.nAlph_;
59 
60  return *this;
61  }
62 
63  CodonAlphabet* clone() const override
64  {
65  return new CodonAlphabet(*this);
66  }
67 
68  virtual ~CodonAlphabet()
69  {}
70 
71  std::string getAlphabetType() const override
72  {
73  return "Codon(letter=" + nAlph_->getAlphabetType() + ")";
74  }
75 
76 private:
82  bool containsUnresolved(const std::string& state) const override;
83 
84  bool containsGap(const std::string& state) const override;
85 
86  void build_();
87 
90 public:
96  unsigned int getNumberOfTypes() const override { return 65; }
97 
98  unsigned int getSize() const override
99  {
100  return 64;
101  }
102 
103  int getUnknownCharacterCode() const override
104  {
105  return 64;
106  }
107 
108  bool isUnresolved(int state) const override
109  {
110  return state >= 64;
111  }
112 
113  bool isUnresolved(const std::string& state) const override
114  {
115  return isUnresolved(charToInt(state));
116  }
117 
118  bool isResolvedIn(int state1, int state2) const override;
119 
120  std::vector<int> getAlias(int state) const override;
121 
122  std::vector<std::string> getAlias(const std::string& state) const override;
123 
124  int getGeneric(const std::vector<int>& states) const override
125  {
126  return states[0];
127  }
128 
129  std::string getGeneric(const std::vector<std::string>& states) const override
130  {
131  return states[0];
132  }
133 
134  int charToInt(const std::string& state) const override
135  {
136  if (state.size() != 3)
137  throw BadCharException(state, "CodonAlphabet::charToInt", this);
138  if (containsUnresolved(state))
139  return static_cast<int>(getSize());
140  if (containsGap(state))
141  return -1;
142  else return AbstractAlphabet::charToInt(state);
143  }
144 
160  int getCodon(int pos1, int pos2, int pos3) const
161  {
162  return (nAlph_->isUnresolved(pos1)
163  || nAlph_->isUnresolved(pos2)
164  || nAlph_->isUnresolved(pos3)) ? getUnknownCharacterCode()
165  : pos3 + 4 * pos2 + 16 * pos1;
166  }
167 
183  std::string getCodon(const std::string& pos1, const std::string& pos2, const std::string& pos3) const
184  {
185  return pos1 + pos2 + pos3;
186  }
187 
194  int getFirstPosition(int codon) const
195  {
196  return isUnresolved(codon) ? nAlph_->charToInt("N") : codon / 16;
197  }
198 
205  int getSecondPosition(int codon) const
206  {
207  return isUnresolved(codon) ? nAlph_->charToInt("N") : (codon / 4) % 4;
208  }
209 
210 
217  int getThirdPosition(int codon) const
218  {
219  return isUnresolved(codon) ? nAlph_->charToInt("N") : codon % 4;
220  }
221 
228  std::string getFirstPosition (const std::string& codon) const
229  {
230  return codon.substr(0, 1);
231  }
232 
233 
240  std::string getSecondPosition(const std::string& codon) const
241  {
242  return codon.substr(1, 1);
243  }
244 
245 
252  std::string getThirdPosition(const std::string& codon) const
253  {
254  return codon.substr(2, 1);
255  }
256 
257 
263  unsigned int getLength() const override
264  {
265  return 3;
266  }
267 
268  bool hasUniqueAlphabet() const override
269  {
270  return true;
271  }
272 
273  std::shared_ptr<const Alphabet> getNAlphabet(size_t n) const override
274  {
275  return nAlph_;
276  }
277 
278  int getWord(const Sequence& seq, size_t pos = 0) const override
279  {
280  if (seq.size() < pos + 3)
281  throw IndexOutOfBoundsException("CodonAlphabet::getWord", pos, 0, seq.size() - 3);
282  return getCodon(seq[pos], seq[pos + 1], seq[pos + 2]);
283  }
284 
295  std::string getWord(const std::vector<std::string>& vpos, size_t pos = 0) const override
296  {
297  if (vpos.size() < pos + 3)
298  throw IndexOutOfBoundsException("CodonAlphabet::getWord", pos, 0, vpos.size() - 3);
299 
300  return getCodon(vpos[pos], vpos[pos + 1], vpos[pos + 2]);
301  }
302 
303  int getWord(const std::vector<int>& vpos, size_t pos = 0) const override
304  {
305  if (vpos.size() < pos + 3)
306  throw IndexOutOfBoundsException("CodonAlphabet::getWord", pos, 0, vpos.size() - 3);
307 
308  return getCodon(vpos[pos], vpos[pos + 1], vpos[pos + 2]);
309  }
310 
311 
312  int getNPosition(int codon, size_t pos) const override
313  {
314  if (isUnresolved(codon))
315  return nAlph_->getUnknownCharacterCode();
316  else
317  return pos == 0 ? codon / 16 :
318  (pos == 1 ? (codon / 4) % 4
319  : codon % 4);
320  }
321 
328  std::vector<int> getPositions(int word) const override
329  {
330  if (isUnresolved(word))
331  {
332  int n = nAlph_->getUnknownCharacterCode();
333  return std::vector<int>{n, n, n};
334  }
335  else
336  return std::vector<int>{word / 16, (word / 4) % 4, word % 4}
337  ;
338  }
339 
340 
348  std::string getNPosition(const std::string& codon, size_t pos) const override
349  {
350  return codon.substr(pos, 1);
351  }
352 
359  std::vector<std::string> getPositions(const std::string& word) const override
360  {
361  return std::vector<std::string>{word.substr(0, 1), word.substr(1, 1), word.substr(2, 1)};
362  }
363 
373  std::unique_ptr<SequenceInterface> translate(const SequenceInterface& sequence, size_t = 0) const override;
374 
383  std::unique_ptr<SequenceInterface> reverse(const SequenceInterface& sequence) const override;
384 
385  /*
386  *
387  * @}
388  */
389 
396  int getGCinCodon(int codon) const;
397 
401  std::shared_ptr<const NucleicAlphabet> getNucleicAlphabet() const
402  {
403  return nAlph_;
404  }
405 
410  unsigned int getStateCodingSize() const override { return 3; }
412 };
413 } // end of namespace bpp.
414 #endif // BPP_SEQ_ALPHABET_CODONALPHABET_H
A partial implementation of the Alphabet interface.
AbstractAlphabet & operator=(const AbstractAlphabet &alph)
int charToInt(const std::string &state) const
Give the int description of a state given its string description.
size_t size() const override
Get the number of elements in the list.
Definition: SymbolList.h:124
An alphabet exception thrown when trying to specify a bad char to the alphabet.
Codon alphabet class.
Definition: CodonAlphabet.h:31
std::vector< int > getAlias(int state) const override
Get all resolved states that match a generic state.
bool isResolvedIn(int state1, int state2) const override
Tells if a given (potentially unresolved) state can be resolved in another resolved state.
std::vector< int > getPositions(int word) const override
Get the int codes of each position of a word given its int description.
bool isUnresolved(const std::string &state) const override
std::unique_ptr< SequenceInterface > reverse(const SequenceInterface &sequence) const override
Translate a whole sequence from words alphabet to letters alphabet.
int getGCinCodon(int codon) const
Get the number of G+C in codon.
int getWord(const Sequence &seq, size_t pos=0) const override
bool isUnresolved(int state) const override
bool containsGap(const std::string &state) const override
int getThirdPosition(int codon) const
Get the int code of the third position of a codon given its int description.
int getWord(const std::vector< int > &vpos, size_t pos=0) const override
Get the int code for a word given the int code of the underlying positions.
int charToInt(const std::string &state) const override
Give the int description of a state given its string description.
std::string getNPosition(const std::string &codon, size_t pos) const override
Get the char code of the Nth position of a codon given its char description.
int getNPosition(int codon, size_t pos) const override
Get the int code of the n-position of a word given its int description.
unsigned int getLength() const override
std::string getWord(const std::vector< std::string > &vpos, size_t pos=0) const override
Get the char code for a word given the char code of the underlying positions.
unsigned int getStateCodingSize() const override
Get the size of the string coding a state.
int getUnknownCharacterCode() const override
std::string getAlphabetType() const override
Identification method.
Definition: CodonAlphabet.h:71
int getFirstPosition(int codon) const
Get the int code of the first position of a codon given its int description.
CodonAlphabet * clone() const override
Definition: CodonAlphabet.h:63
std::unique_ptr< SequenceInterface > translate(const SequenceInterface &sequence, size_t=0) const override
Translate a whole sequence from letters alphabet to words alphabet.
std::string getGeneric(const std::vector< std::string > &states) const override
Get the generic state that match a set of states.
std::shared_ptr< const NucleicAlphabet > nAlph_
Definition: CodonAlphabet.h:33
std::shared_ptr< const Alphabet > getNAlphabet(size_t n) const override
CodonAlphabet & operator=(const CodonAlphabet &bia)
Definition: CodonAlphabet.h:55
std::string getFirstPosition(const std::string &codon) const
Get the char code of the first position of a codon given its char description.
virtual ~CodonAlphabet()
Definition: CodonAlphabet.h:68
CodonAlphabet(std::shared_ptr< const NucleicAlphabet > alpha)
Builds a new codon alphabet from a nucleic alphabet.
Definition: CodonAlphabet.h:43
std::string getCodon(const std::string &pos1, const std::string &pos2, const std::string &pos3) const
Get the char code for a codon given the char code of the three underlying positions.
unsigned int getNumberOfTypes() const override
Get the number of distinct states in alphabet (e.g. return 15 for DNA alphabet). This is the number o...
Definition: CodonAlphabet.h:96
std::string getSecondPosition(const std::string &codon) const
Get the char code of the second position of a codon given its char description.
unsigned int getSize() const override
Definition: CodonAlphabet.h:98
std::string getThirdPosition(const std::string &codon) const
Get the char code of the third position of a codon given its char description.
CodonAlphabet(const CodonAlphabet &bia)
Definition: CodonAlphabet.h:50
std::vector< std::string > getPositions(const std::string &word) const override
Get the char codes of each position of a word given its char description.
std::shared_ptr< const NucleicAlphabet > getNucleicAlphabet() const
int getCodon(int pos1, int pos2, int pos3) const
Get the int code for a codon given the int code of the three underlying positions.
bool containsUnresolved(const std::string &state) const override
int getSecondPosition(int codon) const
Get the int code of the second position of a codon given its int description.
int getGeneric(const std::vector< int > &states) const override
Get the generic state that match a set of states.
bool hasUniqueAlphabet() const override
The interface class for word alphabets.
Definition: WordAlphabet.h:25
The sequence interface.
Definition: Sequence.h:34
A basic implementation of the Sequence interface.
Definition: Sequence.h:117
This alphabet is used to deal NumericAlphabet.