bpp-seq3  3.0.0
WordAlphabet.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_SEQ_ALPHABET_WORDALPHABET_H
6 #define BPP_SEQ_ALPHABET_WORDALPHABET_H
7 
8 
9 #include "AbstractAlphabet.h"
10 
11 // From the STL:
12 #include <string>
13 #include <vector>
14 
15 #include "../Sequence.h"
16 
17 namespace bpp
18 {
25 {
26 public:
28 
29  virtual ~CoreWordAlphabet() {}
30 
31  virtual unsigned int getLength() const = 0;
32 
33  virtual unsigned int getSize() const = 0;
34 
35  virtual bool hasUniqueAlphabet() const = 0;
36 
37  virtual std::shared_ptr<const Alphabet> getNAlphabet(size_t n) const = 0;
38 
39  virtual int getWord(const Sequence& seq, size_t pos = 0) const = 0;
40 
41 
51  virtual int getWord(const std::vector<int>& vint, size_t pos = 0) const = 0;
52 
63  virtual std::string getWord(const std::vector<std::string>& vpos, size_t pos = 0) const = 0;
64 
72  virtual int getNPosition(int word, size_t n) const = 0;
73 
80  virtual std::vector<int> getPositions(int word) const = 0;
81 
89  virtual std::string getNPosition(const std::string& word, size_t n) const = 0;
90 
97  virtual std::vector<std::string> getPositions(const std::string& word) const = 0;
98 
108  virtual std::unique_ptr<SequenceInterface> translate(const SequenceInterface& sequence, size_t pos = 0) const = 0;
109 
118  virtual std::unique_ptr<SequenceInterface> reverse(const SequenceInterface& sequence) const = 0;
119 
120 private:
127  virtual bool containsUnresolved(const std::string& state) const = 0;
128 
129  virtual bool containsGap(const std::string& state) const = 0;
130 
132 };
133 
134 
149  public virtual CoreWordAlphabet,
150  public AbstractAlphabet
151 {
152 protected:
153  std::vector< std::shared_ptr<const Alphabet>> vAbsAlph_;
154 
155 public:
156  // Constructor and destructor.
165  WordAlphabet(const std::vector< std::shared_ptr<const Alphabet>>& vAlpha);
166 
174  WordAlphabet(std::shared_ptr<const Alphabet> pAlpha, size_t num);
175 
177 
179  {
181  vAbsAlph_ = bia.vAbsAlph_;
182  return *this;
183  }
184 
185  WordAlphabet* clone() const override
186  {
187  return new WordAlphabet(*this);
188  }
189 
190  virtual ~WordAlphabet() {}
191 
192 public:
208  std::string getName(const std::string& state) const override;
209 
210  int charToInt(const std::string& state) const override
211  {
212  if (state.size() != vAbsAlph_.size())
213  throw BadCharException(state, "WordAlphabet::charToInt", this);
214  if (containsUnresolved(state))
215  return static_cast<int>(getSize());
216  if (containsGap(state))
217  return -1;
218  else return AbstractAlphabet::charToInt(state);
219  }
220 
221  unsigned int getSize() const override
222  {
223  return getNumberOfChars() - 2;
224  }
225 
229  bool isResolvedIn(int state1, int state2) const override;
230 
236  bool hasUniqueAlphabet() const override;
237 
242  unsigned int getLength() const override
243  {
244  return static_cast<unsigned int>(vAbsAlph_.size());
245  }
246 
247 
252  unsigned int getNumberOfTypes() const override
253  {
254  return getNumberOfChars() - 1;
255  }
256 
257  std::string getAlphabetType() const override;
258 
259  int getUnknownCharacterCode() const override
260  {
261  return static_cast<int>(getSize());
262  }
263 
264  bool isUnresolved(int state) const override { return state == getUnknownCharacterCode(); }
265  bool isUnresolved(const std::string& state) const override { return charToInt(state) == getUnknownCharacterCode(); }
266 
267  std::vector<int> getAlias(int state) const override;
268 
269  std::vector<std::string> getAlias(const std::string& state) const override;
270 
271  int getGeneric(const std::vector<int>& states) const override;
272 
273  std::string getGeneric(const std::vector<std::string>& states) const override;
274 
275 private:
281  bool containsUnresolved(const std::string& state) const override;
282  bool containsGap(const std::string& state) const override;
283  void build_();
286 public:
299  std::shared_ptr<const Alphabet> getNAlphabet(size_t n) const override
300  {
301  if (n >= vAbsAlph_.size())
302  throw IndexOutOfBoundsException("WordAlphabet::getNPosition", n, 0, vAbsAlph_.size());
303 
304  return vAbsAlph_[n];
305  }
306 
317  virtual int getWord(const Sequence& seq, size_t pos = 0) const override;
318 
319 
330  virtual int getWord(const std::vector<int>& vint, size_t pos = 0) const override;
331 
343  virtual std::string getWord(const std::vector<std::string>& vpos, size_t pos = 0) const override;
344 
352  int getNPosition(int word, size_t n) const override
353  {
354  if (n >= vAbsAlph_.size())
355  throw IndexOutOfBoundsException("WordAlphabet::getNPosition", n, 0, vAbsAlph_.size());
356 
357  std::string s = intToChar(word);
358  return vAbsAlph_[n]->charToInt(s.substr(n, 1));
359  }
360 
367  std::vector<int> getPositions(int word) const override
368  {
369  std::string s = intToChar(word);
370  std::vector<int> positions;
371  for (size_t i = 0; i < s.size(); i++)
372  {
373  positions.push_back(vAbsAlph_[i]->charToInt(s.substr(i, 1)));
374  }
375 
376  return positions;
377  }
385  std::string getNPosition(const std::string& word, size_t n) const override
386  {
387  if (n > vAbsAlph_.size())
388  throw BadCharException("", "WordAlphabet::getNPosition", this);
389  // Test:
390  charToInt(word);
391 
392  return "" + word.substr(n, 1);
393  }
394 
395 
402  std::vector<std::string> getPositions(const std::string& word) const override
403  {
404  charToInt(word);
405  std::vector<std::string> positions;
406  for (size_t i = 0; i < word.size(); i++)
407  {
408  positions.push_back(word.substr(i, 1));
409  }
410 
411  return positions;
412  }
413 
423  std::unique_ptr<SequenceInterface> translate(const SequenceInterface& sequence, size_t = 0) const override;
424 
433  std::unique_ptr<SequenceInterface> reverse(const SequenceInterface& sequence) const override;
434 
441  unsigned int getStateCodingSize() const override
442  {
443  return static_cast<unsigned int>(vAbsAlph_.size());
444  }
446 };
447 } // end of namespace bpp.
448 #endif // BPP_SEQ_ALPHABET_WORDALPHABET_H
A partial implementation of the Alphabet interface.
unsigned int getNumberOfChars() const
Get the number of supported characters in this alphabet, including generic characters (e....
AbstractAlphabet & operator=(const AbstractAlphabet &alph)
std::string intToChar(int state) const
Give the string description of a state given its int description.
int charToInt(const std::string &state) const
Give the int description of a state given its string description.
An alphabet exception thrown when trying to specify a bad char to the alphabet.
The interface class for word alphabets.
Definition: WordAlphabet.h:25
virtual std::string getNPosition(const std::string &word, size_t n) const =0
Get the char code of the n-position of a word given its char description.
virtual bool containsUnresolved(const std::string &state) const =0
virtual int getWord(const std::vector< int > &vint, size_t pos=0) const =0
Get the int code for a word given the int code of the underlying positions.
virtual std::vector< std::string > getPositions(const std::string &word) const =0
Get the char codes of each position of a word given its char description.
virtual std::shared_ptr< const Alphabet > getNAlphabet(size_t n) const =0
virtual int getWord(const Sequence &seq, size_t pos=0) const =0
virtual std::unique_ptr< SequenceInterface > translate(const SequenceInterface &sequence, size_t pos=0) const =0
Translate a whole sequence from letters alphabet to words alphabet.
virtual std::unique_ptr< SequenceInterface > reverse(const SequenceInterface &sequence) const =0
Translate a whole sequence from words alphabet to letters alphabet.
virtual unsigned int getSize() const =0
virtual std::vector< int > getPositions(int word) const =0
Get the int codes of each position of a word given its int description.
virtual ~CoreWordAlphabet()
Definition: WordAlphabet.h:29
virtual bool containsGap(const std::string &state) const =0
virtual int getNPosition(int word, size_t n) const =0
Get the int code of the n-position of a word given its int description.
virtual unsigned int getLength() const =0
virtual bool hasUniqueAlphabet() const =0
virtual std::string getWord(const std::vector< std::string > &vpos, size_t pos=0) const =0
Get the char code for a word given the char code of the underlying positions.
The sequence interface.
Definition: Sequence.h:34
A basic implementation of the Sequence interface.
Definition: Sequence.h:117
The base class for word alphabets.
Definition: WordAlphabet.h:151
bool isUnresolved(int state) const override
Definition: WordAlphabet.h:264
unsigned int getLength() const override
Returns the length of the word.
Definition: WordAlphabet.h:242
WordAlphabet(const WordAlphabet &bia)
Definition: WordAlphabet.h:176
bool containsGap(const std::string &state) const override
virtual ~WordAlphabet()
Definition: WordAlphabet.h:190
bool containsUnresolved(const std::string &state) const override
virtual int getWord(const Sequence &seq, size_t pos=0) const override
Get the int code for a word given the int code of the underlying positions.
std::vector< std::shared_ptr< const Alphabet > > vAbsAlph_
Definition: WordAlphabet.h:153
unsigned int getNumberOfTypes() const override
Returns the number of resolved states + one for unresolved.
Definition: WordAlphabet.h:252
unsigned int getStateCodingSize() const override
Get the size of the string coding a state.
Definition: WordAlphabet.h:441
std::vector< int > getAlias(int state) const override
Get all resolved states that match a generic state.
bool isResolvedIn(int state1, int state2) const override
Tells if a given (potentially unresolved) state can be resolved in another resolved state.
bool hasUniqueAlphabet() const override
Returns True if the Alphabet of the letters in the word are the same type.
std::string getAlphabetType() const override
Identification method.
int getUnknownCharacterCode() const override
Definition: WordAlphabet.h:259
int getNPosition(int word, size_t n) const override
Get the int code of the n-position of a word given its int description.
Definition: WordAlphabet.h:352
int getGeneric(const std::vector< int > &states) const override
Get the generic state that match a set of states.
std::unique_ptr< SequenceInterface > translate(const SequenceInterface &sequence, size_t=0) const override
Translate a whole sequence from letters alphabet to words alphabet.
std::unique_ptr< SequenceInterface > reverse(const SequenceInterface &sequence) const override
Translate a whole sequence from words alphabet to letters alphabet.
WordAlphabet(const std::vector< std::shared_ptr< const Alphabet >> &vAlpha)
Builds a new word alphabet from a vector of Alphabets.
int charToInt(const std::string &state) const override
Give the int description of a state given its string description.
Definition: WordAlphabet.h:210
std::string getNPosition(const std::string &word, size_t n) const override
Get the char code of the n-position of a word given its char description.
Definition: WordAlphabet.h:385
unsigned int getSize() const override
Definition: WordAlphabet.h:221
std::vector< std::string > getPositions(const std::string &word) const override
Get the char codes of each position of a word given its char description.
Definition: WordAlphabet.h:402
std::vector< int > getPositions(int word) const override
Get the int codes of each position of a word given its int description.
Definition: WordAlphabet.h:367
std::string getName(const std::string &state) const override
Get the complete name of a state given its string description.
std::shared_ptr< const Alphabet > getNAlphabet(size_t n) const override
Get the pointer to the Alphabet at the n-position.
Definition: WordAlphabet.h:299
WordAlphabet * clone() const override
Definition: WordAlphabet.h:185
bool isUnresolved(const std::string &state) const override
Definition: WordAlphabet.h:265
WordAlphabet & operator=(const WordAlphabet &bia)
Definition: WordAlphabet.h:178
This alphabet is used to deal NumericAlphabet.