bpp-seq3  3.0.0
NucleicAlphabet.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_SEQ_ALPHABET_NUCLEICALPHABET_H
6 #define BPP_SEQ_ALPHABET_NUCLEICALPHABET_H
7 
8 #include <iostream>
9 #include <map>
10 #include <typeinfo>
11 
12 #include "LetterAlphabet.h"
13 #include "NucleicAlphabetState.h"
14 
15 namespace bpp
16 {
24  public LetterAlphabet
25 {
26 private:
27  std::map<int, size_t> binCodes_;
28  void updateBinMaps_(size_t pos, const NucleicAlphabetState& st)
29  {
30  if (binCodes_.find(st.getBinaryCode()) == binCodes_.end())
31  binCodes_[st.getBinaryCode()] = pos;
32  }
33 
34 public:
36 
38 
40  {
42  binCodes_ = bia.binCodes_;
43  return *this;
44  }
45 
46  virtual NucleicAlphabet* clone() const = 0;
47 
48  virtual ~NucleicAlphabet() {}
49 
50 protected:
56  {
57  NucleicAlphabetState* nst = dynamic_cast<NucleicAlphabetState*>(st);
58  if (!nst)
59  throw Exception("NucleicAlphabet::registerState. Incorrect alphabet type.");
62  }
63 
64  void setState(size_t pos, AlphabetState* st)
65  {
66  NucleicAlphabetState* nst = dynamic_cast<NucleicAlphabetState*>(st);
67  if (!nst)
68  throw Exception("NucleicAlphabet::setState. Incorrect alphabet type.");
69  LetterAlphabet::setState(pos, nst);
70  updateBinMaps_(pos, *nst);
71  }
72 
75 public:
80  const NucleicAlphabetState& getStateAt(size_t stateIndex) const
81  {
82  return dynamic_cast<const NucleicAlphabetState&>(
84  );
85  }
86  NucleicAlphabetState& getStateAt(size_t stateIndex)
87  {
88  return dynamic_cast<NucleicAlphabetState&>(
90  );
91  }
92  const NucleicAlphabetState& getState(const std::string& letter) const
93  {
94  return dynamic_cast<const NucleicAlphabetState&>(
96  );
97  }
98  const NucleicAlphabetState& getState(int num) const
99  {
100  return dynamic_cast<const NucleicAlphabetState&>(
102  );
103  }
120  {
121  std::map<int, size_t>::const_iterator it = binCodes_.find(code);
122  if (it == binCodes_.end())
123  throw BadIntException(code, "NucleicAlphabet::getState(unsigned char): Binary code not in alphabet", this);
124  return getStateAt(it->second);
125  }
126 
148  int subtract(int s1, int s2) const
149  {
150  return getStateByBinCode(getState(s1).getBinaryCode() & ~getState(s2).getBinaryCode()).getNum();
151  }
152 
173  std::string subtract(const std::string& s1, const std::string& s2) const
174  {
175  return intToChar(subtract(charToInt(s1), charToInt(s2)));
176  }
177 
199  int getOverlap(int s1, int s2) const
200  {
201  return getStateByBinCode(getState(s1).getBinaryCode() & getState(s2).getBinaryCode()).getNum();
202  }
203 
224  std::string getOverlap(const std::string& s1, const std::string& s2) const
225  {
226  return intToChar(getOverlap(charToInt(s1), charToInt(s2)));
227  }
228 
231 public:
232  // return 4 : A, C, G, T (or U)
233  unsigned int getSize() const { return 4; }
234 
235  // return 15 : gap isn't included, generic unresolved bases (N, X, ?, O, 0) count for one
236  unsigned int getNumberOfTypes() const { return 15; }
237 
238  int getUnknownCharacterCode() const { return 14; }
239 
240  bool isUnresolved(int state) const { return state > 3; }
241  bool isUnresolved(const std::string& state) const { return charToInt(state) > 3; }
242 };
243 } // end of namespace bpp.
244 #endif // BPP_SEQ_ALPHABET_NUCLEICALPHABET_H
virtual AlphabetState & getStateAt(size_t stateIndex)
Get a state at a position in the alphabet_ vector.
unsigned int getNumberOfChars() const
Get the number of supported characters in this alphabet, including generic characters (e....
const AlphabetState & getState(const std::string &letter) const
Get a state by its letter.
std::string intToChar(int state) const
Give the string description of a state given its int description.
This is the base class to describe states in an Alphabet.
Definition: AlphabetState.h:22
int getNum() const
Get the state's number.
Definition: AlphabetState.h:47
An alphabet exception thrown when trying to specify a bad int to the alphabet.
Specialized partial implementation of Alphabet using single letters.
void registerState(AlphabetState *st)
Add a state to the Alphabet.
LetterAlphabet & operator=(const LetterAlphabet &bia)
void setState(size_t pos, AlphabetState *st)
Set a state in the Alphabet.
int charToInt(const std::string &state) const
Give the int description of a state given its string description.
This is the base class to describe states in a NucleicAlphabet.
int getBinaryCode() const
Get the state's binary representation.
The abstract base class for nucleic alphabets.
NucleicAlphabetState & getStateAt(size_t stateIndex)
Get a state at a position in the alphabet_ vector.
void setState(size_t pos, AlphabetState *st)
Set a state in the Alphabet.
virtual NucleicAlphabet * clone() const =0
int getOverlap(int s1, int s2) const
Get the overlap between to states.
const NucleicAlphabetState & getStateByBinCode(int code) const
Get a state by its binary representation.
int getUnknownCharacterCode() const
const NucleicAlphabetState & getState(const std::string &letter) const
Get a state by its letter.
unsigned int getNumberOfTypes() const
Get the number of distinct states in alphabet (e.g. return 15 for DNA alphabet). This is the number o...
bool isUnresolved(int state) const
const NucleicAlphabetState & getState(int num) const
Get a state by its num.
void registerState(AlphabetState *st)
Add a state to the Alphabet.
bool isUnresolved(const std::string &state) const
std::string subtract(const std::string &s1, const std::string &s2) const
Subtract states.
NucleicAlphabet & operator=(const NucleicAlphabet &bia)
std::string getOverlap(const std::string &s1, const std::string &s2) const
Get the overlap between to states.
unsigned int getSize() const
Get the number of resolved states in the alphabet (e.g. return 4 for DNA alphabet)....
void updateBinMaps_(size_t pos, const NucleicAlphabetState &st)
const NucleicAlphabetState & getStateAt(size_t stateIndex) const
Get a state at a position in the alphabet_ vector.
int subtract(int s1, int s2) const
Subtract states.
std::map< int, size_t > binCodes_
NucleicAlphabet(const NucleicAlphabet &bia)
This alphabet is used to deal NumericAlphabet.