bpp-seq3  3.0.0
AbstractAlphabet.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Text/TextTools.h>
6 #include <Bpp/Utils/MapTools.h>
7 
8 #include "AbstractAlphabet.h"
9 #include "AlphabetExceptions.h"
10 
11 using namespace bpp;
12 
13 // From the STL:
14 #include <ctype.h>
15 #include <map>
16 #include <iostream>
17 
18 using namespace std;
19 
20 /******************************************************************************/
21 
22 void AbstractAlphabet::updateMaps_(size_t pos, const AlphabetState& st)
23 {
24  if (letters_.find(st.getLetter()) == letters_.end())
25  letters_[st.getLetter()] = pos;
26  else
27  throw Exception("AbstractAlphabet::updateMaps_. A state with the same character code already exists! " + st.getLetter() + ".");
28  if (nums_.find(st.getNum()) == nums_.end())
29  nums_[st.getNum()] = pos;
30  else
31  nums_[st.getNum()] = min(pos, nums_[st.getNum()]);
32 }
33 
34 /******************************************************************************/
35 
37 {
38  // Add the state to the vector
39  alphabet_.push_back(st);
40  // Update the maps
41  updateMaps_(alphabet_.size() - 1, *st);
42 }
43 
44 /******************************************************************************/
45 
47 {
48  if (pos > alphabet_.size())
49  throw IndexOutOfBoundsException("AbstractAlphabet::setState: incorrect position", pos, 0, alphabet_.size());
50  // Delete the state if not empty
51  if (alphabet_[pos] != 0)
52  delete alphabet_[pos];
53  // Put the state in the vector
54  alphabet_[pos] = st;
55  // Update the maps
56  updateMaps_(pos, *st);
57 }
58 
59 /******************************************************************************/
60 
61 const AlphabetState& AbstractAlphabet::getState(const std::string& letter) const
62 {
63  map<string, size_t>::const_iterator it = letters_.find(letter);
64  if (it == letters_.end())
65  throw BadCharException(letter, "AbstractAlphabet::getState(string): Specified base unknown", this);
66  return *(alphabet_[it->second]);
67 }
68 
69 /******************************************************************************/
70 
71 size_t AbstractAlphabet::getStateIndex(const std::string& letter) const
72 {
73  map<string, size_t>::const_iterator it = letters_.find(letter);
74  if (it == letters_.end())
75  throw BadCharException(letter, "AbstractAlphabet::getStateIndex(string): Specified base unknown", this);
76  return it->second;
77 }
78 
79 /******************************************************************************/
80 
82 {
83  map<int, size_t>::const_iterator it = nums_.find(num);
84  if (it == nums_.end())
85  throw BadIntException(num, "AbstractAlphabet::getState(int): Specified base unknown", this);
86  return *(alphabet_[it->second]);
87 }
88 
89 /******************************************************************************/
90 
91 size_t AbstractAlphabet::getStateIndex(int num) const
92 {
93  map<int, size_t>::const_iterator it = nums_.find(num);
94  if (it == nums_.end())
95  throw BadIntException(num, "AbstractAlphabet::getStateIndex(int): Specified base unknown", this);
96  return it->second;
97 }
98 
99 /******************************************************************************/
100 
101 AlphabetState& AbstractAlphabet::getState(const std::string& letter)
102 {
103  map<string, size_t>::iterator it = letters_.find(letter);
104  if (it == letters_.end())
105  throw BadCharException(letter, "AbstractAlphabet::getState(string): Specified base unknown", this);
106  return *(alphabet_[it->second]);
107 }
108 
109 /******************************************************************************/
110 
112 {
113  map<int, size_t>::iterator it = nums_.find(num);
114  if (it == nums_.end())
115  throw BadIntException(num, "AbstractAlphabet::getState(int): Specified base unknown", this);
116  return *(alphabet_[it->second]);
117 }
118 
119 /******************************************************************************/
120 
122 {
123  if (pos > alphabet_.size())
124  throw IndexOutOfBoundsException("AbstractAlphabet::getStateAt: incorrect position", pos, 0, alphabet_.size());
125  return *(alphabet_[pos]);
126 }
127 
128 /******************************************************************************/
129 
131 {
132  if (pos > alphabet_.size())
133  throw IndexOutOfBoundsException("AbstractAlphabet::getStateAt: incorrect position", pos, 0, alphabet_.size());
134  return *(alphabet_[pos]);
135 }
136 
137 /******************************************************************************/
138 
139 std::string AbstractAlphabet::getName(const std::string& state) const
140 {
141  return (getState(state)).getName();
142 }
143 
144 /******************************************************************************/
145 
146 std::string AbstractAlphabet::getName(int state) const
147 {
148  return (getState(state)).getName();
149 }
150 
151 /******************************************************************************/
152 
153 int AbstractAlphabet::charToInt(const std::string& state) const
154 {
155  return getState(state).getNum();
156 }
157 
158 /******************************************************************************/
159 
160 std::string AbstractAlphabet::intToChar(int state) const
161 {
162  return (getState(state)).getLetter();
163 }
164 
165 /******************************************************************************/
166 
168 {
169  map<int, size_t>::const_iterator it = nums_.find(state);
170  if (it != nums_.end())
171  return true;
172  return false;
173 }
174 
175 /******************************************************************************/
176 
177 bool AbstractAlphabet::isCharInAlphabet(const std::string& state) const
178 {
179  map<string, size_t>::const_iterator it = letters_.find(state);
180  if (it != letters_.end())
181  return true;
182  return false;
183 }
184 
185 /******************************************************************************/
186 
187 bool AbstractAlphabet::isResolvedIn(int state1, int state2) const
188 {
189  if (state1 < 0 || !isIntInAlphabet(state1))
190  throw BadIntException(state1, "AbstractAlphabet::isResolvedIn(int, int): Specified base " + intToChar(state1) + " is unknown.", this);
191 
192  if (state2 < 0 || !isIntInAlphabet(state2))
193  throw BadIntException(state2, "AbstractAlphabet::isResolvedIn(int, int): Specified base " + intToChar(state2) + " is unknown.", this);
194 
195  if (isUnresolved(state2))
196  throw BadIntException(state2, "AbstractAlphabet::isResolvedIn(int, int): Unresolved base " + intToChar(state2), this);
197 
198  return state1 == state2;
199 }
200 
201 /******************************************************************************/
202 
203 std::vector<int> AbstractAlphabet::getAlias(int state) const
204 {
205  if (!isIntInAlphabet(state))
206  throw BadIntException(state, "AbstractAlphabet::getAlias(int): Specified base unknown.", this);
207  vector<int> v(1);
208  v[0] = state;
209  return v;
210 }
211 
212 /******************************************************************************/
213 
214 std::vector<std::string> AbstractAlphabet::getAlias(const std::string& state) const
215 {
216  if (!isCharInAlphabet(state))
217  throw BadCharException(state, "AbstractAlphabet::getAlias(char): Specified base unknown.", this);
218  vector<string> v(1);
219  v[0] = state;
220  return v;
221 }
222 
223 /******************************************************************************/
224 
225 int AbstractAlphabet::getGeneric(const std::vector<int>& states) const
226 {
227  map<int, int> m;
228  for (size_t i = 0; i < states.size(); ++i)
229  {
230  vector<int> tmp_s = this->getAlias(states[i]); // get the states for generic characters
231  for (size_t j = 0; j < tmp_s.size(); ++j)
232  {
233  m[tmp_s[j]]++; // add each state to the list
234  }
235  }
236  vector<int> ve = MapTools::getKeys(m);
237 
238  string key;
239  for (size_t i = 0; i < ve.size(); ++i)
240  {
241  if (!isIntInAlphabet(ve[i]))
242  throw BadIntException(ve[i], "AbstractAlphabet::getGeneric(const vector<int>): Specified base unknown.", this);
243  key += "_" + TextTools::toString(ve[i]);
244  }
245  int v;
246  if (ve.size() == 1)
247  {
248  v = ve[0];
249  }
250  else
251  {
252  v = this->getUnknownCharacterCode();
253  }
254  return v;
255 }
256 
257 /******************************************************************************/
258 
259 std::string AbstractAlphabet::getGeneric(const std::vector<std::string>& states) const
260 {
261  map<string, int> m;
262  for (size_t i = 0; i < states.size(); ++i)
263  {
264  vector<string> tmp_s = this->getAlias(states[i]); // get the states for generic characters
265  for (size_t j = 0; j < tmp_s.size(); ++j)
266  {
267  m[tmp_s[j]]++; // add each state to the list
268  }
269  }
270  vector<string> ve = MapTools::getKeys(m);
271 
272  string key;
273  for (size_t i = 0; i < ve.size(); ++i)
274  {
275  if (!isCharInAlphabet(ve[i]))
276  throw BadCharException(ve[i], "AbstractAlphabet::getAlias(const vector<string>): Specified base unknown.", this);
277  key += TextTools::toString(ve[i]);
278  }
279  string v;
280  if (ve.size() == 1)
281  {
282  v = ve[0];
283  }
284  else
285  {
286  throw CharStateNotSupportedException("AbstractAlphabet::getAlias(const vector<string>): No generic char state.", this);
287  }
288  return v;
289 }
290 
291 /******************************************************************************/
292 
293 const std::vector<int>& AbstractAlphabet::getSupportedInts() const
294 {
295  if (intList_.size() != alphabet_.size())
296  {
297  intList_.resize(alphabet_.size());
298  charList_.resize(alphabet_.size());
299  for (size_t i = 0; i < alphabet_.size(); ++i)
300  {
301  intList_[i] = alphabet_[i]->getNum();
302  charList_[i] = alphabet_[i]->getLetter();
303  }
304  }
305  return intList_;
306 }
307 
308 /******************************************************************************/
309 
310 const std::vector<std::string>& AbstractAlphabet::getSupportedChars() const
311 {
312  if (charList_.size() != alphabet_.size())
313  {
314  intList_.resize(alphabet_.size());
315  charList_.resize(alphabet_.size());
316  for (size_t i = 0; i < alphabet_.size(); ++i)
317  {
318  intList_[i] = alphabet_[i]->getNum();
319  charList_[i] = alphabet_[i]->getLetter();
320  }
321  }
322  return charList_;
323 }
324 
325 /******************************************************************************/
326 
327 const std::vector<std::string>& AbstractAlphabet::getResolvedChars() const
328 {
329  charList_.clear();
330  for (size_t i = 0; i < alphabet_.size(); ++i)
331  {
332  // well, non-gap chars also
333  if (!isGap(alphabet_[i]->getLetter()) and !isUnresolved(alphabet_[i]->getLetter()))
334  charList_.push_back(alphabet_[i]->getLetter());
335  }
336 
337  return charList_;
338 }
std::string getName(const std::string &state) const
Get the complete name of a state given its string description.
const std::vector< int > & getSupportedInts() const
void updateMaps_(size_t pos, const AlphabetState &st)
Update the private maps letters_ and nums_ when adding a state.
virtual AlphabetState & getStateAt(size_t stateIndex)
Get a state at a position in the alphabet_ vector.
const std::vector< std::string > & getResolvedChars() const
const AlphabetState & getState(const std::string &letter) const
Get a state by its letter.
virtual void setState(size_t pos, AlphabetState *st)
Set a state in the Alphabet.
std::string intToChar(int state) const
Give the string description of a state given its int description.
const std::vector< std::string > & getSupportedChars() const
size_t getStateIndex(int state) const
virtual void registerState(AlphabetState *st)
Add a state to the Alphabet.
int charToInt(const std::string &state) const
Give the int description of a state given its string description.
bool isResolvedIn(int state1, int state2) const
Tells if a given (potentially unresolved) state can be resolved in another resolved state.
int getGeneric(const std::vector< int > &states) const
Get the generic state that match a set of states.
std::vector< int > getAlias(int state) const
Get all resolved states that match a generic state.
bool isIntInAlphabet(int state) const
Tell if a state (specified by its int description) is allowed by the the alphabet.
bool isCharInAlphabet(const std::string &state) const
Tell if a state (specified by its string description) is allowed by the the alphabet.
This is the base class to describe states in an Alphabet.
Definition: AlphabetState.h:22
const std::string & getLetter() const
Get the letter(s) corresponding to the state.
Definition: AlphabetState.h:63
int getNum() const
Get the state's number.
Definition: AlphabetState.h:47
An alphabet exception thrown when trying to specify a bad char to the alphabet.
An alphabet exception thrown when trying to specify a bad int to the alphabet.
Exception thrown in case no character is available for a certain state in an alphabet.
static std::vector< Key > getKeys(const std::map< Key, T, Cmp > &myMap)
std::string toString(T t)
This alphabet is used to deal NumericAlphabet.