bpp-seq3  3.0.0
RNY.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "RNY.h" // class's header file
6 
7 // From Utils:
8 #include <Bpp/Text/TextTools.h>
9 #include "AlphabetTools.h"
10 
11 #include <iostream>
12 using namespace std;
13 using namespace bpp;
14 
15 /****************************************************************************************/
16 
17 RNY::RNY(shared_ptr<const NucleicAlphabet> na) : nuclalph_(na)
18 {
19  // Initialization:
20  vector<AlphabetState*> states(351, nullptr);
21 
22  // Alphabet content definition:
23 
24  string s1;
25 
27  s1 = "RCT-";
28  else
29  s1 = "RCU-";
30 
31  string s2;
32 
34  s2 = "AGCT-";
35  else
36  s2 = "AGCU-";
37 
38  string s3 = "AGY-";
39  string s = " ";
40 
41 
42  // NNN (0->35)
43 
44  for (size_t i = 0; i < 3; ++i)
45  {
46  for (size_t j = 0; j < 4; ++j)
47  {
48  for (size_t k = 0; k < 3; ++k)
49  {
50  size_t l = i * 12 + j * 3 + k;
51  s[0] = s1[i];
52  s[1] = s2[j];
53  s[2] = s3[k];
54  states[l] = new AlphabetState(static_cast<int>(l), s, s);
55  }
56  }
57  }
58 
59  // NN- (50->83)
60 
61  for (size_t i = 0; i < 3; ++i)
62  {
63  for (size_t j = 0; j < 4; ++j)
64  {
65  size_t l = 50 + 12 * i + j * 3;
66  s[0] = s1[i];
67  s[1] = s2[j];
68  s[2] = s3[3];
69  states[l] = new AlphabetState(static_cast<int>(l), s, s);
70  }
71  }
72 
73  // N-N (100->126)
74 
75  for (size_t i = 0; i < 3; ++i)
76  {
77  for (size_t k = 0; k < 3; ++k)
78  {
79  size_t l = 100 + 12 * i + k;
80  s[0] = s1[i];
81  s[1] = s2[4];
82  s[2] = s3[k];
83  states[l] = new AlphabetState(static_cast<int>(l), s, s);
84  }
85  }
86 
87  // N-- (150->152)
88 
89  for (size_t i = 0; i < 3; ++i)
90  {
91  size_t l = 150 + 12 * i;
92  s[0] = s1[i];
93  s[1] = s2[4];
94  s[2] = s3[3];
95  states[l] = new AlphabetState(static_cast<int>(l), s, s);
96  }
97 
98  // -NN (200->211)
99 
100  for (size_t j = 0; j < 4; ++j)
101  {
102  for (size_t k = 0; k < 3; ++k)
103  {
104  size_t l = 200 + j * 3 + k;
105  s[0] = s1[3];
106  s[1] = s2[j];
107  s[2] = s3[k];
108  states[l] = new AlphabetState(static_cast<int>(l), s, s);
109  }
110  }
111 
112 
113  // -N- (250->253)
114 
115  for (size_t j = 0; j < 4; ++j)
116  {
117  size_t l = 250 + 3 * j;
118  s[0] = s1[3];
119  s[1] = s2[j];
120  s[2] = s3[3];
121  states[l] = new AlphabetState(static_cast<int>(l), s, s);
122  }
123 
124  // --N (300->302)
125 
126  for (size_t k = 0; k < 3; ++k)
127  {
128  size_t l = 300 + k;
129  s[0] = s1[3];
130  s[1] = s2[4];
131  s[2] = s3[k];
132  states[l] = new AlphabetState(static_cast<int>(l), s, s);
133  }
134 
135 
136  // --- (350)
137 
138  s[0] = s1[3];
139  s[1] = s2[4];
140  s[2] = s3[3];
141  states[350] = new AlphabetState(350, s, s);
142 
143  // Register all states:
144  for (size_t i = 0; i < states.size(); ++i)
145  {
146  if (states[i])
147  registerState(states[i]);
148  }
149 }
150 
151 /****************************************************************************************/
152 
153 vector<int> RNY::getAlias(int state) const
154 {
155  if (!isIntInAlphabet(state))
156  throw BadIntException(state, "RNY::getAlias(int): Specified base unknown.", this);
157  vector<int> v;
158 
159  int qs = state / 50;
160  int rs = state % 50;
161  int i, j, k;
162 
163  switch (qs)
164  {
165  case 0: // NNN
166  v.resize(1);
167  v[0] = rs;
168  break;
169  case 1: // NN-
170  v.resize(3);
171  for (k = 0; k < 3; ++k)
172  {
173  v[static_cast<size_t>(k)] = k + rs;
174  }
175  break;
176  case 2: // N-N
177  v.resize(4);
178  for (j = 0; j < 4; ++j)
179  {
180  v[static_cast<size_t>(j)] = 3 * j + rs;
181  }
182  break;
183  case 3: // N--
184  v.resize(12);
185  for (j = 0; j < 4; ++j)
186  {
187  for (k = 0; k < 3; ++k)
188  {
189  v[static_cast<size_t>(3 * j + k)] = rs + 3 * j + k;
190  }
191  }
192  break;
193  case 4: // -NN
194  v.resize(3);
195  for (i = 0; i < 3; ++i)
196  {
197  v[static_cast<size_t>(i)] = 12 * i + rs;
198  }
199  break;
200  case 5: // -N-
201  v.resize(9);
202  for (i = 0; i < 3; ++i)
203  {
204  for (k = 0; k < 3; ++k)
205  {
206  v[static_cast<size_t>(3 * i + k)] = rs + 12 * i + k;
207  }
208  }
209  break;
210  case 6: // --N
211  v.resize(12);
212  for (i = 0; i < 3; ++i)
213  {
214  for (j = 0; j < 4; ++j)
215  {
216  v[static_cast<size_t>(4 * i + j)] = rs + 12 * i + 3 * j;
217  }
218  }
219  break;
220  case 7: // ---
221  v.resize(36);
222  for (i = 0; i < 3; ++i)
223  {
224  for (j = 0; j < 4; ++j)
225  {
226  for (k = 0; k < 3; ++k)
227  {
228  v[static_cast<size_t>(12 * i + 3 * j + k)] = 12 * i + 3 * j + k;
229  }
230  }
231  }
232  break;
233  }
234  return v;
235 }
236 
237 /****************************************************************************************/
238 
239 bool RNY::isResolvedIn(int state1, int state2) const
240 {
241  if (!isIntInAlphabet(state1))
242  throw BadIntException(state1, "RNY::isResolvedIn(int, int): Specified base unknown.", this);
243 
244  if (!isIntInAlphabet(state2))
245  throw BadIntException(state2, "RNY::isResolvedIn(int, int): Specified base unknown.", this);
246 
247  if (isUnresolved(state2))
248  throw BadIntException(state2, "RNY::isResolvedIn(int, int): Unresolved base.", this);
249 
250  int qs = state1 / 50;
251  int rs = state1 % 50;
252 
253  switch (qs)
254  {
255  case 0: // NNN
256  return state2 == rs;
257  case 1: // NN-
258  return (state2 < rs + 3) && (state2 >= rs);
259  case 2: // N-N
260  return (state2 - rs) % 3 == 0 && (state2 >= rs) && (state2 < rs + 12);
261  case 3: // N--
262  return (state2 >= rs) && (state2 < rs + 12);
263  case 4: // -NN
264  return (state2 - rs) % 12 == 0 && (state2 >= rs) && (state2 < rs + 36);
265  case 5: // -N-
266  return (state2 - rs) % 12 < 3 && (state2 >= rs) && (state2 < rs + 27);
267  case 6: // --N
268  return (state2 - rs) % 3 == 0 && (state2 >= rs) && (state2 < rs + 36);
269  case 7: // ---
270  return state2 >= 0;
271  default:
272  throw BadIntException(state1, "RNY:isResolvedIn : this should not happen.", this);
273  }
274 }
275 
276 /****************************************************************************************/
277 
278 vector<string> RNY::getAlias(const string& state) const
279 {
280  if (!isCharInAlphabet(state))
281  throw BadCharException(state, "RNY::getAlias(int): Specified base unknown.", this);
282 
283  vector<int> v = getAlias(charToInt(state));
284  vector<string> s;
285  size_t size = v.size();
286  s.resize(size);
287 
288  for (size_t i = 0; i < size; i++)
289  {
290  s[i] = AbstractAlphabet::intToChar(v[i]);
291  }
292  return s;
293 }
294 
295 /****************************************************************************************/
296 
297 string RNY::getRNY(const string& pos1, const string& pos2, const string& pos3) const
298 {
299  string tr;
300 
301  if (pos1 == "A" || pos1 == "G")
302  tr = "R";
303  else
304  tr = pos1;
305 
306  tr += pos2;
307 
308  if (pos3 == "T" || pos3 == "U" || pos3 == "C")
309  tr += "Y";
310  else
311  tr += pos3;
312 
313  // teste triplet;
314  charToInt(tr);
315  return tr;
316 }
317 
318 /**************************************************************************************/
319 
320 int RNY::getRNY(int i, int j, int k, const Alphabet& alph) const
321 {
323  {
324  throw AlphabetException("RNY::getRNY : Sequence must be Nucleic", &alph);
325  }
326 
327  char li = alph.intToChar(i)[0];
328  char lj = alph.intToChar(j)[0];
329  char lk = alph.intToChar(k)[0];
330 
331  int r = 0;
332  int s = 0;
333 
334  switch (li)
335  {
336  case 'A':
337  case 'G':
338  r += 0;
339  break;
340  case 'C':
341  r += 1;
342  break;
343  case 'T':
344  case 'U':
345  r += 2;
346  break;
347  case '-':
348  case 'N':
349  s += 1;
350  break;
351  default:
352  throw BadCharException(&li, "RNY::getRNY(int,int;int,alph): Specified base unknown.", this);
353  }
354 
355  r *= 4;
356  s *= 2;
357 
358  switch (lj)
359  {
360  case 'A':
361  r += 0;
362  break;
363  case 'G':
364  r += 1;
365  break;
366  case 'C':
367  r += 2;
368  break;
369  case 'T':
370  case 'U':
371  r += 3;
372  break;
373  case '-':
374  case 'N':
375  s += 1;
376  break;
377  default:
378  throw BadCharException(&lj, "RNY::getRNY(int,int;int,alph): Specified base unknown.", this);
379  }
380 
381  r *= 3;
382  s *= 2;
383 
384  switch (lk)
385  {
386  case 'A':
387  r += 0;
388  break;
389  case 'G':
390  r += 1;
391  break;
392  case 'C':
393  case 'T':
394  case 'U':
395  r += 2;
396  break;
397  case '-':
398  case 'N':
399  s += 1;
400  break;
401  default:
402  throw BadCharException(&lk, "RNY::getRNY(int,int;int,alph): Specified base unknown.", this);
403  }
404 
405  return 50 * s + r;
406 }
407 
408 /****************************************************************************************/
409 bool RNY::isGap(int state) const
410 {
411  return state == 350;
412 }
413 
414 bool RNY::containsGap(const string& state) const
415 {
416  return state.find("-") != string::npos;
417 }
418 
419 bool RNY::isUnresolved(const string& state) const
420 {
421  return containsGap(state);
422 }
423 
424 bool RNY::isUnresolved(int state) const
425 {
426  return state >= 50 && state != 350;
427 }
428 
429 /****************************************************************************************/
430 
431 int RNY::charToInt(const string& state) const
432 {
433  if (state.size() != 3)
434  throw BadCharException(state, "RNY::charToInt", this);
435  else
436  return AbstractAlphabet::charToInt(state);
437 }
438 
439 
440 /************************************************************/
441 
442 string RNY::intToChar(int state) const
443 {
444  int i, j, k, l;
445  for (i = 0; i < 3; ++i)
446  {
447  for (j = 0; j < 4; ++j)
448  {
449  for (k = 0; k < 3; ++k)
450  {
451  l = i * 12 + j * 3 + k;
452  if (getState(l).getNum() == state)
453  return getState(l).getLetter();
454  }
455  }
456  }
457 
458  // NN- (50->83)
459 
460  for (i = 0; i < 3; ++i)
461  {
462  for (j = 0; j < 4; ++j)
463  {
464  l = 50 + 12 * i + j * 3;
465  if (getState(l).getNum() == state)
466  return getState(l).getLetter();
467  }
468  }
469 
470  // N-N (100->126)
471 
472  for (i = 0; i < 3; ++i)
473  {
474  for (k = 0; k < 3; ++k)
475  {
476  l = 100 + 12 * i + k;
477  if (getState(l).getNum() == state)
478  return getState(l).getLetter();
479  }
480  }
481 
482  // N-- (150->152)
483 
484  for (i = 0; i < 3; ++i)
485  {
486  l = 150 + 12 * i;
487  if (getState(l).getNum() == state)
488  return getState(l).getLetter();
489  }
490 
491  // -NN (200->211)
492 
493  for (j = 0; j < 4; ++j)
494  {
495  for (k = 0; k < 3; ++k)
496  {
497  l = 200 + j * 3 + k;
498  if (getState(l).getNum() == state)
499  return getState(l).getLetter();
500  }
501  }
502 
503 
504  // -N- (250->253)
505 
506  for (j = 0; j < 4; ++j)
507  {
508  l = 250 + 3 * j;
509  if (getState(l).getNum() == state)
510  return getState(l).getLetter();
511  }
512 
513  // --N (300->302)
514 
515  for (k = 0; k < 3; ++k)
516  {
517  l = 300 + k;
518  if (getState(l).getNum() == state)
519  return getState(l).getLetter();
520  }
521 
522 
523  // --- (350)
524 
525  l = 350;
526  if (getState(l).getNum() == state)
527  return getState(l).getLetter();
528 
529  throw BadIntException(state, "RNY::intToChar: Specified base unknown", this);
530  return "XXX";
531 }
const AlphabetState & getState(const std::string &letter) const
Get a state by its letter.
std::string intToChar(int state) const
Give the string description of a state given its int description.
virtual void registerState(AlphabetState *st)
Add a state to the Alphabet.
int charToInt(const std::string &state) const
Give the int description of a state given its string description.
bool isIntInAlphabet(int state) const
Tell if a state (specified by its int description) is allowed by the the alphabet.
bool isCharInAlphabet(const std::string &state) const
Tell if a state (specified by its string description) is allowed by the the alphabet.
The alphabet exception base class.
This is the base class to describe states in an Alphabet.
Definition: AlphabetState.h:22
const std::string & getLetter() const
Get the letter(s) corresponding to the state.
Definition: AlphabetState.h:63
static bool isNucleicAlphabet(const Alphabet &alphabet)
static bool isRNAAlphabet(const Alphabet &alphabet)
static bool isDNAAlphabet(const Alphabet &alphabet)
The Alphabet interface.
Definition: Alphabet.h:99
virtual std::string intToChar(int state) const =0
Give the string description of a state given its int description.
An alphabet exception thrown when trying to specify a bad char to the alphabet.
An alphabet exception thrown when trying to specify a bad int to the alphabet.
bool isGap(int state) const
Definition: RNY.cpp:409
bool containsGap(const std::string &state) const
Definition: RNY.cpp:414
bool isUnresolved(int state) const
Definition: RNY.cpp:424
std::vector< int > getAlias(int state) const
Get all resolved states that match a generic state.
Definition: RNY.cpp:153
int charToInt(const std::string &state) const
Give the int description of a state given its string description.
Definition: RNY.cpp:431
std::string intToChar(int state) const
Give the string description of a state given its int description.
Definition: RNY.cpp:442
std::string getRNY(const std::string &, const std::string &, const std::string &) const
Get the char code for a triplet given the char code of the three underlying positions.
Definition: RNY.cpp:297
bool isResolvedIn(int state1, int state2) const
Tells if a given (potentially unresolved) state can be resolved in another resolved state.
Definition: RNY.cpp:239
This alphabet is used to deal NumericAlphabet.