bpp-seq3  3.0.0
RNA.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Text/TextTools.h>
6 #include <Bpp/Utils/MapTools.h>
7 
8 #include "RNA.h"
9 
10 using namespace bpp;
11 
12 // From STL:
13 #include <map>
14 
15 using namespace std;
16 
17 /******************************************************************************/
18 // class constructor
19 RNA::RNA(bool exclamationMarkCountsAsGap)
20 {
21  // Alphabet content definition
22  // all unresolved bases use n°14
23  registerState(new NucleicAlphabetState(-1, "-", 0, "Gap"));
24  registerState(new NucleicAlphabetState( 0, "A", 1, "Adenine"));
25  registerState(new NucleicAlphabetState( 1, "C", 2, "Cytosine"));
26  registerState(new NucleicAlphabetState( 2, "G", 4, "Guanine"));
27  registerState(new NucleicAlphabetState( 3, "U", 8, "Uracile"));
28  registerState(new NucleicAlphabetState( 4, "M", 3, "Adenine or Cytosine"));
29  registerState(new NucleicAlphabetState( 5, "R", 5, "Purine (Adenine or Guanine)"));
30  registerState(new NucleicAlphabetState( 6, "W", 9, "Adenine or Uracile"));
31  registerState(new NucleicAlphabetState( 7, "S", 6, "Cytosine or Guanine"));
32  registerState(new NucleicAlphabetState( 8, "Y", 10, "Pyrimidine (Cytosine or Uracile)"));
33  registerState(new NucleicAlphabetState( 9, "K", 12, "Guanine or Uracile"));
34  registerState(new NucleicAlphabetState(10, "V", 7, "Adenine or Cytosine or Guanine"));
35  registerState(new NucleicAlphabetState(11, "H", 11, "Adenine or Cytosine or Uracile"));
36  registerState(new NucleicAlphabetState(12, "D", 13, "Adenine or Guanine or Uracile"));
37  registerState(new NucleicAlphabetState(13, "B", 14, "Cytosine or Guanine or Uracile"));
38  registerState(new NucleicAlphabetState(14, "N", 15, "Unresolved base"));
39  registerState(new NucleicAlphabetState(14, "X", 15, "Unresolved base"));
40  registerState(new NucleicAlphabetState(14, "O", 15, "Unresolved base"));
41  registerState(new NucleicAlphabetState(14, "0", 15, "Unresolved base"));
42  registerState(new NucleicAlphabetState(14, "?", 15, "Unresolved base"));
43  if (exclamationMarkCountsAsGap)
44  registerState(new NucleicAlphabetState(-1, "!", 0, "Frameshift"));
45  else
46  registerState(new NucleicAlphabetState(14, "!", 15, "Unresolved base"));
47 }
48 
49 /******************************************************************************/
50 
51 bool RNA::isResolvedIn(int state1, int state2) const
52 {
53  if (!isIntInAlphabet(state1))
54  throw BadIntException(state1, "RNA::isResolvedIn(int, int): Specified base unknown.", this);
55 
56  if (!isIntInAlphabet(state2))
57  throw BadIntException(state2, "RNA::isResolvedIn(int, int): Specified base unknown.", this);
58 
59  if (isUnresolved(state2))
60  throw BadIntException(state2, "RNA::isResolvedIn(int, int): Unresolved base.", this);
61 
62  if (state1 == -1)
63  return state2 == -1;
64 
65  const NucleicAlphabetState& st1 = getState(state1);
66 
67  switch (state2)
68  {
69  case 0:
70  return st1.getBinaryCode() & 1;
71  case 1:
72  return st1.getBinaryCode() & 2;
73  case 2:
74  return st1.getBinaryCode() & 4;
75  case 3:
76  return st1.getBinaryCode() & 8;
77  default:
78  throw BadIntException(state2, "RNA::isResolvedIn : this should not happen", this);
79  }
80 }
81 
82 /******************************************************************************/
83 
84 std::vector<int> RNA::getAlias(int state) const
85 {
86  if (!isIntInAlphabet(state))
87  throw BadIntException(state, "RNA::getAlias(int): Specified base unknown.", this);
88  vector<int> v;
89  const NucleicAlphabetState& st = getState(state);
90  if (state == -1)
91  v.push_back(-1);
92  if (st.getBinaryCode() & 1)
93  v.push_back(0);
94  if (st.getBinaryCode() & 2)
95  v.push_back(1);
96  if (st.getBinaryCode() & 4)
97  v.push_back(2);
98  if (st.getBinaryCode() & 8)
99  v.push_back(3);
100  return v;
101 }
102 
103 
104 /******************************************************************************/
105 
106 std::vector<std::string> RNA::getAlias(const std::string& state) const
107 {
108  string locstate = TextTools::toUpper(state);
109  if (!isCharInAlphabet(locstate))
110  throw BadCharException(locstate, "RNA::getAlias(int): Specified base unknown.", this);
111  vector<int> vi = this->getAlias(this->charToInt(state));
112  vector<string> v;
113  for (unsigned int i = 0; i < vi.size(); i++)
114  {
115  v.push_back(this->intToChar(vi[i]));
116  }
117  return v;
118 }
119 
120 /******************************************************************************/
121 
122 int RNA::getGeneric(const std::vector<int>& states) const
123 {
124  int v = 0;
125  for (size_t i = 0; i < states.size(); ++i)
126  {
127  if (!isIntInAlphabet(states[i]))
128  throw BadIntException(states[i], "RNA::getGeneric(const vector<int>& states): Specified base unknown.", this);
129  v |= getState(states[i]).getBinaryCode();
130  }
131  return getStateByBinCode(v).getNum();
132 }
133 
134 /******************************************************************************/
135 
136 std::string RNA::getGeneric(const std::vector<std::string>& states) const
137 {
138  vector<int> vi;
139  for (unsigned int i = 0; i < states.size(); ++i)
140  {
141  if (!isCharInAlphabet(states[i]))
142  throw BadCharException(states[i], "DNA::getGeneric(const vector<string>& states): Specified base unknown.", this);
143  vi.push_back(this->charToInt(states[i]));
144  }
145  return intToChar(getGeneric(vi));
146 }
147 
148 /******************************************************************************/
An alphabet exception thrown when trying to specify a bad char to the alphabet.
An alphabet exception thrown when trying to specify a bad int to the alphabet.
This is the base class to describe states in a NucleicAlphabet.
int getBinaryCode() const
Get the state's binary representation.
int getGeneric(const std::vector< int > &states) const
Get the generic state that match a set of states.
Definition: RNA.cpp:122
bool isResolvedIn(int state1, int state2) const
Tells if a given (potentially unresolved) state can be resolved in another resolved state.
Definition: RNA.cpp:51
std::vector< int > getAlias(int state) const
Get all resolved states that match a generic state.
Definition: RNA.cpp:84
RNA(bool exclamationMarkCountsAsGap=false)
Definition: RNA.cpp:19
std::string toUpper(const std::string &s)
This alphabet is used to deal NumericAlphabet.