bpp-seq3  3.0.0
DNA.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Text/TextTools.h>
6 #include <Bpp/Utils/MapTools.h>
7 
8 #include "AlphabetState.h"
9 #include "DNA.h"
10 
11 using namespace bpp;
12 
13 // From STL:
14 #include <map>
15 
16 using namespace std;
17 
18 /******************************************************************************/
19 
20 DNA::DNA(bool exclamationMarkCountsAsGap)
21 {
22  // Alphabet content definition
23  // all unresolved bases use n°14
24  registerState(new NucleicAlphabetState(-1, "-", 0, "Gap"));
25  registerState(new NucleicAlphabetState( 0, "A", 1, "Adenine"));
26  registerState(new NucleicAlphabetState( 1, "C", 2, "Cytosine"));
27  registerState(new NucleicAlphabetState( 2, "G", 4, "Guanine"));
28  registerState(new NucleicAlphabetState( 3, "T", 8, "Thymine"));
29  registerState(new NucleicAlphabetState( 4, "M", 3, "Adenine or Cytosine"));
30  registerState(new NucleicAlphabetState( 5, "R", 5, "Purine (Adenine or Guanine)"));
31  registerState(new NucleicAlphabetState( 6, "W", 9, "Adenine or Thymine"));
32  registerState(new NucleicAlphabetState( 7, "S", 6, "Cytosine or Guanine"));
33  registerState(new NucleicAlphabetState( 8, "Y", 10, "Pyrimidine (Cytosine or Thymine)"));
34  registerState(new NucleicAlphabetState( 9, "K", 12, "Guanine or Thymine"));
35  registerState(new NucleicAlphabetState(10, "V", 7, "Adenine or Cytosine or Guanine"));
36  registerState(new NucleicAlphabetState(11, "H", 11, "Adenine or Cytosine or Thymine"));
37  registerState(new NucleicAlphabetState(12, "D", 13, "Adenine or Guanine or Thymine"));
38  registerState(new NucleicAlphabetState(13, "B", 14, "Cytosine or Guanine or Thymine"));
39  registerState(new NucleicAlphabetState(14, "N", 15, "Unresolved base"));
40  registerState(new NucleicAlphabetState(14, "X", 15, "Unresolved base"));
41  registerState(new NucleicAlphabetState(14, "O", 15, "Unresolved base"));
42  registerState(new NucleicAlphabetState(14, "0", 15, "Unresolved base"));
43  registerState(new NucleicAlphabetState(14, "?", 15, "Unresolved base"));
44  if (exclamationMarkCountsAsGap)
45  registerState(new NucleicAlphabetState(-1, "!", 0, "Frameshift"));
46  else
47  registerState(new NucleicAlphabetState(14, "!", 15, "Unresolved base"));
48 }
49 
50 /******************************************************************************/
51 
52 bool DNA::isResolvedIn(int state1, int state2) const
53 {
54  if (state1 < 0 || !isIntInAlphabet(state1))
55  throw BadIntException(state1, "AbstractAlphabet::isResolvedIn(int, int): Specified base " + intToChar(state1) + " is unknown.", this);
56 
57  if (state2 < 0 || !isIntInAlphabet(state2))
58  throw BadIntException(state2, "AbstractAlphabet::isResolvedIn(int, int): Specified base " + intToChar(state2) + " is unknown.", this);
59 
60  if (isUnresolved(state2))
61  throw BadIntException(state2, "AbstractAlphabet::isResolvedIn(int, int): Unresolved base " + intToChar(state2), this);
62 
63  if (state1 == -1)
64  return state2 == -1;
65 
66  const NucleicAlphabetState& st1 = getState(state1);
67 
68  switch (state2)
69  {
70  case 0:
71  return st1.getBinaryCode() & 1;
72  case 1:
73  return st1.getBinaryCode() & 2;
74  case 2:
75  return st1.getBinaryCode() & 4;
76  case 3:
77  return st1.getBinaryCode() & 8;
78  default:
79  throw BadIntException(state2, "DNA::isResolvedIn : this should not happen", this);
80  }
81 }
82 
83 /******************************************************************************/
84 
85 std::vector<int> DNA::getAlias(int state) const
86 {
87  if (!isIntInAlphabet(state))
88  throw BadIntException(state, "DNA::getAlias(int): Specified base unknown.", this);
89  vector<int> v;
90  const NucleicAlphabetState& st = getState(state);
91  if (state == -1)
92  v.push_back(-1);
93  if (st.getBinaryCode() & 1)
94  v.push_back(0);
95  if (st.getBinaryCode() & 2)
96  v.push_back(1);
97  if (st.getBinaryCode() & 4)
98  v.push_back(2);
99  if (st.getBinaryCode() & 8)
100  v.push_back(3);
101  return v;
102 }
103 
104 
105 /******************************************************************************/
106 
107 std::vector<std::string> DNA::getAlias(const std::string& state) const
108 {
109  string locstate = TextTools::toUpper(state);
110  if (!isCharInAlphabet(locstate))
111  throw BadCharException(locstate, "DNA::getAlias(int): Specified base unknown.", this);
112  vector<int> vi = this->getAlias(this->charToInt(state));
113  vector<string> v;
114  for (unsigned int i = 0; i < vi.size(); i++)
115  {
116  v.push_back(this->intToChar(vi[i]));
117  }
118  return v;
119 }
120 
121 /******************************************************************************/
122 
123 int DNA::getGeneric(const std::vector<int>& states) const
124 {
125  int v = 0;
126  for (size_t i = 0; i < states.size(); ++i)
127  {
128  if (!isIntInAlphabet(states[i]))
129  throw BadIntException(states[i], "DNA::getGeneric(const vector<int>& states): Specified base unknown.", this);
130  v |= getState(states[i]).getBinaryCode();
131  }
132  return getStateByBinCode(v).getNum();
133 }
134 
135 /******************************************************************************/
136 
137 std::string DNA::getGeneric(const std::vector<std::string>& states) const
138 {
139  vector<int> vi;
140  for (unsigned int i = 0; i < states.size(); ++i)
141  {
142  if (!isCharInAlphabet(states[i]))
143  throw BadCharException(states[i], "DNA::getGeneric(const vector<string>& states): Specified base unknown.", this);
144  vi.push_back(this->charToInt(states[i]));
145  }
146  return intToChar(getGeneric(vi));
147 }
148 
149 /******************************************************************************/
An alphabet exception thrown when trying to specify a bad char to the alphabet.
An alphabet exception thrown when trying to specify a bad int to the alphabet.
std::vector< int > getAlias(int state) const
Get all resolved states that match a generic state.
Definition: DNA.cpp:85
DNA(bool exclamationMarkCountsAsGap=false)
Definition: DNA.cpp:20
bool isResolvedIn(int state1, int state2) const
Tells if a given (potentially unresolved) state can be resolved in another resolved state.
Definition: DNA.cpp:52
int getGeneric(const std::vector< int > &states) const
Get the generic state that match a set of states.
Definition: DNA.cpp:123
This is the base class to describe states in a NucleicAlphabet.
int getBinaryCode() const
Get the state's binary representation.
std::string toUpper(const std::string &s)
This alphabet is used to deal NumericAlphabet.