bpp-seq3  3.0.0
CodonAlphabet.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Text/TextTools.h>
6 
7 #include "CodonAlphabet.h"
8 
9 using namespace bpp;
10 
11 // From the STL:
12 #include <iostream>
13 
14 using namespace std;
15 
16 
18 {
19  vector<AlphabetState*> states(66);
20 
21  states[0] = new AlphabetState(-1, "---", "gap");
22 
23  size_t i = 0;
24  for (int i1 = 0; i1 < 4; ++i1)
25  {
26  for (int i2 = 0; i2 < 4; ++i2)
27  {
28  for (int i3 = 0; i3 < 4; ++i3)
29  {
30  string s = nAlph_->intToChar(i1) + nAlph_->intToChar(i2) + nAlph_->intToChar(i3);
31  states[i + 1] = new AlphabetState(static_cast<int>(i), s, s);
32  i++;
33  }
34  }
35  }
36 
37 
38  states[65] = new AlphabetState(static_cast<int>(64), "NNN", "Unresolved");
39 
40  // Now register all states once for all:
41  for (i = 0; i < states.size(); ++i)
42  {
43  registerState(states[i]);
44  }
45 }
46 
47 int CodonAlphabet::getGCinCodon(int codon) const
48 {
49  int i = 0;
50  int j = getFirstPosition(codon);
51  if (j == 1 || j == 2)
52  i++;
53  j = getSecondPosition(codon);
54  if (j == 1 || j == 2)
55  i++;
56  j = getThirdPosition(codon);
57  if (j == 1 || j == 2)
58  i++;
59 
60  return i;
61 }
62 
63 bool CodonAlphabet::containsUnresolved(const std::string& state) const
64 {
65  if (state.length() != 3)
66  throw BadCharException(state, "CodonAlphabet::containsUnresolved", this);
67 
68  for (size_t i = 0; i < 3; i++)
69  {
70  if (nAlph_->isUnresolved(state.substr(i, 1)))
71  {
72  return true;
73  }
74  }
75  return false;
76 }
77 
78 /******************************************************************************/
79 
80 bool CodonAlphabet::containsGap(const std::string& state) const
81 {
82  if (state.length() != 3)
83  throw BadCharException(state, "CodonAlphabet::containsGap", this);
84 
85  for (size_t i = 0; i < 3; i++)
86  {
87  if (nAlph_->isGap(state.substr(i, 1)))
88  return true;
89  }
90 
91  return false;
92 }
93 
94 
95 /****************************************************************************************/
96 
97 unique_ptr<SequenceInterface> CodonAlphabet::translate(const SequenceInterface& sequence, size_t pos) const
98 {
99  vector<int> content;
100 
101  size_t s = sequence.size();
102  size_t i = pos;
103 
104  while (i + 3 <= s)
105  {
106  content.push_back(getWord(sequence, i));
107  i += 3;
108  }
109 
110  auto alphaPtr = shared_from_this();
111  return make_unique<Sequence>(sequence.getName(), content, alphaPtr);
112 }
113 
114 /****************************************************************************************/
115 
116 unique_ptr<SequenceInterface> CodonAlphabet::reverse(const SequenceInterface& sequence) const
117 {
118  auto alphaPtr = getNAlphabet(0);
119  auto seqPtr = make_unique<Sequence>(sequence.getName(), "", alphaPtr);
120 
121  size_t s = sequence.size();
122  for (size_t i = 0; i < s; i++)
123  {
124  seqPtr->append(getPositions(sequence[i]));
125  }
126 
127  return seqPtr;
128 }
129 
130 /****************************************************************************************/
131 
132 std::vector<int> CodonAlphabet::getAlias(int state) const
133 {
134  if (!isIntInAlphabet(state))
135  throw BadIntException(state, "WordAlphabet::getAlias(int): Specified base unknown.", this);
136  vector<int> v;
137 
138  if (state == 64)
139  {
140  v.resize(64);
141  for (size_t i = 0; i < 64; ++i)
142  {
143  v[i] = static_cast<int>(i);
144  }
145  }
146  else
147  {
148  v.resize(1); v[0] = state;
149  }
150  return v;
151 }
152 
153 /******************************************************************************/
154 
155 std::vector<std::string> CodonAlphabet::getAlias(const std::string& state) const
156 {
157  string locstate = TextTools::toUpper(state);
158  if (!isCharInAlphabet(locstate))
159  throw BadCharException(locstate, "CodonAlphabet::getAlias(string): Specified base unknown.", this);
160  vector<string> v;
161 
162  if (locstate == "NNN")
163  {
164  v.resize(64);
165  for (size_t i = 0; i < 64; ++i)
166  {
167  v[i] = intToChar(static_cast<int>(i));
168  }
169  }
170  else
171  {
172  v.resize(1); v[0] = state;
173  }
174  return v;
175 }
176 
177 
178 bool CodonAlphabet::isResolvedIn(int state1, int state2) const
179 {
180  if (state1 < 0 || !isIntInAlphabet(state1))
181  throw BadIntException(state1, "CodonAlphabet::isResolvedIn(int, int): Specified base " + intToChar(state1) + " is unknown.", this);
182 
183  if (state2 < 0 || !isIntInAlphabet(state2))
184  throw BadIntException(state2, "CodonAlphabet::isResolvedIn(int, int): Specified base " + intToChar(state2) + " is unknown.", this);
185 
186  if (isUnresolved(state2))
187  throw BadIntException(state2, "CodonAlphabet::isResolvedIn(int, int): Unresolved base " + intToChar(state2), this);
188 
189  return (state1 == 64) ? (state2 >= 0) : (state1 == state2);
190 }
This is the base class to describe states in an Alphabet.
Definition: AlphabetState.h:22
An alphabet exception thrown when trying to specify a bad char to the alphabet.
An alphabet exception thrown when trying to specify a bad int to the alphabet.
std::vector< int > getAlias(int state) const override
Get all resolved states that match a generic state.
bool isResolvedIn(int state1, int state2) const override
Tells if a given (potentially unresolved) state can be resolved in another resolved state.
std::unique_ptr< SequenceInterface > reverse(const SequenceInterface &sequence) const override
Translate a whole sequence from words alphabet to letters alphabet.
int getGCinCodon(int codon) const
Get the number of G+C in codon.
bool containsGap(const std::string &state) const override
std::unique_ptr< SequenceInterface > translate(const SequenceInterface &sequence, size_t=0) const override
Translate a whole sequence from letters alphabet to words alphabet.
bool containsUnresolved(const std::string &state) const override
virtual const std::string & getName() const =0
Get the name of this sequence.
virtual size_t size() const =0
Get the number of elements in the list.
The sequence interface.
Definition: Sequence.h:34
std::string toUpper(const std::string &s)
This alphabet is used to deal NumericAlphabet.