bpp-seq3  3.0.0
GeneticCode.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include "../Alphabet/AlphabetTools.h"
6 #include "../SequenceTools.h"
7 #include "GeneticCode.h"
8 
9 using namespace bpp;
10 using namespace std;
11 
12 /**********************************************************************************************/
13 
14 StopCodonException::StopCodonException(const std::string& text, const std::string& codon) :
15  Exception("StopCodonException: " + text + "(" + codon + ")"),
16  codon_(codon) {}
17 
18 /**********************************************************************************************/
19 
20 int GeneticCode::translate(int state) const
21 {
22  if (isStop(state))
23  throw StopCodonException("GeneticCode::translate().", codonAlphabet_->intToChar(state));
24 
25  map<int, int>::const_iterator it = tlnTable_.find(state);
26  if (it == tlnTable_.end())
27  throw BadIntException(state, "GeneticCode::translate().", codonAlphabet_.get());
28 
29  return it->second;
30 }
31 
32 /**********************************************************************************************/
33 
34 std::string GeneticCode::translate(const std::string& state) const
35 {
36  int x = codonAlphabet_->charToInt(state);
37  return proteicAlphabet_->intToChar(translate(x));
38 }
39 
40 /**********************************************************************************************/
41 
42 vector<int> GeneticCode::getSynonymous(int aminoacid) const
43 {
44  // test:
45  proteicAlphabet_->intToChar(aminoacid);
46 
47  vector<int> synonyms;
48  for (int i = 0; i < static_cast<int>(codonAlphabet_->getSize()); ++i)
49  {
50  try
51  {
52  if (translate(i) == aminoacid)
53  synonyms.push_back(i);
54  }
55  catch (StopCodonException&)
56  {}
57  }
58  return synonyms;
59 }
60 
61 /**********************************************************************************************/
62 
63 std::vector<std::string> GeneticCode::getSynonymous(const std::string& aminoacid) const
64 {
65  // test:
66  int aa = proteicAlphabet_->charToInt(aminoacid);
67 
68  vector<string> synonyms;
69  for (int i = 0; i < static_cast<int>(codonAlphabet_->getSize()); ++i)
70  {
71  try
72  {
73  if (translate(i) == aa)
74  synonyms.push_back(codonAlphabet_->intToChar(i));
75  }
76  catch (StopCodonException&)
77  {}
78  }
79  return synonyms;
80 }
81 
82 /**********************************************************************************************/
83 
85 {
86  if (isStop(val))
87  return false;
88 
89  vector<int> codon = codonAlphabet_->getPositions(val);
90  int acid = translate(val);
91 
92  // test all the substitution on third codon position
93  for (int an = 0; an < 4; an++)
94  {
95  if (an == codon[2])
96  continue;
97  vector<int> mutcodon = codon;
98  mutcodon[2] = an;
99  int intcodon = codonAlphabet_->getCodon(mutcodon[0], mutcodon[1], mutcodon[2]);
100  if (isStop(intcodon))
101  return false;
102  int altacid = translate(intcodon);
103  if (altacid != acid) // if non-synonymous
104  {
105  return false;
106  }
107  }
108 
109  return true;
110 }
111 
112 /**********************************************************************************************/
113 
114 unique_ptr<Sequence> GeneticCode::getCodingSequence(
115  const SequenceInterface& sequence,
116  bool lookForInitCodon,
117  bool includeInitCodon) const
118 {
119  size_t initPos = 0;
120  size_t stopPos = sequence.size();
122  {
123  // Look for AUG(or ATG) codon:
124  if (lookForInitCodon)
125  {
126  for (size_t i = 0; i < sequence.size(); i++)
127  {
128  vector<int> pos = codonAlphabet_->getPositions(sequence[i]);
129  if (pos[0] == 0 && pos[1] == 3 && pos[2] == 2)
130  {
131  initPos = includeInitCodon ? i : i + 1;
132  break;
133  }
134  }
135  }
136  // Look for stop codon:
137  for (size_t i = initPos; i < sequence.size(); i++)
138  {
139  if (isStop(sequence[i]))
140  {
141  stopPos = i;
142  break;
143  }
144  }
145  }
146  else if (AlphabetTools::isNucleicAlphabet(sequence.alphabet()))
147  {
148  // Look for AUG(or ATG) codon:
149  if (lookForInitCodon)
150  {
151  for (size_t i = 0; i < sequence.size() - 2; i++)
152  {
153  if (sequence[i] == 0 && sequence[i + 1] == 3 && sequence[i + 2] == 2)
154  {
155  initPos = includeInitCodon ? i : i + 3;
156  break;
157  }
158  }
159  }
160  // Look for stop codon:
161  shared_ptr<const NucleicAlphabet> nucAlpha = codonAlphabet_->getNucleicAlphabet();
162  for (size_t i = initPos; i < sequence.size() - 2; i += 3)
163  {
164  string codon = nucAlpha->intToChar(sequence[i])
165  + nucAlpha->intToChar(sequence[i + 1])
166  + nucAlpha->intToChar(sequence[i + 2]);
167  if (isStop(codon))
168  {
169  stopPos = i;
170  break;
171  }
172  }
173  }
174  else
175  throw AlphabetMismatchException("Sequence must have alphabet of type nucleic or codon in GeneticCode::getCodingSequence.", 0, &sequence.alphabet());
176 
177  return SequenceTools::subseq<Sequence>(sequence, initPos, stopPos - 1);
178 }
179 
180 /**********************************************************************************************/
Exception thrown when two alphabets do not match.
static bool isNucleicAlphabet(const Alphabet &alphabet)
static bool isCodonAlphabet(const Alphabet &alphabet)
An alphabet exception thrown when trying to specify a bad int to the alphabet.
virtual size_t size() const =0
Get the number of elements in the list.
virtual const Alphabet & alphabet() const =0
Get the alphabet associated to the list.
bool isFourFoldDegenerated(int codon) const
Definition: GeneticCode.cpp:84
std::unique_ptr< Sequence > getCodingSequence(const SequenceInterface &sequence, bool lookForInitCodon=false, bool includeInitCodon=false) const
Get the subsequence corresponding to the coding part of a given sequence.
std::vector< int > getSynonymous(int aminoacid) const
Definition: GeneticCode.cpp:42
std::shared_ptr< const CodonAlphabet > codonAlphabet_
Definition: GeneticCode.h:52
int translate(int state) const override
Translate a given state coded as a int from source alphabet to target alphabet.
Definition: GeneticCode.cpp:20
std::map< int, int > tlnTable_
Definition: GeneticCode.h:54
std::shared_ptr< const ProteicAlphabet > proteicAlphabet_
Definition: GeneticCode.h:53
virtual bool isStop(int state) const =0
Tells is a particular codon is a stop codon.
The sequence interface.
Definition: Sequence.h:34
Exception thrown when a stop codon is found.
Definition: GeneticCode.h:22
StopCodonException(const std::string &text, const std::string &codon)
Definition: GeneticCode.cpp:14
This alphabet is used to deal NumericAlphabet.