bpp-seq3  3.0.0
CodonSiteTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Numeric/NumTools.h>
7 #include <Bpp/Utils/MapTools.h>
8 
10 #include "Alphabet/CodonAlphabet.h"
11 #include "Alphabet/DNA.h"
12 #include "CodonSiteTools.h"
15 #include "SymbolListTools.h"
16 
17 using namespace bpp;
18 
19 // From the STL:
20 #include <cmath>
21 
22 using namespace std;
23 
24 /******************************************************************************/
25 
26 bool CodonSiteTools::hasGapOrStop(const Site& site, const GeneticCode& gCode)
27 {
28  // Alphabet checking
30  throw AlphabetException("CodonSiteTools::hasGapOrStop: alphabet is not CodonAlphabet", site.getAlphabet());
31  for (size_t i = 0; i < site.size(); i++)
32  {
33  if (site[i] < 0)
34  return true;
35  }
36  return false;
37 }
38 
39 /******************************************************************************/
40 
41 bool CodonSiteTools::hasStop(const Site& site, const GeneticCode& gCode)
42 {
43  // Alphabet checking
45  throw AlphabetException("CodonSiteTools::hasStop: alphabet is not CodonAlphabet", site.getAlphabet());
46  for (size_t i = 0; i < site.size(); i++)
47  {
48  if (gCode.isStop(site[i]))
49  return true;
50  }
51  return false;
52 }
53 
54 /******************************************************************************/
55 
57 {
58  // Alphabet checking
60  throw AlphabetException("CodonSiteTools::isMonoSitePolymorphic: alphabet is not CodonAlphabet", site.getAlphabet());
61  // Empty site checking
62  if (site.size() == 0)
63  throw EmptySiteException("CodonSiteTools::isMonoSitePolymorphic: Incorrect specified site", &site);
64 
65  // Global polymorphism checking
67  return false;
68  // initialisation of the 3 sub-sites of the codon
69  vector<int> pos1, pos2, pos3;
70  auto ca = dynamic_pointer_cast<const CodonAlphabet>(site.getAlphabet());
71  for (size_t i = 0; i < site.size(); i++)
72  {
73  pos1.push_back(ca->getFirstPosition(site[i]));
74  pos2.push_back(ca->getSecondPosition(site[i]));
75  pos3.push_back(ca->getThirdPosition(site[i]));
76  }
77  shared_ptr<const Alphabet> na = ca->getNucleicAlphabet();
78  Site s1(pos1, na), s2(pos2, na), s3(pos3, na);
79  // polymorphism checking for each sub-sites
80  size_t nbpol = 0;
82  nbpol++;
84  nbpol++;
86  nbpol++;
87  if (nbpol > 1)
88  return false;
89  return true;
90 }
91 
92 /******************************************************************************/
93 
95 {
96  // Alphabet checking
98  throw AlphabetException("CodonSiteTools::isSynonymousPolymorphic: alphabet is not CodonAlphabet", site.getAlphabet());
99  if (!site.alphabet().equals(gCode.sourceAlphabet()))
100  throw AlphabetMismatchException("CodonSiteTools::isSynonymousPolymorphic: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getSourceAlphabet());
101  // Empty site checking
102  if (site.size() == 0)
103  throw EmptySiteException("CodonSiteTools::isSynonymousPolymorphic: Incorrect specified site", &site);
104 
105  // Global polymorphism checking
106  if (SymbolListTools::isConstant(site))
107  return false;
108 
109  // Synonymous polymorphism checking
110  map<int, size_t> counts;
111  SymbolListTools::getCounts(site, counts);
112  map<int, size_t> aas;
113  size_t cdat = 0;
114  for (auto it = counts.begin(); it != counts.end(); ++it)
115  {
116  if (!site.getAlphabet()->isUnresolved(it->first))
117  {
118  cdat += it->second;
119  aas[gCode.translate(it->first)]++;
120  if (aas.size() > 1)
121  return false;
122  }
123  }
124  return cdat > 1; // If only one sequence is non-missing, then the site is not considered to be polymorphic.
125 }
126 
127 /******************************************************************************/
128 
129 unique_ptr<Site> CodonSiteTools::generateCodonSiteWithoutRareVariant(const Site& site, const GeneticCode& gCode, double freqmin)
130 {
131  // Alphabet checking
133  throw AlphabetException("CodonSiteTools::generateCodonSiteWithoutRareVariant: alphabet is not CodonAlphabet", site.getAlphabet());
134  // Empty site checking
135  if (site.size() == 0)
136  throw EmptySiteException("CodonSiteTools::generateCodonSiteWithoutRareVariant: Incorrect specified site", &site);
137 
138  if (SymbolListTools::isConstant(site))
139  {
140  auto noRareVariant = make_unique<Site>(site);
141  return noRareVariant;
142  }
143  else
144  {
145  // Computation
146  map<int, double> freqcodon;
147  SymbolListTools::getFrequencies(site, freqcodon);
148  auto ca = dynamic_pointer_cast<const CodonAlphabet>(site.getAlphabet());
149  shared_ptr<const Alphabet> na = ca->getNucleicAlphabet();
150  int newcodon = -1;
151  for (map<int, double>::iterator it = freqcodon.begin(); it != freqcodon.end(); it++)
152  {
153  if (it->second > freqmin && !gCode.isStop(it->first))
154  {
155  newcodon = it->first;
156  break;
157  }
158  }
159  vector<int> pos1, pos2, pos3;
160  for (size_t i = 0; i < site.size(); i++)
161  {
162  pos1.push_back(ca->getFirstPosition(site[i]));
163  pos2.push_back(ca->getSecondPosition(site[i]));
164  pos3.push_back(ca->getThirdPosition(site[i]));
165  }
166  Site s1(pos1, na), s2(pos2, na), s3(pos3, na);
167  map<int, double> freq1;
169  map<int, double> freq2;
171  map<int, double> freq3;
173  vector<int> codon;
174  for (size_t i = 0; i < site.size(); i++)
175  {
176  if (freq1[s1.getValue(i)] > freqmin && freq2[s2.getValue(i)] > freqmin && freq3[s3.getValue(i)] > freqmin)
177  {
178  codon.push_back(site.getValue(i));
179  }
180  else
181  codon.push_back(newcodon);
182  }
183  auto alphaPtr = dynamic_pointer_cast<const Alphabet>(ca);
184  auto noRareVariant = make_unique<Site>(codon, alphaPtr);
185  return noRareVariant;
186  }
187 }
188 
189 /******************************************************************************/
190 
191 size_t CodonSiteTools::numberOfDifferences(int i, int j, const CodonAlphabet& ca)
192 {
193  size_t nbdif = 0;
194  if (ca.getFirstPosition(i) != ca.getFirstPosition(j))
195  nbdif++;
196  if (ca.getSecondPosition(i) != ca.getSecondPosition(j))
197  nbdif++;
198  if (ca.getThirdPosition(i) != ca.getThirdPosition(j))
199  nbdif++;
200  return nbdif;
201 }
202 
203 /******************************************************************************/
204 
205 double CodonSiteTools::numberOfSynonymousDifferences(int i, int j, const GeneticCode& gCode, bool minchange)
206 {
207  auto ca = gCode.getCodonAlphabet();
208 
209  vector<int> ci = ca->getPositions(i);
210  vector<int> cj = ca->getPositions(j);
211 
212  switch (numberOfDifferences(i, j, *ca))
213  {
214  case 0: return 0;
215  case 1:
216  {
217  if (gCode.areSynonymous(i, j))
218  return 1;
219  return 0;
220  }
221  case 2:
222  {
223  if (gCode.areSynonymous(i, j))
224  return 2;
225  vector<double> path(2, 0); // Vector of number of synonymous changes per path (2 here)
226  vector<double> weight(2, 1); // Weight to exclude path through stop codon
227 
228  if (ci[0] == cj[0])
229  {
230  int trans1 = ca->getCodon(ci[0], cj[1], ci[2]); // transitory codon between NcNiNi et NcNjNj: NcNjNi, Nc = identical site
231  int trans2 = ca->getCodon(ci[0], ci[1], cj[2]); // transitory codon between NcNiNi et NcNjNj: NcNiNj, Nc = identical site
232 
233  if (!gCode.isStop(trans1))
234  {
235  if (gCode.areSynonymous(i, trans1))
236  path[0]++;
237  if (gCode.areSynonymous(trans1, j))
238  path[0]++;
239  }
240  else
241  weight[0] = 0;
242  if (!gCode.isStop(trans2))
243  {
244  if (gCode.areSynonymous(i, trans2))
245  path[1]++;
246  if (gCode.areSynonymous(trans2, j))
247  path[1]++;
248  }
249  else
250  weight[1] = 0;
251  }
252  if (ci[1] == cj[1])
253  {
254  int trans1 = ca->getCodon(cj[0], ci[1], ci[2]); // transitory codon between NiNcNi et NjNcNj: NjNcNi, Nc = identical site
255  int trans2 = ca->getCodon(ci[0], ci[1], cj[2]); // transitory codon between NiNcNi et NjNcNj: NiNcNj, Nc = identical site
256  if (!gCode.isStop(trans1))
257  {
258  if (gCode.areSynonymous(i, trans1))
259  path[0]++;
260  if (gCode.areSynonymous(trans1, j))
261  path[0]++;
262  }
263  else
264  weight[0] = 0;
265  if (!gCode.isStop(trans2))
266  {
267  if (gCode.areSynonymous(i, trans2))
268  path[1]++;
269  if (gCode.areSynonymous(trans2, j))
270  path[1]++;
271  }
272  else
273  weight[1] = 0;
274  }
275  if (ci[2] == cj[2])
276  {
277  int trans1 = ca->getCodon(cj[0], ci[1], ci[2]); // transitory codon between NiNiNc et NjNjNc: NjNiNc, Nc = identical site
278  int trans2 = ca->getCodon(ci[0], cj[1], ci[2]); // transitory codon between NiNiNc et NjNjNc: NiNjNc, Nc = identical site
279  if (!gCode.isStop(trans1))
280  {
281  if (gCode.areSynonymous(i, trans1))
282  path[0]++;
283  if (gCode.areSynonymous(trans1, j))
284  path[0]++;
285  }
286  else
287  weight[0] = 0;
288  if (!gCode.isStop(trans2))
289  {
290  if (gCode.areSynonymous(i, trans2))
291  path[1]++;
292  if (gCode.areSynonymous(trans2, j))
293  path[1]++;
294  }
295  else
296  weight[1] = 0;
297  }
298  if (minchange)
299  return VectorTools::max(path);
300 
301  double nbdif = 0;
302  for (size_t k = 0; k < 2; k++)
303  {
304  nbdif += path[k] * weight[k];
305  }
306 
307  return nbdif / VectorTools::sum(weight);
308  }
309  case 3:
310  {
311  vector<double> path(6, 0);
312  vector<double> weight(6, 1);
313  // First transitory codons
314  int trans100 = ca->getCodon(cj[0], ci[1], ci[2]);
315  int trans010 = ca->getCodon(ci[0], cj[1], ci[2]);
316  int trans001 = ca->getCodon(ci[0], ci[1], cj[2]);
317  // Second transitory codons
318  int trans110 = ca->getCodon(cj[0], cj[1], ci[2]);
319  int trans101 = ca->getCodon(cj[0], ci[1], cj[2]);
320  int trans011 = ca->getCodon(ci[0], cj[1], cj[2]);
321  // Paths
322  if (!gCode.isStop(trans100))
323  {
324  if (gCode.areSynonymous(i, trans100))
325  {
326  path[0]++; path[1]++;
327  }
328  if (!gCode.isStop(trans110))
329  {
330  if (gCode.areSynonymous(trans100, trans110))
331  path[0]++;
332  if (gCode.areSynonymous(trans110, j))
333  path[0]++;
334  }
335  else
336  weight[0] = 0;
337  if (!gCode.isStop(trans101))
338  {
339  if (gCode.areSynonymous(trans100, trans101))
340  path[1]++;
341  if (gCode.areSynonymous(trans101, j))
342  path[1]++;
343  }
344  else
345  weight[1] = 0;
346  }
347  else
348  {
349  weight[0] = 0; weight[1] = 0;
350  }
351  if (!gCode.isStop(trans010))
352  {
353  if (gCode.areSynonymous(i, trans010))
354  {
355  path[2]++; path[3]++;
356  }
357  if (!gCode.isStop(trans110))
358  {
359  if (gCode.areSynonymous(trans010, trans110))
360  path[2]++;
361  if (gCode.areSynonymous(trans110, j))
362  path[2]++;
363  }
364  else
365  weight[2] = 0;
366  if (!gCode.isStop(trans011))
367  {
368  if (gCode.areSynonymous(trans010, trans011))
369  path[3]++;
370  if (gCode.areSynonymous(trans011, j))
371  path[3]++;
372  }
373  else
374  weight[3] = 0;
375  }
376  else
377  {
378  weight[2] = 0; weight[3] = 0;
379  }
380  if (!gCode.isStop(trans001))
381  {
382  if (gCode.areSynonymous(i, trans001))
383  {
384  path[4]++; path[5]++;
385  }
386  if (!gCode.isStop(trans101))
387  {
388  if (gCode.areSynonymous(trans001, trans101))
389  path[4]++;
390  if (gCode.areSynonymous(trans101, j))
391  path[4]++;
392  }
393  else
394  weight[4] = 0;
395  if (!gCode.isStop(trans011))
396  {
397  if (gCode.areSynonymous(trans001, trans011))
398  path[5]++;
399  if (gCode.areSynonymous(trans011, j))
400  path[5]++;
401  }
402  else
403  weight[5] = 0;
404  }
405  else
406  {
407  weight[4] = 0; weight[5] = 0;
408  }
409  if (minchange)
410  return VectorTools::max(path);
411 
412  double nbdif = 0;
413  for (size_t k = 0; k < 6; k++)
414  {
415  nbdif += path[k] * weight[k];
416  }
417 
418  return nbdif / VectorTools::sum(weight);
419  }
420  }
421  // This line is never reached but sends a warning if not there:
422  return 0.;
423 }
424 
425 /******************************************************************************/
426 
427 double CodonSiteTools::piSynonymous(const Site& site, const GeneticCode& gCode, bool minchange)
428 {
429  // Alphabet checking
431  throw AlphabetException("CodonSiteTools::piSynonymous: alphabet is not CodonAlphabet", site.getAlphabet());
432  if (!site.alphabet().equals(gCode.sourceAlphabet()))
433  throw AlphabetMismatchException("CodonSiteTools::piSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getCodonAlphabet());
434  // Empty site checking
435  if (site.size() == 0)
436  throw EmptySiteException("CodonSiteTools::piSynonymous: Incorrect specified site", &site);
437 
438  // General polymorphism checking
439  if (SymbolListTools::isConstant(site))
440  return 0;
441 
442  // Computation
443  map<int, double> freq;
445  double pi = 0;
446  for (map<int, double>::iterator it1 = freq.begin(); it1 != freq.end(); it1++)
447  {
448  for (map<int, double>::iterator it2 = freq.begin(); it2 != freq.end(); it2++)
449  {
450  pi += (it1->second) * (it2->second) * (numberOfSynonymousDifferences(it1->first, it2->first, gCode, minchange));
451  }
452  }
453  double n = static_cast<double>(site.size());
454  return pi * n / (n - 1);
455 }
456 
457 /******************************************************************************/
458 
459 double CodonSiteTools::piNonSynonymous(const Site& site, const GeneticCode& gCode, bool minchange)
460 {
461  // Alphabet checking
463  throw AlphabetException("CodonSiteTools::piNonSynonymous: alphabet is not CodonAlphabet", site.getAlphabet());
464  if (!site.alphabet().equals(gCode.sourceAlphabet()))
465  throw AlphabetMismatchException("CodonSiteTools::piNonSynonymous: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getCodonAlphabet());
466  // Empty site checking
467  if (site.size() == 0)
468  throw EmptySiteException("CodonSiteTools::piSynonymous: Incorrect specified site", &site);
469 
470  // General polymorphism checking
471  if (SymbolListTools::isConstant(site))
472  return 0;
473  if (isSynonymousPolymorphic(site, gCode))
474  return 0;
475  // Computation
476  map<int, double> freq;
478  auto ca = dynamic_pointer_cast<const CodonAlphabet>(site.getAlphabet());
479  double pi = 0;
480  for (map<int, double>::iterator it1 = freq.begin(); it1 != freq.end(); it1++)
481  {
482  for (map<int, double>::iterator it2 = freq.begin(); it2 != freq.end(); it2++)
483  {
484  double nbtot = static_cast<double>(numberOfDifferences(it1->first, it2->first, *ca));
485  double nbsyn = numberOfSynonymousDifferences(it1->first, it2->first, gCode, minchange);
486  pi += (it1->second) * (it2->second) * (nbtot - nbsyn);
487  }
488  }
489 
490  double n = static_cast<double>(site.size());
491  return pi * n / (n - 1);
492 }
493 
494 /******************************************************************************/
495 
496 double CodonSiteTools::numberOfSynonymousPositions(int i, const GeneticCode& gCode, double ratio)
497 {
498  auto ca = gCode.getCodonAlphabet();
499  if (gCode.isStop(i))
500  return 0;
501  if (ca->isUnresolved(i))
502  return 0;
503  double nbsynpos = 0.0;
504  vector<int> codon = ca->getPositions(i);
505  int acid = gCode.translate(i);
506  for (size_t pos = 0; pos < 3; ++pos)
507  {
508  for (int an = 0; an < 4; ++an)
509  {
510  if (an == codon[pos])
511  continue;
512  vector<int> mutcodon = codon;
513  mutcodon[pos] = an;
514  int intcodon = ca->getCodon(mutcodon[0], mutcodon[1], mutcodon[2]);
515  if (gCode.isStop(intcodon))
516  continue;
517  int altacid = gCode.translate(intcodon);
518  if (altacid == acid) // if synonymous
519  {
520  if (((codon[pos] == 0 || codon[pos] == 2) && (mutcodon[pos] == 1 || mutcodon[pos] == 3)) ||
521  ((codon[pos] == 1 || codon[pos] == 3) && (mutcodon[pos] == 0 || mutcodon[pos] == 2))) // if it is a transversion
522  {
523  nbsynpos = nbsynpos + 1 / (ratio + 2);
524  }
525  else // if transition
526  {
527  nbsynpos = nbsynpos + ratio / (ratio + 2);
528  }
529  }
530  }
531  }
532  return nbsynpos;
533 }
534 
535 /******************************************************************************/
536 
537 double CodonSiteTools::meanNumberOfSynonymousPositions(const Site& site, const GeneticCode& gCode, double ratio)
538 {
539  // Alphabet checking
540  auto alphabet = site.getAlphabet();
541  if (!AlphabetTools::isCodonAlphabet(*alphabet))
542  throw AlphabetException("CodonSiteTools::meanNumberOfSynonymousPositions: alphabet is not CodonAlphabet", alphabet);
543  if (!site.alphabet().equals(gCode.sourceAlphabet()))
544  throw AlphabetMismatchException("CodonSiteTools::meanNumberOfSynonymousPositions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getCodonAlphabet());
545  // Empty site checking
546  if (site.size() == 0)
547  throw EmptySiteException("CodonSiteTools::meanNumberOfSynonymousPositions: Incorrect specified site", &site);
548 
549  // Computation
550 
551  double nbSyn = 0;
552  map<int, double> freqs;
553  SymbolListTools::getFrequencies(site, freqs);
554  double total = 0;
555  for (const auto& it : freqs)
556  {
557  int state = it.first;
558  if (!alphabet->isUnresolved(state) &&
559  !alphabet->isGap(state))
560  {
561  double freq = it.second;
562  total += freq;
563  nbSyn += freq * numberOfSynonymousPositions(state, gCode, ratio);
564  }
565  }
566  return nbSyn / total;
567 }
568 
569 /******************************************************************************/
570 
571 size_t CodonSiteTools::numberOfSubstitutions(const Site& site, const GeneticCode& gCode, double freqmin)
572 {
573  // Alphabet checking
575  throw AlphabetException("CodonSiteTools::numberOfSubstitutions: alphabet is not CodonAlphabet", site.getAlphabet());
576  // Empty site checking
577  if (site.size() == 0)
578  throw EmptySiteException("CodonSiteTools::numberOfSubstitutions: Incorrect specified site", &site);
579 
580  if (SymbolListTools::isConstant(site))
581  return 0;
582  unique_ptr<Site> newsite;
583  if (freqmin > 1. / static_cast<double>(site.size()))
584  newsite = CodonSiteTools::generateCodonSiteWithoutRareVariant(site, gCode, freqmin);
585  else
586  newsite = make_unique<Site>(site);
587  // Computation
588  if (SymbolListTools::hasGap(*newsite))
589  return 0;
590  vector<int> pos1, pos2, pos3;
591 
592  auto ca = dynamic_pointer_cast<const CodonAlphabet>(site.getAlphabet());
593 
594  for (size_t i = 0; i < newsite->size(); i++)
595  {
596  pos1.push_back(ca->getFirstPosition(newsite->getValue(i)));
597  pos2.push_back(ca->getSecondPosition(newsite->getValue(i)));
598  pos3.push_back(ca->getThirdPosition(newsite->getValue(i)));
599  }
600 
601  shared_ptr<const Alphabet> na = ca->getNucleicAlphabet();
602 
603  Site s1(pos1, na), s2(pos2, na), s3(pos3, na);
604  size_t Scodon = SymbolListTools::getNumberOfDistinctCharacters(*newsite) - 1;
606  if (Scodon >= Sbase)
607  return Scodon;
608  else
609  return Sbase;
610 }
611 
612 /******************************************************************************/
613 
614 size_t CodonSiteTools::numberOfNonSynonymousSubstitutions(const Site& site, const GeneticCode& gCode, double freqmin)
615 {
616  // Alphabet checking
618  throw AlphabetException("CodonSiteTools::numberOfNonSynonymousSubstitutions: alphabet is not CodonAlphabet", site.getAlphabet());
619  if (!site.alphabet().equals(gCode.sourceAlphabet()))
620  throw AlphabetMismatchException("CodonSiteTools::numberOfNonSynonymousSubstitutions: site and genetic code have not the same codon alphabet.", site.getAlphabet(), gCode.getCodonAlphabet());
621  // Empty site checking
622  if (site.size() == 0)
623  throw EmptySiteException("CodonSiteTools::numberOfNonSynonymousSubstitutions: Incorrect specified site", &site);
624 
625  if (SymbolListTools::isConstant(site))
626  return 0;
627  unique_ptr<Site> newsite;
628  if (freqmin > 1. / static_cast<double>(site.size()))
629  newsite = generateCodonSiteWithoutRareVariant(site, gCode, freqmin);
630  else
631  newsite = make_unique<Site>(site);
632  if (SymbolListTools::hasGap(*newsite))
633  return 0;
634  // computation
635  map<int, size_t> count;
637  size_t NaSup = 0;
638  size_t Nminmin = 10;
639 
640  auto ca = dynamic_pointer_cast<const CodonAlphabet>(site.getAlphabet());
641 
642  for (map<int, size_t>::iterator it1 = count.begin(); it1 != count.end(); it1++)
643  {
644  size_t Nmin = 10;
645  for (map<int, size_t>::iterator it2 = count.begin(); it2 != count.end(); it2++)
646  {
647  size_t Ntot = numberOfDifferences(it1->first, it2->first, *ca);
648  size_t Ns = (size_t)numberOfSynonymousDifferences(it1->first, it2->first, gCode, true);
649  if (Nmin > Ntot - Ns && it1->first != it2->first)
650  Nmin = Ntot - Ns;
651  }
652  NaSup += Nmin;
653  if (Nmin < Nminmin)
654  Nminmin = Nmin;
655  }
656  return NaSup - Nminmin;
657 }
658 
659 /******************************************************************************/
660 
661 vector<size_t> CodonSiteTools::fixedDifferences(const Site& siteIn, const Site& siteOut, int i, int j, const GeneticCode& gCode)
662 {
663  // Alphabet checking
665  throw AlphabetException("CodonSiteTools::fixedDifferences: alphabet is not CodonAlphabet (siteIn)", siteIn.getAlphabet());
667  throw AlphabetException("CodonSiteTools::fixedDifferences: alphabet is not CodonAlphabet (siteOut)", siteOut.getAlphabet());
668  if (!siteIn.alphabet().equals(gCode.sourceAlphabet()))
669  throw AlphabetMismatchException("CodonSiteTools::fixedDifferences: siteIn and genetic code have not the same codon alphabet.", siteIn.getAlphabet(), gCode.getCodonAlphabet());
670  if (!siteOut.alphabet().equals(gCode.sourceAlphabet()))
671  throw AlphabetMismatchException("CodonSiteTools::fixedDifferences: siteOut and genetic code have not the same codon alphabet.", siteOut.getAlphabet(), gCode.getCodonAlphabet());
672  // Empty site checking
673  if (siteIn.size() == 0)
674  throw EmptySiteException("CodonSiteTools::getFixedDifferences Incorrect specified site", &siteIn);
675  if (siteOut.size() == 0)
676  throw EmptySiteException("CodonSiteTools::getFixedDifferences Incorrect specified site", &siteOut);
677 
678  auto ca = gCode.getCodonAlphabet();
679 
680  size_t Ntot = numberOfDifferences(i, j, *ca);
681  size_t Ns = static_cast<size_t>(numberOfSynonymousDifferences(i, j, gCode, true));
682  size_t Na = Ntot - Ns;
683  size_t Nfix = Ntot;
684  vector<int> pos1in, pos2in, pos3in, pos1out, pos2out, pos3out;
685 
686  for (size_t k = 0; k < siteIn.size(); k++)
687  {
688  pos1in.push_back(ca->getFirstPosition(siteIn[k]));
689  pos2in.push_back(ca->getSecondPosition(siteIn[k]));
690  pos3in.push_back(ca->getThirdPosition(siteIn[k]));
691  pos1out.push_back(ca->getFirstPosition(siteOut[k]));
692  pos2out.push_back(ca->getSecondPosition(siteOut[k]));
693  pos3out.push_back(ca->getThirdPosition(siteOut[k]));
694  }
695  shared_ptr<const Alphabet> na = ca->getNucleicAlphabet();
696 
697  Site s1in(pos1in, na), s2in(pos2in, na), s3in(pos3in, na);
698  Site s1out(pos1out, na), s2out(pos2out, na), s3out(pos3out, na);
699  bool test1 = false;
700  bool test2 = false;
701  bool test3 = false;
702  if ( (!SymbolListTools::isConstant(s1in) || !SymbolListTools::isConstant(s1out)) && ca->getFirstPosition(i) != ca->getFirstPosition(j) )
703  {
704  test1 = true;
705  Nfix--;
706  }
707  if ( (!SymbolListTools::isConstant(s2in) || !SymbolListTools::isConstant(s2out)) && ca->getSecondPosition(i) != ca->getSecondPosition(j) )
708  {
709  test2 = true;
710  Nfix--;
711  }
712  if ( (!SymbolListTools::isConstant(s3in) || !SymbolListTools::isConstant(s3out)) && ca->getThirdPosition(i) != ca->getThirdPosition(j) )
713  {
714  test3 = true;
715  Nfix--;
716  }
717  // Suppression of differences when not fixed
718  vector<size_t> v(2);
719  if (Nfix == 0)
720  {
721  v[0] = 0;
722  v[1] = 0;
723  return v;
724  }
725  if (Nfix < Ntot)
726  {
727  if (Na == 0)
728  Ns = Nfix;
729  if (Ns == 0)
730  Na = Nfix;
731  else
732  {
733  if (Ntot == 3)
734  {
735  if (Nfix == 1)
736  {
737  if (test1 && test2)
738  {
739  Na = 0; Ns = 1;
740  }
741  if (test1 && test3)
742  {
743  Na = 1; Ns = 0;
744  }
745  if (test2 && test3)
746  {
747  Na--; Ns--;
748  }
749  }
750  }
751  if (Nfix == 2)
752  {
753  if (test1)
754  {
755  Na = 1; Ns = 1;
756  }
757  if (test2)
758  Na--;
759  if (test3)
760  Ns--;
761  }
762  }
763  if (Ntot == 2)
764  {
765  if (test1)
766  {
767  if (ca->getSecondPosition(i) == ca->getSecondPosition(j))
768  {
769  if (Na > 0) // jdutheil on 19/12/16: added these checks to avoid negative Na/Ns
770  Na--;
771  }
772  else
773  {
774  if (Ns > 0)
775  Ns--;
776  }
777  }
778  if (test2)
779  Na--;
780  if (test3)
781  Ns--;
782  }
783  }
784  // cout << Na << " " << Ns << endl;
785  v[0] = Ns;
786  v[1] = Na;
787  return v;
788 }
789 
790 /******************************************************************************/
791 
793 {
794  if (!SymbolListTools::isConstant(site, true))
795  {
797  if (!(CodonSiteTools::isSynonymousPolymorphic(site, gCode)))
798  return false;
799 
800  for (size_t i = 0; i < site.size(); i++)
801  {
802  if (!(gCode.isFourFoldDegenerated(site.getValue(i))))
803  {
804  return false;
805  }
806  }
807  }
808  else
809  {
810  for (size_t i = 0; i < site.size(); i++)
811  {
812  if (!(gCode.isFourFoldDegenerated(site.getValue(i))))
813  {
814  return false;
815  }
816  }
817  }
818  return true;
819 }
820 
821 /******************************************************************************/
const Alphabet & alphabet() const override
Get the alphabet associated to the list.
Definition: SymbolList.h:122
const T & getValue(size_t pos) const override
checked access to a character in list.
Definition: SymbolList.h:180
std::shared_ptr< const Alphabet > getAlphabet() const override
Get the alphabet associated to the list.
Definition: SymbolList.h:120
size_t size() const override
Get the number of elements in the list.
Definition: SymbolList.h:124
The alphabet exception base class.
Exception thrown when two alphabets do not match.
static bool isCodonAlphabet(const Alphabet &alphabet)
virtual bool equals(const Alphabet &alphabet) const =0
Comparison of alphabets.
Codon alphabet class.
Definition: CodonAlphabet.h:31
int getThirdPosition(int codon) const
Get the int code of the third position of a codon given its int description.
int getFirstPosition(int codon) const
Get the int code of the first position of a codon given its int description.
int getSecondPosition(int codon) const
Get the int code of the second position of a codon given its int description.
static double piNonSynonymous(const Site &site, const GeneticCode &gCode, bool minchange=false)
Compute the non-synonymous pi per codon site.
static double numberOfSynonymousPositions(int i, const GeneticCode &gCode, double ratio=1.0)
Return the number of synonymous positions of a codon.
static bool hasGapOrStop(const Site &site, const GeneticCode &gCode)
Method to know if a codon site contains gap(s) or stop codons.
static double piSynonymous(const Site &site, const GeneticCode &gCode, bool minchange=false)
Compute the synonymous pi per codon site.
static bool isFourFoldDegenerated(const Site &site, const GeneticCode &gCode)
static bool isSynonymousPolymorphic(const Site &site, const GeneticCode &gCode)
Method to know if polymorphism at a codon site is synonymous.
static std::vector< size_t > fixedDifferences(const Site &siteIn, const Site &siteOut, int i, int j, const GeneticCode &gCode)
Return a vector with the number of fixed synonymous and non-synonymous differences per codon site.
static size_t numberOfSubstitutions(const Site &site, const GeneticCode &gCode, double freqmin=0.)
Return the number of substitutions per codon site.
static double numberOfSynonymousDifferences(int i, int j, const GeneticCode &gCode, bool minchange=false)
Compute the number of synonymous differences between two codons.
static std::unique_ptr< Site > generateCodonSiteWithoutRareVariant(const Site &site, const GeneticCode &gCode, double freqmin)
generate a codon site without rare variants
static size_t numberOfDifferences(int i, int j, const CodonAlphabet &ca)
Compute the number of differences between two codons.
static bool hasStop(const Site &site, const GeneticCode &gCode)
Method to know if a codon site contains stop codon or not.
static double meanNumberOfSynonymousPositions(const Site &site, const GeneticCode &gCode, double ratio=1)
Return the mean number of synonymous positions per codon site.
static size_t numberOfNonSynonymousSubstitutions(const Site &site, const GeneticCode &gCode, double freqmin=0.)
Return the number of Non Synonymous substitutions per codon site.
static bool isMonoSitePolymorphic(const Site &site)
Method to know if a polymorphic codon site is polymorphic at only one site.
Exception sent when a empty site is found.
Partial implementation of the Transliterator interface for genetic code object.
Definition: GeneticCode.h:50
bool isFourFoldDegenerated(int codon) const
Definition: GeneticCode.cpp:84
virtual std::shared_ptr< const CodonAlphabet > getCodonAlphabet() const
Alias for getSourceAlphabet return a pointer toward a CodonAlphabet.
Definition: GeneticCode.h:104
int translate(int state) const override
Translate a given state coded as a int from source alphabet to target alphabet.
Definition: GeneticCode.cpp:20
std::shared_ptr< const Alphabet > getSourceAlphabet() const override
Get the source alphabet.
Definition: GeneticCode.h:74
const Alphabet & sourceAlphabet() const override
Get the source alphabet.
Definition: GeneticCode.h:76
bool areSynonymous(int i, int j) const
Tell if two codons are synonymous, that is, if they encode the same amino-acid.
Definition: GeneticCode.h:205
virtual bool isStop(int state) const =0
Tells is a particular codon is a stop codon.
The Site class.
Definition: Site.h:73
static bool isConstant(const IntSymbolListInterface &site, bool ignoreUnknown=false, bool unresolvedRaisesException=true)
Tell if a site is constant, that is displaying the same state in all sequences that do not present a ...
static void getCounts(const IntSymbolListInterface &list, std::map< int, count_type > &counts)
Count all states in the list.
static void getFrequencies(const CruxSymbolListInterface &list, std::map< int, double > &frequencies, bool resolveUnknowns=false)
Get all states frequencies in the list.
static size_t getNumberOfDistinctCharacters(const IntSymbolListInterface &list)
Give the number of distinct characters at a list.
static bool hasGap(const IntSymbolListInterface &site)
static T sum(const std::vector< T > &v1)
static T max(const std::vector< T > &v)
std::size_t count(const std::string &s, const std::string &pattern)
This alphabet is used to deal NumericAlphabet.