bpp-seq3  3.0.0
SequenceTools.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
7 
9 #include "SequenceTools.h"
10 #include "StringSequenceTools.h"
11 
12 using namespace bpp;
13 
14 // From the STL:
15 #include <ctype.h>
16 #include <cmath>
17 #include <list>
18 #include <iostream>
19 
20 using namespace std;
21 
26 
27 /******************************************************************************/
28 
30 {
31  // Site's size and content checking
32  if (seq1.getAlphabet()->getAlphabetType() != seq2.getAlphabet()->getAlphabetType())
33  return false;
34  if (seq1.size() != seq2.size())
35  return false;
36  else
37  {
38  for (size_t i = 0; i < seq1.size(); i++)
39  {
40  if (seq1[i] != seq2[i])
41  return false;
42  }
43  return true;
44  }
45 }
46 
47 /******************************************************************************/
48 
50 {
51  // Alphabet type checking
54  {
55  NAR = &DNARep_;
56  }
57  else if (AlphabetTools::isRNAAlphabet(seq.alphabet()))
58  {
59  NAR = &RNARep_;
60  }
61  else
62  {
63  throw AlphabetException("SequenceTools::complement: Sequence must be nucleic.", seq.getAlphabet());
64  }
65  for (size_t i = 0; i < seq.size(); ++i)
66  {
67  seq.setElement(i, NAR->translate(seq.getValue(i)));
68  }
69 }
70 
71 /******************************************************************************/
72 
73 unique_ptr<Sequence> SequenceTools::getComplement(const SequenceInterface& sequence)
74 {
75  // Alphabet type checking
77  if (AlphabetTools::isDNAAlphabet(sequence.alphabet()))
78  {
79  NAR = &DNARep_;
80  }
81  else if (AlphabetTools::isRNAAlphabet(sequence.alphabet()))
82  {
83  NAR = &RNARep_;
84  }
85  else
86  {
87  throw AlphabetException ("SequenceTools::getComplement: Sequence must be nucleic.", sequence.getAlphabet());
88  }
89 
90  return NAR->translate(sequence);
91 }
92 
93 /******************************************************************************/
94 
95 unique_ptr<Sequence> SequenceTools::transcript(const Sequence& sequence)
96 {
97  // Alphabet type checking
98  if (!AlphabetTools::isDNAAlphabet(sequence.alphabet()))
99  {
100  throw AlphabetException ("SequenceTools::transcript : Sequence must be DNA", sequence.getAlphabet());
101  }
102 
103  return transc_.translate(sequence);
104 }
105 
106 /******************************************************************************/
107 
108 unique_ptr<Sequence> SequenceTools::reverseTranscript(const Sequence& sequence)
109 {
110  // Alphabet type checking
111  if (!AlphabetTools::isRNAAlphabet(sequence.alphabet()))
112  {
113  throw AlphabetException ("SequenceTools::reverseTranscript : Sequence must be RNA", sequence.getAlphabet());
114  }
115 
116  return transc_.reverse(sequence);
117 }
118 
119 /******************************************************************************/
120 
122 {
123  size_t seq_size = seq.size(); // store seq size for efficiency
124  int tmp_state = 0; // to store one state when swapping positions
125  size_t j = seq_size; // symmetric position iterator from sequence end
126  for (size_t i = 0; i < seq_size / 2; ++i)
127  {
128  j = seq_size - 1 - i;
129  tmp_state = seq.getValue(i);
130  seq.setElement(i, seq.getValue(j));
131  seq.setElement(j, tmp_state);
132  }
133 }
134 
135 /******************************************************************************/
136 
137 unique_ptr<SequenceInterface> SequenceTools::getInvert(const SequenceInterface& sequence)
138 {
139  auto iSeq = unique_ptr<SequenceInterface>(sequence.clone());
140  invert(*iSeq);
141  return iSeq;
142 }
143 
144 /******************************************************************************/
145 
147 {
148  // Alphabet type checking
151  {
152  NAR = &DNARep_;
153  }
154  else if (AlphabetTools::isRNAAlphabet(seq.alphabet()))
155  {
156  NAR = &RNARep_;
157  }
158  else
159  {
160  throw AlphabetException("SequenceTools::invertComplement: Sequence must be nucleic.", seq.getAlphabet());
161  }
162  // for (size_t i = 0 ; i < seq.size() ; i++) {
163  // seq.setElement(i, NAR->translate(seq.getValue(i)));
164  // }
165  size_t seq_size = seq.size(); // store seq size for efficiency
166  int tmp_state = 0; // to store one state when swapping positions
167  size_t j = seq_size; // symmetric position iterator from sequence end
168  for (size_t i = 0; i < seq_size / 2; ++i)
169  {
170  j = seq_size - 1 - i;
171  tmp_state = seq.getValue(i);
172  seq.setElement(i, NAR->translate(seq.getValue(j)));
173  seq.setElement(j, NAR->translate(tmp_state));
174  }
175  if (seq_size % 2) // treat the state in the middle of odd sequences
176  {
177  seq.setElement(seq_size / 2, NAR->translate(seq.getValue(seq_size / 2)));
178  }
179 }
180 
181 /******************************************************************************/
182 
184  const SequenceInterface& seq1,
185  const SequenceInterface& seq2,
186  bool ignoreGaps)
187 {
188  if (seq1.alphabet().getAlphabetType() != seq2.alphabet().getAlphabetType())
189  throw AlphabetMismatchException("SequenceTools::getPercentIdentity", seq1.getAlphabet(), seq2.getAlphabet());
190  if (seq1.size() != seq2.size())
191  throw SequenceNotAlignedException("SequenceTools::getPercentIdentity", &seq2);
192  int gap = seq1.alphabet().getGapCharacterCode();
193  size_t id = 0;
194  size_t tot = 0;
195  for (size_t i = 0; i < seq1.size(); ++i)
196  {
197  int x = seq1.getValue(i);
198  int y = seq2.getValue(i);
199  if (ignoreGaps)
200  {
201  if (x != gap && y != gap)
202  {
203  tot++;
204  if (x == y)
205  id++;
206  }
207  }
208  else
209  {
210  tot++;
211  if (x == y)
212  id++;
213  }
214  }
215  return static_cast<double>(id) / static_cast<double>(tot) * 100.;
216 }
217 
218 /******************************************************************************/
219 
221 {
222  size_t count = 0;
223  auto& alpha = seq.alphabet();
224  for (size_t i = 0; i < seq.size(); ++i)
225  {
226  if (!alpha.isGap(seq[i]))
227  count++;
228  }
229  return count;
230 }
231 
232 /******************************************************************************/
233 
235 {
236  size_t count = 0;
237  auto& alpha = seq.alphabet();
238  for (size_t i = 0; i < seq.size(); ++i)
239  {
240  if (!alpha.isGap(seq[i]) && !alpha.isUnresolved(seq[i]))
241  count++;
242  }
243  return count;
244 }
245 
246 /******************************************************************************/
247 
248 unique_ptr<SequenceInterface> SequenceTools::getSequenceWithCompleteSites(const SequenceInterface& seq)
249 {
250  auto& alpha = seq.alphabet();
251  vector<int> content;
252  for (size_t i = 0; i < seq.size(); ++i)
253  {
254  if (!(alpha.isGap(seq[i]) || alpha.isUnresolved(seq[i])))
255  content.push_back(seq[i]);
256  }
257  auto newSeq = unique_ptr<SequenceInterface>(seq.clone());
258  newSeq->setContent(content);
259  return newSeq;
260 }
261 
262 /******************************************************************************/
263 
265 {
266  size_t count = 0;
267  auto alphaPtr = seq.getAlphabet();
268  for (size_t i = 0; i < seq.size(); ++i)
269  {
270  if (alphaPtr->isUnresolved(seq[i]))
271  count++;
272  }
273  return count;
274 }
275 
276 /******************************************************************************/
277 
278 unique_ptr<SequenceInterface> SequenceTools::getSequenceWithoutGaps(const SequenceInterface& seq)
279 {
280  auto alphaPtr = seq.getAlphabet();
281  vector<int> content;
282  for (size_t i = 0; i < seq.size(); ++i)
283  {
284  if (!alphaPtr->isGap(seq[i]))
285  content.push_back(seq[i]);
286  }
287  auto newSeq = unique_ptr<SequenceInterface>(seq.clone());
288  newSeq->setContent(content);
289  return newSeq;
290 }
291 
292 /******************************************************************************/
293 
295 {
296  auto alphaPtr = seq.getAlphabet();
297  for (size_t i = seq.size(); i > 0; --i)
298  {
299  if (alphaPtr->isGap(seq[i - 1]))
300  seq.deleteElement(i - 1);
301  }
302 }
303 
304 /******************************************************************************/
305 
306 unique_ptr<SequenceInterface> SequenceTools::getSequenceWithoutStops(const SequenceInterface& seq, const GeneticCode& gCode)
307 {
308  auto calpha = dynamic_pointer_cast<const CodonAlphabet>(seq.getAlphabet());
309  if (!calpha)
310  throw Exception("SequenceTools::getSequenceWithoutStops. Input sequence should have a codon alphabet.");
311  vector<int> content;
312  for (size_t i = 0; i < seq.size(); ++i)
313  {
314  if (!gCode.isStop(seq[i]))
315  content.push_back(seq[i]);
316  }
317  auto newSeq = unique_ptr<SequenceInterface>(seq.clone());
318  newSeq->setContent(content);
319  return newSeq;
320 }
321 
322 /******************************************************************************/
323 
325 {
326  auto calpha = dynamic_pointer_cast<const CodonAlphabet>(seq.getAlphabet());
327  if (!calpha)
328  throw Exception("SequenceTools::removeStops. Input sequence should have a codon alphabet.");
329  for (size_t i = seq.size(); i > 0; --i)
330  {
331  if (gCode.isStop(seq[i - 1]))
332  seq.deleteElement(i - 1);
333  }
334 }
335 
336 /******************************************************************************/
337 
339 {
340  auto calpha = dynamic_pointer_cast<const CodonAlphabet>(seq.getAlphabet());
341  if (!calpha)
342  throw Exception("SequenceTools::replaceStopsWithGaps. Input sequence should have a codon alphabet.");
343  int gap = calpha->getGapCharacterCode();
344  for (size_t i = 0; i < seq.size(); ++i)
345  {
346  if (gCode.isStop(seq[i]))
347  seq.setElement(i, gap);
348  }
349 }
350 
351 /******************************************************************************/
352 
353 unique_ptr<BowkerTest> SequenceTools::bowkerTest(
354  const SequenceInterface& seq1,
355  const SequenceInterface& seq2)
356 {
357  if (seq1.size() != seq2.size())
358  throw SequenceNotAlignedException("SequenceTools::bowkerTest.", &seq2);
359  size_t n = seq1.size();
360  auto alphaPtr = seq1.getAlphabet();
361  unsigned int r = alphaPtr->getSize();
362 
363  // Compute contingency table:
364  RowMatrix<double> array(r, r);
365  int x, y;
366  for (size_t i = 0; i < n; ++i)
367  {
368  x = seq1[i];
369  y = seq2[i];
370  if (!alphaPtr->isGap(x) && !alphaPtr->isUnresolved(x)
371  && !alphaPtr->isGap(y) && !alphaPtr->isUnresolved(y))
372  {
373  array(static_cast<size_t>(x), static_cast<size_t>(y))++;
374  }
375  }
376 
377  // Compute Bowker's statistic:
378  double sb2 = 0, nij, nji;
379  for (unsigned int i = 1; i < r; ++i)
380  {
381  for (unsigned int j = 0; j < i; ++j)
382  {
383  nij = array(i, j);
384  nji = array(j, i);
385  if (nij != 0 || nji != 0)
386  {
387  sb2 += pow(nij - nji, 2) / (nij + nji);
388  }
389  // Else: we should display a warning there.
390  }
391  }
392 
393  // Compute p-value:
394  double pvalue = 1. - RandomTools::pChisq(sb2, (r - 1) * r / 2);
395 
396  // Return results:
397  auto test = make_unique<BowkerTest>();
398  test->setStatistic(sb2);
399  test->setPValue(pvalue);
400  return test;
401 }
402 
403 /******************************************************************************/
404 
406  const SequenceInterface& seq,
407  std::vector<unique_ptr<SequenceInterface>>& hap,
408  unsigned int level)
409 {
410  vector< vector< int >> states(seq.size());
411  list< unique_ptr<Sequence>> t_hap;
412  auto alphaPtr = seq.getAlphabet();
413  unsigned int hap_count = 1;
414  // Vector of available states at each position
415  for (size_t i = 0; i < seq.size(); ++i)
416  {
417  vector<int> st = alphaPtr->getAlias(seq[i]);
418  if (!st.size())
419  {
420  st.push_back(alphaPtr->getGapCharacterCode());
421  }
422  if (st.size() <= level)
423  {
424  states[i] = st;
425  }
426  else
427  {
428  states[i] = vector<int>(1, seq[i]);
429  }
430  }
431  // Combinatorial haplotypes building (the use of tree may be more accurate)
432  t_hap.push_back(make_unique<Sequence>(seq.getName() + "_hap" + TextTools::toString(hap_count++), "", alphaPtr));
433  for (size_t i = 0; i < states.size(); ++i)
434  {
435  for (auto it = t_hap.begin(); it != t_hap.end(); it++)
436  {
437  for (size_t j = 0; j < states[i].size(); ++j)
438  {
439  auto tmp_seq = unique_ptr<Sequence>((**it).clone());
440  tmp_seq->setName(seq.getName() + "_hap");
441  if (j < states[i].size() - 1)
442  {
443  tmp_seq->setName(tmp_seq->getName() + TextTools::toString(hap_count++));
444  tmp_seq->addElement(states[i][j]);
445  t_hap.insert(it, std::move(tmp_seq));
446  }
447  else
448  {
449  (**it).addElement(states[i][j]);
450  }
451  }
452  }
453  }
454  for (auto it = t_hap.rbegin(); it != t_hap.rend(); it++)
455  {
456  hap.push_back(std::move(*it));
457  }
458 }
459 
460 /******************************************************************************/
461 
462 unique_ptr<Sequence> SequenceTools::combineSequences(
463  const SequenceInterface& s1,
464  const SequenceInterface& s2)
465 {
466  if (s1.alphabet().getAlphabetType() != s2.alphabet().getAlphabetType())
467  {
468  throw AlphabetMismatchException("SequenceTools::combineSequences(const Sequence& s1, const Sequence& s2): s1 and s2 don't have same Alphabet.", s1.getAlphabet(), s2.getAlphabet());
469  }
470  auto alphaPtr = s1.getAlphabet();
471  vector<int> st;
472  vector<int> seq;
473  size_t length = NumTools::max(s1.size(), s2.size());
474  for (size_t i = 0; i < length; ++i)
475  {
476  if (i < s1.size())
477  st.push_back(s1.getValue(i));
478  if (i < s2.size())
479  st.push_back(s2.getValue(i));
480  seq.push_back(alphaPtr->getGeneric(st));
481  st.clear();
482  }
483  auto s = make_unique<Sequence>(s1.getName() + "+" + s2.getName(), seq, alphaPtr);
484  return s;
485 }
486 
487 /******************************************************************************/
488 
489 unique_ptr<Sequence> SequenceTools::subtractHaplotype(
490  const SequenceInterface& s,
491  const SequenceInterface& h,
492  string name,
493  unsigned int level)
494 {
495  auto alphaPtr = s.getAlphabet();
496  if (name.size() == 0)
497  name = s.getName() + "_haplotype";
498  string seq;
499  if (s.size() != h.size())
500  throw SequenceNotAlignedException("SequenceTools::subtractHaplotype: haplotype must be aligned with the sequence.", &h);
501  for (unsigned int i = 0; i < s.size(); ++i)
502  {
503  string c;
504  vector<int> nucs = alphaPtr->getAlias(s.getValue(i));
505  if (nucs.size() > 1)
506  {
507  std::ignore = remove(nucs.begin(), nucs.end(), h.getValue(i));
508  nucs = vector<int>(nucs.begin(), nucs.end() - 1);
509  }
510  else
511  {
512  nucs = vector<int>(nucs.begin(), nucs.end());
513  }
514  c = alphaPtr->intToChar(alphaPtr->getGeneric(nucs));
515  if (level <= nucs.size() && (alphaPtr->isUnresolved(s.getValue(i)) || alphaPtr->isUnresolved(h.getValue(i))))
516  {
517  c = alphaPtr->intToChar(alphaPtr->getUnknownCharacterCode());
518  }
519  seq += c;
520  }
521  auto hap = make_unique<Sequence>(name, seq, alphaPtr);
522  return hap;
523 }
524 
525 /******************************************************************************/
526 
527 unique_ptr<Sequence> SequenceTools::RNYslice(const SequenceInterface& seq, int ph)
528 {
529  // Alphabet type checking
531  {
532  throw AlphabetException ("SequenceTools::transcript : Sequence must be DNA", seq.getAlphabet());
533  }
534 
535  if ((ph < 1) || (ph > 3))
536  throw Exception("Bad phase for RNYSlice: " + TextTools::toString(ph) + ". Should be between 1 and 3.");
537 
538  size_t s = seq.size();
539  size_t n = (s - static_cast<size_t>(ph) + 3) / 3;
540 
541  vector<int> content(n);
542 
543  int tir = seq.getAlphabet()->getGapCharacterCode();
544  size_t j;
545 
546  for (size_t i = 0; i < n; i++)
547  {
548  j = i * 3 + static_cast<size_t>(ph) - 1;
549 
550  if (j == 0)
551  content[i] = RNY_->getRNY(tir, seq[0], seq[1], *seq.getAlphabet());
552  else
553  {
554  if (j == s - 1)
555  content[i] = RNY_->getRNY(seq[j - 1], seq[j], tir, *seq.getAlphabet());
556  else
557  content[i] = RNY_->getRNY(seq[j - 1], seq[j], seq[j + 1], *seq.getAlphabet());
558  }
559  }
560 
561  // New sequence creating, and sense reversing
562  auto alphaPtr = dynamic_pointer_cast<const Alphabet>(RNY_);
563  auto seqPtr = make_unique<Sequence>(seq.getName(), content, seq.getComments(), alphaPtr);
564 
565  // Send result
566  return seqPtr;
567 }
568 
569 unique_ptr<Sequence> SequenceTools::RNYslice(const SequenceInterface& seq)
570 {
571  // Alphabet type checking
573  {
574  throw AlphabetException ("SequenceTools::transcript : Sequence must be DNA", seq.getAlphabet());
575  }
576 
577  size_t n = seq.size();
578 
579  vector<int> content(n);
580 
581  int tir = seq.getAlphabet()->getGapCharacterCode();
582 
583  if (seq.size() >= 2)
584  {
585  content[0] = RNY_->getRNY(tir, seq[0], seq[1], *seq.getAlphabet());
586 
587  for (unsigned int i = 1; i < n - 1; i++)
588  {
589  content[i] = RNY_->getRNY(seq[i - 1], seq[i], seq[i + 1],
590  *seq.getAlphabet());
591  }
592 
593  content[n - 1] = RNY_->getRNY(seq[n - 2], seq[n - 1], tir, *seq.getAlphabet());
594  }
595 
596  // New sequence creating, and sense reversing
597  auto alphaPtr = dynamic_pointer_cast<const Alphabet>(RNY_);
598  auto seqPtr = make_unique<Sequence>(seq.getName(), content, seq.getComments(), alphaPtr);
599 
600  // Send result
601  return seqPtr;
602 }
603 
604 /******************************************************************************/
605 
606 
608  SequenceInterface& sequence,
609  const GeneticCode& gCode,
610  bool checkInit,
611  bool checkStop,
612  bool includeInit,
613  bool includeStop)
614 {
615  auto alphabet = dynamic_pointer_cast<const CodonAlphabet>(sequence.getAlphabet());
616  if (!alphabet)
617  throw AlphabetException("SequenceTools::getCDS. Sequence is not a codon sequence.", sequence.getAlphabet());
618  if (checkInit)
619  {
620  size_t i;
621  for (i = 0; i < sequence.size() && !gCode.isStart(sequence[i]); ++i)
622  {}
623  for (size_t j = 0; includeInit ? j < i : j <= i; ++j)
624  {
625  sequence.deleteElement(j);
626  }
627  }
628  if (checkStop)
629  {
630  size_t i;
631  for (i = 0; i < sequence.size() && !gCode.isStop(sequence[i]); ++i)
632  {}
633  for (size_t j = includeStop ? i + 1 : i; j < sequence.size(); ++j)
634  {
635  sequence.deleteElement(j);
636  }
637  }
638 }
639 
640 /******************************************************************************/
641 
643  const SequenceInterface& seq,
644  const SequenceInterface& motif,
645  bool strict)
646 {
647  if (motif.size() > seq.size())
648  return seq.size();
649  for (size_t seqi = 0; seqi < seq.size() - motif.size() + 1; seqi++)
650  {
651  bool match = false;
652  for (size_t moti = 0; moti < motif.size(); moti++)
653  {
654  if (strict)
655  {
656  match = seq.getValue(seqi + moti) == motif.getValue(moti);
657  }
658  else
659  {
660  match = AlphabetTools::match(seq.getAlphabet(), seq.getValue(seqi + moti), motif.getValue(moti));
661  }
662  if (!match)
663  {
664  break;
665  }
666  }
667  if (match)
668  {
669  return seqi;
670  }
671  }
672  return seq.size();
673 }
674 
675 /******************************************************************************/
676 
677 unique_ptr<Sequence> SequenceTools::getRandomSequence(shared_ptr<const Alphabet>& alphabet, size_t length)
678 {
679  int s = static_cast<int>(alphabet->getSize());
680  vector<int> content(length);
681  for (size_t i = 0; i < length; ++i)
682  {
684  }
685  return make_unique<Sequence>("random", content, alphabet);
686 }
687 
688 /******************************************************************************/
const Alphabet & alphabet() const override
Get the alphabet associated to the list.
Definition: SymbolList.h:122
std::shared_ptr< const Alphabet > getAlphabet() const override
Get the alphabet associated to the list.
Definition: SymbolList.h:120
The alphabet exception base class.
Exception thrown when two alphabets do not match.
static bool match(std::shared_ptr< const Alphabet > alphabet, int i, int j)
Tell if two characters match according to a given alphabet.
static bool isRNAAlphabet(const Alphabet &alphabet)
static std::shared_ptr< const DNA > DNA_ALPHABET
Definition: AlphabetTools.h:34
static bool isDNAAlphabet(const Alphabet &alphabet)
static std::shared_ptr< const RNA > RNA_ALPHABET
Definition: AlphabetTools.h:35
virtual std::string getAlphabetType() const =0
Identification method.
virtual int getGapCharacterCode() const =0
virtual const Comments & getComments() const =0
Get the comments.
virtual const std::string & getName() const =0
Get the name of this sequence.
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get the alphabet associated to the list.
virtual void deleteElement(size_t pos)=0
Remove the element at position 'pos'.
virtual size_t size() const =0
Get the number of elements in the list.
virtual const Alphabet & alphabet() const =0
Get the alphabet associated to the list.
Partial implementation of the Transliterator interface for genetic code object.
Definition: GeneticCode.h:50
virtual bool isStart(int state) const
Tells is a particular codon is a start codon.
Definition: GeneticCode.h:164
virtual bool isStop(int state) const =0
Tells is a particular codon is a stop codon.
virtual void setElement(size_t pos, const std::string &c)=0
Set the element at position 'pos' to character 'c'.
Replication between to nucleic acids.
int translate(int state) const override
Translate a given state coded as a int from source alphabet to target alphabet.
static T max(T a, T b)
Definition: RNY.h:30
static double pChisq(double x, double v)
static intType giveIntRandomNumberBetweenZeroAndEntry(intType entry)
The sequence interface.
Definition: Sequence.h:34
SequenceInterface * clone() const override=0
Exception thrown when a sequence is not align with others.
static bool areSequencesIdentical(const SequenceInterface &seq1, const SequenceInterface &seq2)
static void getCDS(SequenceInterface &sequence, const GeneticCode &gCode, bool checkInit, bool checkStop, bool includeInit=true, bool includeStop=true)
Extract CDS part from a codon sequence. Optionally check for intiator and stop codons,...
static std::unique_ptr< Sequence > getRandomSequence(std::shared_ptr< const Alphabet > &alphabet, size_t length)
Get a random sequence of given size and alphabet, with all state with equal probability.
static size_t findFirstOf(const SequenceInterface &seq, const SequenceInterface &motif, bool strict=true)
Find the position of a motif in a sequence.
static std::unique_ptr< Sequence > transcript(const Sequence &sequence)
Get the transcription sequence of a DNA sequence.
static void invert(SequenceInterface &seq)
Inverse a sequence from 5'->3' to 3'->5' and vice-versa.
static NucleicAcidsReplication DNARep_
Definition: SequenceTools.h:67
static size_t getNumberOfSites(const SequenceInterface &seq)
static size_t getNumberOfCompleteSites(const SequenceInterface &seq)
static std::unique_ptr< BowkerTest > bowkerTest(const SequenceInterface &seq1, const SequenceInterface &seq2)
Bowker's test for homogeneity.
static std::unique_ptr< Sequence > combineSequences(const SequenceInterface &s1, const SequenceInterface &s2)
Combine two sequences.
static void removeGaps(SequenceInterface &seq)
Remove gaps from a sequence.
static void getPutativeHaplotypes(const SequenceInterface &seq, std::vector< std::unique_ptr< SequenceInterface >> &hap, unsigned int level=2)
Get all putatives haplotypes from an heterozygous sequence.
static std::unique_ptr< SequenceInterface > getSequenceWithoutStops(const SequenceInterface &seq, const GeneticCode &gCode)
Get a copy of the codon sequence without stops.
static std::unique_ptr< Sequence > RNYslice(const SequenceInterface &sequence, int ph)
Get the RNY decomposition of a DNA sequence.
static NucleicAcidsReplication RNARep_
Definition: SequenceTools.h:68
static std::unique_ptr< Sequence > reverseTranscript(const Sequence &sequence)
Get the reverse-transcription sequence of a RNA sequence.
static void removeStops(SequenceInterface &seq, const GeneticCode &gCode)
Remove stops from a codon sequence.
static double getPercentIdentity(const SequenceInterface &seq1, const SequenceInterface &seq2, bool ignoreGaps=false)
static void invertComplement(SequenceInterface &seq)
Inverse and complement a sequence.
static void complement(SequenceInterface &seq)
Complement the nucleotide sequence itself.
static void replaceStopsWithGaps(SequenceInterface &seq, const GeneticCode &gCode)
Replace stop codons by gaps.
static std::unique_ptr< SequenceInterface > getSequenceWithCompleteSites(const SequenceInterface &seq)
keep only complete sites in a sequence.
static std::unique_ptr< SequenceInterface > getInvert(const SequenceInterface &sequence)
Inverse a sequence from 5'->3' to 3'->5' and vice-versa.
static size_t getNumberOfUnresolvedSites(const SequenceInterface &seq)
static std::unique_ptr< SequenceInterface > getSequenceWithoutGaps(const SequenceInterface &seq)
Get a copy of the sequence without gaps.
static std::shared_ptr< RNY > RNY_
Definition: SequenceTools.h:66
static NucleicAcidsReplication transc_
Definition: SequenceTools.h:69
static std::unique_ptr< Sequence > subtractHaplotype(const SequenceInterface &s, const SequenceInterface &h, std::string name="", unsigned int level=1)
Subtract haplotype from an heterozygous sequence.
static std::unique_ptr< Sequence > getComplement(const SequenceInterface &sequence)
Get the complementary sequence of a nucleotide sequence.
A basic implementation of the Sequence interface.
Definition: Sequence.h:117
virtual const T & getValue(size_t pos) const =0
checked access to a character in list.
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)
This alphabet is used to deal NumericAlphabet.