bpp-seq3  3.0.0
SiteContainerTools.h
Go to the documentation of this file.
1 #ifndef BPP_SEQ_CONTAINER_SITECONTAINERTOOLS_H
2 #define BPP_SEQ_CONTAINER_SITECONTAINERTOOLS_H
3 
4 // Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
5 // SPDX-FileCopyrightText: The Bio++ Development Group
6 //
7 // SPDX-License-Identifier: CECILL-2.1
8 
9 #include "SiteContainer.h"
10 #include "VectorSiteContainer.h"
12 #include "SequenceContainerTools.h"
13 #include "AlignmentData.h"
14 #include "../AlphabetIndex/AlphabetIndex2.h"
15 #include "../DistanceMatrix.h"
16 #include "../GeneticCode/GeneticCode.h"
17 #include "../SiteTools.h"
18 #include "../CodonSiteTools.h"
19 #include "../Site.h"
22 
23 // From the STL:
24 #include <vector>
25 #include <map>
26 #include <memory>
27 
28 namespace bpp
29 {
30 using SiteSelection = std::vector<size_t>;
31 
36 {
37 public:
39  virtual ~SiteContainerTools() {}
40 
41 public:
51  template<class SiteType, class SequenceType>
52  static std::unique_ptr<TemplateVectorSiteContainer<SiteType, SequenceType>>
54  {
55  std::vector<std::string> sequenceKeys = sites.getSequenceKeys();
56  std::shared_ptr<const Alphabet> alphaPtr = sites.getAlphabet();
57  auto selectedSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sequenceKeys, alphaPtr);
58  for (size_t i = 0; i < sites.getNumberOfSites(); ++i)
59  {
60  if (!SiteTools::hasGap(sites.site(i))) // This calls the method dedicated to basic sites
61  {
62  std::unique_ptr<SiteType> sitePtr(sites.site(i).clone());
63  selectedSites->addSite(sitePtr, false);
64  }
65  }
66  selectedSites->setSequenceNames(sites.getSequenceNames(), false);
67  return selectedSites;
68  }
69 
70 
81  template<class SiteType, class SequenceType>
82  static std::unique_ptr<TemplateVectorSiteContainer<SiteType, SequenceType>>
84  {
85  std::vector<std::string> sequenceKeys = sites.getSequenceKeys();
86  std::shared_ptr<const Alphabet> alphaPtr = sites.getAlphabet();
87  auto selectedSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sequenceKeys, alphaPtr);
88  for (size_t i = 0; i < sites.getNumberOfSites(); ++i)
89  {
90  if (SiteTools::isComplete(sites.site(i))) // This calls the method dedicated to basic sites
91  {
92  std::unique_ptr<SiteType> sitePtr(sites.site(i).clone());
93  selectedSites->addSite(sitePtr, false);
94  }
95  }
96  selectedSites->setSequenceNames(sites.getSequenceNames(), false);
97  return selectedSites;
98  }
99 
100 
110  template<class SiteType, class SequenceType>
111  static std::unique_ptr<TemplateSiteContainerInterface<SiteType, SequenceType, std::string>>
113  {
114  if (sites.getNumberOfSequences() == 0)
115  throw Exception("SiteContainerTools::removeGapOnlySites. Container is empty.");
116  std::vector<std::string> sequenceKeys = sites.getSequenceKeys();
117  auto alphaPtr = sites.getAlphabet();
118  auto newContainer = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sequenceKeys, alphaPtr);
119  for (size_t i = 0; i < sites.getNumberOfSites(); ++i)
120  {
121  const Site& site = sites.site(i);
122  if (!SiteTools::isGapOnly(site))
123  {
124  auto site2 = std::unique_ptr<SiteType>(site.clone());
125  newContainer->addSite(site2, false);
126  }
127  }
128  return newContainer;
129  }
130 
131 
137  template<class SiteType, class SequenceType, class HashType>
139  {
140  if (sites.getNumberOfSequences() == 0)
141  throw Exception("SiteContainerTools::removeGapOnlySites. Container is empty.");
142 
143  size_t n = sites.getNumberOfSites();
144  size_t i = n;
145  while (i > 1)
146  {
147  ApplicationTools::displayGauge(n - i + 1, n);
148  const SiteType* site = &sites.site(i - 1); // Note (jdutheil 18/12/22: for some reason a ref here does not work, resorting to pointer)
149  if (SiteTools::isGapOnly(*site))
150  {
151  size_t end = i;
152  while (SiteTools::isGapOnly(*site) && i > 1)
153  {
154  --i;
155  site = &sites.site(i - 1);
156  }
157  sites.deleteSites(i, end - i);
158  }
159  else
160  {
161  --i;
162  }
163  }
165  const Site& site = sites.site(0);
166  if (SiteTools::isGapOnly(site))
167  sites.deleteSite(0);
168  }
169 
170 
180  template<class SiteType, class SequenceType>
181  static std::unique_ptr<TemplateVectorSiteContainer<SiteType, SequenceType>>
183  {
184  if (sites.getNumberOfSequences() == 0)
185  throw Exception("SiteContainerTools::removeGapOrUnresolvedOnlySites. Container is empty.");
186 
187  std::vector<std::string> sequenceKeys = sites.getSequenceKeys();
188  auto alphaPtr = sites.getAlphabet();
189  auto newContainer = std::make_unique<TemplateVectorSiteContainer<SiteType, SequenceType>>(sequenceKeys, alphaPtr);
190  for (size_t i = 0; i < sites.getNumberOfSites(); ++i)
191  {
192  const Site& site = sites.site(i);
194  {
195  auto site2 = std::unique_ptr<SiteType>(site.clone());
196  newContainer->addSite(site2, false);
197  }
198  }
199  return newContainer;
200  }
201 
202 
208  template<class SiteType, class SequenceType, class HashType>
210  {
211  if (sites.getNumberOfSequences() == 0)
212  throw Exception("SiteContainerTools::removeGapOrUnresolvedOnlySites. Container is empty.");
213 
214  size_t n = sites.getNumberOfSites();
215  size_t i = n;
216  while (i > 1)
217  {
218  ApplicationTools::displayGauge(n - i + 1, n);
219  const SiteType& site = sites.site(i - 1);
220  if (SiteTools::isGapOnly(site))
221  {
222  size_t end = i;
223  while (SiteTools::isGapOrUnresolvedOnly(site) && i > 1)
224  {
225  --i;
226  site = &sites.site(i - 1);
227  }
228  sites.deleteSites(i, end - i);
229  }
230  else
231  {
232  --i;
233  }
234  }
236  const SiteType& site = sites.site(0);
238  sites.deleteSite(0);
239  }
240 
248  template<class SiteType, class SequenceType>
249  static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType>>
252  double maxFreqGaps)
253  {
254  if (sites.getNumberOfSequences() == 0)
255  throw Exception("SiteContainerTools::removeGapSites. Container is empty.");
256 
257  std::vector<std::string> sequenceKeys = sites.getSequenceKeys();
258  auto newContainer = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sequenceKeys, sites.getAlphabet());
259  for (size_t i = 0; i < sites.getNumberOfSites(); ++i)
260  {
261  std::map<int, double> freq;
262  const Site& site = sites.site(i);
263  SiteTools::getFrequencies(site, freq);
264  if (freq[-1] <= maxFreqGaps)
265  {
266  auto site2 = std::make_unique<SiteType>(site.clone());
267  newContainer->addSite(site2, false);
268  }
269  }
270  newContainer->setSequenceNames(sites.getSequenceNames(), false);
271  return newContainer;
272  }
273 
274 
281  template<class SiteType, class SequenceType, class HashType>
282  static void removeGapSites(
284  double maxFreqGaps)
285  {
286  if (sites.getNumberOfSequences() == 0)
287  throw Exception("SiteContainerTools::removeGapSites. Container is empty.");
288 
289  for (size_t i = sites.getNumberOfSites(); i > 0; --i)
290  {
291  std::map<int, double> freq;
292  SiteTools::getFrequencies(sites.site(i - 1), freq);
293  if (freq[-1] > maxFreqGaps)
294  {
295  sites.deleteSite(i - 1);
296  }
297  }
298  }
299 
300 
311  static std::unique_ptr<SiteContainerInterface> getSitesWithoutStopCodon(
312  const SiteContainerInterface& sites,
313  const GeneticCode& gCode)
314  {
315  std::shared_ptr<const CodonAlphabet> pca = std::dynamic_pointer_cast<const CodonAlphabet>(sites.getAlphabet());
316  if (!pca)
317  throw AlphabetException("Not a Codon Alphabet", sites.getAlphabet().get());
318  if (sites.getNumberOfSequences() == 0)
319  throw Exception("SiteContainerTools::getSitesWithoutStopCodon. Container is empty.");
320 
321  std::vector<std::string> sequenceKeys = sites.getSequenceKeys();
322  auto alphaP = sites.getAlphabet();
323  auto newContainer = std::make_unique<VectorSiteContainer>(sequenceKeys, alphaP);
324  for (size_t i = 0; i < sites.getNumberOfSites(); ++i)
325  {
326  const Site& site = sites.site(i);
327  if (!CodonSiteTools::hasStop(site, gCode))
328  {
329  std::unique_ptr<Site> site2(site.clone());
330  newContainer->addSite(site2, false);
331  }
332  }
333  newContainer->setSequenceNames(sites.getSequenceNames(), false);
334  return newContainer;
335  }
336 
344  SiteContainerInterface& sites,
345  const GeneticCode& gCode)
346  {
347  std::shared_ptr<const CodonAlphabet> pca = std::dynamic_pointer_cast<const CodonAlphabet>(sites.getAlphabet());
348  if (!pca)
349  throw AlphabetException("Not a Codon Alphabet", sites.getAlphabet().get());
350  if (sites.getNumberOfSequences() == 0)
351  throw Exception("SiteContainerTools::removeSitesWithStopCodon. Container is empty.");
352 
353  for (size_t i = sites.getNumberOfSites(); i > 0; --i)
354  {
355  const Site& site = sites.site(i - 1);
356  if (CodonSiteTools::hasStop(site, gCode))
357  sites.deleteSite(i - 1);
358  }
359  }
360 
369  {
370  throw Exception("SiteContainerTools::removeSitesWithStopCodon. Method not supported for probabilistic sequences.");
371  }
372 
384  template<class SiteType, class SequenceType, class HashType>
385  static void getSelectedSites(
387  const SiteSelection& selection,
389  {
390  for (auto pos : selection)
391  {
392  auto sitePtr = std::unique_ptr<SiteType>(sites.site(pos).clone());
393  outputSites.addSite(sitePtr, false);
394  }
395  outputSites.setSequenceNames(sites.getSequenceNames(), true);
396  }
397 
398 
408  template<class SiteType, class SequenceType>
409  static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType>>
412  const SiteSelection& selection)
413  {
414  auto alphaPtr = sites.getAlphabet();
415  auto outputSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sites.getSequenceKeys(), alphaPtr);
416  outputSites->setComments(sites.getComments());
417  getSelectedSites<SiteType, SequenceType, std::string>(sites, selection, *outputSites);
418  return outputSites;
419  }
420 
421 
432  static std::unique_ptr<AlignmentDataInterface>
434  const AlignmentDataInterface& sites,
435  const SiteSelection& selection)
436  {
437  try
438  {
439  auto& sc = dynamic_cast<const SiteContainerInterface&>(sites);
440  auto sel = getSelectedSites<Site, Sequence>(sc, selection);
441  return std::move(sel);
442  }
443  catch (std::bad_cast& e) {}
444 
445  try
446  {
447  auto& psc = dynamic_cast<const ProbabilisticSiteContainerInterface&>(sites);
448  auto sel = getSelectedSites<ProbabilisticSite, ProbabilisticSequence>(psc, selection);
449  return std::move(sel);
450  }
451  catch (std::bad_cast& e) {}
452 
453  throw Exception("SiteContainerTools::getSelectedSites : unsupported container type.");
454  }
455 
456 
469  template<class SiteType, class SequenceType, class HashType>
470  static void getSelectedPositions(
472  const SiteSelection& selection,
474  {
475  size_t wsize = sites.getAlphabet()->getStateCodingSize();
476  if (wsize > 1)
477  {
478  if (selection.size() % wsize != 0)
479  throw IOException("SiteContainerTools::getSelectedPositions: Positions selection is not compatible with the alphabet in use in the container.");
480  SiteSelection selection2;
481  for (size_t i = 0; i < selection.size(); i += wsize)
482  {
483  if (selection[i] % wsize != 0)
484  throw IOException("SiteContainerTools::getSelectedPositions: Positions selection is not compatible with the alphabet in use in the container.");
485 
486  for (size_t j = 1; j < wsize; ++j)
487  {
488  if (selection[i + j] != (selection[i + j - 1] + 1))
489  throw IOException("SiteContainerTools::getSelectedPositions: Positions selection is not compatible with the alphabet in use in the container.");
490  }
491  selection2.push_back(selection[i] / wsize);
492  }
493  getSelectedSites(sites, selection2, outputSites);
494  }
495  else
496  {
497  getSelectedSites(sites, selection, outputSites);
498  }
499  }
500 
501 
513  template<class SiteType, class SequenceType>
514  static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType>>
517  const SiteSelection& selection)
518  {
519  auto alphaPtr = sites.getAlphabet();
520  auto outputSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sites.getSequenceKeys(), alphaPtr);
521  outputSites->setComments(sites.getComments());
522  getSelectedPositions<SiteType, SequenceType, std::string>(sites, selection, *outputSites);
523  return outputSites;
524  }
525 
526 
539  static std::unique_ptr<Sequence> getConsensus(
540  const SiteContainerInterface& sc,
541  const std::string& name = "consensus",
542  bool ignoreGap = true,
543  bool resolveUnknown = false);
544 
554 
562 
571 
600  static std::unique_ptr<SiteContainerInterface> resolveDottedAlignment(
601  const SiteContainerInterface& dottedAln,
602  std::shared_ptr<const Alphabet>& resolvedAlphabet);
603 
620  static std::map<size_t, size_t> getSequencePositions(const Sequence& seq);
621 
631  static std::map<size_t, size_t> getAlignmentPositions(const Sequence& seq);
632 
642  static void getSequencePositions(
643  const SiteContainerInterface& sites,
644  Matrix<size_t>& positions);
660  static std::map<size_t, size_t> translateAlignment(
661  const Sequence& seq1,
662  const Sequence& seq2);
663 
675  static std::map<size_t, size_t> translateSequence(
676  const SiteContainerInterface& sequences,
677  size_t i1,
678  size_t i2);
679 
694  static std::unique_ptr<AlignedSequenceContainer> alignNW(
695  const Sequence& seq1,
696  const Sequence& seq2,
697  const AlphabetIndex2& s,
698  double gap);
699 
715  static std::unique_ptr<AlignedSequenceContainer> alignNW(
716  const Sequence& seq1,
717  const Sequence& seq2,
718  const AlphabetIndex2& s,
719  double opening,
720  double extending);
721 
735  template<class SiteType, class SequenceType, class HashType>
736  static void sampleSites(
738  size_t nbSites,
740  std::shared_ptr< std::vector<size_t>> index = nullptr)
741  {
742  for (size_t i = 0; i < nbSites; ++i)
743  {
744  size_t pos = static_cast<size_t>(RandomTools::giveIntRandomNumberBetweenZeroAndEntry(static_cast<int>(sites.getNumberOfSites())));
745  auto s = std::unique_ptr<SiteType>(sites.site(pos).clone());
746  outSites.addSite(s, false);
747 
748  if (index)
749  index->push_back(pos);
750  }
751  outSites.setSequenceNames(sites.getSequenceNames(),true);
752  }
753 
754 
768  template<class SiteType, class SequenceType>
769  static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType>>
772  size_t nbSites,
773  std::shared_ptr< std::vector<size_t>> index = nullptr)
774  {
775  auto sampledSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sites.getAlphabet());
776  sampleSites<SiteType, SequenceType, std::string>(sites, nbSites, *sampledSites, index);
777  return sampledSites;
778  }
779 
780 
792  template<class SiteType, class SequenceType, class HashType>
793  static void bootstrapSites(
796  {
797  sampleSites(sites, sites.getNumberOfSites(), outputSites, nullptr);
798  }
799 
800 
812  template<class SiteType, class SequenceType>
813  static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType>>
815  {
816  auto outputSites = std::make_unique< TemplateVectorSiteContainer<SiteType, SequenceType>>(sites.getAlphabet());
817  bootstrapSites<SiteType, SequenceType, std::string>(sites, *outputSites);
818  return outputSites;
819  }
820 
821 
843  static double computeSimilarity(
844  const SequenceInterface& seq1,
845  const SequenceInterface& seq2,
846  bool dist = false,
847  const std::string& gapOption = SIMILARITY_NODOUBLEGAP,
848  bool unresolvedAsGap = true);
849 
872  static std::unique_ptr<DistanceMatrix> computeSimilarityMatrix(
873  const SiteContainerInterface& sites,
874  bool dist = false,
875  const std::string& gapOption = SIMILARITY_NOFULLGAP,
876  bool unresolvedAsGap = true);
877 
878  static const std::string SIMILARITY_ALL;
879  static const std::string SIMILARITY_NOFULLGAP;
880  static const std::string SIMILARITY_NODOUBLEGAP;
881  static const std::string SIMILARITY_NOGAP;
882 
903  template<class SiteType, class SequenceType, class HashType>
904  static void merge(
907  bool leavePositionAsIs = false)
908  {
909  if (seqCont1.getAlphabet()->getAlphabetType() != seqCont2.getAlphabet()->getAlphabetType())
910  throw AlphabetMismatchException("SiteContainerTools::merge.", seqCont1.getAlphabet(), seqCont2.getAlphabet());
911 
912  std::vector<HashType> seqKeys1 = seqCont1.getSequenceKeys();
913  std::vector<HashType> seqKeys2 = seqCont2.getSequenceKeys();
915  bool del = false;
916  if (seqKeys1 == seqKeys2)
917  {
918  seqCont2bis = &seqCont2;
919  }
920  else
921  {
922  // We shall reorder sequences first:
923  auto seqCont2ter = seqCont2.createEmptyContainer();
924  SequenceContainerTools::getSelectedSequences(seqCont2, seqKeys1, *seqCont2ter);
925  seqCont2bis = seqCont2ter;
926  del = true;
927  }
928 
929  if (leavePositionAsIs)
930  {
931  for (size_t i = 0; i < seqCont2bis->getNumberOfSites(); ++i)
932  {
933  std::unique_ptr<Site> site(seqCont2bis->site(i).clone());
934  seqCont1.addSite(site, false);
935  }
936  }
937  else
938  {
939  int offset = static_cast<int>(seqCont1.getNumberOfSites());
940  for (size_t i = 0; i < seqCont2bis->getNumberOfSites(); ++i)
941  {
942  std::unique_ptr<Site> site(seqCont2bis->site(i).clone());
943  site->setCoordinate(offset + site->getCoordinate());
944  seqCont1.addSite(site, false);
945  }
946  }
947 
948  if (del)
949  delete seqCont2bis;
950  }
951 
952 
966  static std::vector<int> getColumnScores(const Matrix<size_t>& positions1, const Matrix<size_t>& positions2, int na = 0);
967 
981  static std::vector<double> getSumOfPairsScores(const Matrix<size_t>& positions1, const Matrix<size_t>& positions2, double na = 0);
982 };
983 } // end of namespace bpp.
984 #endif // BPP_SEQ_CONTAINER_SITECONTAINERTOOLS_H
The alphabet exception base class.
Two dimensionnal alphabet index interface.
Exception thrown when two alphabets do not match.
static void displayGauge(size_t iter, size_t total, char symbol='>', const std::string &mes="")
static bool hasStop(const Site &site, const GeneticCode &gCode)
Method to know if a codon site contains stop codon or not.
virtual const Comments & getComments() const =0
Get the comments.
Partial implementation of the Transliterator interface for genetic code object.
Definition: GeneticCode.h:50
static intType giveIntRandomNumberBetweenZeroAndEntry(intType entry)
static void getSelectedSequences(const TemplateSequenceContainerInterface< SequenceType, HashType > &sequences, const SequenceSelection &selection, TemplateSequenceContainerInterface< SequenceType, HashType > &outputCont)
Add a specified set of sequences from a container to another.
The sequence interface.
Definition: Sequence.h:34
A basic implementation of the Sequence interface.
Definition: Sequence.h:117
Some utililitary methods to deal with site containers.
static void changeUnresolvedCharactersToGaps(SiteContainerInterface &sites)
Change all unresolved characters to gaps in a SiteContainer, according to its alphabet.
static std::unique_ptr< SiteContainerInterface > getSitesWithoutStopCodon(const SiteContainerInterface &sites, const GeneticCode &gCode)
Get a site set without stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception...
static void getSelectedSites(const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, const SiteSelection &selection, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites)
Extract a specified set of sites.
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > getSelectedSites(const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, const SiteSelection &selection)
Create a new container with a specified set of sites.
static void removeGapSites(TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, double maxFreqGaps)
Remove sites with a given amount of gaps.
static std::vector< int > getColumnScores(const Matrix< size_t > &positions1, const Matrix< size_t > &positions2, int na=0)
Compare an alignment to a reference alignment, and compute the column scores.
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > getSelectedPositions(const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, const SiteSelection &selection)
Create a new container with a specified set of positions.
static void removeGapOrUnresolvedOnlySites(TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites)
Remove gap/unresolved-only sites from a SiteContainer.
static void bootstrapSites(const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites)
Bootstrap sites in an alignment.
static const std::string SIMILARITY_NOFULLGAP
static std::map< size_t, size_t > translateSequence(const SiteContainerInterface &sequences, size_t i1, size_t i2)
Translate sequence positions from a sequence to another in the same alignment.
static const std::string SIMILARITY_NOGAP
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > getCompleteSites(const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites)
Retrieves complete sites.
static void sampleSites(const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, size_t nbSites, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outSites, std::shared_ptr< std::vector< size_t >> index=nullptr)
Sample sites in an alignment.
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > bootstrapSites(const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites)
Bootstrap sites in an alignment.
static void getSelectedPositions(const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, const SiteSelection &selection, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites)
Extract a specified set of positions.
static std::map< size_t, size_t > getAlignmentPositions(const Sequence &seq)
Get the index of each alignment position in an aligned sequence.
static std::unique_ptr< AlignmentDataInterface > getSelectedSites(const AlignmentDataInterface &sites, const SiteSelection &selection)
Create a new container with a specified set of sites.
static std::vector< double > getSumOfPairsScores(const Matrix< size_t > &positions1, const Matrix< size_t > &positions2, double na=0)
Compare an alignment to a reference alignment, and compute the sum-of-pairs scores.
static std::unique_ptr< Sequence > getConsensus(const SiteContainerInterface &sc, const std::string &name="consensus", bool ignoreGap=true, bool resolveUnknown=false)
create the consensus sequence of the alignment.
static void changeGapsToUnknownCharacters(SiteContainerInterface &sites)
Change all gaps to unknown state in a SiteContainer, according to its alphabet.
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > sampleSites(const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, size_t nbSites, std::shared_ptr< std::vector< size_t >> index=nullptr)
Sample sites in an alignment.
static std::unique_ptr< AlignedSequenceContainer > alignNW(const Sequence &seq1, const Sequence &seq2, const AlphabetIndex2 &s, double gap)
Align two sequences using the Needleman-Wunsch dynamic algorithm.
static std::unique_ptr< TemplateSiteContainerInterface< SiteType, SequenceType, std::string > > removeGapOnlySites(const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites)
Get a site set without gap-only sites.
static void removeSitesWithStopCodon(ProbabilisticSiteContainerInterface &sites, const GeneticCode &gCode)
Remove sites with stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception.
static void removeSitesWithStopCodon(SiteContainerInterface &sites, const GeneticCode &gCode)
Remove sites with stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception.
static std::unique_ptr< DistanceMatrix > computeSimilarityMatrix(const SiteContainerInterface &sites, bool dist=false, const std::string &gapOption=SIMILARITY_NOFULLGAP, bool unresolvedAsGap=true)
Compute the similarity matrix of an alignment.
static void removeGapOnlySites(TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites)
Remove gap-only sites from a SiteContainer.
static std::unique_ptr< SiteContainerInterface > resolveDottedAlignment(const SiteContainerInterface &dottedAln, std::shared_ptr< const Alphabet > &resolvedAlphabet)
Resolve a container with "." notations.
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > getSitesWithoutGaps(const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites)
Retrieves sites without gaps.
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > removeGapOrUnresolvedOnlySites(const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites)
Get a site set without gap/unresolved-only sites from a SiteContainer.
static double computeSimilarity(const SequenceInterface &seq1, const SequenceInterface &seq2, bool dist=false, const std::string &gapOption=SIMILARITY_NODOUBLEGAP, bool unresolvedAsGap=true)
Compute the similarity/distance score between two aligned sequences.
static std::map< size_t, size_t > translateAlignment(const Sequence &seq1, const Sequence &seq2)
Translate alignment positions from an aligned sequence to the same sequence in a different alignment.
static void merge(TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &seqCont1, const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &seqCont2, bool leavePositionAsIs=false)
Add the content of a site container to an existing one.
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > removeGapSites(const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, double maxFreqGaps)
Extract sites, from a SiteContainer, with less than a given amount of gaps.
static const std::string SIMILARITY_NODOUBLEGAP
static std::map< size_t, size_t > getSequencePositions(const Sequence &seq)
Get the index of each sequence position in an aligned sequence.
static const std::string SIMILARITY_ALL
The Site class.
Definition: Site.h:73
Site * clone() const
Definition: Site.h:184
static bool isGapOnly(const IntSymbolListInterface &site)
static void getFrequencies(const CruxSymbolListInterface &list, std::map< int, double > &frequencies, bool resolveUnknowns=false)
Get all states frequencies in the list.
static bool hasGap(const IntSymbolListInterface &site)
static bool isComplete(const IntSymbolListInterface &site)
static bool isGapOrUnresolvedOnly(const IntSymbolListInterface &site)
The Container of Aligned Values interface.
Definition: AlignmentData.h:26
virtual void setSequenceNames(const std::vector< std::string > &names, bool updateKeys)=0
Batch-set all sequence names.
virtual std::vector< std::string > getSequenceNames() const =0
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.
virtual std::vector< HashType > getSequenceKeys() const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get a pointer toward the container's alphabet.
virtual void deleteSite(size_t sitePosition)=0
Delete a site from the container.
virtual void addSite(std::unique_ptr< SiteType > &site, bool checkCoordinate)=0
Add a site in the container.
TemplateSiteContainerInterface< SiteType, SequenceType, HashType > * createEmptyContainer() const override=0
Return a copy of this container, but with no data inside.
virtual const SiteType & site(size_t sitePosition) const override=0
Get a site from the container.
virtual void deleteSites(size_t sitePosition, size_t length) override=0
Remove a continuous range of sites in the container.
virtual size_t getNumberOfSites() const override=0
Get the number of aligned positions in the container.
This alphabet is used to deal NumericAlphabet.
std::vector< size_t > SiteSelection