bpp-seq3  3.0.0
bpp::SiteContainerTools Class Reference

Some utililitary methods to deal with site containers. More...

#include <Bpp/Seq/Container/SiteContainerTools.h>

+ Collaboration diagram for bpp::SiteContainerTools:

Public Member Functions

 SiteContainerTools ()
 
virtual ~SiteContainerTools ()
 

Static Public Member Functions

template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > getSitesWithoutGaps (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites)
 Retrieves sites without gaps. More...
 
template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > getCompleteSites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites)
 Retrieves complete sites. More...
 
template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateSiteContainerInterface< SiteType, SequenceType, std::string > > removeGapOnlySites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites)
 Get a site set without gap-only sites. More...
 
template<class SiteType , class SequenceType , class HashType >
static void removeGapOnlySites (TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites)
 Remove gap-only sites from a SiteContainer. More...
 
template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > removeGapOrUnresolvedOnlySites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites)
 Get a site set without gap/unresolved-only sites from a SiteContainer. More...
 
template<class SiteType , class SequenceType , class HashType >
static void removeGapOrUnresolvedOnlySites (TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites)
 Remove gap/unresolved-only sites from a SiteContainer. More...
 
template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > removeGapSites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, double maxFreqGaps)
 Extract sites, from a SiteContainer, with less than a given amount of gaps. More...
 
template<class SiteType , class SequenceType , class HashType >
static void removeGapSites (TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, double maxFreqGaps)
 Remove sites with a given amount of gaps. More...
 
static std::unique_ptr< SiteContainerInterfacegetSitesWithoutStopCodon (const SiteContainerInterface &sites, const GeneticCode &gCode)
 Get a site set without stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception. More...
 
static void removeSitesWithStopCodon (SiteContainerInterface &sites, const GeneticCode &gCode)
 Remove sites with stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception. More...
 
static void removeSitesWithStopCodon (ProbabilisticSiteContainerInterface &sites, const GeneticCode &gCode)
 Remove sites with stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception. More...
 
template<class SiteType , class SequenceType , class HashType >
static void getSelectedSites (const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, const SiteSelection &selection, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites)
 Extract a specified set of sites. More...
 
template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > getSelectedSites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, const SiteSelection &selection)
 Create a new container with a specified set of sites. More...
 
static std::unique_ptr< AlignmentDataInterfacegetSelectedSites (const AlignmentDataInterface &sites, const SiteSelection &selection)
 Create a new container with a specified set of sites. More...
 
template<class SiteType , class SequenceType , class HashType >
static void getSelectedPositions (const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, const SiteSelection &selection, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites)
 Extract a specified set of positions. More...
 
template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > getSelectedPositions (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, const SiteSelection &selection)
 Create a new container with a specified set of positions. More...
 
static std::unique_ptr< SequencegetConsensus (const SiteContainerInterface &sc, const std::string &name="consensus", bool ignoreGap=true, bool resolveUnknown=false)
 create the consensus sequence of the alignment. More...
 
static void changeGapsToUnknownCharacters (SiteContainerInterface &sites)
 Change all gaps to unknown state in a SiteContainer, according to its alphabet. More...
 
static void changeGapsToUnknownCharacters (ProbabilisticSiteContainerInterface &sites)
 Change all gaps to unknown state in a ProbabilisticSiteContainer, according to its alphabet. More...
 
static void changeUnresolvedCharactersToGaps (SiteContainerInterface &sites)
 Change all unresolved characters to gaps in a SiteContainer, according to its alphabet. More...
 
static std::unique_ptr< SiteContainerInterfaceresolveDottedAlignment (const SiteContainerInterface &dottedAln, std::shared_ptr< const Alphabet > &resolvedAlphabet)
 Resolve a container with "." notations. More...
 
static std::map< size_t, size_t > translateAlignment (const Sequence &seq1, const Sequence &seq2)
 Translate alignment positions from an aligned sequence to the same sequence in a different alignment. More...
 
static std::map< size_t, size_t > translateSequence (const SiteContainerInterface &sequences, size_t i1, size_t i2)
 Translate sequence positions from a sequence to another in the same alignment. More...
 
static std::unique_ptr< AlignedSequenceContaineralignNW (const Sequence &seq1, const Sequence &seq2, const AlphabetIndex2 &s, double gap)
 Align two sequences using the Needleman-Wunsch dynamic algorithm. More...
 
static std::unique_ptr< AlignedSequenceContaineralignNW (const Sequence &seq1, const Sequence &seq2, const AlphabetIndex2 &s, double opening, double extending)
 Align two sequences using the Needleman-Wunsch dynamic algorithm. More...
 
template<class SiteType , class SequenceType , class HashType >
static void sampleSites (const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, size_t nbSites, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outSites, std::shared_ptr< std::vector< size_t >> index=nullptr)
 Sample sites in an alignment. More...
 
template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > sampleSites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, size_t nbSites, std::shared_ptr< std::vector< size_t >> index=nullptr)
 Sample sites in an alignment. More...
 
template<class SiteType , class SequenceType , class HashType >
static void bootstrapSites (const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites)
 Bootstrap sites in an alignment. More...
 
template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > bootstrapSites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites)
 Bootstrap sites in an alignment. More...
 
static double computeSimilarity (const SequenceInterface &seq1, const SequenceInterface &seq2, bool dist=false, const std::string &gapOption=SIMILARITY_NODOUBLEGAP, bool unresolvedAsGap=true)
 Compute the similarity/distance score between two aligned sequences. More...
 
static std::unique_ptr< DistanceMatrixcomputeSimilarityMatrix (const SiteContainerInterface &sites, bool dist=false, const std::string &gapOption=SIMILARITY_NOFULLGAP, bool unresolvedAsGap=true)
 Compute the similarity matrix of an alignment. More...
 
template<class SiteType , class SequenceType , class HashType >
static void merge (TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &seqCont1, const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &seqCont2, bool leavePositionAsIs=false)
 Add the content of a site container to an existing one. More...
 
static std::vector< int > getColumnScores (const Matrix< size_t > &positions1, const Matrix< size_t > &positions2, int na=0)
 Compare an alignment to a reference alignment, and compute the column scores. More...
 
static std::vector< double > getSumOfPairsScores (const Matrix< size_t > &positions1, const Matrix< size_t > &positions2, double na=0)
 Compare an alignment to a reference alignment, and compute the sum-of-pairs scores. More...
 
Sequences coordinates.
See also
SequenceWalker For an alternative approach.
static std::map< size_t, size_t > getSequencePositions (const Sequence &seq)
 Get the index of each sequence position in an aligned sequence. More...
 
static std::map< size_t, size_t > getAlignmentPositions (const Sequence &seq)
 Get the index of each alignment position in an aligned sequence. More...
 
static void getSequencePositions (const SiteContainerInterface &sites, Matrix< size_t > &positions)
 Fill a numeric matrix with the size of the alignment, containing the each sequence position. More...
 

Static Public Attributes

static const std::string SIMILARITY_ALL = "all sites"
 
static const std::string SIMILARITY_NOFULLGAP = "no full gap"
 
static const std::string SIMILARITY_NODOUBLEGAP = "no double gap"
 
static const std::string SIMILARITY_NOGAP = "no gap"
 

Detailed Description

Some utililitary methods to deal with site containers.

Definition at line 35 of file SiteContainerTools.h.

Constructor & Destructor Documentation

◆ SiteContainerTools()

bpp::SiteContainerTools::SiteContainerTools ( )
inline

Definition at line 38 of file SiteContainerTools.h.

◆ ~SiteContainerTools()

virtual bpp::SiteContainerTools::~SiteContainerTools ( )
inlinevirtual

Definition at line 39 of file SiteContainerTools.h.

Member Function Documentation

◆ alignNW() [1/2]

std::unique_ptr< AlignedSequenceContainer > SiteContainerTools::alignNW ( const Sequence seq1,
const Sequence seq2,
const AlphabetIndex2 s,
double  gap 
)
static

Align two sequences using the Needleman-Wunsch dynamic algorithm.

If the input sequences contain gaps, they will be ignored.

See also
BLOSUM50, DefaultNucleotideScore for score matrices.
Parameters
seq1The first sequence.
seq2The second sequence.
sThe score matrix to use.
gapGap penalty.
Returns
A new SiteContainer instance.
Exceptions
AlphabetMismatchExceptionIf the sequences and the score matrix do not share the same alphabet.

Definition at line 335 of file SiteContainerTools.cpp.

References bpp::Sequence::clone(), bpp::AlphabetIndex2::getAlphabet(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), bpp::AlphabetIndex2::getIndex(), and bpp::SequenceTools::removeGaps().

◆ alignNW() [2/2]

unique_ptr< AlignedSequenceContainer > SiteContainerTools::alignNW ( const Sequence seq1,
const Sequence seq2,
const AlphabetIndex2 s,
double  opening,
double  extending 
)
static

Align two sequences using the Needleman-Wunsch dynamic algorithm.

If the input sequences contain gaps, they will be ignored.

See also
BLOSUM50, DefaultNucleotideScore for score matrices.
Parameters
seq1The first sequence.
seq2The second sequence.
sThe score matrix to use.
openingGap opening penalty.
extendingGap extending penalty.
Returns
A new SiteContainer instance.
Exceptions
AlphabetMismatchExceptionIf the sequences and the score matrix do not share the same alphabet.

Definition at line 435 of file SiteContainerTools.cpp.

References bpp::Sequence::clone(), bpp::AlphabetIndex2::getAlphabet(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), bpp::AlphabetIndex2::getIndex(), and bpp::SequenceTools::removeGaps().

◆ bootstrapSites() [1/2]

template<class SiteType , class SequenceType , class HashType >
static void bpp::SiteContainerTools::bootstrapSites ( const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &  sites,
TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &  outputSites 
)
inlinestatic

Bootstrap sites in an alignment.

Original site positions will be kept. The resulting container will hence probably have duplicated positions. You may wish to call the reindexSites() method on the returned container.

Note: This method will be optimal with a container with vertical storage like VectorSiteContainer.

Parameters
sitesAn input alignment to sample.
outputSitesA container that will contain the sampled alignment.

Definition at line 793 of file SiteContainerTools.h.

References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), and sampleSites().

◆ bootstrapSites() [2/2]

template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType> > bpp::SiteContainerTools::bootstrapSites ( const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &  sites)
inlinestatic

Bootstrap sites in an alignment.

Original site positions will be kept. The resulting container will hence probably have duplicated positions. You may wish to call the reindexSites() method on the returned container.

Note: This method will be optimal with a container with vertical storage like VectorSiteContainer.

Parameters
sitesAn input alignment to sample.
Returns
A container that contains the sampled alignment.

Definition at line 814 of file SiteContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet().

◆ changeGapsToUnknownCharacters() [1/2]

void SiteContainerTools::changeGapsToUnknownCharacters ( ProbabilisticSiteContainerInterface sites)
static

◆ changeGapsToUnknownCharacters() [2/2]

void SiteContainerTools::changeGapsToUnknownCharacters ( SiteContainerInterface sites)
static

◆ changeUnresolvedCharactersToGaps()

void SiteContainerTools::changeUnresolvedCharactersToGaps ( SiteContainerInterface sites)
static

◆ computeSimilarity()

double SiteContainerTools::computeSimilarity ( const SequenceInterface seq1,
const SequenceInterface seq2,
bool  dist = false,
const std::string &  gapOption = SIMILARITY_NODOUBLEGAP,
bool  unresolvedAsGap = true 
)
static

Compute the similarity/distance score between two aligned sequences.

The similarity measures are computed as the proportion of identical match. The distance between the two sequences is defined as 1 - similarity. This function can be used with any type of alphabet.

Parameters
seq1The first sequence.
seq2The second sequence.
distShall we return a distance instead of similarity?
gapOptionHow to deal with gaps:
  • SIMILARITY_ALL: all positions are used.
  • SIMILARITY_NODOUBLEGAP: ignore all positions with a gap in the two sequences.
  • SIMILARITY_NOGAP: ignore all positions with a gap in at least one of the two sequences.
unresolvedAsGapTell if unresolved characters must be considered as gaps when counting. If set to yes, the gap option will also apply to unresolved characters.
Returns
The proportion of matches between the two sequences.
Exceptions
SequenceNotAlignedExceptionIf the two sequences do not have the same length.
AlphabetMismatchExceptionIf the two sequences do not share the same alphabet type.
ExceptionIf an invalid gapOption is passed.

Definition at line 580 of file SiteContainerTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

◆ computeSimilarityMatrix()

std::unique_ptr< DistanceMatrix > SiteContainerTools::computeSimilarityMatrix ( const SiteContainerInterface sites,
bool  dist = false,
const std::string &  gapOption = SIMILARITY_NOFULLGAP,
bool  unresolvedAsGap = true 
)
static

Compute the similarity matrix of an alignment.

The similarity measures are computed as the proportion of identical match. The distance between the two sequences is defined as 1 - similarity. This function can be used with any type of alphabet. Several options concerning gaps and unresolved characters are proposed:

  • SIMILARITY_ALL: all positions are used.
  • SIMILARITY_NOFULLGAP: ignore positions with a gap in all the sequences in the alignment.
  • SIMILARITY_NODOUBLEGAP: ignore all positions with a gap in the two sequences for each pair.
  • SIMILARITY_NOGAP: ignore all positions with a gap in at least one of the two sequences for each pair.
See also
computeSimilarityMatrix
Parameters
sitesThe input alignment.
distShall we return a distance instead of similarity?
gapOptionHow to deal with gaps.
unresolvedAsGapTell if unresolved characters must be considered as gaps when counting. If set to yes, the gap option will also apply to unresolved characters.
Returns
All pairwise similarity measures.

Definition at line 640 of file SiteContainerTools.cpp.

References bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), and bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames().

◆ getAlignmentPositions()

std::map< size_t, size_t > SiteContainerTools::getAlignmentPositions ( const Sequence seq)
static

Get the index of each alignment position in an aligned sequence.

If the sequence contains no gap, the translated and the original positions are the same. Position numbers start at 1.

Parameters
seqThe sequence to translate.
Returns
A map with original alignment positions as keys, and translated positions as values.

Definition at line 221 of file SiteContainerTools.cpp.

References count(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), and bpp::AbstractTemplateSymbolList< T >::size().

◆ getColumnScores()

vector< int > SiteContainerTools::getColumnScores ( const Matrix< size_t > &  positions1,
const Matrix< size_t > &  positions2,
int  na = 0 
)
static

Compare an alignment to a reference alignment, and compute the column scores.

Calculations are made according to formula for the "CS" score in Thompson et al 1999, Nucleic Acids Research (1999):27(13);2682–2690.

Parameters
positions1Alignment index for the test alignment.
positions2Alignment index for the reference alignment.
naThe score to use if the tested column is full of gap.
Returns
A vector of score, as 0 or 1.
See also
getSequencePositions for creating the alignment indexes.
Warning
The indexes for the two alignments must have the sequences in the exact same order!
Author
Julien Dutheil

Definition at line 711 of file SiteContainerTools.cpp.

References bpp::Matrix< class >::getNumberOfColumns(), bpp::Matrix< class >::getNumberOfRows(), and bpp::TextTools::toString().

◆ getCompleteSites()

template<class SiteType , class SequenceType >
static std::unique_ptr<TemplateVectorSiteContainer<SiteType, SequenceType> > bpp::SiteContainerTools::getCompleteSites ( const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &  sites)
inlinestatic

Retrieves complete sites.

This function builds a new VectorSiteContainer instance with only complete sites, i.e. site with fully resolved states (no gap, no unknown characters). The container passed as input is not modified, all sites are copied.

Parameters
sitesThe container to analyse.
Returns
A pointer toward a new SiteContainer with only complete sites.

Definition at line 83 of file SiteContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames(), bpp::SymbolListTools::isComplete(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().

Referenced by bpp::SequenceApplicationTools::getSitesToAnalyse().

◆ getConsensus()

unique_ptr< Sequence > SiteContainerTools::getConsensus ( const SiteContainerInterface sc,
const std::string &  name = "consensus",
bool  ignoreGap = true,
bool  resolveUnknown = false 
)
static

create the consensus sequence of the alignment.

In case of ambiguity (for instance a AATT site), one state will be chosen arbitrarily.

Parameters
sca site container
namethe name of the sequence object that will be created.
ignoreGapTell if gap must be counted or not. If not (true option), only fully gapped sites will result in a gap in the consensus sequence.
resolveUnknownTell is unknnown characters must resolved. In a DNA sequence for instance, N will be counted as A=1/4, T=1/4, G=1/4 and C=1/4. Otherwise it will be counted as N=1. If this option is set to true, a consensus sequence will never contain an unknown character.
Returns
A new Sequence object with the consensus sequence.

Definition at line 28 of file SiteContainerTools.cpp.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::SymbolListTools::getFrequencies(), bpp::SimpleTemplateSiteContainerIterator< SiteType, SequenceType, HashType >::hasMoreSites(), and bpp::SimpleTemplateSiteContainerIterator< SiteType, SequenceType, HashType >::nextSite().

◆ getSelectedPositions() [1/2]

template<class SiteType , class SequenceType , class HashType >
static void bpp::SiteContainerTools::getSelectedPositions ( const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &  sites,
const SiteSelection selection,
TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &  outputSites 
)
inlinestatic

Extract a specified set of positions.

A SiteContainer is filled with the specified positions. Positions are specified by their indice, beginning at 0, and are converted to site positions given the length of the words of the alphabet.

Parameters
sitesThe container from which sequences are to be taken.
selectionThe positions to retrieve.
outputSitesA container where to add the selected positions. The container must have the same alphabet, number of sequences and sequence keys from the input container.

Definition at line 470 of file SiteContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), and getSelectedSites().

◆ getSelectedPositions() [2/2]

template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType> > bpp::SiteContainerTools::getSelectedPositions ( const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &  sites,
const SiteSelection selection 
)
inlinestatic

Create a new container with a specified set of positions.

Positions are specified by their indice, beginning at 0, and are converted to site positions given the length of the words of the alphabet.

Parameters
sitesThe container from which sequences are to be taken.
selectionThe positions of all sites to retrieve.
Returns
A VectorSiteContainer with the selected positions. Comments from the original container will be copied.

Definition at line 515 of file SiteContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::Commentable::getComments(), and bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys().

◆ getSelectedSites() [1/3]

static std::unique_ptr<AlignmentDataInterface> bpp::SiteContainerTools::getSelectedSites ( const AlignmentDataInterface sites,
const SiteSelection selection 
)
inlinestatic

Create a new container with a specified set of sites.

Sites are specified by their indice, beginning at 0. Sites may be selected multiple times. This version takes as input a generic AlignmentData object, and will try various casts.

Parameters
sitesThe container from which sequences are to be taken.
selectionThe positions of all sites to retrieve.
Returns
A container of the same type as the input one, with the selected sites. Comments from the original container will be copied.

Definition at line 433 of file SiteContainerTools.h.

◆ getSelectedSites() [2/3]

template<class SiteType , class SequenceType , class HashType >
static void bpp::SiteContainerTools::getSelectedSites ( const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &  sites,
const SiteSelection selection,
TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &  outputSites 
)
inlinestatic

Extract a specified set of sites.

A SiteContainer is filled with specified sites.

Sites are specified by their indice, beginning at 0. Sites may be selected multiple times.

Parameters
sitesThe container from which sequences are to be taken.
selectionThe positions of all sites to retrieve.
outputSitesA container where to add the selected sites. The container must have the same alphabet, number of sequences and sequence keys from the input container.

Definition at line 385 of file SiteContainerTools.h.

References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::addSite(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames(), bpp::TemplateSequenceDataInterface< HashType >::setSequenceNames(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().

Referenced by bpp::SequenceApplicationTools::getProbabilisticSiteContainer(), getSelectedPositions(), and bpp::SequenceApplicationTools::getSiteContainer().

◆ getSelectedSites() [3/3]

template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType> > bpp::SiteContainerTools::getSelectedSites ( const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &  sites,
const SiteSelection selection 
)
inlinestatic

Create a new container with a specified set of sites.

Sites are specified by their indice, beginning at 0. Sites may be selected multiple times.

Parameters
sitesThe container from which sequences are to be taken.
selectionThe positions of all sites to retrieve.
Returns
A VectorSiteContainer with the selected sites. Comments from the original container will be copied.

Definition at line 410 of file SiteContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::Commentable::getComments(), and bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys().

◆ getSequencePositions() [1/2]

std::map< size_t, size_t > SiteContainerTools::getSequencePositions ( const Sequence seq)
static

Get the index of each sequence position in an aligned sequence.

If the sequence contains no gap, the translated and the original positions are the same. Position numbers start at 1.

Parameters
seqThe sequence to translate.
Returns
A map with original sequence positions as keys, and translated positions as values.

Definition at line 201 of file SiteContainerTools.cpp.

References count(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), and bpp::AbstractTemplateSymbolList< T >::size().

◆ getSequencePositions() [2/2]

void SiteContainerTools::getSequencePositions ( const SiteContainerInterface sites,
Matrix< size_t > &  positions 
)
static

Fill a numeric matrix with the size of the alignment, containing the each sequence position.

Positions start at 1, gaps have "position" 0.

Parameters
sitesThe input alignment.
positionsA matrix object which is going to be resized and filled with the corresponding positions.
Author
Julien Dutheil

Definition at line 684 of file SiteContainerTools.cpp.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::Matrix< class >::resize(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().

◆ getSitesWithoutGaps()

template<class SiteType , class SequenceType >
static std::unique_ptr<TemplateVectorSiteContainer<SiteType, SequenceType> > bpp::SiteContainerTools::getSitesWithoutGaps ( const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &  sites)
inlinestatic

Retrieves sites without gaps.

This function builds a new VectorSiteContainer instance with only sites without gaps. The container passed as input is not modified, all sites are copied.

Parameters
sitesThe container to analyse.
Returns
A pointer toward a new SiteContainer with only sites with no gaps.

Definition at line 53 of file SiteContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames(), bpp::SymbolListTools::hasGap(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().

Referenced by bpp::SequenceApplicationTools::getSitesToAnalyse().

◆ getSitesWithoutStopCodon()

static std::unique_ptr<SiteContainerInterface> bpp::SiteContainerTools::getSitesWithoutStopCodon ( const SiteContainerInterface sites,
const GeneticCode gCode 
)
inlinestatic

Get a site set without stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception.

This function build a new BasicSiteContainer instance without sites that have at least a stop codon. The container passed as input is not modified, all sites are copied.

Parameters
sitesThe container to analyse.
gCodethe genetic code to use to determine stop codons.
Returns
A pointer toward a new SiteContainer.

Definition at line 311 of file SiteContainerTools.h.

References bpp::Site::clone(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames(), bpp::CodonSiteTools::hasStop(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().

◆ getSumOfPairsScores()

vector< double > SiteContainerTools::getSumOfPairsScores ( const Matrix< size_t > &  positions1,
const Matrix< size_t > &  positions2,
double  na = 0 
)
static

Compare an alignment to a reference alignment, and compute the sum-of-pairs scores.

Calculations are made according to formula for the "SPS" score in Thompson et al 1999, Nucleic Acids Research (1999):27(13);2682–2690.

Parameters
positions1Alignment index for the test alignment.
positions2Alignment index for the reference alignment.
naThe score to use if the tested column is not testable, that is not containing at least to residues.
Returns
A vector of score, between 0 and 1 (+ na value).
See also
getSequencePositions for creating the alignment indexes.
Warning
The indexes for the two alignments must have the sequences in the exact same order!
Author
Julien Dutheil

Definition at line 764 of file SiteContainerTools.cpp.

References bpp::Matrix< class >::getNumberOfColumns(), bpp::Matrix< class >::getNumberOfRows(), and bpp::TextTools::toString().

◆ merge()

template<class SiteType , class SequenceType , class HashType >
static void bpp::SiteContainerTools::merge ( TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &  seqCont1,
const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &  seqCont2,
bool  leavePositionAsIs = false 
)
inlinestatic

Add the content of a site container to an existing one.

The input containers are supposed to have unique sequence names. If it is not the case, several things can happen:

  • If the two containers have exactly the same keys in the same order, then the content of the second one will be added as is to the first one.
  • If the second container does not have exactly the same sequences keys or in a different order, then a reordered selection of the second container is created first, and in that case, only the first sequence with a given name will be used and duplicated. In any case, note that the second container should always contains all the sequence names from the first one, otherwise an exception will be thrown.
Author
Julien Dutheil
Parameters
seqCont1First container.
seqCont2Second container. This container must contain sequences with the same names as in seqcont1. Additional sequences will be ignored.
leavePositionAsIsTell is site position should be unchanged. Otherwise (the default) is to add the size of container 1 to the positions in container 2.
Exceptions
AlphabetMismatchExceptionIf the alphabet in the 2 containers do not match.
ExceptionIf sequence names do not match.

Definition at line 904 of file SiteContainerTools.h.

References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::addSite(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::createEmptyContainer(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::SequenceContainerTools::getSelectedSequences(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().

◆ removeGapOnlySites() [1/2]

template<class SiteType , class SequenceType >
static std::unique_ptr<TemplateSiteContainerInterface<SiteType, SequenceType, std::string> > bpp::SiteContainerTools::removeGapOnlySites ( const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &  sites)
inlinestatic

Get a site set without gap-only sites.

This function builds a new VectorSiteContainer instance without sites with only gaps. The container passed as input is not modified, all sites are copied.

Parameters
sitesThe container to analyse.
Returns
A pointer toward a new SiteContainer.

Definition at line 112 of file SiteContainerTools.h.

References bpp::Site::clone(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::SymbolListTools::isGapOnly(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().

◆ removeGapOnlySites() [2/2]

◆ removeGapOrUnresolvedOnlySites() [1/2]

template<class SiteType , class SequenceType >
static std::unique_ptr<TemplateVectorSiteContainer<SiteType, SequenceType> > bpp::SiteContainerTools::removeGapOrUnresolvedOnlySites ( const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &  sites)
inlinestatic

Get a site set without gap/unresolved-only sites from a SiteContainer.

This function build a new VectorSiteContainer instance without sites with only gaps or unresolved characters. The container passed as input is not modified, all sites are copied.

Parameters
sitesThe container to analyse.
Returns
A pointer toward a new SiteContainer.

Definition at line 182 of file SiteContainerTools.h.

References bpp::Site::clone(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::SymbolListTools::isGapOrUnresolvedOnly(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().

◆ removeGapOrUnresolvedOnlySites() [2/2]

◆ removeGapSites() [1/2]

template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType> > bpp::SiteContainerTools::removeGapSites ( const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &  sites,
double  maxFreqGaps 
)
inlinestatic

◆ removeGapSites() [2/2]

template<class SiteType , class SequenceType , class HashType >
static void bpp::SiteContainerTools::removeGapSites ( TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &  sites,
double  maxFreqGaps 
)
inlinestatic

◆ removeSitesWithStopCodon() [1/2]

static void bpp::SiteContainerTools::removeSitesWithStopCodon ( ProbabilisticSiteContainerInterface sites,
const GeneticCode gCode 
)
inlinestatic

Remove sites with stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception.

Note: this method is currently not implemented for probabilistic objects. An exception is thrown when called.

Parameters
sitesThe container to analyse.
gCodethe genetic code to use to determine stop codons.

Definition at line 368 of file SiteContainerTools.h.

◆ removeSitesWithStopCodon() [2/2]

◆ resolveDottedAlignment()

unique_ptr< SiteContainerInterface > SiteContainerTools::resolveDottedAlignment ( const SiteContainerInterface dottedAln,
std::shared_ptr< const Alphabet > &  resolvedAlphabet 
)
static

Resolve a container with "." notations.

ATGCCGTTGG
.C...A..C.
..A....C..

will results in

ATGCCGTTGG
ACCCCATTCG
ATACCGTCGG

for instance. The first sequence is here called the "reference" sequence. It need not be the first in the container. The alphabet of the input alignment must be an instance of the DefaultAlphabet class, the only one which support dot characters. A new alignment is created and returned, with the specified alphabet.

If several sequences that may be considered as reference are found, the first one is used.

Parameters
dottedAlnThe input alignment.
resolvedAlphabetThe alphabet of the output alignment.
Returns
A pointer toward a dynamically created SiteContainer with the specified alphabet (can be a DefaultAlphabet).
Exceptions
AlphabetExceptionIf the alphabet of the input alignment is not of class DefaultAlphabet, or if one character does not match with the output alphabet.
ExceptionIf no reference sequence was found, or if the input alignment contains no sequence.

Definition at line 140 of file SiteContainerTools.cpp.

References bpp::TemplateSequenceDataInterface< HashType >::alphabet(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::IntSymbolList::getChar(), bpp::Sequence::getChar(), bpp::Commentable::getComments(), bpp::AbstractCoreSite::getCoordinate(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::AlphabetTools::isDefaultAlphabet(), bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site(), and bpp::AbstractTemplateSymbolList< T >::size().

Referenced by bpp::NexusIOSequence::appendAlignmentFromStream().

◆ sampleSites() [1/2]

template<class SiteType , class SequenceType , class HashType >
static void bpp::SiteContainerTools::sampleSites ( const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &  sites,
size_t  nbSites,
TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &  outSites,
std::shared_ptr< std::vector< size_t >>  index = nullptr 
)
inlinestatic

Sample sites in an alignment.

Original site positions will be kept. The resulting container will hence probably have duplicated positions. You may wish to call the reindexSites() method on the returned container.

Note: This method will be optimal with a container with vertical storage like VectorSiteContainer.

Parameters
sitesAn input alignment to sample.
nbSitesThe size of the resulting container.
index[out] If non-null the underlying vector will be appended with the original site indices.
outSitesA container where the sample will be added.

Definition at line 736 of file SiteContainerTools.h.

References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::addSite(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames(), bpp::RandomTools::giveIntRandomNumberBetweenZeroAndEntry(), bpp::TemplateSequenceDataInterface< HashType >::setSequenceNames(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().

Referenced by bootstrapSites().

◆ sampleSites() [2/2]

template<class SiteType , class SequenceType >
static std::unique_ptr< TemplateVectorSiteContainer<SiteType, SequenceType> > bpp::SiteContainerTools::sampleSites ( const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &  sites,
size_t  nbSites,
std::shared_ptr< std::vector< size_t >>  index = nullptr 
)
inlinestatic

Sample sites in an alignment.

Original site positions will be kept. The resulting container will hence probably have duplicated positions. You may wish to call the reindexSites() method on the returned container.

Note: This method will be optimal with a container with vertical storage like VectorSiteContainer.

Parameters
sitesAn input alignment to sample.
nbSitesThe size of the resulting container.
index[out] If non-null the underlying vector will be appended with the original site indices.
Returns
A container with the sampled sites.

Definition at line 770 of file SiteContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet().

◆ translateAlignment()

std::map< size_t, size_t > SiteContainerTools::translateAlignment ( const Sequence seq1,
const Sequence seq2 
)
static

Translate alignment positions from an aligned sequence to the same sequence in a different alignment.

Takes each position (starting at 1) in sequence 1, and look for the corresponding position in sequence 2. The two sequences must be the same, excepted for the gaps. If no sequence contains gaps, or if the gaps are at the same place in both sequences, the translated position will be the same as the original positions.

Parameters
seq1The sequence to translate.
seq2The reference sequence.
Returns
A map with original alignment positions as keys, and translated positions as values.
Exceptions
AlphabetMismatchExceptionIf the sequences do not share the same alphabet.
ExceptionIf the sequence do not match.

Definition at line 241 of file SiteContainerTools.cpp.

References bpp::AbstractTemplateSymbolList< T >::getAlphabet(), bpp::AbstractTemplateSymbolList< T >::size(), and bpp::TextTools::toString().

◆ translateSequence()

std::map< size_t, size_t > SiteContainerTools::translateSequence ( const SiteContainerInterface sequences,
size_t  i1,
size_t  i2 
)
static

Translate sequence positions from a sequence to another in the same alignment.

Takes each position (starting at 1) in sequence 1, and look for the corresponding position in sequence 2 at the same site. If no corresponding position is available (i.e. if there is a gap in sequence 2 at the corresponding position), 0 is returned.

Parameters
sequencesThe alignment to use.
i1The index of the sequence to translate.
i2The index of the reference sequence.
Returns
A map with original sequence positions as keys, and translated positions as values.

Definition at line 308 of file SiteContainerTools.cpp.

References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().

Member Data Documentation

◆ SIMILARITY_ALL

const string SiteContainerTools::SIMILARITY_ALL = "all sites"
static

Definition at line 878 of file SiteContainerTools.h.

◆ SIMILARITY_NODOUBLEGAP

const string SiteContainerTools::SIMILARITY_NODOUBLEGAP = "no double gap"
static

Definition at line 880 of file SiteContainerTools.h.

◆ SIMILARITY_NOFULLGAP

const string SiteContainerTools::SIMILARITY_NOFULLGAP = "no full gap"
static

Definition at line 879 of file SiteContainerTools.h.

◆ SIMILARITY_NOGAP

const string SiteContainerTools::SIMILARITY_NOGAP = "no gap"
static

Definition at line 881 of file SiteContainerTools.h.


The documentation for this class was generated from the following files: