bpp-seq3
3.0.0
|
Some utililitary methods to deal with site containers. More...
#include <Bpp/Seq/Container/SiteContainerTools.h>
Public Member Functions | |
SiteContainerTools () | |
virtual | ~SiteContainerTools () |
Static Public Member Functions | |
template<class SiteType , class SequenceType > | |
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > | getSitesWithoutGaps (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites) |
Retrieves sites without gaps. More... | |
template<class SiteType , class SequenceType > | |
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > | getCompleteSites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites) |
Retrieves complete sites. More... | |
template<class SiteType , class SequenceType > | |
static std::unique_ptr< TemplateSiteContainerInterface< SiteType, SequenceType, std::string > > | removeGapOnlySites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites) |
Get a site set without gap-only sites. More... | |
template<class SiteType , class SequenceType , class HashType > | |
static void | removeGapOnlySites (TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites) |
Remove gap-only sites from a SiteContainer. More... | |
template<class SiteType , class SequenceType > | |
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > | removeGapOrUnresolvedOnlySites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites) |
Get a site set without gap/unresolved-only sites from a SiteContainer. More... | |
template<class SiteType , class SequenceType , class HashType > | |
static void | removeGapOrUnresolvedOnlySites (TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites) |
Remove gap/unresolved-only sites from a SiteContainer. More... | |
template<class SiteType , class SequenceType > | |
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > | removeGapSites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, double maxFreqGaps) |
Extract sites, from a SiteContainer, with less than a given amount of gaps. More... | |
template<class SiteType , class SequenceType , class HashType > | |
static void | removeGapSites (TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, double maxFreqGaps) |
Remove sites with a given amount of gaps. More... | |
static std::unique_ptr< SiteContainerInterface > | getSitesWithoutStopCodon (const SiteContainerInterface &sites, const GeneticCode &gCode) |
Get a site set without stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception. More... | |
static void | removeSitesWithStopCodon (SiteContainerInterface &sites, const GeneticCode &gCode) |
Remove sites with stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception. More... | |
static void | removeSitesWithStopCodon (ProbabilisticSiteContainerInterface &sites, const GeneticCode &gCode) |
Remove sites with stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception. More... | |
template<class SiteType , class SequenceType , class HashType > | |
static void | getSelectedSites (const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, const SiteSelection &selection, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites) |
Extract a specified set of sites. More... | |
template<class SiteType , class SequenceType > | |
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > | getSelectedSites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, const SiteSelection &selection) |
Create a new container with a specified set of sites. More... | |
static std::unique_ptr< AlignmentDataInterface > | getSelectedSites (const AlignmentDataInterface &sites, const SiteSelection &selection) |
Create a new container with a specified set of sites. More... | |
template<class SiteType , class SequenceType , class HashType > | |
static void | getSelectedPositions (const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, const SiteSelection &selection, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites) |
Extract a specified set of positions. More... | |
template<class SiteType , class SequenceType > | |
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > | getSelectedPositions (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, const SiteSelection &selection) |
Create a new container with a specified set of positions. More... | |
static std::unique_ptr< Sequence > | getConsensus (const SiteContainerInterface &sc, const std::string &name="consensus", bool ignoreGap=true, bool resolveUnknown=false) |
create the consensus sequence of the alignment. More... | |
static void | changeGapsToUnknownCharacters (SiteContainerInterface &sites) |
Change all gaps to unknown state in a SiteContainer, according to its alphabet. More... | |
static void | changeGapsToUnknownCharacters (ProbabilisticSiteContainerInterface &sites) |
Change all gaps to unknown state in a ProbabilisticSiteContainer, according to its alphabet. More... | |
static void | changeUnresolvedCharactersToGaps (SiteContainerInterface &sites) |
Change all unresolved characters to gaps in a SiteContainer, according to its alphabet. More... | |
static std::unique_ptr< SiteContainerInterface > | resolveDottedAlignment (const SiteContainerInterface &dottedAln, std::shared_ptr< const Alphabet > &resolvedAlphabet) |
Resolve a container with "." notations. More... | |
static std::map< size_t, size_t > | translateAlignment (const Sequence &seq1, const Sequence &seq2) |
Translate alignment positions from an aligned sequence to the same sequence in a different alignment. More... | |
static std::map< size_t, size_t > | translateSequence (const SiteContainerInterface &sequences, size_t i1, size_t i2) |
Translate sequence positions from a sequence to another in the same alignment. More... | |
static std::unique_ptr< AlignedSequenceContainer > | alignNW (const Sequence &seq1, const Sequence &seq2, const AlphabetIndex2 &s, double gap) |
Align two sequences using the Needleman-Wunsch dynamic algorithm. More... | |
static std::unique_ptr< AlignedSequenceContainer > | alignNW (const Sequence &seq1, const Sequence &seq2, const AlphabetIndex2 &s, double opening, double extending) |
Align two sequences using the Needleman-Wunsch dynamic algorithm. More... | |
template<class SiteType , class SequenceType , class HashType > | |
static void | sampleSites (const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, size_t nbSites, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outSites, std::shared_ptr< std::vector< size_t >> index=nullptr) |
Sample sites in an alignment. More... | |
template<class SiteType , class SequenceType > | |
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > | sampleSites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites, size_t nbSites, std::shared_ptr< std::vector< size_t >> index=nullptr) |
Sample sites in an alignment. More... | |
template<class SiteType , class SequenceType , class HashType > | |
static void | bootstrapSites (const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &sites, TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &outputSites) |
Bootstrap sites in an alignment. More... | |
template<class SiteType , class SequenceType > | |
static std::unique_ptr< TemplateVectorSiteContainer< SiteType, SequenceType > > | bootstrapSites (const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sites) |
Bootstrap sites in an alignment. More... | |
static double | computeSimilarity (const SequenceInterface &seq1, const SequenceInterface &seq2, bool dist=false, const std::string &gapOption=SIMILARITY_NODOUBLEGAP, bool unresolvedAsGap=true) |
Compute the similarity/distance score between two aligned sequences. More... | |
static std::unique_ptr< DistanceMatrix > | computeSimilarityMatrix (const SiteContainerInterface &sites, bool dist=false, const std::string &gapOption=SIMILARITY_NOFULLGAP, bool unresolvedAsGap=true) |
Compute the similarity matrix of an alignment. More... | |
template<class SiteType , class SequenceType , class HashType > | |
static void | merge (TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &seqCont1, const TemplateSiteContainerInterface< SiteType, SequenceType, HashType > &seqCont2, bool leavePositionAsIs=false) |
Add the content of a site container to an existing one. More... | |
static std::vector< int > | getColumnScores (const Matrix< size_t > &positions1, const Matrix< size_t > &positions2, int na=0) |
Compare an alignment to a reference alignment, and compute the column scores. More... | |
static std::vector< double > | getSumOfPairsScores (const Matrix< size_t > &positions1, const Matrix< size_t > &positions2, double na=0) |
Compare an alignment to a reference alignment, and compute the sum-of-pairs scores. More... | |
Sequences coordinates. | |
| |
static std::map< size_t, size_t > | getSequencePositions (const Sequence &seq) |
Get the index of each sequence position in an aligned sequence. More... | |
static std::map< size_t, size_t > | getAlignmentPositions (const Sequence &seq) |
Get the index of each alignment position in an aligned sequence. More... | |
static void | getSequencePositions (const SiteContainerInterface &sites, Matrix< size_t > &positions) |
Fill a numeric matrix with the size of the alignment, containing the each sequence position. More... | |
Static Public Attributes | |
static const std::string | SIMILARITY_ALL = "all sites" |
static const std::string | SIMILARITY_NOFULLGAP = "no full gap" |
static const std::string | SIMILARITY_NODOUBLEGAP = "no double gap" |
static const std::string | SIMILARITY_NOGAP = "no gap" |
Some utililitary methods to deal with site containers.
Definition at line 35 of file SiteContainerTools.h.
|
inline |
Definition at line 38 of file SiteContainerTools.h.
|
inlinevirtual |
Definition at line 39 of file SiteContainerTools.h.
|
static |
Align two sequences using the Needleman-Wunsch dynamic algorithm.
If the input sequences contain gaps, they will be ignored.
seq1 | The first sequence. |
seq2 | The second sequence. |
s | The score matrix to use. |
gap | Gap penalty. |
AlphabetMismatchException | If the sequences and the score matrix do not share the same alphabet. |
Definition at line 335 of file SiteContainerTools.cpp.
References bpp::Sequence::clone(), bpp::AlphabetIndex2::getAlphabet(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), bpp::AlphabetIndex2::getIndex(), and bpp::SequenceTools::removeGaps().
|
static |
Align two sequences using the Needleman-Wunsch dynamic algorithm.
If the input sequences contain gaps, they will be ignored.
seq1 | The first sequence. |
seq2 | The second sequence. |
s | The score matrix to use. |
opening | Gap opening penalty. |
extending | Gap extending penalty. |
AlphabetMismatchException | If the sequences and the score matrix do not share the same alphabet. |
Definition at line 435 of file SiteContainerTools.cpp.
References bpp::Sequence::clone(), bpp::AlphabetIndex2::getAlphabet(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), bpp::AlphabetIndex2::getIndex(), and bpp::SequenceTools::removeGaps().
|
inlinestatic |
Bootstrap sites in an alignment.
Original site positions will be kept. The resulting container will hence probably have duplicated positions. You may wish to call the reindexSites() method on the returned container.
Note: This method will be optimal with a container with vertical storage like VectorSiteContainer.
sites | An input alignment to sample. |
outputSites | A container that will contain the sampled alignment. |
Definition at line 793 of file SiteContainerTools.h.
References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), and sampleSites().
|
inlinestatic |
Bootstrap sites in an alignment.
Original site positions will be kept. The resulting container will hence probably have duplicated positions. You may wish to call the reindexSites() method on the returned container.
Note: This method will be optimal with a container with vertical storage like VectorSiteContainer.
sites | An input alignment to sample. |
Definition at line 814 of file SiteContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet().
|
static |
Change all gaps to unknown state in a ProbabilisticSiteContainer, according to its alphabet.
This changes in each sequence all sites that sum to 0 into sites where all values equal 1.
sites | The container to be modified. |
Definition at line 97 of file SiteContainerTools.cpp.
References bpp::VectorTools::fill(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::SymbolListTools::hasGap(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::setSite(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site(), bpp::VectorTools::sum(), and bpp::NumConstants::TINY().
|
static |
Change all gaps to unknown state in a SiteContainer, according to its alphabet.
For DNA alphabets, this change all '-' to 'N'. For Protein alphabets, this change all '-' to 'X'.
sites | The container to be modified. |
Definition at line 75 of file SiteContainerTools.cpp.
References bpp::Site::clone(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::SymbolListTools::hasGap(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::setSite(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
|
static |
Change all unresolved characters to gaps in a SiteContainer, according to its alphabet.
For DNA alphabets, this change all 'N', 'M', 'R', etc. to '-'.
sites | The container to be modified. |
Definition at line 119 of file SiteContainerTools.cpp.
References bpp::Site::clone(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::SymbolListTools::hasUnresolved(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::setSite(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
|
static |
Compute the similarity/distance score between two aligned sequences.
The similarity measures are computed as the proportion of identical match. The distance between the two sequences is defined as 1 - similarity. This function can be used with any type of alphabet.
seq1 | The first sequence. |
seq2 | The second sequence. |
dist | Shall we return a distance instead of similarity? |
gapOption | How to deal with gaps:
|
unresolvedAsGap | Tell if unresolved characters must be considered as gaps when counting. If set to yes, the gap option will also apply to unresolved characters. |
SequenceNotAlignedException | If the two sequences do not have the same length. |
AlphabetMismatchException | If the two sequences do not share the same alphabet type. |
Exception | If an invalid gapOption is passed. |
Definition at line 580 of file SiteContainerTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
|
static |
Compute the similarity matrix of an alignment.
The similarity measures are computed as the proportion of identical match. The distance between the two sequences is defined as 1 - similarity. This function can be used with any type of alphabet. Several options concerning gaps and unresolved characters are proposed:
sites | The input alignment. |
dist | Shall we return a distance instead of similarity? |
gapOption | How to deal with gaps. |
unresolvedAsGap | Tell if unresolved characters must be considered as gaps when counting. If set to yes, the gap option will also apply to unresolved characters. |
Definition at line 640 of file SiteContainerTools.cpp.
References bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), and bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames().
|
static |
Get the index of each alignment position in an aligned sequence.
If the sequence contains no gap, the translated and the original positions are the same. Position numbers start at 1.
seq | The sequence to translate. |
Definition at line 221 of file SiteContainerTools.cpp.
References count(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), and bpp::AbstractTemplateSymbolList< T >::size().
|
static |
Compare an alignment to a reference alignment, and compute the column scores.
Calculations are made according to formula for the "CS" score in Thompson et al 1999, Nucleic Acids Research (1999):27(13);2682â2690.
positions1 | Alignment index for the test alignment. |
positions2 | Alignment index for the reference alignment. |
na | The score to use if the tested column is full of gap. |
Definition at line 711 of file SiteContainerTools.cpp.
References bpp::Matrix< class >::getNumberOfColumns(), bpp::Matrix< class >::getNumberOfRows(), and bpp::TextTools::toString().
|
inlinestatic |
Retrieves complete sites.
This function builds a new VectorSiteContainer instance with only complete sites, i.e. site with fully resolved states (no gap, no unknown characters). The container passed as input is not modified, all sites are copied.
sites | The container to analyse. |
Definition at line 83 of file SiteContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames(), bpp::SymbolListTools::isComplete(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
Referenced by bpp::SequenceApplicationTools::getSitesToAnalyse().
|
static |
create the consensus sequence of the alignment.
In case of ambiguity (for instance a AATT site), one state will be chosen arbitrarily.
sc | a site container |
name | the name of the sequence object that will be created. |
ignoreGap | Tell if gap must be counted or not. If not (true option), only fully gapped sites will result in a gap in the consensus sequence. |
resolveUnknown | Tell is unknnown characters must resolved. In a DNA sequence for instance, N will be counted as A=1/4, T=1/4, G=1/4 and C=1/4. Otherwise it will be counted as N=1. If this option is set to true, a consensus sequence will never contain an unknown character. |
Definition at line 28 of file SiteContainerTools.cpp.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::SymbolListTools::getFrequencies(), bpp::SimpleTemplateSiteContainerIterator< SiteType, SequenceType, HashType >::hasMoreSites(), and bpp::SimpleTemplateSiteContainerIterator< SiteType, SequenceType, HashType >::nextSite().
|
inlinestatic |
Extract a specified set of positions.
A SiteContainer is filled with the specified positions. Positions are specified by their indice, beginning at 0, and are converted to site positions given the length of the words of the alphabet.
sites | The container from which sequences are to be taken. |
selection | The positions to retrieve. |
outputSites | A container where to add the selected positions. The container must have the same alphabet, number of sequences and sequence keys from the input container. |
Definition at line 470 of file SiteContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), and getSelectedSites().
|
inlinestatic |
Create a new container with a specified set of positions.
Positions are specified by their indice, beginning at 0, and are converted to site positions given the length of the words of the alphabet.
sites | The container from which sequences are to be taken. |
selection | The positions of all sites to retrieve. |
Definition at line 515 of file SiteContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::Commentable::getComments(), and bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys().
|
inlinestatic |
Create a new container with a specified set of sites.
Sites are specified by their indice, beginning at 0. Sites may be selected multiple times. This version takes as input a generic AlignmentData object, and will try various casts.
sites | The container from which sequences are to be taken. |
selection | The positions of all sites to retrieve. |
Definition at line 433 of file SiteContainerTools.h.
|
inlinestatic |
Extract a specified set of sites.
A SiteContainer is filled with specified sites.
Sites are specified by their indice, beginning at 0. Sites may be selected multiple times.
sites | The container from which sequences are to be taken. |
selection | The positions of all sites to retrieve. |
outputSites | A container where to add the selected sites. The container must have the same alphabet, number of sequences and sequence keys from the input container. |
Definition at line 385 of file SiteContainerTools.h.
References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::addSite(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames(), bpp::TemplateSequenceDataInterface< HashType >::setSequenceNames(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
Referenced by bpp::SequenceApplicationTools::getProbabilisticSiteContainer(), getSelectedPositions(), and bpp::SequenceApplicationTools::getSiteContainer().
|
inlinestatic |
Create a new container with a specified set of sites.
Sites are specified by their indice, beginning at 0. Sites may be selected multiple times.
sites | The container from which sequences are to be taken. |
selection | The positions of all sites to retrieve. |
Definition at line 410 of file SiteContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::Commentable::getComments(), and bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys().
|
static |
Get the index of each sequence position in an aligned sequence.
If the sequence contains no gap, the translated and the original positions are the same. Position numbers start at 1.
seq | The sequence to translate. |
Definition at line 201 of file SiteContainerTools.cpp.
References count(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), and bpp::AbstractTemplateSymbolList< T >::size().
|
static |
Fill a numeric matrix with the size of the alignment, containing the each sequence position.
Positions start at 1, gaps have "position" 0.
sites | The input alignment. |
positions | A matrix object which is going to be resized and filled with the corresponding positions. |
Definition at line 684 of file SiteContainerTools.cpp.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::Matrix< class >::resize(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().
|
inlinestatic |
Retrieves sites without gaps.
This function builds a new VectorSiteContainer instance with only sites without gaps. The container passed as input is not modified, all sites are copied.
sites | The container to analyse. |
Definition at line 53 of file SiteContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames(), bpp::SymbolListTools::hasGap(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
Referenced by bpp::SequenceApplicationTools::getSitesToAnalyse().
|
inlinestatic |
Get a site set without stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception.
This function build a new BasicSiteContainer instance without sites that have at least a stop codon. The container passed as input is not modified, all sites are copied.
sites | The container to analyse. |
gCode | the genetic code to use to determine stop codons. |
Definition at line 311 of file SiteContainerTools.h.
References bpp::Site::clone(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames(), bpp::CodonSiteTools::hasStop(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
|
static |
Compare an alignment to a reference alignment, and compute the sum-of-pairs scores.
Calculations are made according to formula for the "SPS" score in Thompson et al 1999, Nucleic Acids Research (1999):27(13);2682â2690.
positions1 | Alignment index for the test alignment. |
positions2 | Alignment index for the reference alignment. |
na | The score to use if the tested column is not testable, that is not containing at least to residues. |
Definition at line 764 of file SiteContainerTools.cpp.
References bpp::Matrix< class >::getNumberOfColumns(), bpp::Matrix< class >::getNumberOfRows(), and bpp::TextTools::toString().
|
inlinestatic |
Add the content of a site container to an existing one.
The input containers are supposed to have unique sequence names. If it is not the case, several things can happen:
seqCont1 | First container. |
seqCont2 | Second container. This container must contain sequences with the same names as in seqcont1. Additional sequences will be ignored. |
leavePositionAsIs | Tell is site position should be unchanged. Otherwise (the default) is to add the size of container 1 to the positions in container 2. |
AlphabetMismatchException | If the alphabet in the 2 containers do not match. |
Exception | If sequence names do not match. |
Definition at line 904 of file SiteContainerTools.h.
References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::addSite(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::createEmptyContainer(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::SequenceContainerTools::getSelectedSequences(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
|
inlinestatic |
Get a site set without gap-only sites.
This function builds a new VectorSiteContainer instance without sites with only gaps. The container passed as input is not modified, all sites are copied.
sites | The container to analyse. |
Definition at line 112 of file SiteContainerTools.h.
References bpp::Site::clone(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::SymbolListTools::isGapOnly(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
|
inlinestatic |
Remove gap-only sites from a SiteContainer.
sites | The container where the sites have to be removed. |
Definition at line 138 of file SiteContainerTools.h.
References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::deleteSite(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::deleteSites(), bpp::ApplicationTools::displayGauge(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::SymbolListTools::isGapOnly(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
|
inlinestatic |
Get a site set without gap/unresolved-only sites from a SiteContainer.
This function build a new VectorSiteContainer instance without sites with only gaps or unresolved characters. The container passed as input is not modified, all sites are copied.
sites | The container to analyse. |
Definition at line 182 of file SiteContainerTools.h.
References bpp::Site::clone(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::SymbolListTools::isGapOrUnresolvedOnly(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
|
inlinestatic |
Remove gap/unresolved-only sites from a SiteContainer.
sites | The container where the sites have to be removed. |
Definition at line 209 of file SiteContainerTools.h.
References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::deleteSite(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::deleteSites(), bpp::ApplicationTools::displayGauge(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::SymbolListTools::isGapOnly(), bpp::SymbolListTools::isGapOrUnresolvedOnly(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
|
inlinestatic |
Extract sites, from a SiteContainer, with less than a given amount of gaps.
sites | The container from which the sites have to be removed. |
maxFreqGaps | The maximum frequency of gaps in each site. |
Definition at line 250 of file SiteContainerTools.h.
References bpp::Site::clone(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::SymbolListTools::getFrequencies(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
|
inlinestatic |
Remove sites with a given amount of gaps.
sites | The container from which the sites have to be removed. |
maxFreqGaps | The maximum frequency of gaps in each site. |
Definition at line 282 of file SiteContainerTools.h.
References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::deleteSite(), bpp::SymbolListTools::getFrequencies(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
|
inlinestatic |
Remove sites with stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception.
Note: this method is currently not implemented for probabilistic objects. An exception is thrown when called.
sites | The container to analyse. |
gCode | the genetic code to use to determine stop codons. |
Definition at line 368 of file SiteContainerTools.h.
|
inlinestatic |
Remove sites with stop codons, if the alphabet is a CodonAlphabet, otherwise throws an Exception.
sites | The container to analyse. |
gCode | the genetic code to use to determine stop codons. |
Definition at line 343 of file SiteContainerTools.h.
References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::deleteSite(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::CodonSiteTools::hasStop(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
Referenced by bpp::SequenceApplicationTools::getSitesToAnalyse().
|
static |
Resolve a container with "." notations.
will results in
for instance. The first sequence is here called the "reference" sequence. It need not be the first in the container. The alphabet of the input alignment must be an instance of the DefaultAlphabet class, the only one which support dot characters. A new alignment is created and returned, with the specified alphabet.
If several sequences that may be considered as reference are found, the first one is used.
dottedAln | The input alignment. |
resolvedAlphabet | The alphabet of the output alignment. |
AlphabetException | If the alphabet of the input alignment is not of class DefaultAlphabet, or if one character does not match with the output alphabet. |
Exception | If no reference sequence was found, or if the input alignment contains no sequence. |
Definition at line 140 of file SiteContainerTools.cpp.
References bpp::TemplateSequenceDataInterface< HashType >::alphabet(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::IntSymbolList::getChar(), bpp::Sequence::getChar(), bpp::Commentable::getComments(), bpp::AbstractCoreSite::getCoordinate(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), bpp::AlphabetTools::isDefaultAlphabet(), bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site(), and bpp::AbstractTemplateSymbolList< T >::size().
Referenced by bpp::NexusIOSequence::appendAlignmentFromStream().
|
inlinestatic |
Sample sites in an alignment.
Original site positions will be kept. The resulting container will hence probably have duplicated positions. You may wish to call the reindexSites() method on the returned container.
Note: This method will be optimal with a container with vertical storage like VectorSiteContainer.
sites | An input alignment to sample. |
nbSites | The size of the resulting container. |
index | [out] If non-null the underlying vector will be appended with the original site indices. |
outSites | A container where the sample will be added. |
Definition at line 736 of file SiteContainerTools.h.
References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::addSite(), bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceNames(), bpp::RandomTools::giveIntRandomNumberBetweenZeroAndEntry(), bpp::TemplateSequenceDataInterface< HashType >::setSequenceNames(), and bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::site().
Referenced by bootstrapSites().
|
inlinestatic |
Sample sites in an alignment.
Original site positions will be kept. The resulting container will hence probably have duplicated positions. You may wish to call the reindexSites() method on the returned container.
Note: This method will be optimal with a container with vertical storage like VectorSiteContainer.
sites | An input alignment to sample. |
nbSites | The size of the resulting container. |
index | [out] If non-null the underlying vector will be appended with the original site indices. |
Definition at line 770 of file SiteContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet().
|
static |
Translate alignment positions from an aligned sequence to the same sequence in a different alignment.
Takes each position (starting at 1) in sequence 1, and look for the corresponding position in sequence 2. The two sequences must be the same, excepted for the gaps. If no sequence contains gaps, or if the gaps are at the same place in both sequences, the translated position will be the same as the original positions.
seq1 | The sequence to translate. |
seq2 | The reference sequence. |
AlphabetMismatchException | If the sequences do not share the same alphabet. |
Exception | If the sequence do not match. |
Definition at line 241 of file SiteContainerTools.cpp.
References bpp::AbstractTemplateSymbolList< T >::getAlphabet(), bpp::AbstractTemplateSymbolList< T >::size(), and bpp::TextTools::toString().
|
static |
Translate sequence positions from a sequence to another in the same alignment.
Takes each position (starting at 1) in sequence 1, and look for the corresponding position in sequence 2 at the same site. If no corresponding position is available (i.e. if there is a gap in sequence 2 at the corresponding position), 0 is returned.
sequences | The alignment to use. |
i1 | The index of the sequence to translate. |
i2 | The index of the reference sequence. |
Definition at line 308 of file SiteContainerTools.cpp.
References bpp::TemplateSiteContainerInterface< SiteType, SequenceType, HashType >::getNumberOfSites(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().
|
static |
Definition at line 878 of file SiteContainerTools.h.
|
static |
Definition at line 880 of file SiteContainerTools.h.
|
static |
Definition at line 879 of file SiteContainerTools.h.
|
static |
Definition at line 881 of file SiteContainerTools.h.