bpp-seq3  3.0.0
bpp::SequenceContainerTools Class Reference

Utilitary methods dealing with sequence containers. More...

#include <Bpp/Seq/Container/SequenceContainerTools.h>

Public Member Functions

 SequenceContainerTools ()
 
virtual ~SequenceContainerTools ()
 

Static Public Member Functions

template<class SequenceType , class HashType >
static std::unique_ptr< TemplateSequenceContainerInterface< SequenceType, HashType > > createContainerOfSpecifiedSize (std::shared_ptr< const Alphabet > &alphabet, size_t size)
 Create a container with $n$ void sequences. More...
 
template<class SequenceType , class HashType >
static std::unique_ptr< TemplateSequenceContainerInterface< SequenceType, HashType > > createContainerWithSequenceNames (std::shared_ptr< const Alphabet > &alphabet, const std::vector< std::string > &seqNames)
 Create a container with specified names. More...
 
template<class ContFrom , class ContTo , class SequenceType >
static void convertContainer (const ContFrom &input, ContTo &output)
 Generic function which creates a new container from another one, by specifying the class of sequence to be stored. More...
 
template<class SequenceType , class HashType >
static void getSelectedSequences (const TemplateSequenceContainerInterface< SequenceType, HashType > &sequences, const SequenceSelection &selection, TemplateSequenceContainerInterface< SequenceType, HashType > &outputCont)
 Add a specified set of sequences from a container to another. More...
 
template<class SequenceType , class HashType >
static void getSelectedSequences (const TemplateSequenceContainerInterface< SequenceType, HashType > &sequences, const std::vector< std::string > &selection, TemplateSequenceContainerInterface< SequenceType, HashType > &outputCont, bool strict=true)
 Add a specified set of sequences from a container to another. More...
 
template<class SequenceType , class HashType >
static void keepOnlySelectedSequences (TemplateSequenceContainerInterface< SequenceType, HashType > &sequences, const SequenceSelection &selection)
 Remove all sequences that are not in a given selection from a given container. More...
 
template<class SequenceType , class HashType >
static bool sequencesHaveTheSameLength (const TemplateSequenceContainerInterface< SequenceType, HashType > &sc)
 Check if all sequences in a SequenceContainer have the same length. More...
 
static void getCounts (const SequenceContainerInterface &sc, std::map< int, unsigned int > &f)
 Compute base counts. More...
 
static void getFrequencies (const SequenceContainerInterface &sc, std::map< int, double > &f, double pseudoCount=0)
 Compute base frequencies of a BasicSequenceContainer. More...
 
static void getFrequencies (const ProbabilisticSequenceContainerInterface &sc, std::map< int, double > &f, double pseudoCount=0)
 Compute base frequencies of a ProbabilisticSequenceContainer. More...
 
static void getFrequencies (const SequenceDataInterface &sc, std::map< int, double > &f, double pseudoCount=0)
 Compute base frequencies of an object implementing the SequenceDataInterface. More...
 
template<class SequenceType , class HashType >
static void append (TemplateSequenceContainerInterface< SequenceType, HashType > &seqCont1, const TemplateSequenceContainerInterface< SequenceType, HashType > &seqCont2)
 Append all the sequences of a SequenceContainer to the end of another. More...
 
template<class SequenceType , class HashType >
static void mergeByKey (const TemplateSequenceContainerInterface< SequenceType, HashType > &seqCont1, const TemplateSequenceContainerInterface< SequenceType, HashType > &seqCont2, TemplateSequenceContainerInterface< SequenceType, HashType > &outputCont)
 Concatenate the sequences from two containers. More...
 
template<class SequenceType , class HashType >
static void convertAlphabet (const TemplateSequenceContainerInterface< SequenceType, HashType > &seqCont, TemplateSequenceContainerInterface< SequenceType, HashType > &outputCont)
 Convert a SequenceContainer to a new alphabet. More...
 
template<class SequenceType >
static std::unique_ptr< TemplateSequenceContainerInterface< SequenceType > > getCodonPosition (const TemplateSequenceContainerInterface< SequenceType, std::string > &sequences, size_t pos)
 Extract a certain position (1, 2 or 3) from a container of codon sequences and returns the resulting nucleotide container. More...
 
Work with sequence names

Note that in case the container is indexed by sequence names, methods working directly on sequence keys will be more efficient!

template<class SequenceType , class HashType >
static bool hasSequenceWithName (const TemplateSequenceContainerInterface< SequenceType, HashType > &sc, const std::string &name)
 Tells whether a sequence with the given name is present in the container. More...
 

Detailed Description

Utilitary methods dealing with sequence containers.

Definition at line 29 of file SequenceContainerTools.h.

Constructor & Destructor Documentation

◆ SequenceContainerTools()

bpp::SequenceContainerTools::SequenceContainerTools ( )
inline

Definition at line 32 of file SequenceContainerTools.h.

◆ ~SequenceContainerTools()

virtual bpp::SequenceContainerTools::~SequenceContainerTools ( )
inlinevirtual

Definition at line 33 of file SequenceContainerTools.h.

Member Function Documentation

◆ append()

template<class SequenceType , class HashType >
static void bpp::SequenceContainerTools::append ( TemplateSequenceContainerInterface< SequenceType, HashType > &  seqCont1,
const TemplateSequenceContainerInterface< SequenceType, HashType > &  seqCont2 
)
inlinestatic

Append all the sequences of a SequenceContainer to the end of another.

Parameters
seqCont1The SequenceContainer in which the sequences will be added.
seqCont2The SequenceContainer from which the sequences are taken.

Definition at line 403 of file SequenceContainerTools.h.

References bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::addSequence(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().

◆ convertAlphabet()

template<class SequenceType , class HashType >
static void bpp::SequenceContainerTools::convertAlphabet ( const TemplateSequenceContainerInterface< SequenceType, HashType > &  seqCont,
TemplateSequenceContainerInterface< SequenceType, HashType > &  outputCont 
)
inlinestatic

◆ convertContainer()

template<class ContFrom , class ContTo , class SequenceType >
static void bpp::SequenceContainerTools::convertContainer ( const ContFrom &  input,
ContTo &  output 
)
inlinestatic

Generic function which creates a new container from another one, by specifying the class of sequence to be stored.

Compared to several copy constructors, this function allows to change the class of the inner sequence class used for storing sequences. The function used the addSequence method, so that it can also be used to concatenate containers.

Parameters
inputThe container to copy.
outputThe container where new sequences will be appended.

Definition at line 125 of file SequenceContainerTools.h.

◆ createContainerOfSpecifiedSize()

template<class SequenceType , class HashType >
static std::unique_ptr< TemplateSequenceContainerInterface<SequenceType, HashType> > bpp::SequenceContainerTools::createContainerOfSpecifiedSize ( std::shared_ptr< const Alphabet > &  alphabet,
size_t  size 
)
inlinestatic

Create a container with $n$ void sequences.

A new VectorSequenceContainer with the specified alphabet is created. The destruction of this new container is up to the user. Sequences have name "0", "1",... "n-1" and no content and comments.

Parameters
alphabetThe alphabet to use in the container.
sizeThe number of sequences in the container.
Returns
A pointer toward a newly created container.

Definition at line 78 of file SequenceContainerTools.h.

References bpp::TextTools::toString().

◆ createContainerWithSequenceNames()

template<class SequenceType , class HashType >
static std::unique_ptr< TemplateSequenceContainerInterface<SequenceType, HashType> > bpp::SequenceContainerTools::createContainerWithSequenceNames ( std::shared_ptr< const Alphabet > &  alphabet,
const std::vector< std::string > &  seqNames 
)
inlinestatic

Create a container with specified names.

A new VectorSequenceContainer with the specified alphabet is created. The destruction of this new container is up to the user. Sequences have the specified names and no content and comments.

Parameters
alphabetThe alphabet to use in the container.
seqNamesThe names of the sequences.
Returns
A pointer toward a newly created container.
Exceptions
ExceptionIf two sequence names are not unique.

Definition at line 102 of file SequenceContainerTools.h.

◆ getCodonPosition()

template<class SequenceType >
static std::unique_ptr< TemplateSequenceContainerInterface<SequenceType> > bpp::SequenceContainerTools::getCodonPosition ( const TemplateSequenceContainerInterface< SequenceType, std::string > &  sequences,
size_t  pos 
)
inlinestatic

Extract a certain position (1, 2 or 3) from a container of codon sequences and returns the resulting nucleotide container.

Parameters
sequencesThe input sequence container, with codon alphabet.
posThe codon position to retrieve.
Returns
A SequenceContainer with a nucleotide alphabet.
Exceptions
AlphabetExceptionIf input sequences are not registered with a codon alphabet.

Definition at line 478 of file SequenceContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().

◆ getCounts()

static void bpp::SequenceContainerTools::getCounts ( const SequenceContainerInterface sc,
std::map< int, unsigned int > &  f 
)
inlinestatic

Compute base counts.

Example of usage: getting the GC count from a sequence container. map<int, unsigned int> counts; SequenceContainerTools::getCounts(myContainer, counts); //My container is previously defined. int GCcontent = counts[1] + counts[2] ;

States are stored as their int code.

Definition at line 272 of file SequenceContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence(), and bpp::AbstractTemplateSymbolList< T >::size().

◆ getFrequencies() [1/3]

static void bpp::SequenceContainerTools::getFrequencies ( const ProbabilisticSequenceContainerInterface sc,
std::map< int, double > &  f,
double  pseudoCount = 0 
)
inlinestatic

Compute base frequencies of a ProbabilisticSequenceContainer.

Example of usage: getting the GC content from a sequence container. map<int, double> freqs; SequenceContainerTools::getFrequencies(myContainer, freqs); //My container is previously defined. double GCcontent = (freqs[1] + freqs[2]) / (freqs[0] + freqs[1] + freqs[2] + freqs[3]);

States are stored as their int code.

Definition at line 339 of file SequenceContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::SymbolListTools::getCounts(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence(), and bpp::ProbabilisticSymbolList::size().

◆ getFrequencies() [2/3]

static void bpp::SequenceContainerTools::getFrequencies ( const SequenceContainerInterface sc,
std::map< int, double > &  f,
double  pseudoCount = 0 
)
inlinestatic

Compute base frequencies of a BasicSequenceContainer.

Example of usage: getting the GC content from a sequence container. map<int, double> freqs; SequenceContainerTools::getFrequencies(myContainer, freqs); //My container is previously defined. double GCcontent = (freqs[1] + freqs[2]) / (freqs[0] + freqs[1] + freqs[2] + freqs[3]);

States are stored as their int code.

Definition at line 297 of file SequenceContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::SymbolListTools::getCounts(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence(), and bpp::AbstractTemplateSymbolList< T >::size().

Referenced by getFrequencies().

◆ getFrequencies() [3/3]

static void bpp::SequenceContainerTools::getFrequencies ( const SequenceDataInterface sc,
std::map< int, double > &  f,
double  pseudoCount = 0 
)
inlinestatic

Compute base frequencies of an object implementing the SequenceDataInterface.

This method will try to cast the input data and call the corresponding method is any. An exception will be thrown if the cast failed.

Definition at line 375 of file SequenceContainerTools.h.

References getFrequencies().

◆ getSelectedSequences() [1/2]

template<class SequenceType , class HashType >
static void bpp::SequenceContainerTools::getSelectedSequences ( const TemplateSequenceContainerInterface< SequenceType, HashType > &  sequences,
const SequenceSelection selection,
TemplateSequenceContainerInterface< SequenceType, HashType > &  outputCont 
)
inlinestatic

Add a specified set of sequences from a container to another.

Sequences are specified by their position, beginning at 0. Name verification will be performed, only if the output container is not empty, based on the assumption that the container passed as argument is a correct one. Redundant selection is not checked, so be careful with what you're doing!

Author
Julien Dutheil
Parameters
sequencesThe container from which sequences are to be taken.
selectionThe positions of all sequences to retrieve.
outputContA container where the selection should be added.
Exceptions
ExceptionIn case of bad sequence name, alphabet mismatch, etc.

Definition at line 150 of file SequenceContainerTools.h.

References bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::addSequence(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().

Referenced by bpp::SiteContainerTools::merge().

◆ getSelectedSequences() [2/2]

template<class SequenceType , class HashType >
static void bpp::SequenceContainerTools::getSelectedSequences ( const TemplateSequenceContainerInterface< SequenceType, HashType > &  sequences,
const std::vector< std::string > &  selection,
TemplateSequenceContainerInterface< SequenceType, HashType > &  outputCont,
bool  strict = true 
)
inlinestatic

Add a specified set of sequences from a container to another.

Sequences are specified by their names. Name verification will be performed, only if the output container is not empty, based on the assumption that the container passed as argument is a correct one. Redundant selection is not checked, so be careful with what you're doing!

Author
Julien Dutheil
Parameters
sequencesThe container from which sequences are to be taken.
selectionThe names of all sequences to retrieve.
outputContA container where the selection should be added.
strictIf yes, trying to select a sequence that is not present will raise an exception. If no, only available sequence will be added.
Exceptions
ExceptionIn case of bad sequence name, alphabet mismatch, etc.

Definition at line 181 of file SequenceContainerTools.h.

References bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::addSequence(), bpp::TemplateSequenceDataInterface< HashType >::hasSequence(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().

◆ hasSequenceWithName()

template<class SequenceType , class HashType >
static bool bpp::SequenceContainerTools::hasSequenceWithName ( const TemplateSequenceContainerInterface< SequenceType, HashType > &  sc,
const std::string &  name 
)
inlinestatic

Tells whether a sequence with the given name is present in the container.

Parameters
scThe sequence container to check.
nameThe query sequence name.

Definition at line 51 of file SequenceContainerTools.h.

References bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().

◆ keepOnlySelectedSequences()

template<class SequenceType , class HashType >
static void bpp::SequenceContainerTools::keepOnlySelectedSequences ( TemplateSequenceContainerInterface< SequenceType, HashType > &  sequences,
const SequenceSelection selection 
)
inlinestatic

Remove all sequences that are not in a given selection from a given container.

A new VectorSequenceContainer is created with specified sequences. The destruction of the container is up to the user. Sequences are specified by their position, beginning at 0. Redundant selection is not checked, so be careful with what you're doing!

Parameters
sequencesThe container from which sequences are to be taken.
selectionThe positions of all sequences to retrieve.
Returns
A new container with all selected sequences.

Definition at line 219 of file SequenceContainerTools.h.

References bpp::VectorTools::diff(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::removeSequence().

◆ mergeByKey()

template<class SequenceType , class HashType >
static void bpp::SequenceContainerTools::mergeByKey ( const TemplateSequenceContainerInterface< SequenceType, HashType > &  seqCont1,
const TemplateSequenceContainerInterface< SequenceType, HashType > &  seqCont2,
TemplateSequenceContainerInterface< SequenceType, HashType > &  outputCont 
)
inlinestatic

Concatenate the sequences from two containers.

Author
Julien Dutheil
Parameters
seqCont1First container.
seqCont2Second container. This container must contain sequences with the same keys as in seqcont1. Additional sequences will be ignored.
outputContOutput sequence container to which concatenated sequences will be added.
Exceptions
AlphabetMismatchExceptionIf the alphabet in the 3 containers do not match.

Definition at line 429 of file SequenceContainerTools.h.

References bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::addSequence(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().

◆ sequencesHaveTheSameLength()

template<class SequenceType , class HashType >
static bool bpp::SequenceContainerTools::sequencesHaveTheSameLength ( const TemplateSequenceContainerInterface< SequenceType, HashType > &  sc)
inlinestatic

Check if all sequences in a SequenceContainer have the same length.

Parameters
scThe container to check.
Returns
True is all sequence have the same length.

Definition at line 243 of file SequenceContainerTools.h.


The documentation for this class was generated from the following file: