bpp-seq3
3.0.0
|
Utilitary methods dealing with sequence containers. More...
#include <Bpp/Seq/Container/SequenceContainerTools.h>
Public Member Functions | |
SequenceContainerTools () | |
virtual | ~SequenceContainerTools () |
Static Public Member Functions | |
template<class SequenceType , class HashType > | |
static std::unique_ptr< TemplateSequenceContainerInterface< SequenceType, HashType > > | createContainerOfSpecifiedSize (std::shared_ptr< const Alphabet > &alphabet, size_t size) |
Create a container with void sequences. More... | |
template<class SequenceType , class HashType > | |
static std::unique_ptr< TemplateSequenceContainerInterface< SequenceType, HashType > > | createContainerWithSequenceNames (std::shared_ptr< const Alphabet > &alphabet, const std::vector< std::string > &seqNames) |
Create a container with specified names. More... | |
template<class ContFrom , class ContTo , class SequenceType > | |
static void | convertContainer (const ContFrom &input, ContTo &output) |
Generic function which creates a new container from another one, by specifying the class of sequence to be stored. More... | |
template<class SequenceType , class HashType > | |
static void | getSelectedSequences (const TemplateSequenceContainerInterface< SequenceType, HashType > &sequences, const SequenceSelection &selection, TemplateSequenceContainerInterface< SequenceType, HashType > &outputCont) |
Add a specified set of sequences from a container to another. More... | |
template<class SequenceType , class HashType > | |
static void | getSelectedSequences (const TemplateSequenceContainerInterface< SequenceType, HashType > &sequences, const std::vector< std::string > &selection, TemplateSequenceContainerInterface< SequenceType, HashType > &outputCont, bool strict=true) |
Add a specified set of sequences from a container to another. More... | |
template<class SequenceType , class HashType > | |
static void | keepOnlySelectedSequences (TemplateSequenceContainerInterface< SequenceType, HashType > &sequences, const SequenceSelection &selection) |
Remove all sequences that are not in a given selection from a given container. More... | |
template<class SequenceType , class HashType > | |
static bool | sequencesHaveTheSameLength (const TemplateSequenceContainerInterface< SequenceType, HashType > &sc) |
Check if all sequences in a SequenceContainer have the same length. More... | |
static void | getCounts (const SequenceContainerInterface &sc, std::map< int, unsigned int > &f) |
Compute base counts. More... | |
static void | getFrequencies (const SequenceContainerInterface &sc, std::map< int, double > &f, double pseudoCount=0) |
Compute base frequencies of a BasicSequenceContainer. More... | |
static void | getFrequencies (const ProbabilisticSequenceContainerInterface &sc, std::map< int, double > &f, double pseudoCount=0) |
Compute base frequencies of a ProbabilisticSequenceContainer. More... | |
static void | getFrequencies (const SequenceDataInterface &sc, std::map< int, double > &f, double pseudoCount=0) |
Compute base frequencies of an object implementing the SequenceDataInterface. More... | |
template<class SequenceType , class HashType > | |
static void | append (TemplateSequenceContainerInterface< SequenceType, HashType > &seqCont1, const TemplateSequenceContainerInterface< SequenceType, HashType > &seqCont2) |
Append all the sequences of a SequenceContainer to the end of another. More... | |
template<class SequenceType , class HashType > | |
static void | mergeByKey (const TemplateSequenceContainerInterface< SequenceType, HashType > &seqCont1, const TemplateSequenceContainerInterface< SequenceType, HashType > &seqCont2, TemplateSequenceContainerInterface< SequenceType, HashType > &outputCont) |
Concatenate the sequences from two containers. More... | |
template<class SequenceType , class HashType > | |
static void | convertAlphabet (const TemplateSequenceContainerInterface< SequenceType, HashType > &seqCont, TemplateSequenceContainerInterface< SequenceType, HashType > &outputCont) |
Convert a SequenceContainer to a new alphabet. More... | |
template<class SequenceType > | |
static std::unique_ptr< TemplateSequenceContainerInterface< SequenceType > > | getCodonPosition (const TemplateSequenceContainerInterface< SequenceType, std::string > &sequences, size_t pos) |
Extract a certain position (1, 2 or 3) from a container of codon sequences and returns the resulting nucleotide container. More... | |
Work with sequence names | |
Note that in case the container is indexed by sequence names, methods working directly on sequence keys will be more efficient! | |
template<class SequenceType , class HashType > | |
static bool | hasSequenceWithName (const TemplateSequenceContainerInterface< SequenceType, HashType > &sc, const std::string &name) |
Tells whether a sequence with the given name is present in the container. More... | |
Utilitary methods dealing with sequence containers.
Definition at line 29 of file SequenceContainerTools.h.
|
inline |
Definition at line 32 of file SequenceContainerTools.h.
|
inlinevirtual |
Definition at line 33 of file SequenceContainerTools.h.
|
inlinestatic |
Append all the sequences of a SequenceContainer to the end of another.
seqCont1 | The SequenceContainer in which the sequences will be added. |
seqCont2 | The SequenceContainer from which the sequences are taken. |
Definition at line 403 of file SequenceContainerTools.h.
References bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::addSequence(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().
|
inlinestatic |
Convert a SequenceContainer to a new alphabet.
seqCont | The container to convert. |
outputCont | A container (most likely empty) with an alphabet into which the container will be converted. |
Definition at line 453 of file SequenceContainerTools.h.
References bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::addSequence(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().
|
inlinestatic |
Generic function which creates a new container from another one, by specifying the class of sequence to be stored.
Compared to several copy constructors, this function allows to change the class of the inner sequence class used for storing sequences. The function used the addSequence method, so that it can also be used to concatenate containers.
input | The container to copy. |
output | The container where new sequences will be appended. |
Definition at line 125 of file SequenceContainerTools.h.
|
inlinestatic |
Create a container with void sequences.
A new VectorSequenceContainer with the specified alphabet is created. The destruction of this new container is up to the user. Sequences have name "0", "1",... "n-1" and no content and comments.
alphabet | The alphabet to use in the container. |
size | The number of sequences in the container. |
Definition at line 78 of file SequenceContainerTools.h.
References bpp::TextTools::toString().
|
inlinestatic |
Create a container with specified names.
A new VectorSequenceContainer with the specified alphabet is created. The destruction of this new container is up to the user. Sequences have the specified names and no content and comments.
alphabet | The alphabet to use in the container. |
seqNames | The names of the sequences. |
Exception | If two sequence names are not unique. |
Definition at line 102 of file SequenceContainerTools.h.
|
inlinestatic |
Extract a certain position (1, 2 or 3) from a container of codon sequences and returns the resulting nucleotide container.
sequences | The input sequence container, with codon alphabet. |
pos | The codon position to retrieve. |
AlphabetException | If input sequences are not registered with a codon alphabet. |
Definition at line 478 of file SequenceContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().
|
inlinestatic |
Compute base counts.
Example of usage: getting the GC count from a sequence container. map<int, unsigned int> counts; SequenceContainerTools::getCounts(myContainer, counts); //My container is previously defined. int GCcontent = counts[1] + counts[2] ;
States are stored as their int code.
Definition at line 272 of file SequenceContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence(), and bpp::AbstractTemplateSymbolList< T >::size().
|
inlinestatic |
Compute base frequencies of a ProbabilisticSequenceContainer.
Example of usage: getting the GC content from a sequence container. map<int, double> freqs; SequenceContainerTools::getFrequencies(myContainer, freqs); //My container is previously defined. double GCcontent = (freqs[1] + freqs[2]) / (freqs[0] + freqs[1] + freqs[2] + freqs[3]);
States are stored as their int code.
Definition at line 339 of file SequenceContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::SymbolListTools::getCounts(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence(), and bpp::ProbabilisticSymbolList::size().
|
inlinestatic |
Compute base frequencies of a BasicSequenceContainer.
Example of usage: getting the GC content from a sequence container. map<int, double> freqs; SequenceContainerTools::getFrequencies(myContainer, freqs); //My container is previously defined. double GCcontent = (freqs[1] + freqs[2]) / (freqs[0] + freqs[1] + freqs[2] + freqs[3]);
States are stored as their int code.
Definition at line 297 of file SequenceContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::SymbolListTools::getCounts(), bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence(), and bpp::AbstractTemplateSymbolList< T >::size().
Referenced by getFrequencies().
|
inlinestatic |
Compute base frequencies of an object implementing the SequenceDataInterface.
This method will try to cast the input data and call the corresponding method is any. An exception will be thrown if the cast failed.
Definition at line 375 of file SequenceContainerTools.h.
References getFrequencies().
|
inlinestatic |
Add a specified set of sequences from a container to another.
Sequences are specified by their position, beginning at 0. Name verification will be performed, only if the output container is not empty, based on the assumption that the container passed as argument is a correct one. Redundant selection is not checked, so be careful with what you're doing!
sequences | The container from which sequences are to be taken. |
selection | The positions of all sequences to retrieve. |
outputCont | A container where the selection should be added. |
Exception | In case of bad sequence name, alphabet mismatch, etc. |
Definition at line 150 of file SequenceContainerTools.h.
References bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::addSequence(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().
Referenced by bpp::SiteContainerTools::merge().
|
inlinestatic |
Add a specified set of sequences from a container to another.
Sequences are specified by their names. Name verification will be performed, only if the output container is not empty, based on the assumption that the container passed as argument is a correct one. Redundant selection is not checked, so be careful with what you're doing!
sequences | The container from which sequences are to be taken. |
selection | The names of all sequences to retrieve. |
outputCont | A container where the selection should be added. |
strict | If yes, trying to select a sequence that is not present will raise an exception. If no, only available sequence will be added. |
Exception | In case of bad sequence name, alphabet mismatch, etc. |
Definition at line 181 of file SequenceContainerTools.h.
References bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::addSequence(), bpp::TemplateSequenceDataInterface< HashType >::hasSequence(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().
|
inlinestatic |
Tells whether a sequence with the given name is present in the container.
sc | The sequence container to check. |
name | The query sequence name. |
Definition at line 51 of file SequenceContainerTools.h.
References bpp::TemplateSequenceDataInterface< HashType >::getNumberOfSequences(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().
|
inlinestatic |
Remove all sequences that are not in a given selection from a given container.
A new VectorSequenceContainer is created with specified sequences. The destruction of the container is up to the user. Sequences are specified by their position, beginning at 0. Redundant selection is not checked, so be careful with what you're doing!
sequences | The container from which sequences are to be taken. |
selection | The positions of all sequences to retrieve. |
Definition at line 219 of file SequenceContainerTools.h.
References bpp::VectorTools::diff(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::removeSequence().
|
inlinestatic |
Concatenate the sequences from two containers.
seqCont1 | First container. |
seqCont2 | Second container. This container must contain sequences with the same keys as in seqcont1. Additional sequences will be ignored. |
outputCont | Output sequence container to which concatenated sequences will be added. |
AlphabetMismatchException | If the alphabet in the 3 containers do not match. |
Definition at line 429 of file SequenceContainerTools.h.
References bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::addSequence(), bpp::TemplateSequenceDataInterface< HashType >::getAlphabet(), bpp::TemplateSequenceDataInterface< HashType >::getSequenceKeys(), and bpp::TemplateSequenceContainerInterface< SequenceType, HashType >::sequence().
|
inlinestatic |
Check if all sequences in a SequenceContainer have the same length.
sc | The container to check. |
Definition at line 243 of file SequenceContainerTools.h.