bpp-seq3
3.0.0
|
SequenceTools static class. More...
#include <Bpp/Seq/SequenceTools.h>
Public Member Functions | |
SequenceTools () | |
virtual | ~SequenceTools () |
Static Public Member Functions | |
static bool | areSequencesIdentical (const SequenceInterface &seq1, const SequenceInterface &seq2) |
static void | subseq (const SequenceInterface &sequence, size_t begin, size_t end, SequenceInterface &output) |
Get a sub-sequence. More... | |
template<class SequenceTypeOut > | |
static std::unique_ptr< SequenceTypeOut > | subseq (const SequenceInterface &sequence, size_t begin, size_t end) |
Get a sub-sequence. More... | |
template<class SequenceTypeOut > | |
static std::unique_ptr< SequenceTypeOut > | concatenate (const SequenceInterface &seq1, const SequenceInterface &seq2) |
Concatenate two sequences. More... | |
static void | complement (SequenceInterface &seq) |
Complement the nucleotide sequence itself. More... | |
static std::unique_ptr< Sequence > | getComplement (const SequenceInterface &sequence) |
Get the complementary sequence of a nucleotide sequence. More... | |
static std::unique_ptr< Sequence > | transcript (const Sequence &sequence) |
Get the transcription sequence of a DNA sequence. More... | |
static std::unique_ptr< Sequence > | reverseTranscript (const Sequence &sequence) |
Get the reverse-transcription sequence of a RNA sequence. More... | |
static void | invert (SequenceInterface &seq) |
Inverse a sequence from 5'->3' to 3'->5' and vice-versa. More... | |
static std::unique_ptr< SequenceInterface > | getInvert (const SequenceInterface &sequence) |
Inverse a sequence from 5'->3' to 3'->5' and vice-versa. More... | |
static void | invertComplement (SequenceInterface &seq) |
Inverse and complement a sequence. More... | |
static double | getPercentIdentity (const SequenceInterface &seq1, const SequenceInterface &seq2, bool ignoreGaps=false) |
static size_t | getNumberOfSites (const SequenceInterface &seq) |
static size_t | getNumberOfCompleteSites (const SequenceInterface &seq) |
static std::unique_ptr< SequenceInterface > | getSequenceWithCompleteSites (const SequenceInterface &seq) |
keep only complete sites in a sequence. More... | |
static size_t | getNumberOfUnresolvedSites (const SequenceInterface &seq) |
static void | removeGaps (SequenceInterface &seq) |
Remove gaps from a sequence. More... | |
static std::unique_ptr< SequenceInterface > | getSequenceWithoutGaps (const SequenceInterface &seq) |
Get a copy of the sequence without gaps. More... | |
static void | removeStops (SequenceInterface &seq, const GeneticCode &gCode) |
Remove stops from a codon sequence. More... | |
static std::unique_ptr< SequenceInterface > | getSequenceWithoutStops (const SequenceInterface &seq, const GeneticCode &gCode) |
Get a copy of the codon sequence without stops. More... | |
static void | replaceStopsWithGaps (SequenceInterface &seq, const GeneticCode &gCode) |
Replace stop codons by gaps. More... | |
static std::unique_ptr< BowkerTest > | bowkerTest (const SequenceInterface &seq1, const SequenceInterface &seq2) |
Bowker's test for homogeneity. More... | |
static void | getPutativeHaplotypes (const SequenceInterface &seq, std::vector< std::unique_ptr< SequenceInterface >> &hap, unsigned int level=2) |
Get all putatives haplotypes from an heterozygous sequence. More... | |
static std::unique_ptr< Sequence > | combineSequences (const SequenceInterface &s1, const SequenceInterface &s2) |
Combine two sequences. More... | |
static std::unique_ptr< Sequence > | subtractHaplotype (const SequenceInterface &s, const SequenceInterface &h, std::string name="", unsigned int level=1) |
Subtract haplotype from an heterozygous sequence. More... | |
static std::unique_ptr< Sequence > | RNYslice (const SequenceInterface &sequence, int ph) |
Get the RNY decomposition of a DNA sequence. More... | |
static std::unique_ptr< Sequence > | RNYslice (const SequenceInterface &sequence) |
Get the RNY decomposition of a DNA sequence. More... | |
static void | getCDS (SequenceInterface &sequence, const GeneticCode &gCode, bool checkInit, bool checkStop, bool includeInit=true, bool includeStop=true) |
Extract CDS part from a codon sequence. Optionally check for intiator and stop codons, or both. More... | |
static size_t | findFirstOf (const SequenceInterface &seq, const SequenceInterface &motif, bool strict=true) |
Find the position of a motif in a sequence. More... | |
static std::unique_ptr< Sequence > | getRandomSequence (std::shared_ptr< const Alphabet > &alphabet, size_t length) |
Get a random sequence of given size and alphabet, with all state with equal probability. More... | |
static bool | hasGap (const IntSymbolListInterface &site) |
static bool | hasGap (const ProbabilisticSymbolListInterface &site) |
static bool | hasGap (const CruxSymbolListInterface &site) |
static bool | hasUnresolved (const IntSymbolListInterface &site) |
static bool | isGapOnly (const IntSymbolListInterface &site) |
static bool | isGapOnly (const ProbabilisticSymbolListInterface &site) |
static bool | isGapOnly (const CruxSymbolListInterface &site) |
static size_t | numberOfGaps (const IntSymbolListInterface &site) |
static size_t | numberOfGaps (const ProbabilisticSymbolListInterface &site) |
static size_t | numberOfGaps (const CruxSymbolListInterface &site) |
static bool | isGapOrUnresolvedOnly (const IntSymbolListInterface &site) |
static bool | isGapOrUnresolvedOnly (const ProbabilisticSymbolListInterface &site) |
static bool | isGapOrUnresolvedOnly (const CruxSymbolListInterface &site) |
static size_t | numberOfUnresolved (const IntSymbolListInterface &site) |
static size_t | numberOfUnresolved (const ProbabilisticSymbolListInterface &site) |
static size_t | numberOfUnresolved (const CruxSymbolListInterface &site) |
static bool | hasUnknown (const IntSymbolListInterface &site) |
static bool | hasUnknown (const ProbabilisticSymbolListInterface &site) |
static bool | hasUnknown (const CruxSymbolListInterface &site) |
static bool | isComplete (const IntSymbolListInterface &site) |
static bool | isComplete (const ProbabilisticSymbolListInterface &site) |
static bool | isComplete (const CruxSymbolListInterface &site) |
static bool | isConstant (const IntSymbolListInterface &site, bool ignoreUnknown=false, bool unresolvedRaisesException=true) |
Tell if a site is constant, that is displaying the same state in all sequences that do not present a gap. More... | |
static bool | isConstant (const ProbabilisticSymbolListInterface &site, bool unresolvedRaisesException=true) |
static bool | isConstant (const CruxSymbolListInterface &site, bool ignoreUnknown=false, bool unresolvedRaisesException=true) |
static bool | areSymbolListsIdentical (const IntSymbolListInterface &list1, const IntSymbolListInterface &list2) |
static bool | areSymbolListsIdentical (const ProbabilisticSymbolListInterface &list1, const ProbabilisticSymbolListInterface &list2) |
static bool | areSymbolListsIdentical (const CruxSymbolListInterface &l1, const CruxSymbolListInterface &l2) |
template<class count_type > | |
static void | getCounts (const IntSymbolListInterface &list, std::map< int, count_type > &counts) |
Count all states in the list. More... | |
static void | getCounts (const ProbabilisticSymbolListInterface &list, std::map< int, double_t > &counts) |
Sum all states in the list. More... | |
static void | getCounts (const CruxSymbolListInterface &list, std::map< int, double > &counts, bool resolveUnknowns=false) |
Count all states in the list, optionally resolving unknown characters. More... | |
template<class count_type > | |
static void | getCounts (const IntSymbolListInterface &list1, const IntSymbolListInterface &list2, std::map< int, std::map< int, count_type >> &counts) |
Count all pair of states for two lists of the same size. More... | |
static void | getCounts (const ProbabilisticSymbolListInterface &list1, const ProbabilisticSymbolListInterface &list2, std::map< int, std::map< int, double >> &counts) |
Sum along the lists the joined probabilities for all pair of states for two lists of the same size. More... | |
static void | getCounts (const CruxSymbolListInterface &list1, const CruxSymbolListInterface &list2, std::map< int, std::map< int, double >> &counts, bool resolveUnknowns) |
Count all pairs of states for two lists of the same size, optionally resolving unknown characters. More... | |
static void | getCountsResolveUnknowns (const IntSymbolListInterface &list, std::map< int, double > &counts) |
Count all states in the list normalizing unknown characters. More... | |
static void | getCountsResolveUnknowns (const ProbabilisticSymbolListInterface &list, std::map< int, double > &counts) |
Count all states in the list normalizing unknown characters. More... | |
static void | getCountsResolveUnknowns (const IntSymbolListInterface &list1, const IntSymbolListInterface &list2, std::map< int, std::map< int, double >> &counts) |
Count all pairs of states for two lists of the same size resolving unknown characters. More... | |
static void | getCountsResolveUnknowns (const ProbabilisticSymbolListInterface &list1, const ProbabilisticSymbolListInterface &list2, std::map< int, std::map< int, double >> &counts) |
Count all pairs of states for two lists of the same size resolving unknown characters. More... | |
static void | getFrequencies (const CruxSymbolListInterface &list, std::map< int, double > &frequencies, bool resolveUnknowns=false) |
Get all states frequencies in the list. More... | |
static void | getFrequencies (const CruxSymbolListInterface &list1, const CruxSymbolListInterface &list2, std::map< int, std::map< int, double >> &frequencies, bool resolveUnknowns=false) |
Get all state pairs frequencies for two lists of the same size. More... | |
static double | getGCContent (const IntSymbolListInterface &list, bool ignoreUnresolved=true, bool ignoreGap=true) |
Get the GC content of a symbol list. More... | |
static double | getGCContent (const ProbabilisticSymbolListInterface &list, bool ignoreUnresolved=true, bool ignoreGap=true) |
static double | getGCContent (const CruxSymbolListInterface &list, bool ignoreUnresolved=true, bool ignoreGap=true) |
static size_t | getNumberOfDistinctPositions (const IntSymbolListInterface &l1, const IntSymbolListInterface &l2) |
Get the number of distinct positions. More... | |
static size_t | getNumberOfDistinctPositions (const ProbabilisticSymbolListInterface &l1, const ProbabilisticSymbolListInterface &l2) |
static size_t | getNumberOfDistinctPositions (const CruxSymbolListInterface &l1, const CruxSymbolListInterface &l2) |
static size_t | getNumberOfPositionsWithoutGap (const IntSymbolListInterface &l1, const IntSymbolListInterface &l2) |
Get the number of positions without gap (or without null column). More... | |
static size_t | getNumberOfPositionsWithoutGap (const ProbabilisticSymbolListInterface &l1, const ProbabilisticSymbolListInterface &l2) |
static size_t | getNumberOfPositionsWithoutGap (const CruxSymbolListInterface &l1, const CruxSymbolListInterface &l2) |
static void | changeGapsToUnknownCharacters (IntSymbolListInterface &l) |
Change all gap elements to unknown characters (or columns of 1). More... | |
static void | changeGapsToUnknownCharacters (ProbabilisticSymbolListInterface &l) |
static void | changeGapsToUnknownCharacters (CruxSymbolListInterface &l) |
static void | changeUnresolvedCharactersToGaps (IntSymbolListInterface &l) |
Change all unknown characters to gap elements (or columns of 0). More... | |
static void | changeUnresolvedCharactersToGaps (ProbabilisticSymbolListInterface &l) |
static void | changeUnresolvedCharactersToGaps (CruxSymbolListInterface &l) |
static double | variabilityShannon (const CruxSymbolListInterface &list, bool resolveUnknowns) |
Compute the Shannon entropy index of a SymbolList. More... | |
static double | variabilityFactorial (const IntSymbolListInterface &list) |
Compute the factorial diversity index of a site. More... | |
static double | mutualInformation (const CruxSymbolListInterface &list1, const CruxSymbolListInterface &list2, bool resolveUnknowns) |
Compute the mutual information between two lists. More... | |
static double | entropy (const CruxSymbolListInterface &list, bool resolveUnknowns) |
Compute the entropy of a site. This is an alias of method variabilityShannon. More... | |
static double | jointEntropy (const CruxSymbolListInterface &list1, const CruxSymbolListInterface &list2, bool resolveUnknowns) |
Compute the joint entropy between two lists. More... | |
static double | heterozygosity (const CruxSymbolListInterface &list) |
Compute the heterozygosity index of a list. More... | |
static size_t | getNumberOfDistinctCharacters (const IntSymbolListInterface &list) |
Give the number of distinct characters at a list. More... | |
static size_t | getMajorAlleleFrequency (const IntSymbolListInterface &list) |
return the number of occurrences of the most common allele. More... | |
static int | getMajorAllele (const CruxSymbolListInterface &list) |
return the state corresponding to the most common allele. More... | |
static size_t | getMinorAlleleFrequency (const IntSymbolListInterface &list) |
return the number of occurrences of the least common allele. More... | |
static int | getMinorAllele (const CruxSymbolListInterface &list) |
return the state corresponding to the least common allele. More... | |
static bool | hasSingleton (const IntSymbolListInterface &list) |
Tell if a list has singletons. More... | |
static bool | isParsimonyInformativeSite (const IntSymbolListInterface &site) |
Tell if a site is a parsimony informative site. More... | |
static bool | isTriplet (const IntSymbolListInterface &list) |
Tell if a list has more than 2 distinct characters. More... | |
static bool | isDoubleton (const IntSymbolListInterface &list) |
Tell if a list has exactly 2 distinct characters. More... | |
Static Private Attributes | |
static std::shared_ptr< RNY > | RNY_ |
static NucleicAcidsReplication | DNARep_ |
static NucleicAcidsReplication | RNARep_ |
static NucleicAcidsReplication | transc_ |
SequenceTools static class.
Implement methods to manipulate sequences
Definition at line 62 of file SequenceTools.h.
|
inline |
Definition at line 72 of file SequenceTools.h.
|
inlinevirtual |
Definition at line 73 of file SequenceTools.h.
|
static |
seq1 | The first sequence. |
seq2 | The second sequence. |
Definition at line 29 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 239 of file SymbolListTools.h.
References bpp::SymbolListTools::areSymbolListsIdentical().
|
staticinherited |
list1 | The first site. |
list2 | The second site. |
Definition at line 216 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::areSymbolListsIdentical().
|
staticinherited |
Definition at line 236 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
|
static |
Bowker's test for homogeneity.
Computes the contingency table of occurrence of all pairs of states and test its symmetry using Bowker's (1948) test.
Reference:
seq1 | The first sequence. |
seq2 | The second sequence. |
SequenceNotAlignedException | If the two sequences do not have the same length. |
Definition at line 353 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), bpp::RandomTools::pChisq(), and bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 709 of file SymbolListTools.h.
References bpp::SymbolListTools::changeGapsToUnknownCharacters().
|
staticinherited |
Change all gap elements to unknown characters (or columns of 1).
l | The input list of characters. |
Definition at line 501 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::changeGapsToUnknownCharacters().
|
staticinherited |
Definition at line 614 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 736 of file SymbolListTools.h.
References bpp::SymbolListTools::changeUnresolvedCharactersToGaps().
|
staticinherited |
Change all unknown characters to gap elements (or columns of 0).
l | The input list of characters. |
Definition at line 511 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::changeUnresolvedCharactersToGaps().
|
staticinherited |
Definition at line 623 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
static |
Combine two sequences.
Definition at line 462 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::Alphabet::getAlphabetType(), bpp::CoreSequenceInterface::getName(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), bpp::NumTools::max(), and bpp::CruxSymbolListInterface::size().
|
static |
Complement the nucleotide sequence itself.
seq | The sequence to be complemented. |
AlphabetException | if the sequence is not a nucleotide sequence. |
Definition at line 49 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), bpp::AlphabetTools::isDNAAlphabet(), bpp::AlphabetTools::isRNAAlphabet(), bpp::IntSymbolListInterface::setElement(), bpp::CruxSymbolListInterface::size(), and bpp::NucleicAcidsReplication::translate().
|
inlinestatic |
Concatenate two sequences.
Sequences must have the same name and alphabets. Only first sequence's commentaries are kept.
seq1 | The first sequence. |
seq2 | The second sequence. |
AlphabetMismatchException | If the two alphabets do not match. |
Exception | If the sequence names do not match. |
Definition at line 137 of file SequenceTools.h.
References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::Alphabet::getAlphabetType(), bpp::CoreSequenceInterface::getName(), and bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Compute the entropy of a site. This is an alias of method variabilityShannon.
where is the frequency of state .
list | A list. |
resolveUnknowns | Tell is unknown characters must be resolved. |
Definition at line 817 of file SymbolListTools.h.
References bpp::SymbolListTools::variabilityShannon().
|
static |
Find the position of a motif in a sequence.
seq | The reference sequence |
motif | The motif to find |
strict | If true (default) find exactly the motif If false find compatible match |
Definition at line 642 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), bpp::AlphabetTools::match(), and bpp::CruxSymbolListInterface::size().
|
static |
Extract CDS part from a codon sequence. Optionally check for intiator and stop codons, or both.
sequence | The sequence to be reduced to CDS part. |
gCode | The genetic code according to which start and stop codons are specified. |
checkInit | If true, then everything before the initiator codon will be removed, together with the initiator codon if includeInit is false. |
checkStop | If true, then everything after the first stop codon will be removed, together with the stop codon if includeStop is false. |
includeInit | Tell if initiator codon should be kept or removed. No effect if checkInit is false. |
includeStop | Tell if stop codon should be kept or removed. No effect if checkStop is false. |
Definition at line 607 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::deleteElement(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::GeneticCode::isStart(), bpp::GeneticCode::isStop(), and bpp::CruxSymbolListInterface::size().
|
static |
Get the complementary sequence of a nucleotide sequence.
sequence | The sequence to complement. |
AlphabetException | If the sequence is not a nucleotide sequence. |
Definition at line 73 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::AlphabetTools::isDNAAlphabet(), bpp::AlphabetTools::isRNAAlphabet(), and bpp::NucleicAcidsReplication::translate().
|
inlinestaticinherited |
Count all states in the list, optionally resolving unknown characters.
For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4.
list | The list. |
counts | The output map to store the counts (existing ocunts will be incremented). |
resolveUnknowns | Tell is unknown characters must be resolved. |
Definition at line 360 of file SymbolListTools.h.
References bpp::SymbolListTools::getCounts(), and bpp::SymbolListTools::getCountsResolveUnknowns().
|
inlinestaticinherited |
Count all pairs of states for two lists of the same size, optionally resolving unknown characters.
For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4.
NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.
list1 | The first list. |
list2 | The second list. |
counts | The output map to store the counts (existing ocunts will be incremented). |
resolveUnknowns | Tell is unknown characters must be resolved. For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. |
Definition at line 514 of file SymbolListTools.h.
References bpp::SymbolListTools::getCounts(), and bpp::SymbolListTools::getCountsResolveUnknowns().
|
inlinestaticinherited |
Count all states in the list.
list | The list. |
counts | The output map to store the counts (existing counts will be incremented). |
Definition at line 265 of file SymbolListTools.h.
References bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::getCounts(), bpp::SequenceContainerTools::getFrequencies(), bpp::CodonSiteTools::isSynonymousPolymorphic(), and bpp::CodonSiteTools::numberOfNonSynonymousSubstitutions().
|
inlinestaticinherited |
Count all pair of states for two lists of the same size.
NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.
list1 | The first list. |
list2 | The second list. |
counts | The output map to store the counts (existing counts will be incremented). |
Definition at line 412 of file SymbolListTools.h.
References bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Sum all states in the list.
list | The list. |
counts | The output map to store the sum for all states (existing counts will be summed). |
Definition at line 282 of file SymbolListTools.h.
References bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Sum along the lists the joined probabilities for all pair of states for two lists of the same size.
NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.
list1 | The first list. |
list2 | The second list. |
counts | The output map to store the counts (existing counts will be summed). |
Definition at line 437 of file SymbolListTools.h.
References bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Count all states in the list normalizing unknown characters.
For instance, (1,1,1,1) will be counted as (1/4,1/4,1/4,1/4).
list | The list. |
counts | The output map to store the counts (existing ocunts will be incremented). |
Definition at line 306 of file SymbolListTools.h.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::getCounts().
|
staticinherited |
Count all pairs of states for two lists of the same size resolving unknown characters.
For instance, (1,1,1,1) will be counted as (1/4,1/4,1/4,1/4).
NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.
list1 | The first list. |
list2 | The second list. |
counts | The output map to store the counts (existing ocunts will be incremented). |
Definition at line 357 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Count all states in the list normalizing unknown characters.
For instance, (1,1,1,1) will be counted as (1/4,1/4,1/4,1/4).
list | The list. |
counts | The output map to store the counts (existing ocunts will be incremented). |
Definition at line 331 of file SymbolListTools.h.
References bpp::CruxSymbolListInterface::size(), and bpp::VectorTools::sum().
|
staticinherited |
Count all pairs of states for two lists of the same size resolving unknown characters.
For instance, (1,1,1,1) will be counted as (1/4,1/4,1/4,1/4).
NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.
list1 | The first list. |
list2 | The second list. |
counts | The output map to store the counts (existing ocunts will be incremented). |
Definition at line 522 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
staticinherited |
Get all states frequencies in the list.
list | The list. |
resolveUnknowns | Tell is unknown characters must be resolved. For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. |
frequencies | The output map with all states and corresponding frequencies. Existing frequencies will be erased if any. |
Definition at line 380 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
Referenced by bpp::CodonSiteTools::generateCodonSiteWithoutRareVariant(), bpp::SiteContainerTools::getConsensus(), bpp::CodonSiteTools::meanNumberOfSynonymousPositions(), bpp::CodonSiteTools::piNonSynonymous(), bpp::CodonSiteTools::piSynonymous(), and bpp::SiteContainerTools::removeGapSites().
|
staticinherited |
Get all state pairs frequencies for two lists of the same size.
list1 | The first list. |
list2 | The second list. |
resolveUnknowns | Tell is unknown characters must be resolved. For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. For ProbabilisticSymbolList, (1,1,1,1) states will be counted as (1/4,1/4,1/4,1/4). |
frequencies | The output map with all state pairs and corresponding frequencies. Existing frequencies will be erased if any. |
Definition at line 396 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 609 of file SymbolListTools.h.
References bpp::SymbolListTools::getGCContent().
|
staticinherited |
Get the GC content of a symbol list.
list | The list. |
ignoreUnresolved | Do not count unresolved states (or columns that sum > 1). Otherwise, weight by each state probability in case of ambiguity (e.g. the R state counts for 0.5) (or columns are normalized). |
ignoreGap | Do not count gaps (or null columns) in total |
AlphabetException | If the list is not made of nucleotide states. |
Definition at line 415 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::getGCContent().
|
staticinherited |
Definition at line 544 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), and bpp::CruxSymbolListInterface::size().
|
static |
Inverse a sequence from 5'->3' to 3'->5' and vice-versa.
ABCDEF becomes FEDCBA, and the sense attribute is changed (may be inhibited).
sequence | The sequence to inverse. |
Definition at line 137 of file SequenceTools.cpp.
References bpp::SequenceInterface::clone().
|
staticinherited |
return the state corresponding to the most common allele.
list | A list |
Definition at line 817 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
staticinherited |
return the number of occurrences of the most common allele.
list | A list |
Definition at line 795 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
staticinherited |
return the state corresponding to the least common allele.
list | A list |
Definition at line 866 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
staticinherited |
return the number of occurrences of the least common allele.
list | A list |
Definition at line 844 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
static |
seq | The sequence to analyse. |
Definition at line 234 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::alphabet(), count(), and bpp::CruxSymbolListInterface::size().
|
staticinherited |
Give the number of distinct characters at a list.
list | A list |
Definition at line 774 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
Referenced by bpp::CodonSiteTools::numberOfSubstitutions().
|
inlinestaticinherited |
Definition at line 646 of file SymbolListTools.h.
References bpp::SymbolListTools::getNumberOfDistinctPositions().
|
staticinherited |
Get the number of distinct positions.
The comparison in achieved from position 0 to the minimum size of the two vectors.
l1 | SymbolList 1. |
l2 | SymbolList 2. |
AlphabetMismatchException | if the two lists have not the same alphabet type. |
Definition at line 469 of file SymbolListTools.cpp.
References count(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::getNumberOfDistinctPositions().
|
staticinherited |
Definition at line 581 of file SymbolListTools.cpp.
References count(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 682 of file SymbolListTools.h.
References bpp::SymbolListTools::getNumberOfPositionsWithoutGap().
|
staticinherited |
Get the number of positions without gap (or without null column).
The comparison in achieved from position 0 to the minimum size of the two vectors.
l1 | SymbolList 1. |
l2 | SymbolList 2. |
AlphabetMismatchException | if the two lists have not the same alphabet type. |
Definition at line 485 of file SymbolListTools.cpp.
References count(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::getNumberOfPositionsWithoutGap().
|
staticinherited |
Definition at line 598 of file SymbolListTools.cpp.
References count(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
|
static |
seq | The sequence to analyse. |
Definition at line 220 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::alphabet(), count(), and bpp::CruxSymbolListInterface::size().
|
static |
seq | The sequence to analyse. |
Definition at line 264 of file SequenceTools.cpp.
References count(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
|
static |
seq1 | The first sequence. |
seq2 | The second sequence. |
ignoreGaps | If true, only positions without gaps will be used for the counting. |
AlphabetMismatchException | If the two sequences do not have the same alphabet. |
SequenceNotAlignedException | If the two sequences do not have the same length. |
Definition at line 183 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::Alphabet::getAlphabetType(), bpp::Alphabet::getGapCharacterCode(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), and bpp::CruxSymbolListInterface::size().
|
static |
Get all putatives haplotypes from an heterozygous sequence.
seq | The sequence to resolve |
hap | The vector to fill with the new sequences |
level | The maximum number of states that a generic char must code (if this number is higher than level, the state will not be resolved). For instance if level = 3 and Alphabet is DNA, all generic char will be resolved but N. |
Definition at line 405 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), bpp::CoreSequenceInterface::getName(), bpp::CruxSymbolListInterface::size(), and bpp::TextTools::toString().
|
static |
Get a random sequence of given size and alphabet, with all state with equal probability.
alphabet | The alphabet to use. |
length | The length of the sequence to generate. |
Definition at line 677 of file SequenceTools.cpp.
References bpp::RandomTools::giveIntRandomNumberBetweenZeroAndEntry().
|
static |
keep only complete sites in a sequence.
The deleteElement method of the Sequence object will be used where appropriate.
seq | The sequence to analyse. |
Definition at line 248 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::alphabet(), bpp::SequenceInterface::clone(), and bpp::CruxSymbolListInterface::size().
|
static |
Get a copy of the sequence without gaps.
A whole new sequence will be created by adding all non-gap positions. The original sequence will be cloned to serve as a template.
seq | The sequence to analyse. |
Definition at line 278 of file SequenceTools.cpp.
References bpp::SequenceInterface::clone(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
|
static |
Get a copy of the codon sequence without stops.
A whole new sequence will be created by adding all non-stop positions. The original sequence will be cloned to serve as a template.
seq | The sequence to analyse. |
gCode | The genetic code according to which stop codons are specified. |
Exception | if the input sequence does not have a codon alphabet. |
Definition at line 306 of file SequenceTools.cpp.
References bpp::SequenceInterface::clone(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::GeneticCode::isStop(), and bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 36 of file SymbolListTools.h.
References bpp::SymbolListTools::hasGap().
|
staticinherited |
site | A site. |
Definition at line 22 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SiteContainerTools::changeGapsToUnknownCharacters(), bpp::SiteContainerTools::getSitesWithoutGaps(), bpp::SymbolListTools::hasGap(), bpp::NoGapTemplateSiteContainerIterator< SiteType, SequenceType, HashType >::nextSiteWithoutGapPosition(), bpp::CodonSiteTools::numberOfNonSynonymousSubstitutions(), bpp::CodonSiteTools::numberOfSubstitutions(), and bpp::NoGapTemplateSiteContainerIterator< SiteType, SequenceType, HashType >::previousSiteWithoutGapPosition().
|
staticinherited |
Definition at line 33 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
staticinherited |
Tell if a list has singletons.
list | A list. |
Definition at line 892 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 152 of file SymbolListTools.h.
References bpp::SymbolListTools::hasUnknown().
|
staticinherited |
site | A site. |
Definition at line 109 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::hasUnknown().
|
staticinherited |
Definition at line 120 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
staticinherited |
site | A site. |
Definition at line 46 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SiteContainerTools::changeUnresolvedCharactersToGaps().
|
staticinherited |
Compute the heterozygosity index of a list.
where is the frequency of state .
list | A list. |
Definition at line 760 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
static |
Inverse a sequence from 5'->3' to 3'->5' and vice-versa.
ABCDEF becomes FEDCBA, and the sense attribute is changed (may be inhibited).
seq | The sequence to inverse. |
Definition at line 121 of file SequenceTools.cpp.
References bpp::TemplateCoreSymbolListInterface< T >::getValue(), bpp::IntSymbolListInterface::setElement(), and bpp::CruxSymbolListInterface::size().
|
static |
Inverse and complement a sequence.
This method is more accurate than calling invert and complement separately.
seq | The sequence to inverse and complement. |
Definition at line 146 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), bpp::AlphabetTools::isDNAAlphabet(), bpp::AlphabetTools::isRNAAlphabet(), bpp::IntSymbolListInterface::setElement(), bpp::CruxSymbolListInterface::size(), and bpp::NucleicAcidsReplication::translate().
|
inlinestaticinherited |
Definition at line 174 of file SymbolListTools.h.
References bpp::SymbolListTools::isComplete().
|
staticinherited |
site | A site. |
Definition at line 134 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SiteContainerTools::getCompleteSites(), bpp::SymbolListTools::isComplete(), bpp::CompleteTemplateSiteContainerIterator< SiteType, SequenceType, HashType >::nextCompleteSitePosition(), and bpp::CompleteTemplateSiteContainerIterator< SiteType, SequenceType, HashType >::previousCompleteSitePosition().
|
staticinherited |
Definition at line 145 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 208 of file SymbolListTools.h.
References bpp::SymbolListTools::isConstant().
|
staticinherited |
Tell if a site is constant, that is displaying the same state in all sequences that do not present a gap.
site | A site. |
ignoreUnknown | If true, positions with unknown positions will be ignored. Otherwise, a site with one single state + any uncertain state will not be considered as constant. |
unresolvedRaisesException | In case of ambiguous case (gap only site for instance), throw an exception. Otherwise returns false. |
Definition at line 258 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::CodonSiteTools::fixedDifferences(), bpp::CodonSiteTools::generateCodonSiteWithoutRareVariant(), bpp::SymbolListTools::isConstant(), bpp::CodonSiteTools::isFourFoldDegenerated(), bpp::CodonSiteTools::isMonoSitePolymorphic(), bpp::CodonSiteTools::isSynonymousPolymorphic(), bpp::CodonSiteTools::numberOfNonSynonymousSubstitutions(), bpp::CodonSiteTools::numberOfSubstitutions(), bpp::CodonSiteTools::piNonSynonymous(), and bpp::CodonSiteTools::piSynonymous().
|
staticinherited |
Definition at line 320 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
staticinherited |
Tell if a list has exactly 2 distinct characters.
list | A list. |
Definition at line 946 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 64 of file SymbolListTools.h.
References bpp::SymbolListTools::isGapOnly().
|
staticinherited |
site | A site. |
Definition at line 59 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::isGapOnly(), bpp::SiteContainerTools::removeGapOnlySites(), and bpp::SiteContainerTools::removeGapOrUnresolvedOnlySites().
|
staticinherited |
Definition at line 71 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 108 of file SymbolListTools.h.
References bpp::SymbolListTools::isGapOrUnresolvedOnly().
|
staticinherited |
site | A site. |
Definition at line 84 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::isGapOrUnresolvedOnly(), and bpp::SiteContainerTools::removeGapOrUnresolvedOnlySites().
|
staticinherited |
Definition at line 95 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
staticinherited |
Tell if a site is a parsimony informative site.
At least two distinct characters must be present.
site | a Site. |
Definition at line 912 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
staticinherited |
Tell if a list has more than 2 distinct characters.
list | A list. |
Definition at line 935 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
staticinherited |
Compute the joint entropy between two lists.
where is the frequency of the pair .
list1 | First list |
list2 | Second list |
resolveUnknowns | Tell is unknown characters must be resolved. |
Definition at line 706 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
|
staticinherited |
Compute the mutual information between two lists.
where and are the frequencies of states and , and is the frequency of the pair .
list1 | First list |
list2 | Second list |
resolveUnknowns | Tell is unknown characters must be resolved. |
Definition at line 656 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 86 of file SymbolListTools.h.
References bpp::SymbolListTools::numberOfGaps().
|
staticinherited |
site | A site. |
Definition at line 159 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SequenceApplicationTools::getSitesToAnalyse(), and bpp::SymbolListTools::numberOfGaps().
|
staticinherited |
Definition at line 172 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
inlinestaticinherited |
Definition at line 130 of file SymbolListTools.h.
References bpp::SymbolListTools::numberOfUnresolved().
|
staticinherited |
site | A site. |
Definition at line 187 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SequenceApplicationTools::getSitesToAnalyse(), and bpp::SymbolListTools::numberOfUnresolved().
|
staticinherited |
Definition at line 200 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
static |
Remove gaps from a sequence.
The deleteElement method of the Sequence object will be used where appropriate.
seq | The sequence to analyse. |
Definition at line 294 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::deleteElement(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SiteContainerTools::alignNW().
|
static |
Remove stops from a codon sequence.
The deleteElement method of the Sequence object will be used where appropriate.
seq | The sequence to analyse. |
gCode | The genetic code according to which stop codons are specified. |
Exception | if the input sequence does not have a codon alphabet. |
Definition at line 324 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::deleteElement(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::GeneticCode::isStop(), and bpp::CruxSymbolListInterface::size().
|
static |
Replace stop codons by gaps.
The setElement method of the Sequence object will be used where appropriate.
seq | The sequence to analyse. |
gCode | The genetic code according to which stop codons are specified. |
Exception | if the input sequence does not have a codon alphabet. |
Definition at line 338 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), bpp::GeneticCode::isStop(), bpp::IntSymbolListInterface::setElement(), and bpp::CruxSymbolListInterface::size().
Get the reverse-transcription sequence of a RNA sequence.
Translate RNA sequence into DNA sequence.
sequence | The sequence to reverse-transcript. |
AlphabetException | If the sequence is not a RNA sequence. |
Definition at line 108 of file SequenceTools.cpp.
References bpp::AbstractTemplateSymbolList< T >::alphabet(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), and bpp::AlphabetTools::isRNAAlphabet().
|
static |
Get the RNY decomposition of a DNA sequence.
This function gives the alternative succession in phases 1, 2 and 3.
sequence | The sequence to transcript. |
AlphabetException | If the sequence is not a DNA sequence. |
Definition at line 569 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::Commentable::getComments(), bpp::CoreSequenceInterface::getName(), bpp::AlphabetTools::isDNAAlphabet(), and bpp::CruxSymbolListInterface::size().
|
static |
Get the RNY decomposition of a DNA sequence.
This function gives the decomposition in the given phase. In phase 1, the first triplet is centered on the first character.
sequence | The sequence to transcript. |
ph | The phase to use (1,2 or 3). |
AlphabetException | If the sequence is not a DNA sequence. |
Definition at line 527 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::Commentable::getComments(), bpp::CoreSequenceInterface::getName(), bpp::AlphabetTools::isDNAAlphabet(), bpp::CruxSymbolListInterface::size(), and bpp::TextTools::toString().
Referenced by bpp::SequenceApplicationTools::getProbabilisticSiteContainer(), and bpp::SequenceApplicationTools::getSiteContainer().
|
inlinestatic |
Get a sub-sequence.
sequence | The sequence to trunc. |
begin | The first position of the subsequence. |
end | The last position of the subsequence. |
Definition at line 112 of file SequenceTools.h.
References bpp::CruxSymbolListInterface::getAlphabet(), bpp::Commentable::getComments(), bpp::CoreSequenceInterface::getName(), and subseq().
|
inlinestatic |
Get a sub-sequence.
sequence | The sequence to trunc. |
begin | The first position of the subsequence. |
end | The last position of the subsequence (included). |
output | A sequence object to be appended with the given subsequence. |
Definition at line 91 of file SequenceTools.h.
References bpp::SequenceInterface::append(), bpp::CruxSymbolListInterface::size(), and bpp::TextTools::toString().
Referenced by subseq().
|
static |
Subtract haplotype from an heterozygous sequence.
Subtract an haplotype (i.e. a fully resolved sequence) from an heterozygous sequence to get the other haplotype. The new haplotype could be an unresolved sequence if unresolved characters in the sequence code for more than 2 states.
For example:
s | The heterozygous sequence. |
h | The haplotype to subtract. |
name | The name of the new computed haplotype. |
level | The number of states from which the site is set to fully unresolved. |
SequenceNotAlignedException | if s and h don't have the same size. |
Definition at line 489 of file SequenceTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), bpp::CoreSequenceInterface::getName(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), and bpp::CruxSymbolListInterface::size().
Get the transcription sequence of a DNA sequence.
Translate DNA sequence into RNA sequence.
sequence | The sequence to transcript. |
AlphabetException | If the sequence is not a DNA sequence. |
Definition at line 95 of file SequenceTools.cpp.
References bpp::AbstractTemplateSymbolList< T >::alphabet(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), and bpp::AlphabetTools::isDNAAlphabet().
|
staticinherited |
Compute the factorial diversity index of a site.
where is the number of times state is observed in the site.
list | A list. |
Definition at line 744 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::size().
|
staticinherited |
Compute the Shannon entropy index of a SymbolList.
where is the frequency of state .
list | A list. |
resolveUnknowns | Tell is unknown characters must be resolved. |
Definition at line 632 of file SymbolListTools.cpp.
References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().
Referenced by bpp::SymbolListTools::entropy().
|
staticprivate |
Definition at line 67 of file SequenceTools.h.
|
staticprivate |
Definition at line 68 of file SequenceTools.h.
|
staticprivate |
Definition at line 66 of file SequenceTools.h.
|
staticprivate |
Definition at line 69 of file SequenceTools.h.