bpp-seq3  3.0.0
bpp::SequenceTools Class Reference

SequenceTools static class. More...

#include <Bpp/Seq/SequenceTools.h>

+ Inheritance diagram for bpp::SequenceTools:
+ Collaboration diagram for bpp::SequenceTools:

Public Member Functions

 SequenceTools ()
 
virtual ~SequenceTools ()
 

Static Public Member Functions

static bool areSequencesIdentical (const SequenceInterface &seq1, const SequenceInterface &seq2)
 
static void subseq (const SequenceInterface &sequence, size_t begin, size_t end, SequenceInterface &output)
 Get a sub-sequence. More...
 
template<class SequenceTypeOut >
static std::unique_ptr< SequenceTypeOut > subseq (const SequenceInterface &sequence, size_t begin, size_t end)
 Get a sub-sequence. More...
 
template<class SequenceTypeOut >
static std::unique_ptr< SequenceTypeOut > concatenate (const SequenceInterface &seq1, const SequenceInterface &seq2)
 Concatenate two sequences. More...
 
static void complement (SequenceInterface &seq)
 Complement the nucleotide sequence itself. More...
 
static std::unique_ptr< SequencegetComplement (const SequenceInterface &sequence)
 Get the complementary sequence of a nucleotide sequence. More...
 
static std::unique_ptr< Sequencetranscript (const Sequence &sequence)
 Get the transcription sequence of a DNA sequence. More...
 
static std::unique_ptr< SequencereverseTranscript (const Sequence &sequence)
 Get the reverse-transcription sequence of a RNA sequence. More...
 
static void invert (SequenceInterface &seq)
 Inverse a sequence from 5'->3' to 3'->5' and vice-versa. More...
 
static std::unique_ptr< SequenceInterfacegetInvert (const SequenceInterface &sequence)
 Inverse a sequence from 5'->3' to 3'->5' and vice-versa. More...
 
static void invertComplement (SequenceInterface &seq)
 Inverse and complement a sequence. More...
 
static double getPercentIdentity (const SequenceInterface &seq1, const SequenceInterface &seq2, bool ignoreGaps=false)
 
static size_t getNumberOfSites (const SequenceInterface &seq)
 
static size_t getNumberOfCompleteSites (const SequenceInterface &seq)
 
static std::unique_ptr< SequenceInterfacegetSequenceWithCompleteSites (const SequenceInterface &seq)
 keep only complete sites in a sequence. More...
 
static size_t getNumberOfUnresolvedSites (const SequenceInterface &seq)
 
static void removeGaps (SequenceInterface &seq)
 Remove gaps from a sequence. More...
 
static std::unique_ptr< SequenceInterfacegetSequenceWithoutGaps (const SequenceInterface &seq)
 Get a copy of the sequence without gaps. More...
 
static void removeStops (SequenceInterface &seq, const GeneticCode &gCode)
 Remove stops from a codon sequence. More...
 
static std::unique_ptr< SequenceInterfacegetSequenceWithoutStops (const SequenceInterface &seq, const GeneticCode &gCode)
 Get a copy of the codon sequence without stops. More...
 
static void replaceStopsWithGaps (SequenceInterface &seq, const GeneticCode &gCode)
 Replace stop codons by gaps. More...
 
static std::unique_ptr< BowkerTestbowkerTest (const SequenceInterface &seq1, const SequenceInterface &seq2)
 Bowker's test for homogeneity. More...
 
static void getPutativeHaplotypes (const SequenceInterface &seq, std::vector< std::unique_ptr< SequenceInterface >> &hap, unsigned int level=2)
 Get all putatives haplotypes from an heterozygous sequence. More...
 
static std::unique_ptr< SequencecombineSequences (const SequenceInterface &s1, const SequenceInterface &s2)
 Combine two sequences. More...
 
static std::unique_ptr< SequencesubtractHaplotype (const SequenceInterface &s, const SequenceInterface &h, std::string name="", unsigned int level=1)
 Subtract haplotype from an heterozygous sequence. More...
 
static std::unique_ptr< SequenceRNYslice (const SequenceInterface &sequence, int ph)
 Get the RNY decomposition of a DNA sequence. More...
 
static std::unique_ptr< SequenceRNYslice (const SequenceInterface &sequence)
 Get the RNY decomposition of a DNA sequence. More...
 
static void getCDS (SequenceInterface &sequence, const GeneticCode &gCode, bool checkInit, bool checkStop, bool includeInit=true, bool includeStop=true)
 Extract CDS part from a codon sequence. Optionally check for intiator and stop codons, or both. More...
 
static size_t findFirstOf (const SequenceInterface &seq, const SequenceInterface &motif, bool strict=true)
 Find the position of a motif in a sequence. More...
 
static std::unique_ptr< SequencegetRandomSequence (std::shared_ptr< const Alphabet > &alphabet, size_t length)
 Get a random sequence of given size and alphabet, with all state with equal probability. More...
 
static bool hasGap (const IntSymbolListInterface &site)
 
static bool hasGap (const ProbabilisticSymbolListInterface &site)
 
static bool hasGap (const CruxSymbolListInterface &site)
 
static bool hasUnresolved (const IntSymbolListInterface &site)
 
static bool isGapOnly (const IntSymbolListInterface &site)
 
static bool isGapOnly (const ProbabilisticSymbolListInterface &site)
 
static bool isGapOnly (const CruxSymbolListInterface &site)
 
static size_t numberOfGaps (const IntSymbolListInterface &site)
 
static size_t numberOfGaps (const ProbabilisticSymbolListInterface &site)
 
static size_t numberOfGaps (const CruxSymbolListInterface &site)
 
static bool isGapOrUnresolvedOnly (const IntSymbolListInterface &site)
 
static bool isGapOrUnresolvedOnly (const ProbabilisticSymbolListInterface &site)
 
static bool isGapOrUnresolvedOnly (const CruxSymbolListInterface &site)
 
static size_t numberOfUnresolved (const IntSymbolListInterface &site)
 
static size_t numberOfUnresolved (const ProbabilisticSymbolListInterface &site)
 
static size_t numberOfUnresolved (const CruxSymbolListInterface &site)
 
static bool hasUnknown (const IntSymbolListInterface &site)
 
static bool hasUnknown (const ProbabilisticSymbolListInterface &site)
 
static bool hasUnknown (const CruxSymbolListInterface &site)
 
static bool isComplete (const IntSymbolListInterface &site)
 
static bool isComplete (const ProbabilisticSymbolListInterface &site)
 
static bool isComplete (const CruxSymbolListInterface &site)
 
static bool isConstant (const IntSymbolListInterface &site, bool ignoreUnknown=false, bool unresolvedRaisesException=true)
 Tell if a site is constant, that is displaying the same state in all sequences that do not present a gap. More...
 
static bool isConstant (const ProbabilisticSymbolListInterface &site, bool unresolvedRaisesException=true)
 
static bool isConstant (const CruxSymbolListInterface &site, bool ignoreUnknown=false, bool unresolvedRaisesException=true)
 
static bool areSymbolListsIdentical (const IntSymbolListInterface &list1, const IntSymbolListInterface &list2)
 
static bool areSymbolListsIdentical (const ProbabilisticSymbolListInterface &list1, const ProbabilisticSymbolListInterface &list2)
 
static bool areSymbolListsIdentical (const CruxSymbolListInterface &l1, const CruxSymbolListInterface &l2)
 
template<class count_type >
static void getCounts (const IntSymbolListInterface &list, std::map< int, count_type > &counts)
 Count all states in the list. More...
 
static void getCounts (const ProbabilisticSymbolListInterface &list, std::map< int, double_t > &counts)
 Sum all states in the list. More...
 
static void getCounts (const CruxSymbolListInterface &list, std::map< int, double > &counts, bool resolveUnknowns=false)
 Count all states in the list, optionally resolving unknown characters. More...
 
template<class count_type >
static void getCounts (const IntSymbolListInterface &list1, const IntSymbolListInterface &list2, std::map< int, std::map< int, count_type >> &counts)
 Count all pair of states for two lists of the same size. More...
 
static void getCounts (const ProbabilisticSymbolListInterface &list1, const ProbabilisticSymbolListInterface &list2, std::map< int, std::map< int, double >> &counts)
 Sum along the lists the joined probabilities for all pair of states for two lists of the same size. More...
 
static void getCounts (const CruxSymbolListInterface &list1, const CruxSymbolListInterface &list2, std::map< int, std::map< int, double >> &counts, bool resolveUnknowns)
 Count all pairs of states for two lists of the same size, optionally resolving unknown characters. More...
 
static void getCountsResolveUnknowns (const IntSymbolListInterface &list, std::map< int, double > &counts)
 Count all states in the list normalizing unknown characters. More...
 
static void getCountsResolveUnknowns (const ProbabilisticSymbolListInterface &list, std::map< int, double > &counts)
 Count all states in the list normalizing unknown characters. More...
 
static void getCountsResolveUnknowns (const IntSymbolListInterface &list1, const IntSymbolListInterface &list2, std::map< int, std::map< int, double >> &counts)
 Count all pairs of states for two lists of the same size resolving unknown characters. More...
 
static void getCountsResolveUnknowns (const ProbabilisticSymbolListInterface &list1, const ProbabilisticSymbolListInterface &list2, std::map< int, std::map< int, double >> &counts)
 Count all pairs of states for two lists of the same size resolving unknown characters. More...
 
static void getFrequencies (const CruxSymbolListInterface &list, std::map< int, double > &frequencies, bool resolveUnknowns=false)
 Get all states frequencies in the list. More...
 
static void getFrequencies (const CruxSymbolListInterface &list1, const CruxSymbolListInterface &list2, std::map< int, std::map< int, double >> &frequencies, bool resolveUnknowns=false)
 Get all state pairs frequencies for two lists of the same size. More...
 
static double getGCContent (const IntSymbolListInterface &list, bool ignoreUnresolved=true, bool ignoreGap=true)
 Get the GC content of a symbol list. More...
 
static double getGCContent (const ProbabilisticSymbolListInterface &list, bool ignoreUnresolved=true, bool ignoreGap=true)
 
static double getGCContent (const CruxSymbolListInterface &list, bool ignoreUnresolved=true, bool ignoreGap=true)
 
static size_t getNumberOfDistinctPositions (const IntSymbolListInterface &l1, const IntSymbolListInterface &l2)
 Get the number of distinct positions. More...
 
static size_t getNumberOfDistinctPositions (const ProbabilisticSymbolListInterface &l1, const ProbabilisticSymbolListInterface &l2)
 
static size_t getNumberOfDistinctPositions (const CruxSymbolListInterface &l1, const CruxSymbolListInterface &l2)
 
static size_t getNumberOfPositionsWithoutGap (const IntSymbolListInterface &l1, const IntSymbolListInterface &l2)
 Get the number of positions without gap (or without null column). More...
 
static size_t getNumberOfPositionsWithoutGap (const ProbabilisticSymbolListInterface &l1, const ProbabilisticSymbolListInterface &l2)
 
static size_t getNumberOfPositionsWithoutGap (const CruxSymbolListInterface &l1, const CruxSymbolListInterface &l2)
 
static void changeGapsToUnknownCharacters (IntSymbolListInterface &l)
 Change all gap elements to unknown characters (or columns of 1). More...
 
static void changeGapsToUnknownCharacters (ProbabilisticSymbolListInterface &l)
 
static void changeGapsToUnknownCharacters (CruxSymbolListInterface &l)
 
static void changeUnresolvedCharactersToGaps (IntSymbolListInterface &l)
 Change all unknown characters to gap elements (or columns of 0). More...
 
static void changeUnresolvedCharactersToGaps (ProbabilisticSymbolListInterface &l)
 
static void changeUnresolvedCharactersToGaps (CruxSymbolListInterface &l)
 
static double variabilityShannon (const CruxSymbolListInterface &list, bool resolveUnknowns)
 Compute the Shannon entropy index of a SymbolList. More...
 
static double variabilityFactorial (const IntSymbolListInterface &list)
 Compute the factorial diversity index of a site. More...
 
static double mutualInformation (const CruxSymbolListInterface &list1, const CruxSymbolListInterface &list2, bool resolveUnknowns)
 Compute the mutual information between two lists. More...
 
static double entropy (const CruxSymbolListInterface &list, bool resolveUnknowns)
 Compute the entropy of a site. This is an alias of method variabilityShannon. More...
 
static double jointEntropy (const CruxSymbolListInterface &list1, const CruxSymbolListInterface &list2, bool resolveUnknowns)
 Compute the joint entropy between two lists. More...
 
static double heterozygosity (const CruxSymbolListInterface &list)
 Compute the heterozygosity index of a list. More...
 
static size_t getNumberOfDistinctCharacters (const IntSymbolListInterface &list)
 Give the number of distinct characters at a list. More...
 
static size_t getMajorAlleleFrequency (const IntSymbolListInterface &list)
 return the number of occurrences of the most common allele. More...
 
static int getMajorAllele (const CruxSymbolListInterface &list)
 return the state corresponding to the most common allele. More...
 
static size_t getMinorAlleleFrequency (const IntSymbolListInterface &list)
 return the number of occurrences of the least common allele. More...
 
static int getMinorAllele (const CruxSymbolListInterface &list)
 return the state corresponding to the least common allele. More...
 
static bool hasSingleton (const IntSymbolListInterface &list)
 Tell if a list has singletons. More...
 
static bool isParsimonyInformativeSite (const IntSymbolListInterface &site)
 Tell if a site is a parsimony informative site. More...
 
static bool isTriplet (const IntSymbolListInterface &list)
 Tell if a list has more than 2 distinct characters. More...
 
static bool isDoubleton (const IntSymbolListInterface &list)
 Tell if a list has exactly 2 distinct characters. More...
 

Static Private Attributes

static std::shared_ptr< RNYRNY_
 
static NucleicAcidsReplication DNARep_
 
static NucleicAcidsReplication RNARep_
 
static NucleicAcidsReplication transc_
 

Detailed Description

SequenceTools static class.

Implement methods to manipulate sequences

Definition at line 62 of file SequenceTools.h.

Constructor & Destructor Documentation

◆ SequenceTools()

bpp::SequenceTools::SequenceTools ( )
inline

Definition at line 72 of file SequenceTools.h.

◆ ~SequenceTools()

virtual bpp::SequenceTools::~SequenceTools ( )
inlinevirtual

Definition at line 73 of file SequenceTools.h.

Member Function Documentation

◆ areSequencesIdentical()

bool SequenceTools::areSequencesIdentical ( const SequenceInterface seq1,
const SequenceInterface seq2 
)
static
Parameters
seq1The first sequence.
seq2The second sequence.
Returns
True if the two sequences have the same content (and, of course, alphabet).

Definition at line 29 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

◆ areSymbolListsIdentical() [1/3]

static bool bpp::SymbolListTools::areSymbolListsIdentical ( const CruxSymbolListInterface l1,
const CruxSymbolListInterface l2 
)
inlinestaticinherited

◆ areSymbolListsIdentical() [2/3]

bool SymbolListTools::areSymbolListsIdentical ( const IntSymbolListInterface list1,
const IntSymbolListInterface list2 
)
staticinherited
Parameters
list1The first site.
list2The second site.
Returns
True if the two sites have the same content (and, of course, alphabet).

Definition at line 216 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SymbolListTools::areSymbolListsIdentical().

◆ areSymbolListsIdentical() [3/3]

bool SymbolListTools::areSymbolListsIdentical ( const ProbabilisticSymbolListInterface list1,
const ProbabilisticSymbolListInterface list2 
)
staticinherited

◆ bowkerTest()

unique_ptr< BowkerTest > SequenceTools::bowkerTest ( const SequenceInterface seq1,
const SequenceInterface seq2 
)
static

Bowker's test for homogeneity.

Computes the contingency table of occurrence of all pairs of states and test its symmetry using Bowker's (1948) test.

Reference:

Ababneh F. Bioinformatics 2006 22(10) 1225-1231
Parameters
seq1The first sequence.
seq2The second sequence.
Returns
A BowkerTest object with the computed statistic and p-value (computed from a chi square distribution).
Exceptions
SequenceNotAlignedExceptionIf the two sequences do not have the same length.

Definition at line 353 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), bpp::RandomTools::pChisq(), and bpp::CruxSymbolListInterface::size().

◆ changeGapsToUnknownCharacters() [1/3]

static void bpp::SymbolListTools::changeGapsToUnknownCharacters ( CruxSymbolListInterface l)
inlinestaticinherited

◆ changeGapsToUnknownCharacters() [2/3]

void SymbolListTools::changeGapsToUnknownCharacters ( IntSymbolListInterface l)
staticinherited

Change all gap elements to unknown characters (or columns of 1).

Parameters
lThe input list of characters.

Definition at line 501 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SymbolListTools::changeGapsToUnknownCharacters().

◆ changeGapsToUnknownCharacters() [3/3]

void SymbolListTools::changeGapsToUnknownCharacters ( ProbabilisticSymbolListInterface l)
staticinherited

Definition at line 614 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ changeUnresolvedCharactersToGaps() [1/3]

static void bpp::SymbolListTools::changeUnresolvedCharactersToGaps ( CruxSymbolListInterface l)
inlinestaticinherited

◆ changeUnresolvedCharactersToGaps() [2/3]

void SymbolListTools::changeUnresolvedCharactersToGaps ( IntSymbolListInterface l)
staticinherited

Change all unknown characters to gap elements (or columns of 0).

Parameters
lThe input list of characters.

Definition at line 511 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SymbolListTools::changeUnresolvedCharactersToGaps().

◆ changeUnresolvedCharactersToGaps() [3/3]

void SymbolListTools::changeUnresolvedCharactersToGaps ( ProbabilisticSymbolListInterface l)
staticinherited

Definition at line 623 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ combineSequences()

◆ complement()

void SequenceTools::complement ( SequenceInterface seq)
static

◆ concatenate()

template<class SequenceTypeOut >
static std::unique_ptr<SequenceTypeOut> bpp::SequenceTools::concatenate ( const SequenceInterface seq1,
const SequenceInterface seq2 
)
inlinestatic

Concatenate two sequences.

Sequences must have the same name and alphabets. Only first sequence's commentaries are kept.

Parameters
seq1The first sequence.
seq2The second sequence.
Returns
A new sequence object with the concatenation of the two sequences. The type of the output sequence is the same as the one of seq1, as it is created using the clone method.
Exceptions
AlphabetMismatchExceptionIf the two alphabets do not match.
ExceptionIf the sequence names do not match.

Definition at line 137 of file SequenceTools.h.

References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::Alphabet::getAlphabetType(), bpp::CoreSequenceInterface::getName(), and bpp::CruxSymbolListInterface::size().

◆ entropy()

static double bpp::SymbolListTools::entropy ( const CruxSymbolListInterface list,
bool  resolveUnknowns 
)
inlinestaticinherited

Compute the entropy of a site. This is an alias of method variabilityShannon.

\[ I = - \sum_x f_x\cdot \ln(f_x) \]

where $f_x$ is the frequency of state $x$.

Author
J. Dutheil
Parameters
listA list.
resolveUnknownsTell is unknown characters must be resolved.
Returns
The Shannon entropy index of this list.

Definition at line 817 of file SymbolListTools.h.

References bpp::SymbolListTools::variabilityShannon().

◆ findFirstOf()

size_t SequenceTools::findFirstOf ( const SequenceInterface seq,
const SequenceInterface motif,
bool  strict = true 
)
static

Find the position of a motif in a sequence.

Parameters
seqThe reference sequence
motifThe motif to find
strictIf true (default) find exactly the motif If false find compatible match
Returns
The position of the first occurrence of the motif or the seq length.

Definition at line 642 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), bpp::AlphabetTools::match(), and bpp::CruxSymbolListInterface::size().

◆ getCDS()

void SequenceTools::getCDS ( SequenceInterface sequence,
const GeneticCode gCode,
bool  checkInit,
bool  checkStop,
bool  includeInit = true,
bool  includeStop = true 
)
static

Extract CDS part from a codon sequence. Optionally check for intiator and stop codons, or both.

Parameters
sequenceThe sequence to be reduced to CDS part.
gCodeThe genetic code according to which start and stop codons are specified.
checkInitIf true, then everything before the initiator codon will be removed, together with the initiator codon if includeInit is false.
checkStopIf true, then everything after the first stop codon will be removed, together with the stop codon if includeStop is false.
includeInitTell if initiator codon should be kept or removed. No effect if checkInit is false.
includeStopTell if stop codon should be kept or removed. No effect if checkStop is false.

Definition at line 607 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::deleteElement(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::GeneticCode::isStart(), bpp::GeneticCode::isStop(), and bpp::CruxSymbolListInterface::size().

◆ getComplement()

unique_ptr< Sequence > SequenceTools::getComplement ( const SequenceInterface sequence)
static

Get the complementary sequence of a nucleotide sequence.

See also
DNAReplication
Returns
A new sequence object with the complementary sequence.
Parameters
sequenceThe sequence to complement.
Exceptions
AlphabetExceptionIf the sequence is not a nucleotide sequence.

Definition at line 73 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::AlphabetTools::isDNAAlphabet(), bpp::AlphabetTools::isRNAAlphabet(), and bpp::NucleicAcidsReplication::translate().

◆ getCounts() [1/6]

static void bpp::SymbolListTools::getCounts ( const CruxSymbolListInterface list,
std::map< int, double > &  counts,
bool  resolveUnknowns = false 
)
inlinestaticinherited

Count all states in the list, optionally resolving unknown characters.

For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4.

Author
J. Dutheil
Parameters
listThe list.
countsThe output map to store the counts (existing ocunts will be incremented).
resolveUnknownsTell is unknown characters must be resolved.
Returns
A map with all states and corresponding counts.

Definition at line 360 of file SymbolListTools.h.

References bpp::SymbolListTools::getCounts(), and bpp::SymbolListTools::getCountsResolveUnknowns().

◆ getCounts() [2/6]

static void bpp::SymbolListTools::getCounts ( const CruxSymbolListInterface list1,
const CruxSymbolListInterface list2,
std::map< int, std::map< int, double >> &  counts,
bool  resolveUnknowns 
)
inlinestaticinherited

Count all pairs of states for two lists of the same size, optionally resolving unknown characters.

For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4.

NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.

Author
J. Dutheil
Parameters
list1The first list.
list2The second list.
countsThe output map to store the counts (existing ocunts will be incremented).
resolveUnknownsTell is unknown characters must be resolved. For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4.
Returns
A map with all states and corresponding counts.

Definition at line 514 of file SymbolListTools.h.

References bpp::SymbolListTools::getCounts(), and bpp::SymbolListTools::getCountsResolveUnknowns().

◆ getCounts() [3/6]

template<class count_type >
static void bpp::SymbolListTools::getCounts ( const IntSymbolListInterface list,
std::map< int, count_type > &  counts 
)
inlinestaticinherited

Count all states in the list.

Author
J. Dutheil
Parameters
listThe list.
countsThe output map to store the counts (existing counts will be incremented).

Definition at line 265 of file SymbolListTools.h.

References bpp::CruxSymbolListInterface::size().

Referenced by bpp::SymbolListTools::getCounts(), bpp::SequenceContainerTools::getFrequencies(), bpp::CodonSiteTools::isSynonymousPolymorphic(), and bpp::CodonSiteTools::numberOfNonSynonymousSubstitutions().

◆ getCounts() [4/6]

template<class count_type >
static void bpp::SymbolListTools::getCounts ( const IntSymbolListInterface list1,
const IntSymbolListInterface list2,
std::map< int, std::map< int, count_type >> &  counts 
)
inlinestaticinherited

Count all pair of states for two lists of the same size.

NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.

Author
J. Dutheil
Parameters
list1The first list.
list2The second list.
countsThe output map to store the counts (existing counts will be incremented).

Definition at line 412 of file SymbolListTools.h.

References bpp::CruxSymbolListInterface::size().

◆ getCounts() [5/6]

static void bpp::SymbolListTools::getCounts ( const ProbabilisticSymbolListInterface list,
std::map< int, double_t > &  counts 
)
inlinestaticinherited

Sum all states in the list.

Parameters
listThe list.
countsThe output map to store the sum for all states (existing counts will be summed).

Definition at line 282 of file SymbolListTools.h.

References bpp::CruxSymbolListInterface::size().

◆ getCounts() [6/6]

static void bpp::SymbolListTools::getCounts ( const ProbabilisticSymbolListInterface list1,
const ProbabilisticSymbolListInterface list2,
std::map< int, std::map< int, double >> &  counts 
)
inlinestaticinherited

Sum along the lists the joined probabilities for all pair of states for two lists of the same size.

NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.

Author
J. Dutheil
Parameters
list1The first list.
list2The second list.
countsThe output map to store the counts (existing counts will be summed).

Definition at line 437 of file SymbolListTools.h.

References bpp::CruxSymbolListInterface::size().

◆ getCountsResolveUnknowns() [1/4]

static void bpp::SymbolListTools::getCountsResolveUnknowns ( const IntSymbolListInterface list,
std::map< int, double > &  counts 
)
inlinestaticinherited

Count all states in the list normalizing unknown characters.

For instance, (1,1,1,1) will be counted as (1/4,1/4,1/4,1/4).

Author
J. Dutheil
Parameters
listThe list.
countsThe output map to store the counts (existing ocunts will be incremented).
Returns
A map with all states and corresponding counts.

Definition at line 306 of file SymbolListTools.h.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SymbolListTools::getCounts().

◆ getCountsResolveUnknowns() [2/4]

void SymbolListTools::getCountsResolveUnknowns ( const IntSymbolListInterface list1,
const IntSymbolListInterface list2,
std::map< int, std::map< int, double >> &  counts 
)
staticinherited

Count all pairs of states for two lists of the same size resolving unknown characters.

For instance, (1,1,1,1) will be counted as (1/4,1/4,1/4,1/4).

NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.

Author
J. Dutheil
Parameters
list1The first list.
list2The second list.
countsThe output map to store the counts (existing ocunts will be incremented).
Returns
A map with all states and corresponding counts.

Definition at line 357 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

◆ getCountsResolveUnknowns() [3/4]

static void bpp::SymbolListTools::getCountsResolveUnknowns ( const ProbabilisticSymbolListInterface list,
std::map< int, double > &  counts 
)
inlinestaticinherited

Count all states in the list normalizing unknown characters.

For instance, (1,1,1,1) will be counted as (1/4,1/4,1/4,1/4).

Author
J. Dutheil
Parameters
listThe list.
countsThe output map to store the counts (existing ocunts will be incremented).
Returns
A map with all states and corresponding counts.

Definition at line 331 of file SymbolListTools.h.

References bpp::CruxSymbolListInterface::size(), and bpp::VectorTools::sum().

◆ getCountsResolveUnknowns() [4/4]

void SymbolListTools::getCountsResolveUnknowns ( const ProbabilisticSymbolListInterface list1,
const ProbabilisticSymbolListInterface list2,
std::map< int, std::map< int, double >> &  counts 
)
staticinherited

Count all pairs of states for two lists of the same size resolving unknown characters.

For instance, (1,1,1,1) will be counted as (1/4,1/4,1/4,1/4).

NB: The two lists do node need to share the same alphabet! The states of the first list will be used as the first index in the output, and the ones from the second list as the second index.

Author
J. Dutheil
Parameters
list1The first list.
list2The second list.
countsThe output map to store the counts (existing ocunts will be incremented).
Returns
A map with all states and corresponding counts.

Definition at line 522 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ getFrequencies() [1/2]

void SymbolListTools::getFrequencies ( const CruxSymbolListInterface list,
std::map< int, double > &  frequencies,
bool  resolveUnknowns = false 
)
staticinherited

Get all states frequencies in the list.

Author
J. Dutheil
Parameters
listThe list.
resolveUnknownsTell is unknown characters must be resolved. For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4.
frequenciesThe output map with all states and corresponding frequencies. Existing frequencies will be erased if any.

Definition at line 380 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

Referenced by bpp::CodonSiteTools::generateCodonSiteWithoutRareVariant(), bpp::SiteContainerTools::getConsensus(), bpp::CodonSiteTools::meanNumberOfSynonymousPositions(), bpp::CodonSiteTools::piNonSynonymous(), bpp::CodonSiteTools::piSynonymous(), and bpp::SiteContainerTools::removeGapSites().

◆ getFrequencies() [2/2]

void SymbolListTools::getFrequencies ( const CruxSymbolListInterface list1,
const CruxSymbolListInterface list2,
std::map< int, std::map< int, double >> &  frequencies,
bool  resolveUnknowns = false 
)
staticinherited

Get all state pairs frequencies for two lists of the same size.

Author
J. Dutheil
Parameters
list1The first list.
list2The second list.
resolveUnknownsTell is unknown characters must be resolved. For instance, in DNA, N will be counted as A=1/4,T=1/4,C=1/4,G=1/4. For ProbabilisticSymbolList, (1,1,1,1) states will be counted as (1/4,1/4,1/4,1/4).
frequenciesThe output map with all state pairs and corresponding frequencies. Existing frequencies will be erased if any.

Definition at line 396 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ getGCContent() [1/3]

static double bpp::SymbolListTools::getGCContent ( const CruxSymbolListInterface list,
bool  ignoreUnresolved = true,
bool  ignoreGap = true 
)
inlinestaticinherited

Definition at line 609 of file SymbolListTools.h.

References bpp::SymbolListTools::getGCContent().

◆ getGCContent() [2/3]

double SymbolListTools::getGCContent ( const IntSymbolListInterface list,
bool  ignoreUnresolved = true,
bool  ignoreGap = true 
)
staticinherited

Get the GC content of a symbol list.

Parameters
listThe list.
Returns
The proportion of G and C states in the list.
Parameters
ignoreUnresolvedDo not count unresolved states (or columns that sum > 1). Otherwise, weight by each state probability in case of ambiguity (e.g. the R state counts for 0.5) (or columns are normalized).
ignoreGapDo not count gaps (or null columns) in total
Exceptions
AlphabetExceptionIf the list is not made of nucleotide states.

Definition at line 415 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SymbolListTools::getGCContent().

◆ getGCContent() [3/3]

double SymbolListTools::getGCContent ( const ProbabilisticSymbolListInterface list,
bool  ignoreUnresolved = true,
bool  ignoreGap = true 
)
staticinherited

◆ getInvert()

unique_ptr< SequenceInterface > SequenceTools::getInvert ( const SequenceInterface sequence)
static

Inverse a sequence from 5'->3' to 3'->5' and vice-versa.

ABCDEF becomes FEDCBA, and the sense attribute is changed (may be inhibited).

Parameters
sequenceThe sequence to inverse.
Returns
A new sequence object containing the inverted sequence, of the same type as the input sequence (via the clone method).
Author
Sylvain Gaillard

Definition at line 137 of file SequenceTools.cpp.

References bpp::SequenceInterface::clone().

◆ getMajorAllele()

int SymbolListTools::getMajorAllele ( const CruxSymbolListInterface list)
staticinherited

return the state corresponding to the most common allele.

Parameters
listA list
Returns
The most frequent state.

Definition at line 817 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ getMajorAlleleFrequency()

size_t SymbolListTools::getMajorAlleleFrequency ( const IntSymbolListInterface list)
staticinherited

return the number of occurrences of the most common allele.

Parameters
listA list
Returns
The frequency (number of sequences) displaying the most frequent state.

Definition at line 795 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ getMinorAllele()

int SymbolListTools::getMinorAllele ( const CruxSymbolListInterface list)
staticinherited

return the state corresponding to the least common allele.

Parameters
listA list
Returns
The less frequent state.

Definition at line 866 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ getMinorAlleleFrequency()

size_t SymbolListTools::getMinorAlleleFrequency ( const IntSymbolListInterface list)
staticinherited

return the number of occurrences of the least common allele.

Parameters
listA list
Returns
The frequency (number of sequences) displaying the less frequent state.

Definition at line 844 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ getNumberOfCompleteSites()

size_t SequenceTools::getNumberOfCompleteSites ( const SequenceInterface seq)
static
Returns
The number of complete sites in the sequences, i.e. all positions without gaps and unresolved states (generic characters).
Parameters
seqThe sequence to analyse.

Definition at line 234 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::alphabet(), count(), and bpp::CruxSymbolListInterface::size().

◆ getNumberOfDistinctCharacters()

size_t SymbolListTools::getNumberOfDistinctCharacters ( const IntSymbolListInterface list)
staticinherited

Give the number of distinct characters at a list.

Parameters
listA list
Returns
The number of distinct characters in the given list.

Definition at line 774 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

Referenced by bpp::CodonSiteTools::numberOfSubstitutions().

◆ getNumberOfDistinctPositions() [1/3]

static size_t bpp::SymbolListTools::getNumberOfDistinctPositions ( const CruxSymbolListInterface l1,
const CruxSymbolListInterface l2 
)
inlinestaticinherited

◆ getNumberOfDistinctPositions() [2/3]

size_t SymbolListTools::getNumberOfDistinctPositions ( const IntSymbolListInterface l1,
const IntSymbolListInterface l2 
)
staticinherited

Get the number of distinct positions.

The comparison in achieved from position 0 to the minimum size of the two vectors.

Parameters
l1SymbolList 1.
l2SymbolList 2.
Returns
The number of distinct positions.
Exceptions
AlphabetMismatchExceptionif the two lists have not the same alphabet type.

Definition at line 469 of file SymbolListTools.cpp.

References count(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SymbolListTools::getNumberOfDistinctPositions().

◆ getNumberOfDistinctPositions() [3/3]

size_t SymbolListTools::getNumberOfDistinctPositions ( const ProbabilisticSymbolListInterface l1,
const ProbabilisticSymbolListInterface l2 
)
staticinherited

◆ getNumberOfPositionsWithoutGap() [1/3]

static size_t bpp::SymbolListTools::getNumberOfPositionsWithoutGap ( const CruxSymbolListInterface l1,
const CruxSymbolListInterface l2 
)
inlinestaticinherited

◆ getNumberOfPositionsWithoutGap() [2/3]

size_t SymbolListTools::getNumberOfPositionsWithoutGap ( const IntSymbolListInterface l1,
const IntSymbolListInterface l2 
)
staticinherited

Get the number of positions without gap (or without null column).

The comparison in achieved from position 0 to the minimum size of the two vectors.

Parameters
l1SymbolList 1.
l2SymbolList 2.
Returns
The number of positions without gap (or columns with at least a non zero value)
Exceptions
AlphabetMismatchExceptionif the two lists have not the same alphabet type.

Definition at line 485 of file SymbolListTools.cpp.

References count(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SymbolListTools::getNumberOfPositionsWithoutGap().

◆ getNumberOfPositionsWithoutGap() [3/3]

size_t SymbolListTools::getNumberOfPositionsWithoutGap ( const ProbabilisticSymbolListInterface l1,
const ProbabilisticSymbolListInterface l2 
)
staticinherited

◆ getNumberOfSites()

size_t SequenceTools::getNumberOfSites ( const SequenceInterface seq)
static
Returns
The number of sites in the sequences, i.e. all positions without gaps.
Parameters
seqThe sequence to analyse.

Definition at line 220 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::alphabet(), count(), and bpp::CruxSymbolListInterface::size().

◆ getNumberOfUnresolvedSites()

size_t SequenceTools::getNumberOfUnresolvedSites ( const SequenceInterface seq)
static
Returns
The number of unresolved sites in the sequence.
Parameters
seqThe sequence to analyse.
Author
Sylvain Gaillard

Definition at line 264 of file SequenceTools.cpp.

References count(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

◆ getPercentIdentity()

double SequenceTools::getPercentIdentity ( const SequenceInterface seq1,
const SequenceInterface seq2,
bool  ignoreGaps = false 
)
static
Returns
The identity percent of 2 sequence. One match is counted if the two sequences have identical states.
Parameters
seq1The first sequence.
seq2The second sequence.
ignoreGapsIf true, only positions without gaps will be used for the counting.
Exceptions
AlphabetMismatchExceptionIf the two sequences do not have the same alphabet.
SequenceNotAlignedExceptionIf the two sequences do not have the same length.

Definition at line 183 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::Alphabet::getAlphabetType(), bpp::Alphabet::getGapCharacterCode(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), and bpp::CruxSymbolListInterface::size().

◆ getPutativeHaplotypes()

void SequenceTools::getPutativeHaplotypes ( const SequenceInterface seq,
std::vector< std::unique_ptr< SequenceInterface >> &  hap,
unsigned int  level = 2 
)
static

Get all putatives haplotypes from an heterozygous sequence.

Parameters
seqThe sequence to resolve
hapThe vector to fill with the new sequences
levelThe maximum number of states that a generic char must code (if this number is higher than level, the state will not be resolved). For instance if level = 3 and Alphabet is DNA, all generic char will be resolved but N.
Author
Sylvain Gaillard

Definition at line 405 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), bpp::CoreSequenceInterface::getName(), bpp::CruxSymbolListInterface::size(), and bpp::TextTools::toString().

◆ getRandomSequence()

unique_ptr< Sequence > SequenceTools::getRandomSequence ( std::shared_ptr< const Alphabet > &  alphabet,
size_t  length 
)
static

Get a random sequence of given size and alphabet, with all state with equal probability.

Parameters
alphabetThe alphabet to use.
lengthThe length of the sequence to generate.
Returns
A pointer toward a new Sequence object.

Definition at line 677 of file SequenceTools.cpp.

References bpp::RandomTools::giveIntRandomNumberBetweenZeroAndEntry().

◆ getSequenceWithCompleteSites()

unique_ptr< SequenceInterface > SequenceTools::getSequenceWithCompleteSites ( const SequenceInterface seq)
static

keep only complete sites in a sequence.

The deleteElement method of the Sequence object will be used where appropriate.

Parameters
seqThe sequence to analyse.

Definition at line 248 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::alphabet(), bpp::SequenceInterface::clone(), and bpp::CruxSymbolListInterface::size().

◆ getSequenceWithoutGaps()

unique_ptr< SequenceInterface > SequenceTools::getSequenceWithoutGaps ( const SequenceInterface seq)
static

Get a copy of the sequence without gaps.

A whole new sequence will be created by adding all non-gap positions. The original sequence will be cloned to serve as a template.

Parameters
seqThe sequence to analyse.
Returns
A new sequence object without gaps.

Definition at line 278 of file SequenceTools.cpp.

References bpp::SequenceInterface::clone(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

◆ getSequenceWithoutStops()

unique_ptr< SequenceInterface > SequenceTools::getSequenceWithoutStops ( const SequenceInterface seq,
const GeneticCode gCode 
)
static

Get a copy of the codon sequence without stops.

A whole new sequence will be created by adding all non-stop positions. The original sequence will be cloned to serve as a template.

Parameters
seqThe sequence to analyse.
gCodeThe genetic code according to which stop codons are specified.
Returns
A new sequence object without stops.
Exceptions
Exceptionif the input sequence does not have a codon alphabet.

Definition at line 306 of file SequenceTools.cpp.

References bpp::SequenceInterface::clone(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::GeneticCode::isStop(), and bpp::CruxSymbolListInterface::size().

◆ hasGap() [1/3]

static bool bpp::SymbolListTools::hasGap ( const CruxSymbolListInterface site)
inlinestaticinherited

Definition at line 36 of file SymbolListTools.h.

References bpp::SymbolListTools::hasGap().

◆ hasGap() [2/3]

◆ hasGap() [3/3]

bool SymbolListTools::hasGap ( const ProbabilisticSymbolListInterface site)
staticinherited

Definition at line 33 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ hasSingleton()

bool SymbolListTools::hasSingleton ( const IntSymbolListInterface list)
staticinherited

Tell if a list has singletons.

Parameters
listA list.
Returns
True if the list has singletons.

Definition at line 892 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ hasUnknown() [1/3]

static bool bpp::SymbolListTools::hasUnknown ( const CruxSymbolListInterface site)
inlinestaticinherited

Definition at line 152 of file SymbolListTools.h.

References bpp::SymbolListTools::hasUnknown().

◆ hasUnknown() [2/3]

bool SymbolListTools::hasUnknown ( const IntSymbolListInterface site)
staticinherited
Parameters
siteA site.
Returns
True if the site contains one or several unknown characters.

Definition at line 109 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SymbolListTools::hasUnknown().

◆ hasUnknown() [3/3]

bool SymbolListTools::hasUnknown ( const ProbabilisticSymbolListInterface site)
staticinherited

Definition at line 120 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ hasUnresolved()

bool SymbolListTools::hasUnresolved ( const IntSymbolListInterface site)
staticinherited
Parameters
siteA site.
Returns
True if the site contains one or several unresolved state.

Definition at line 46 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SiteContainerTools::changeUnresolvedCharactersToGaps().

◆ heterozygosity()

double SymbolListTools::heterozygosity ( const CruxSymbolListInterface list)
staticinherited

Compute the heterozygosity index of a list.

\[ H = 1 - \sum_x f_x^2 \]

where $f_x$ is the frequency of state $x$.

Parameters
listA list.
Returns
The heterozygosity index of this list.

Definition at line 760 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ invert()

void SequenceTools::invert ( SequenceInterface seq)
static

Inverse a sequence from 5'->3' to 3'->5' and vice-versa.

ABCDEF becomes FEDCBA, and the sense attribute is changed (may be inhibited).

Parameters
seqThe sequence to inverse.
Author
Sylvain Gaillard

Definition at line 121 of file SequenceTools.cpp.

References bpp::TemplateCoreSymbolListInterface< T >::getValue(), bpp::IntSymbolListInterface::setElement(), and bpp::CruxSymbolListInterface::size().

◆ invertComplement()

void SequenceTools::invertComplement ( SequenceInterface seq)
static

Inverse and complement a sequence.

This method is more accurate than calling invert and complement separately.

Parameters
seqThe sequence to inverse and complement.
Author
Sylvain Gaillard

Definition at line 146 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), bpp::AlphabetTools::isDNAAlphabet(), bpp::AlphabetTools::isRNAAlphabet(), bpp::IntSymbolListInterface::setElement(), bpp::CruxSymbolListInterface::size(), and bpp::NucleicAcidsReplication::translate().

◆ isComplete() [1/3]

static bool bpp::SymbolListTools::isComplete ( const CruxSymbolListInterface site)
inlinestaticinherited

Definition at line 174 of file SymbolListTools.h.

References bpp::SymbolListTools::isComplete().

◆ isComplete() [2/3]

◆ isComplete() [3/3]

bool SymbolListTools::isComplete ( const ProbabilisticSymbolListInterface site)
staticinherited

Definition at line 145 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ isConstant() [1/3]

static bool bpp::SymbolListTools::isConstant ( const CruxSymbolListInterface site,
bool  ignoreUnknown = false,
bool  unresolvedRaisesException = true 
)
inlinestaticinherited

Definition at line 208 of file SymbolListTools.h.

References bpp::SymbolListTools::isConstant().

◆ isConstant() [2/3]

bool SymbolListTools::isConstant ( const IntSymbolListInterface site,
bool  ignoreUnknown = false,
bool  unresolvedRaisesException = true 
)
staticinherited

Tell if a site is constant, that is displaying the same state in all sequences that do not present a gap.

Parameters
siteA site.
ignoreUnknownIf true, positions with unknown positions will be ignored. Otherwise, a site with one single state + any uncertain state will not be considered as constant.
unresolvedRaisesExceptionIn case of ambiguous case (gap only site for instance), throw an exception. Otherwise returns false.
Returns
True if the site is made of only one state.

Definition at line 258 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::CodonSiteTools::fixedDifferences(), bpp::CodonSiteTools::generateCodonSiteWithoutRareVariant(), bpp::SymbolListTools::isConstant(), bpp::CodonSiteTools::isFourFoldDegenerated(), bpp::CodonSiteTools::isMonoSitePolymorphic(), bpp::CodonSiteTools::isSynonymousPolymorphic(), bpp::CodonSiteTools::numberOfNonSynonymousSubstitutions(), bpp::CodonSiteTools::numberOfSubstitutions(), bpp::CodonSiteTools::piNonSynonymous(), and bpp::CodonSiteTools::piSynonymous().

◆ isConstant() [3/3]

bool SymbolListTools::isConstant ( const ProbabilisticSymbolListInterface site,
bool  unresolvedRaisesException = true 
)
staticinherited

Definition at line 320 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ isDoubleton()

bool SymbolListTools::isDoubleton ( const IntSymbolListInterface list)
staticinherited

Tell if a list has exactly 2 distinct characters.

Parameters
listA list.
Returns
True if the site has exactly 2 distinct characters

Definition at line 946 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ isGapOnly() [1/3]

static bool bpp::SymbolListTools::isGapOnly ( const CruxSymbolListInterface site)
inlinestaticinherited

Definition at line 64 of file SymbolListTools.h.

References bpp::SymbolListTools::isGapOnly().

◆ isGapOnly() [2/3]

bool SymbolListTools::isGapOnly ( const IntSymbolListInterface site)
staticinherited

◆ isGapOnly() [3/3]

bool SymbolListTools::isGapOnly ( const ProbabilisticSymbolListInterface site)
staticinherited

Definition at line 71 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ isGapOrUnresolvedOnly() [1/3]

static bool bpp::SymbolListTools::isGapOrUnresolvedOnly ( const CruxSymbolListInterface site)
inlinestaticinherited

Definition at line 108 of file SymbolListTools.h.

References bpp::SymbolListTools::isGapOrUnresolvedOnly().

◆ isGapOrUnresolvedOnly() [2/3]

bool SymbolListTools::isGapOrUnresolvedOnly ( const IntSymbolListInterface site)
staticinherited
Parameters
siteA site.
Returns
True if the site contains only gaps.

Definition at line 84 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SymbolListTools::isGapOrUnresolvedOnly(), and bpp::SiteContainerTools::removeGapOrUnresolvedOnlySites().

◆ isGapOrUnresolvedOnly() [3/3]

bool SymbolListTools::isGapOrUnresolvedOnly ( const ProbabilisticSymbolListInterface site)
staticinherited

Definition at line 95 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ isParsimonyInformativeSite()

bool SymbolListTools::isParsimonyInformativeSite ( const IntSymbolListInterface site)
staticinherited

Tell if a site is a parsimony informative site.

At least two distinct characters must be present.

Parameters
sitea Site.
Returns
True if the site is parsimony informative.

Definition at line 912 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ isTriplet()

bool SymbolListTools::isTriplet ( const IntSymbolListInterface list)
staticinherited

Tell if a list has more than 2 distinct characters.

Parameters
listA list.
Returns
True if the list has more than 2 distinct characters

Definition at line 935 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ jointEntropy()

double SymbolListTools::jointEntropy ( const CruxSymbolListInterface list1,
const CruxSymbolListInterface list2,
bool  resolveUnknowns 
)
staticinherited

Compute the joint entropy between two lists.

\[ H_{i,j} = - \sum_x \sum_y p_{x,y}\ln\left(p_{x,y}\right) \]

where $p_{x,y}$ is the frequency of the pair $(x,y)$.

Author
J. Dutheil
Parameters
list1First list
list2Second list
resolveUnknownsTell is unknown characters must be resolved.
Returns
The mutual information for the pair of lists.

Definition at line 706 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

◆ mutualInformation()

double SymbolListTools::mutualInformation ( const CruxSymbolListInterface list1,
const CruxSymbolListInterface list2,
bool  resolveUnknowns 
)
staticinherited

Compute the mutual information between two lists.

\[ MI = \sum_x \sum_y p_{x,y}\ln\left(\frac{p_{x,y}}{p_x \cdot p_y}\right) \]

where $p_x$ and $p_y$ are the frequencies of states $x$ and $y$, and $p_{x,y}$ is the frequency of the pair $(x,y)$.

Author
J. Dutheil
Parameters
list1First list
list2Second list
resolveUnknownsTell is unknown characters must be resolved.
Returns
The mutual information for the pair of lists.

Definition at line 656 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

◆ numberOfGaps() [1/3]

static size_t bpp::SymbolListTools::numberOfGaps ( const CruxSymbolListInterface site)
inlinestaticinherited

Definition at line 86 of file SymbolListTools.h.

References bpp::SymbolListTools::numberOfGaps().

◆ numberOfGaps() [2/3]

size_t SymbolListTools::numberOfGaps ( const IntSymbolListInterface site)
staticinherited

◆ numberOfGaps() [3/3]

size_t SymbolListTools::numberOfGaps ( const ProbabilisticSymbolListInterface site)
staticinherited

Definition at line 172 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ numberOfUnresolved() [1/3]

static size_t bpp::SymbolListTools::numberOfUnresolved ( const CruxSymbolListInterface site)
inlinestaticinherited

Definition at line 130 of file SymbolListTools.h.

References bpp::SymbolListTools::numberOfUnresolved().

◆ numberOfUnresolved() [2/3]

size_t SymbolListTools::numberOfUnresolved ( const IntSymbolListInterface site)
staticinherited

◆ numberOfUnresolved() [3/3]

size_t SymbolListTools::numberOfUnresolved ( const ProbabilisticSymbolListInterface site)
staticinherited

Definition at line 200 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ removeGaps()

void SequenceTools::removeGaps ( SequenceInterface seq)
static

Remove gaps from a sequence.

The deleteElement method of the Sequence object will be used where appropriate.

Parameters
seqThe sequence to analyse.

Definition at line 294 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::deleteElement(), bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SiteContainerTools::alignNW().

◆ removeStops()

void SequenceTools::removeStops ( SequenceInterface seq,
const GeneticCode gCode 
)
static

Remove stops from a codon sequence.

The deleteElement method of the Sequence object will be used where appropriate.

Parameters
seqThe sequence to analyse.
gCodeThe genetic code according to which stop codons are specified.
Exceptions
Exceptionif the input sequence does not have a codon alphabet.

Definition at line 324 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::deleteElement(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::GeneticCode::isStop(), and bpp::CruxSymbolListInterface::size().

◆ replaceStopsWithGaps()

void SequenceTools::replaceStopsWithGaps ( SequenceInterface seq,
const GeneticCode gCode 
)
static

Replace stop codons by gaps.

The setElement method of the Sequence object will be used where appropriate.

Parameters
seqThe sequence to analyse.
gCodeThe genetic code according to which stop codons are specified.
Exceptions
Exceptionif the input sequence does not have a codon alphabet.

Definition at line 338 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), bpp::GeneticCode::isStop(), bpp::IntSymbolListInterface::setElement(), and bpp::CruxSymbolListInterface::size().

◆ reverseTranscript()

unique_ptr< Sequence > SequenceTools::reverseTranscript ( const Sequence sequence)
static

Get the reverse-transcription sequence of a RNA sequence.

Translate RNA sequence into DNA sequence.

See also
DNAReplication
Returns
sequence A new sequence object with the reverse-transcription sequence.
Parameters
sequenceThe sequence to reverse-transcript.
Exceptions
AlphabetExceptionIf the sequence is not a RNA sequence.

Definition at line 108 of file SequenceTools.cpp.

References bpp::AbstractTemplateSymbolList< T >::alphabet(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), and bpp::AlphabetTools::isRNAAlphabet().

◆ RNYslice() [1/2]

unique_ptr< Sequence > SequenceTools::RNYslice ( const SequenceInterface sequence)
static

Get the RNY decomposition of a DNA sequence.

This function gives the alternative succession in phases 1, 2 and 3.

Returns
sequence A new sequence object with the transcription sequence.
Parameters
sequenceThe sequence to transcript.
Exceptions
AlphabetExceptionIf the sequence is not a DNA sequence.
Author
Laurent Guéguen

Definition at line 569 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::Commentable::getComments(), bpp::CoreSequenceInterface::getName(), bpp::AlphabetTools::isDNAAlphabet(), and bpp::CruxSymbolListInterface::size().

◆ RNYslice() [2/2]

unique_ptr< Sequence > SequenceTools::RNYslice ( const SequenceInterface sequence,
int  ph 
)
static

Get the RNY decomposition of a DNA sequence.

This function gives the decomposition in the given phase. In phase 1, the first triplet is centered on the first character.

Returns
sequence A new sequence object with the transcription sequence.
Parameters
sequenceThe sequence to transcript.
phThe phase to use (1,2 or 3).
Exceptions
AlphabetExceptionIf the sequence is not a DNA sequence.
Author
Laurent Guéguen

Definition at line 527 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::alphabet(), bpp::CruxSymbolListInterface::getAlphabet(), bpp::Commentable::getComments(), bpp::CoreSequenceInterface::getName(), bpp::AlphabetTools::isDNAAlphabet(), bpp::CruxSymbolListInterface::size(), and bpp::TextTools::toString().

Referenced by bpp::SequenceApplicationTools::getProbabilisticSiteContainer(), and bpp::SequenceApplicationTools::getSiteContainer().

◆ subseq() [1/2]

template<class SequenceTypeOut >
static std::unique_ptr<SequenceTypeOut> bpp::SequenceTools::subseq ( const SequenceInterface sequence,
size_t  begin,
size_t  end 
)
inlinestatic

Get a sub-sequence.

Parameters
sequenceThe sequence to trunc.
beginThe first position of the subsequence.
endThe last position of the subsequence.
Returns
A new sequence object with the given subsequence.

Definition at line 112 of file SequenceTools.h.

References bpp::CruxSymbolListInterface::getAlphabet(), bpp::Commentable::getComments(), bpp::CoreSequenceInterface::getName(), and subseq().

◆ subseq() [2/2]

static void bpp::SequenceTools::subseq ( const SequenceInterface sequence,
size_t  begin,
size_t  end,
SequenceInterface output 
)
inlinestatic

Get a sub-sequence.

Parameters
sequenceThe sequence to trunc.
beginThe first position of the subsequence.
endThe last position of the subsequence (included).
outputA sequence object to be appended with the given subsequence.

Definition at line 91 of file SequenceTools.h.

References bpp::SequenceInterface::append(), bpp::CruxSymbolListInterface::size(), and bpp::TextTools::toString().

Referenced by subseq().

◆ subtractHaplotype()

unique_ptr< Sequence > SequenceTools::subtractHaplotype ( const SequenceInterface s,
const SequenceInterface h,
std::string  name = "",
unsigned int  level = 1 
)
static

Subtract haplotype from an heterozygous sequence.

Subtract an haplotype (i.e. a fully resolved sequence) from an heterozygous sequence to get the other haplotype. The new haplotype could be an unresolved sequence if unresolved characters in the sequence code for more than 2 states.

For example:

>heterozygous sequence
ATTCGGGKWTATRYRM
>haplotype
ATTCGGGTATATGCAA
>subtracted haplotype
ATTCGGGGTTATATGC
Parameters
sThe heterozygous sequence.
hThe haplotype to subtract.
nameThe name of the new computed haplotype.
levelThe number of states from which the site is set to fully unresolved.
Exceptions
SequenceNotAlignedExceptionif s and h don't have the same size.
Author
Sylvain Gaillard

Definition at line 489 of file SequenceTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), bpp::CoreSequenceInterface::getName(), bpp::TemplateCoreSymbolListInterface< T >::getValue(), and bpp::CruxSymbolListInterface::size().

◆ transcript()

unique_ptr< Sequence > SequenceTools::transcript ( const Sequence sequence)
static

Get the transcription sequence of a DNA sequence.

Translate DNA sequence into RNA sequence.

See also
DNAReplication
Returns
sequence A new sequence object with the transcription sequence.
Parameters
sequenceThe sequence to transcript.
Exceptions
AlphabetExceptionIf the sequence is not a DNA sequence.

Definition at line 95 of file SequenceTools.cpp.

References bpp::AbstractTemplateSymbolList< T >::alphabet(), bpp::AbstractTemplateSymbolList< T >::getAlphabet(), and bpp::AlphabetTools::isDNAAlphabet().

◆ variabilityFactorial()

double SymbolListTools::variabilityFactorial ( const IntSymbolListInterface list)
staticinherited

Compute the factorial diversity index of a site.

\[ F = \frac{log\left(\left(\sum_x p_x\right)!\right)}{\sum_x \log(p_x)!} \]

where $p_x$ is the number of times state $x$ is observed in the site.

Author
J. Dutheil
Parameters
listA list.
Returns
The factorial diversity index of this list.

Definition at line 744 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::size().

◆ variabilityShannon()

double SymbolListTools::variabilityShannon ( const CruxSymbolListInterface list,
bool  resolveUnknowns 
)
staticinherited

Compute the Shannon entropy index of a SymbolList.

\[ I = - \sum_x f_x\cdot \ln(f_x) \]

where $f_x$ is the frequency of state $x$.

Author
J. Dutheil
Parameters
listA list.
resolveUnknownsTell is unknown characters must be resolved.
Returns
The Shannon entropy index of this list.

Definition at line 632 of file SymbolListTools.cpp.

References bpp::CruxSymbolListInterface::getAlphabet(), and bpp::CruxSymbolListInterface::size().

Referenced by bpp::SymbolListTools::entropy().

Member Data Documentation

◆ DNARep_

NucleicAcidsReplication SequenceTools::DNARep_
staticprivate

Definition at line 67 of file SequenceTools.h.

◆ RNARep_

NucleicAcidsReplication SequenceTools::RNARep_
staticprivate

Definition at line 68 of file SequenceTools.h.

◆ RNY_

shared_ptr< RNY > SequenceTools::RNY_
staticprivate

Definition at line 66 of file SequenceTools.h.

◆ transc_

NucleicAcidsReplication SequenceTools::transc_
staticprivate

Definition at line 69 of file SequenceTools.h.


The documentation for this class was generated from the following files: