5 #ifndef BPP_SEQ_CONTAINER_SEQUENCECONTAINERTOOLS_H
6 #define BPP_SEQ_CONTAINER_SEQUENCECONTAINERTOOLS_H
16 #include "../SymbolListTools.h"
19 #include "../Alphabet/CodonAlphabet.h"
50 template<
class SequenceType,
class HashType>
54 for (
size_t i = 0; i < nbSeq; ++i)
56 if (sc.
sequence(i).getName() == name)
77 template<
class SequenceType,
class HashType>
78 static std::unique_ptr< TemplateSequenceContainerInterface<SequenceType, HashType>>
createContainerOfSpecifiedSize(std::shared_ptr<const Alphabet>& alphabet,
size_t size)
80 auto vsc = std::make_unique< TemplateVectorSequenceContainer<SequenceType>>(alphabet);
81 for (
size_t i = 0; i < size; ++i)
101 template<
class SequenceType,
class HashType>
103 std::shared_ptr<const Alphabet>& alphabet,
104 const std::vector<std::string>& seqNames)
106 auto sc = createContainerOfSpecifiedSize<SequenceType, HashType>(alphabet, seqNames.size());
107 sc->setSequenceNames(seqNames,
true);
124 template<
class ContFrom,
class ContTo,
class SequenceType>
127 for (
size_t i = 0; i < input.getNumberOfSequences(); ++i)
129 auto seq = std::make_unique<SequenceType>(input.sequence(i));
130 output.addSequence(seq->getName(), seq);
149 template<
class SequenceType,
class HashType>
155 for (
size_t position : selection)
157 auto seq = std::make_unique<SequenceType>(sequences.
sequence(position));
180 template<
class SequenceType,
class HashType>
183 const std::vector<std::string>& selection,
187 for (
const std::string& key : selection)
191 auto seq = std::make_unique<SequenceType>(sequences.
sequence(key));
198 auto seq = std::make_unique<SequenceType>(sequences.
sequence(key));
218 template<
class SequenceType,
class HashType>
224 std::vector<std::string> selectedKeys = VectorTools::extract<std::string>(keys, selection);
225 std::vector<std::string> keysToRemove;
227 for (
const std::string& key : keysToRemove)
242 template<
class SequenceType,
class HashType>
247 size_t ns = sc.getNumberOfSequences();
250 size_t length = sc.sequence(0).size();
251 for (
size_t i = 1; i < ns; ++i)
253 if (sc.sequence(i).size() != length)
277 for (
size_t j = 0; j < seq.
size(); ++j)
299 std::map<int, double>& f,
300 double pseudoCount = 0)
307 n +=
static_cast<double>(seq.
size());
310 if (pseudoCount != 0)
312 std::shared_ptr<const Alphabet> pA = sc.
getAlphabet();
313 for (
int i = 0; i < static_cast<int>(pA->getSize()); ++i)
317 n += pseudoCount *
static_cast<double>(pA->getSize());
322 i.second = i.second / n;
341 std::map<int, double>& f,
342 double pseudoCount = 0)
349 n +=
static_cast<double>(seq.
size());
352 if (pseudoCount != 0)
354 std::shared_ptr<const Alphabet> pA = sc.
getAlphabet();
355 for (
int i = 0; i < static_cast<int>(pA->getSize()); ++i)
359 n += pseudoCount *
static_cast<double>(pA->getSize());
364 i.second = i.second / n;
377 std::map<int, double>& f,
378 double pseudoCount = 0)
385 catch (std::bad_cast&) {}
390 catch (std::bad_cast&)
392 throw Exception(
"SequenceContainerTools::getFrequencies : unsupported SequenceDataInterface implementation.");
402 template<
class SequenceType,
class HashType>
410 auto tm = std::unique_ptr<SequenceType>(seqCont2.
sequence(i).clone());
428 template<
class SequenceType,
class HashType>
439 auto tmp = std::unique_ptr<SequenceType>(seqCont1.
sequence(key).clone());
440 tmp->append(seqCont2.
sequence(key));
451 template<
class SequenceType,
class HashType>
460 std::string seqName = seqCont.
sequence(i).getName();
461 std::string seqKey = sequenceKeys[i];
463 auto seq = std::unique_ptr<SequenceType>(
new SequenceType(seqName, seqCont.
sequence(i).toString(), alpha));
476 template<
class SequenceType>
477 static std::unique_ptr< TemplateSequenceContainerInterface<SequenceType>>
482 auto calpha = std::dynamic_pointer_cast<const CodonAlphabet>(sequences.
getAlphabet());
485 auto newcont = std::make_unique< TemplateVectorSequenceContainer<SequenceType>>(calpha->getNucleicAlphabet());
488 const SequenceType& seq = sequences.
sequence(i);
489 std::vector<int> newseq(seq.size());
490 for (
size_t j = 0; j < seq.size(); ++j)
492 newseq[i] = calpha->getNPosition(seq[i], pos);
494 std::shared_ptr<const bpp::Alphabet> na = calpha->getNucleicAlphabet();
495 auto s = std::make_unique<SequenceType>(seq.getName(), newseq, seq.getComments(), na);
size_t size() const override
Get the number of elements in the list.
The alphabet exception base class.
Exception thrown when two alphabets do not match.
A basic implementation of the ProbabilisticSequence interface.
size_t size() const override
Get the number of elements in the list.
A basic implementation of the Sequence interface.
virtual void addSequence(const HashType &sequenceKey, std::unique_ptr< SequenceType > &sequencePtr)=0
Add a sequence to the container.
virtual std::unique_ptr< SequenceType > removeSequence(const HashType &sequenceKey)=0
Remove a sequence from the container.
virtual const SequenceType & sequence(const HashType &sequenceKey) const override=0
Retrieve a sequence object from the container.
virtual bool hasSequence(const HashType &sequenceKey) const =0
Check if a certain key is associated to a sequence in the container.
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.
virtual std::vector< HashType > getSequenceKeys() const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get a pointer toward the container's alphabet.
std::string toString(T t)
This alphabet is used to deal NumericAlphabet.
std::vector< size_t > SiteSelection
std::vector< size_t > SequenceSelection