bpp-seq3  3.0.0
AlignedSequenceContainer.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_SEQ_CONTAINER_ALIGNEDSEQUENCECONTAINER_H
6 #define BPP_SEQ_CONTAINER_ALIGNEDSEQUENCECONTAINER_H
7 
8 #include <Bpp/Exceptions.h>
9 
10 #include "../Site.h"
11 #include "../SiteTools.h"
12 #include "../ProbabilisticSite.h"
13 #include "../Sequence.h"
14 #include "../ProbabilisticSequence.h"
15 #include "SiteContainer.h"
18 
19 // From the STL:
20 #include <string>
21 
22 namespace bpp
23 {
38 template<class SequenceType, class SiteType>
40  public TemplateVectorSequenceContainer<SequenceType>,
41  public virtual TemplateSiteContainerInterface<SiteType, SequenceType, std::string>
42 {
43 private:
45 
46  // std::vector that contains the site coordinates
47  std::vector<int> coordinates_;
48 
49  size_t length_; // Number of sites for verifications before sequence's insertion in sequence container
50 
51 public:
61  std::shared_ptr<const Alphabet> alphabet,
62  std::vector< std::unique_ptr<Sequence>> vs) :
63  TemplateVectorSequenceContainer<SequenceType>(alphabet, vs),
64  VectorPositionedContainer<SiteType>(),
65  coordinates_(),
66  length_(0)
67  {
68  if (vs.size() == 0)
69  return;
70 
71  length_ = vs[0]->size();
72  for (const auto& seq: vs)
73  {
74  if (!checkSize_(*seq))
75  {
76  throw BadSizeException("AlignedSequenceContainer: sequences of different sizes in aligned construction", length_, seq->size());
77  }
78  }
80  reindexSites();
81  }
82 
88  TemplateAlignedSequenceContainer(std::shared_ptr<const Alphabet> alphabet) :
89  // AbstractSequenceContainer<SequenceType>(alphabet),
91  siteVector_(),
92  coordinates_(),
93  length_(0)
94  {
95  reindexSites();
96  }
97 
104  TemplateVectorSequenceContainer<SequenceType>(asc),
105  siteVector_(asc.getNumberOfSites()), // Cache is not copied
108  {}
109 
116  TemplateVectorSequenceContainer<SequenceType>(sc),
117  siteVector_(sc.getNumberOfSites()), // Cache is not copied
120  {}
121 
131  TemplateVectorSequenceContainer<SequenceType>(sc.getAlphabet()),
132  siteVector_(),
133  coordinates_(),
134  length_()
135  {
136  // Initializing
137  for (size_t i = 0; i < sc.getNumberOfSequences(); ++i)
138  {
139  addSequence(sc.getSequence(i), true);
140  }
141  if (sc.getNumberOfSequences() > 0)
142  {
143  length_ = sequence(0).size();
144  }
145  else
146  {
147  length_ = 0;
148  }
149  reindexSites();
151  setComments(sc.getComments());
152  }
153 
155  {
157 
158  // Initializing
159  length_ = asc.getNumberOfSites();
161  siteVector_.setSize(length_); // Reset the cache
163 
164  return *this;
165  }
166 
168  {
170 
171  // Initializing
172  length_ = sc.getNumberOfSites();
174  siteVector_.setSize(length_); // Reset the cache
176 
177  return *this;
178  }
179 
181  {
183 
184  // Initializing
185  length_ = sc.getNumberOfSequences() == 0 ? 0 : sc.sequence(0).getSize();
186  reindexSites();
187  siteVector_.setSize(length_); // Reset the cache
189 
190  return *this;
191  }
192 
194 
195 public:
202  {
204  }
212  const SiteType& site(size_t sitePosition) const override
213  {
214  if (siteVector_.hasObjectWithPosition(sitePosition))
215  {
216  return *siteVector_.getObject(sitePosition);
217  }
218 
219  if (sitePosition >= length_)
220  throw IndexOutOfBoundsException("AlignedSequenceContainer::getSite", sitePosition, 0, getNumberOfSites());
221 
222  // Main loop: for all sequences
223  size_t n = getNumberOfSequences();
224  auto alphaPtr = getAlphabet();
225  auto site = std::shared_ptr<SiteType>(
226  new SiteType(alphaPtr, coordinates_[sitePosition]),
228 
229  for (size_t j = 0; j < n; j++)
230  {
231  site->addElement(sequence(j)[sitePosition]);
232  }
233 
234  siteVector_.addObject_(site, sitePosition, true);
235  return *siteVector_.getObject(sitePosition);
236  }
237 
238  void setSite(size_t sitePosition, std::unique_ptr<SiteType>& site, bool checkCoordinate = true) override
239  {
240  // New site's alphabet and site container's alphabet matching verification
241  if (sitePosition >= getNumberOfSites())
242  throw IndexOutOfBoundsException("AlignedSequenceContainer::setSite", sitePosition, 0, getNumberOfSites());
243  if (site->getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
244  throw AlphabetMismatchException("AlignedSequenceContainer::setSite", getAlphabet(), site->getAlphabet());
245 
246  // Check size:
247  if (site->size() != getNumberOfSequences())
248  throw SiteException("AlignedSequenceContainer::setSite, site does not have the appropriate length", site.get());
249 
250  // Check coordinate:
251  if (checkCoordinate)
252  {
253  int coordinate = site->getCoordinate();
254  // For all coordinates in vector: throw exception if position already exists
255  for (size_t i = 0; i < getNumberOfSites(); ++i)
256  {
257  if (i != sitePosition && coordinates_[i] == coordinate)
258  throw SiteException("AlignedSequenceContainer::setSite: Site position already exists in container", site.get());
259  }
260  }
261 
262  // For all sequences
263  for (size_t j = 0; j < getNumberOfSequences(); ++j)
264  {
265  sequence_(j).setElement(sitePosition, (*site)[j]);
266  }
267 
268  // Reset site buffer for this position:
269  siteVector_.addObject(nullptr, sitePosition);
270 
271  coordinates_[sitePosition] = site->getCoordinate();
272  }
273 
274 
275  std::unique_ptr<SiteType> removeSite(size_t sitePosition) override
276  {
277  if (sitePosition >= getNumberOfSites())
278  throw IndexOutOfBoundsException("AlignedSequenceContainer::removeSite", sitePosition, 0, getNumberOfSites());
279 
280  // Get old site
281  site(sitePosition); // Creates the site if it does not exist!
282 
283  // For all sequences
284  for (size_t j = 0; j < getNumberOfSequences(); ++j)
285  {
286  sequence_(j).deleteElement(sitePosition);
287  }
288 
289  // Delete site's position
290  coordinates_.erase(coordinates_.begin() + static_cast<ptrdiff_t>(sitePosition));
291  length_--;
292 
293  // Actualizes the 'sites' vector:
294  auto sitePtr = siteVector_.removeObject(sitePosition);
295  std::get_deleter< SwitchDeleter<SiteType>>(sitePtr)->off();
296  return std::unique_ptr<SiteType>(sitePtr.get());
297  }
298 
299 
300  void deleteSite(size_t sitePosition) override
301  {
302  if (sitePosition >= getNumberOfSites())
303  throw IndexOutOfBoundsException("AlignedSequenceContainer::deleteSite", sitePosition, 0, getNumberOfSites());
304 
305  // For all sequences
306  for (size_t j = 0; j < getNumberOfSequences(); ++j)
307  {
308  sequence_(j).deleteElement(sitePosition);
309  }
310 
311  // Delete site's position
312  coordinates_.erase(coordinates_.begin() + static_cast<ptrdiff_t>(sitePosition));
313  length_--;
314 
315  // Actualizes the 'sites' vector:
316  siteVector_.deleteObject(sitePosition);
317  }
318 
319 
320  void deleteSites(size_t sitePosition, size_t length) override
321  {
322  if (sitePosition + length > getNumberOfSites())
323  throw IndexOutOfBoundsException("AlignedSequenceContainer::deleteSites", sitePosition + length, 0, getNumberOfSites());
324 
325  // For all sequences
326  for (size_t j = 0; j < getNumberOfSequences(); ++j)
327  {
328  sequence_(j).deleteElements(sitePosition, length);
329  }
330 
331  // Delete site's sitePositionition
332  coordinates_.erase(coordinates_.begin() + static_cast<ptrdiff_t>(sitePosition),
333  coordinates_.begin() + static_cast<ptrdiff_t>(sitePosition + length));
334  length_ -= length;
335 
336  // Actualizes the 'sites' vector:
337  siteVector_.deleteObjects(sitePosition, length);
338  }
339 
340 
341  void addSite(std::unique_ptr<SiteType>& site, bool checkCoordinate = true) override
342  {
343  // New site's alphabet and site container's alphabet matching verification
344  if (site->getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
345  throw AlphabetMismatchException("AlignedSequenceContainer::addSite", site->getAlphabet(), getAlphabet());
346 
347  // Check size:
348  if (site->size() != getNumberOfSequences())
349  throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", site.get());
350 
351  // Check position:
352  int coordinate = site->getCoordinate();
353  if (checkCoordinate)
354  {
355  // For all positions in vector : throw exception if position already exists
356  for (size_t i = 0; i < coordinates_.size(); ++i)
357  {
358  if (coordinates_[i] == coordinate)
359  throw SiteException("AlignedSequenceContainer::addSite: Site coordinate already exists in container", site.get());
360  }
361  }
362 
363  // For all sequences
364  for (size_t j = 0; j < getNumberOfSequences(); ++j)
365  {
366  sequence_(j).addElement((*site)[j]);
367  }
368 
369  length_++;
370  coordinates_.push_back(coordinate);
371 
372  // Actualizes the 'sites' vector:
373  siteVector_.appendObject(nullptr);
374  }
375 
376  void addSite(std::unique_ptr<SiteType>& site, size_t sitePosition, bool checkCoordinate = true) override
377  {
378  if (sitePosition >= getNumberOfSites())
379  throw IndexOutOfBoundsException("AlignedSequenceContainer::addSite", sitePosition, 0, getNumberOfSites());
380 
381  // New site's alphabet and site container's alphabet matching verification
382  if (site->getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
383  throw AlphabetMismatchException("AlignedSequenceContainer::addSite", getAlphabet(), site->getAlphabet());
384 
385  // Check size:
386  if (site->size() != getNumberOfSequences())
387  throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", site.get());
388 
389  // Check position:
390  int coordinate = site->getCoordinate();
391  if (checkCoordinate)
392  {
393  // For all positions in vector : throw exception if position already exists
394  for (size_t i = 0; i < coordinates_.size(); ++i)
395  {
396  if (coordinates_[i] == coordinate)
397  throw SiteException("AlignedSequenceContainer::addSite: Site coordinate already exists in container", site.get());
398  }
399  }
400 
401  // For all sequences
402  for (size_t j = 0; j < getNumberOfSequences(); ++j)
403  {
404  sequence_(j).addElement(sitePosition, (*site)[j]);
405  }
406 
407  length_++;
408  coordinates_.insert(coordinates_.begin() + static_cast<ptrdiff_t>(sitePosition), coordinate);
409 
410  // Actualizes the 'sites' vector:
411  siteVector_.insertObject(nullptr, sitePosition);
412  }
413 
414  void clear() override
415  {
416  length_ = 0;
418  siteVector_.clear();
419  }
420 
421 
423  {
424  auto alphaPtr = getAlphabet();
425  auto asc = new TemplateAlignedSequenceContainer(alphaPtr);
426  asc->setComments(getComments());
427  return asc;
428  }
429 
430 
431  size_t getNumberOfSites() const override { return length_; }
432 
433  Vint getSiteCoordinates() const override { return coordinates_; }
434 
435  void setSiteCoordinates(const Vint& vCoordinates) override
436  {
437  if (vCoordinates.size() != getNumberOfSites())
438  throw BadSizeException("AlignedSequenceContainer::setSiteCoordinates bad size of positions vector", vCoordinates.size(), getNumberOfSites());
439 
440  for (size_t i = 0; i < vCoordinates.size(); ++i)
441  {
442  coordinates_[i] = vCoordinates[i];
443  }
444  }
445 
446 
447  void reindexSites() override
448  {
449  coordinates_.resize(length_);
450  for (size_t i = 0; i < length_; ++i)
451  {
452  coordinates_[i] = static_cast<int>(i + 1); // starts with 1.
453  }
454  }
455 
463  void setSequence(const std::string& sequenceKey, std::unique_ptr<SequenceType>& sequencePtr) override
464  {
465  // if container has only one sequence
466  if (getNumberOfSequences() == 1)
467  length_ = sequencePtr->size();
468  if (checkSize_(*sequencePtr))
470  else
471  throw SequenceNotAlignedException("AlignedSequenceContainer::setSequence", sequencePtr.get());
472  }
473 
474 
475  void addSequence(const std::string& sequenceKey, std::unique_ptr<SequenceType>& sequencePtr) override
476  {
477  // if container has only one sequence
478  if (length_ == 0)
479  {
480  length_ = sequencePtr->size();
482  reindexSites();
483  }
484  if (checkSize_(*sequencePtr))
486  else
487  throw SequenceNotAlignedException("AlignedSequenceContainer::addSequence", sequencePtr.get());
488  }
489 
490 
491  void setSequence(size_t sequencePosition, std::unique_ptr<SequenceType>& sequencePtr) override
492  {
493  if (sequencePosition >= getNumberOfSequences())
494  throw IndexOutOfBoundsException("AlignedSequenceContainer::setSequence", sequencePosition, 0, getNumberOfSequences());
495  // if container has only one sequence
496  if (getNumberOfSequences() == 1)
497  length_ = sequencePtr->size();
498  if (checkSize_(*sequencePtr))
499  TemplateVectorSequenceContainer<SequenceType>::setSequence(sequencePosition, sequencePtr);
500  else
501  throw SequenceNotAlignedException("AlignedSequenceContainer::setSequence", sequencePtr.get());
502 
503  // Destroys all sites (but keep Site Container at same size)
505  }
506 
507 
508  void setSequence(size_t sequencePosition, std::unique_ptr<SequenceType>& sequencePtr, const std::string& sequenceKey) override
509  {
510  if (sequencePosition >= getNumberOfSequences())
511  throw IndexOutOfBoundsException("AlignedSequenceContainer::setSequence", sequencePosition, 0, getNumberOfSequences());
512  // if container has only one sequence
513  if (getNumberOfSequences() == 1)
514  length_ = sequencePtr->size();
515  if (checkSize_(*sequencePtr))
517  else
518  throw SequenceNotAlignedException("AlignedSequenceContainer::setSequence", sequencePtr.get());
519 
520  // Destroys all sites (but keep Site Container at same size)
522  }
523 
524 
525  void insertSequence(size_t sequencePosition, std::unique_ptr<SequenceType>& sequencePtr, const std::string& sequenceKey) override
526  {
527  if (sequencePosition >= getNumberOfSequences())
528  throw IndexOutOfBoundsException("AlignedSequenceContainer::insertSequence", sequencePosition, 0, getNumberOfSequences());
529  // if container has only one sequence
530  if (getNumberOfSequences() == 1)
531  length_ = sequencePtr->size();
532  if (checkSize_(*sequencePtr))
534  else
535  throw SequenceNotAlignedException("AlignedSequenceContainer::insertSequence", sequencePtr.get());
536 
537  // Destroys all sites (but keep Site Container at same size)
539  }
540 
543  // Needed because of the template class
550 
551 
552  typename SequenceType::ElementType& valueAt(const std::string& sequenceKey, size_t sitePosition) override
553  {
554  // Reset site buffer for this position:
555  siteVector_.addObject(nullptr, sitePosition);
556  return sequence_(sequenceKey)[sitePosition];
557  }
558 
559  typename SequenceType::ElementType& valueAt(const size_t sequencePosition, size_t sitePosition) override
560  {
561  // Reset site buffer for this position:
562  siteVector_.addObject(nullptr, sitePosition);
563  return sequence_(sequencePosition)[sitePosition];
564  }
565 
566 protected:
573  bool checkSize_(const Sequence& sequenceRef) { return sequenceRef.size() == length_; }
574 };
575 
576 // Aliases:
579 } // end of namespace bpp.
580 #endif // BPP_SEQ_CONTAINER_ALIGNEDSEQUENCECONTAINER_H
std::shared_ptr< const Alphabet > getAlphabet() const override
size_t size() const override
Get the number of elements in the list.
Definition: SymbolList.h:124
Exception thrown when two alphabets do not match.
virtual const Comments & getComments() const =0
Get the comments.
Exception thrown when a sequence is not align with others.
A basic implementation of the Sequence interface.
Definition: Sequence.h:117
const Comments & getComments() const override
Get the comments.
Definition: Commentable.h:79
void setComments(const Comments &comments) override
Set the comments.
Definition: Commentable.h:86
The site exception base class.
The AlignedSequencesContainer class.
TemplateAlignedSequenceContainer(std::shared_ptr< const Alphabet > alphabet)
Build a new empty container with the specified alphabet.
void insertSequence(size_t sequencePosition, std::unique_ptr< SequenceType > &sequencePtr, const std::string &sequenceKey) override
Insert a sequence in the container.
Vint getSiteCoordinates() const override
Get all coordinates of sites.
void setSequence(size_t sequencePosition, std::unique_ptr< SequenceType > &sequencePtr, const std::string &sequenceKey) override
Replace a sequence in the container.
std::unique_ptr< SiteType > removeSite(size_t sitePosition) override
Remove a site from the container.
TemplateAlignedSequenceContainer & operator=(const TemplateSequenceContainerInterface< SequenceType, std::string > &sc)
TemplateAlignedSequenceContainer(const TemplateAlignedSequenceContainer< SequenceType, SiteType > &asc)
Copy constructor.
SequenceType::ElementType & valueAt(const size_t sequencePosition, size_t sitePosition) override
Get the content of the dataset at a specific position (sequence position, site position).
void clear() override
Delete all data in the container.
size_t getNumberOfSites() const override
Get the number of aligned positions in the container.
void deleteSites(size_t sitePosition, size_t length) override
Remove a continuous range of sites in the container.
void setSequence(size_t sequencePosition, std::unique_ptr< SequenceType > &sequencePtr) override
Replace a sequence in the container.
TemplateAlignedSequenceContainer< SequenceType, SiteType > & operator=(const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sc)
VectorPositionedContainer< SiteType > siteVector_
TemplateAlignedSequenceContainer(std::shared_ptr< const Alphabet > alphabet, std::vector< std::unique_ptr< Sequence >> vs)
Build a container with pointers to sequence objects.
void deleteSite(size_t sitePosition) override
Delete a site from the container.
bool checkSize_(const Sequence &sequenceRef)
Check sequence's size before insertion in sequence container.
TemplateAlignedSequenceContainer< SequenceType, SiteType > * clone() const override
void setSite(size_t sitePosition, std::unique_ptr< SiteType > &site, bool checkCoordinate=true) override
Set a site in the container.
SequenceType::ElementType & valueAt(const std::string &sequenceKey, size_t sitePosition) override
Get the content of the dataset at a specific position (sequence key, site position).
TemplateAlignedSequenceContainer & operator=(const TemplateAlignedSequenceContainer< SequenceType, SiteType > &asc)
TemplateAlignedSequenceContainer(const TemplateSiteContainerInterface< SiteType, SequenceType, std::string > &sc)
Convert any SiteContainer object into a AlignedSequenceContainer object.
TemplateAlignedSequenceContainer(const TemplateSequenceContainerInterface< SequenceType, std::string > &sc)
Try to coerce a SequenceContainer object into an AlignedSequenceContainer object.
void setSiteCoordinates(const Vint &vCoordinates) override
Set all coordinates of sites.
void addSite(std::unique_ptr< SiteType > &site, bool checkCoordinate=true) override
Add a site in the container.
void addSite(std::unique_ptr< SiteType > &site, size_t sitePosition, bool checkCoordinate=true) override
Add a site in the container.
void addSequence(const std::string &sequenceKey, std::unique_ptr< SequenceType > &sequencePtr) override
Add a sequence to the container.
TemplateAlignedSequenceContainer * createEmptyContainer() const override
Return a copy of this container, but with no data inside.
const SiteType & site(size_t sitePosition) const override
Get a site from the container.
void reindexSites() override
Set all coordinate attributes.
void setSequence(const std::string &sequenceKey, std::unique_ptr< SequenceType > &sequencePtr) override
Replace a sequence in the container.
virtual Vint getSiteCoordinates() const =0
Get all coordinates of sites.
virtual const SequenceType & sequence(const HashType &sequenceKey) const override=0
Retrieve a sequence object from the container.
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.
The SiteContainer interface.
Definition: SiteContainer.h:29
virtual size_t getNumberOfSites() const override=0
Get the number of aligned positions in the container.
The VectorSequenceContainer class.
void addSequence(const std::string &sequenceKey, std::unique_ptr< SequenceType > &sequencePtr) override
Add a sequence to the container.
void clear() override
Delete all data in the container.
TemplateVectorSequenceContainer & operator=(const TemplateVectorSequenceContainer &vsc)
Assign from a VectorSequenceContainer.
size_t getNumberOfSequences() const override
Get the number of sequences in the container.
void setSequence(const std::string &sequenceKey, std::unique_ptr< SequenceType > &sequencePtr) override
Replace a sequence in the container.
const std::string & sequenceKey(size_t sequencePosition) const override
Get the key associated to a given sequence.
void insertSequence(size_t sequencePosition, std::unique_ptr< SequenceType > &sequencePtr, const std::string &sequenceKey) override
Insert a sequence in the container.
const SequenceType & sequence(const std::string &sequenceKey) const override
Retrieve a sequence object from the container.
virtual SequenceType & sequence_(size_t sequencePosition)
std::shared_ptr< T > removeObject(size_t objectIndex) override
Extract and remove a object from the container.
const std::shared_ptr< T > getObject(size_t objectIndex) const override
Retrieve an object from the container.
void deleteObject(size_t objectIndex) override
Delete an object from the container.
void appendObject(std::shared_ptr< T > object)
bool hasObjectWithPosition(size_t objectIndex) const
void addObject(std::shared_ptr< T > object, size_t objectIndex, bool checkPosition=false)
Add an object.
void insertObject(std::shared_ptr< T > object, size_t objectIndex)
Insert an object.
void addObject_(std::shared_ptr< T > object, size_t objectIndex, bool checkPosition=false) const
virtual void nullify()
Nullify all elements.
void deleteObjects(size_t objectIndex, size_t length)
void clear() override
Destroys the vector.
This alphabet is used to deal NumericAlphabet.
std::vector< int > Vint