bpp-seq3  3.0.0
AlignedSequenceContainer.cpp
Go to the documentation of this file.
1 //
2 // File: AlignedSequenceContainer.cpp
3 // Authors:
4 // Guillaume Deuchst
5 // Julien Dutheil
6 // Created: 2003-08-22 00:00:00
7 //
8 
9 /*
10  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
11 
12  This software is a computer program whose purpose is to provide classes
13  for sequences analysis.
14 
15  This software is governed by the CeCILL license under French law and
16  abiding by the rules of distribution of free software. You can use,
17  modify and/ or redistribute the software under the terms of the CeCILL
18  license as circulated by CEA, CNRS and INRIA at the following URL
19  "http://www.cecill.info".
20 
21  As a counterpart to the access to the source code and rights to copy,
22  modify and redistribute granted by the license, users are provided only
23  with a limited warranty and the software's author, the holder of the
24  economic rights, and the successive licensors have only limited
25  liability.
26 
27  In this respect, the user's attention is drawn to the risks associated
28  with loading, using, modifying and/or developing or reproducing the
29  software by the user in light of its specific status of free software,
30  that may mean that it is complicated to manipulate, and that also
31  therefore means that it is reserved for developers and experienced
32  professionals having in-depth computer knowledge. Users are therefore
33  encouraged to load and test the software's suitability as regards their
34  requirements in conditions enabling the security of their systems and/or
35  data to be ensured and, more generally, to use and operate it in the
36  same conditions as regards security.
37 
38  The fact that you are presently reading this means that you have had
39  knowledge of the CeCILL license and that you accept its terms.
40 */
41 
42 #include <Bpp/Text/TextTools.h>
43 
45 
46 using namespace bpp;
47 
48 // From the STL:
49 #include <iostream>
50 
51 using namespace std;
52 
53 /***************************************************************************/
54 
55 AlignedSequenceContainer::AlignedSequenceContainer(std::vector<std::shared_ptr<Sequence>> vseq, const Alphabet* alpha):
57  VectorSequenceContainer(vseq, alpha),
59  positions_(),
60  length_(0)
61 {
62  if (vseq.size()==0)
63  return;
64 
65  length_ = vseq[0]->size();
66  for (size_t ns=1; ns < vseq.size(); ns++)
67  if (!checkSize_(*vseq[ns]))
68  throw BadSizeException("Sequences of different sizes in aligned construction",length_,vseq[ns]->size());
69 
71  reindexSites();
72 }
73 
75  AbstractSequenceContainer(osc.getAlphabet()),
76  VectorSequenceContainer(osc.getAlphabet()),
78  // We can't call the copy constructor because we want to use the overloaded addSequence method !!!
79  positions_(),
80  length_()
81 {
82  // Initializing
83  for (unsigned int i = 0; i < osc.getNumberOfSequences(); i++)
84  {
85  addSequence(osc.getSequence(i), true);
86  }
87 
88  if (osc.getNumberOfSequences() > 0)
89  length_ = getSequence(0).size(); // the overloaded
90  else
91  length_ = 0;
92 
93  reindexSites();
96 }
97 
98 /***************************************************************************/
99 
101 {
104 
105  // Initializing
106  length_ = asc.getNumberOfSites();
108 
109  return *this;
110 }
111 
112 /***************************************************************************/
113 
115 {
117 
118  // Initializing
119  length_ = sc.getNumberOfSites();
122 
123  return *this;
124 }
125 
126 /***************************************************************************/
127 
129 {
131 
132  // Initializing
133  length_ = 0;
134  reindexSites();
136 
137  return *this;
138 }
139 
140 /***************************************************************************/
141 
143 {
146 
147  if (i >= length_)
148  throw IndexOutOfBoundsException("AlignedSequenceContainer::getSite", i, 0, getNumberOfSites());
149 
150  // Main loop : for all sequences
151  size_t n = getNumberOfSequences();
152  std::shared_ptr<Site> site(new Site(getAlphabet(), (int)(i+1)));
153  for (size_t j = 0; j < n; j++)
154  site->addElement(getSequence(j)[i]);
155 
158 }
159 
161 {
164 
165  if (i >= length_)
166  throw IndexOutOfBoundsException("AlignedSequenceContainer::getSite", i, 0, getNumberOfSites());
167 
168  // Main loop : for all sequences
169  size_t n = getNumberOfSequences();
170  std::shared_ptr<Site> site(new Site(getAlphabet(), (int)(i+1)));
171  for (size_t j = 0; j < n; j++)
172  {
173  site->addElement(getSequence(j)[i]);
174  }
175 
178 }
179 
180 /******************************************************************************/
181 
182 void AlignedSequenceContainer::setSite(size_t pos, const Site& site, bool checkPositions)
183 {
184  // New site's alphabet and site container's alphabet matching verification
185  if (pos >= getNumberOfSites())
186  throw IndexOutOfBoundsException("AlignedSequenceContainer::setSite", pos, 0, getNumberOfSites());
187  if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
188  throw AlphabetMismatchException("AlignedSequenceContainer::setSite", getAlphabet(), site.getAlphabet());
189 
190  // Check size:
191  if (site.size() != getNumberOfSequences())
192  throw SiteException("AlignedSequenceContainer::setSite, site does not have the appropriate length", &site);
193 
194  // Check position:
195  if (checkPositions)
196  {
197  int position = site.getPosition();
198  for (auto poss : positions_)
199  {
200  if (poss == position)
201  throw SiteException("AlignedSequenceContainer::setSite: Site position already exists in container", &site);
202  }
203  }
204 
205  // For all sequences
206  for (size_t j = 0; j < getNumberOfSequences(); j++)
207  getSequence_(j).setElement(pos, site[j]);
208 
209  positions_[pos] = site.getPosition();
210 }
211 
212 /******************************************************************************/
213 
214 std::shared_ptr<Site> AlignedSequenceContainer::removeSite(size_t pos)
215 {
216  if (pos >= getNumberOfSites())
217  throw IndexOutOfBoundsException("AlignedSequenceContainer::removeSite", pos, 0, getNumberOfSites());
218 
219  // Get old site
220  getSite(pos); // Creates the site!
221 
222  // For all sequences
223  for (size_t j = 0; j < getNumberOfSequences(); j++)
224  {
225  getSequence_(j).deleteElement(pos);
226  }
227 
228  // Delete site's position
229  positions_.erase(positions_.begin() + static_cast<ptrdiff_t>(pos));
230  length_--;
231 
232  // Actualizes the 'sites' vector:
234 }
235 
236 /******************************************************************************/
237 
239 {
240  if (pos >= getNumberOfSites())
241  throw IndexOutOfBoundsException("AlignedSequenceContainer::deleteSite", pos, 0, getNumberOfSites());
242 
243  // For all sequences
244  for (size_t j = 0; j < getNumberOfSequences(); j++)
245  {
246  getSequence_(j).deleteElement(pos);
247  }
248 
249  // Delete site's position
250  positions_.erase(positions_.begin() + static_cast<ptrdiff_t>(pos));
251  length_--;
252 
253  // Actualizes the 'sites' vector:
255 }
256 
257 /******************************************************************************/
258 
259 void AlignedSequenceContainer::deleteSites(size_t siteIndex, size_t length)
260 {
261  if (siteIndex + length > getNumberOfSites())
262  throw IndexOutOfBoundsException("AlignedSequenceContainer::deleteSites", siteIndex + length, 0, getNumberOfSites());
263 
264  // For all sequences
265  for (size_t j = 0; j < getNumberOfSequences(); j++)
266  {
267  getSequence_(j).deleteElements(siteIndex, length);
268  }
269 
270  // Delete site's siteIndexition
271  positions_.erase(positions_.begin() + static_cast<ptrdiff_t>(siteIndex),
272  positions_.begin() + static_cast<ptrdiff_t>(siteIndex + length));
273  length_ -= length;
274 
275  // Actualizes the 'sites' vector:
277 }
278 
279 /******************************************************************************/
280 
281 void AlignedSequenceContainer::addSite(const Site& site, bool checkPositions)
282 {
283  // New site's alphabet and site container's alphabet matching verification
284  if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
285  throw AlphabetMismatchException("AlignedSequenceContainer::addSite");
286 
287  // Check size:
288  if (site.size() != getNumberOfSequences())
289  throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site);
290 
291  // Check position:
292  int position = site.getPosition();
293  if (checkPositions)
294  {
295  // For all positions in vector : throw exception if position already exists
296  for (size_t i = 0; i < positions_.size(); i++)
297  {
298  if (positions_[i] == position)
299  throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site);
300  }
301  }
302 
303  // For all sequences
304  for (size_t j = 0; j < getNumberOfSequences(); j++)
305  getSequence_(j).addElement(site[j]);
306 
307  length_++;
308  positions_.push_back(position);
309 
310  // Actualizes the 'sites' vector:
312 }
313 
314 /******************************************************************************/
315 
316 void AlignedSequenceContainer::addSite(const Site& site, int position, bool checkPositions)
317 {
318  // New site's alphabet and site container's alphabet matching verification
319  if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
320  throw AlphabetMismatchException("AlignedSequenceContainer::addSite");
321 
322  // Check size:
323  if (site.size() != getNumberOfSequences())
324  throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site);
325 
326  // Check position:
327  if (checkPositions)
328  {
329  // For all positions in vector : throw exception if position already exists
330  for (size_t i = 0; i < positions_.size(); i++)
331  {
332  if (positions_[i] == position)
333  throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site);
334  }
335  }
336 
337  // For all sequences
338  for (size_t j = 0; j < getNumberOfSequences(); j++)
339  {
340  getSequence_(j).addElement(site[j]);
341  }
342 
343  length_++;
344  positions_.push_back(position);
345 
346  // Actualizes the 'sites' vector:
348 }
349 
350 /******************************************************************************/
351 
352 void AlignedSequenceContainer::addSite(const Site& site, size_t siteIndex, bool checkPositions)
353 {
354  if (siteIndex >= getNumberOfSites())
355  throw IndexOutOfBoundsException("AlignedSequenceContainer::addSite", siteIndex, 0, getNumberOfSites());
356 
357  // New site's alphabet and site container's alphabet matching verification
358  if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
359  throw AlphabetMismatchException("AlignedSequenceContainer::addSite", getAlphabet(), site.getAlphabet());
360 
361  // Check size:
362  if (site.size() != getNumberOfSequences())
363  throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site);
364 
365  // Check position:
366  int position = site.getPosition();
367  if (checkPositions)
368  {
369  // For all positions in vector : throw exception if position already exists
370  for (size_t i = 0; i < positions_.size(); i++)
371  {
372  if (positions_[i] == position)
373  throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site);
374  }
375  }
376 
377  // For all sequences
378  for (size_t j = 0; j < getNumberOfSequences(); j++)
379  getSequence_(j).addElement(siteIndex, site[j]);
380 
381  length_++;
382  positions_.insert(positions_.begin() + static_cast<ptrdiff_t>(siteIndex), position);
383 
384  // Actualizes the 'sites' vector:
386 }
387 
388 /******************************************************************************/
389 
390 void AlignedSequenceContainer::addSite(const Site& site, size_t siteIndex, int position, bool checkPositions)
391 {
392  if (siteIndex >= getNumberOfSites())
393  throw IndexOutOfBoundsException("AlignedSequenceContainer::addSite", siteIndex, 0, getNumberOfSites() - 1);
394 
395  // New site's alphabet and site container's alphabet matching verification
396  if (site.getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
397  throw AlphabetMismatchException("AlignedSequenceContainer::addSite", getAlphabet(), site.getAlphabet());
398 
399  // Check size:
400  if (site.size() != getNumberOfSequences())
401  throw SiteException("AlignedSequenceContainer::addSite, site does not have the appropriate length", &site);
402 
403  // Check position:
404  if (checkPositions)
405  {
406  // For all positions in vector : throw exception if position already exists
407  for (size_t i = 0; i < positions_.size(); i++)
408  {
409  if (positions_[i] == position)
410  throw SiteException("AlignedSequenceContainer::addSite: Site position already exists in container", &site);
411  }
412  }
413 
414  // For all sequences
415  for (size_t j = 0; j < getNumberOfSequences(); j++)
416  getSequence_(j).addElement(siteIndex, site[j]);
417 
418  length_++;
419  positions_.insert(positions_.begin() + static_cast<ptrdiff_t>(siteIndex), position);
420 
421  // Actualizes the 'sites' vector:
423 }
424 
425 /******************************************************************************/
426 
428 {
429  positions_.resize(length_);
430  for (size_t i = 0; i < length_; i++)
431  {
432  positions_[i] = static_cast<int>(i + 1); // start with 1.
433  }
434 }
435 
437 {
438  if (vPositions.size() != getNumberOfSites())
439  throw BadSizeException("AlignedSequenceContainer::setSitePositions bad size of positions vector", vPositions.size(), getNumberOfSites());
440 
441  for (size_t i = 0; i < vPositions.size(); i++)
442  {
443  positions_[i] = vPositions[i];
444  }
445 }
446 
447 
448 /******************************************************************************/
449 
450 void AlignedSequenceContainer::setSequence(size_t i, const Sequence& sequence, bool checkName)
451 {
452  if (i >= getNumberOfSequences())
453  throw IndexOutOfBoundsException("AlignedSequenceContainer::setSequence", i, 0, getNumberOfSequences());
454  // if container has only one sequence
455  if (getNumberOfSequences() == 1)
456  length_ = sequence.size();
457  if (checkSize_(sequence))
458  VectorSequenceContainer::setSequence(i, sequence, checkName);
459  else
460  throw SequenceNotAlignedException("AlignedSequenceContainer::setSequence", &sequence);
461 
462  // Detroys all sites (but keep Site Container at same size)
464 }
465 
466 /******************************************************************************/
467 
468 void AlignedSequenceContainer::setSequence(const string& name, const Sequence& sequence, bool checkName)
469 {
470  // if container has only one sequence
471  if (getNumberOfSequences() == 1)
472  length_ = sequence.size();
473  if (checkSize_(sequence))
474  VectorSequenceContainer::setSequence(name, sequence, checkName);
475  else
476  throw SequenceNotAlignedException("AlignedSequenceContainer::setSequence", &sequence);
477 }
478 
479 /******************************************************************************/
480 
481 void AlignedSequenceContainer::addSequence(const Sequence& sequence, bool checkName)
482 {
483 // if container has only one sequence
484  if (length_ == 0)
485  {
486  length_ = sequence.size();
488  reindexSites();
489  }
490  if (checkSize_(sequence))
491  VectorSequenceContainer::addSequence(sequence, checkName);
492  else
493  throw SequenceNotAlignedException("AlignedSequenceContainer::addSequence", &sequence);
494 }
495 
496 /******************************************************************************/
497 
498 void AlignedSequenceContainer::addSequence(const Sequence& sequence, size_t i, bool checkName)
499 {
500  if (i >= getNumberOfSequences())
501  throw IndexOutOfBoundsException("AlignedSequenceContainer::addSequence", i, 0, getNumberOfSequences());
502  // if container has only one sequence
503  if (length_ == 0)
504  {
505  length_ = sequence.size();
507  }
508  else
510 
511  if (checkSize_(sequence))
512  VectorSequenceContainer::addSequence(sequence, i, checkName);
513  else
514  throw SequenceNotAlignedException("AlignedSequenceContainer::addSequence", &sequence);
515 }
516 
517 /******************************************************************************/
518 
520 {
521  length_ = 0;
524 }
525 
526 /******************************************************************************/
527 
529 {
532  return asc;
533 }
534 
535 /******************************************************************************/
virtual int getPosition() const
Get the position of this site.
Definition: CoreSite.h:183
Partial implementation of the OrderedSequenceContainer interface.
void setGeneralComments(const Comments &comments)
Set the comments of this container.
const Alphabet * getAlphabet() const
Get container's alphabet.
const Comments & getGeneralComments() const
Get the comments of this container.
Aligned sequences container.
size_t getNumberOfSites() const
Get the number of aligned positions in the container.
virtual const Site & getSite(size_t siteIndex) const
Get a site from the container.
void deleteSite(size_t siteIndex)
Delete a site from the container.
Vint getSitePositions() const
Get all position attributes of sites.
void reindexSites()
Set all positions attributes.
virtual void setSite(size_t siteIndex, const Site &site, bool checkPosition=true)
Set a site in the container.
void addSequence(const Sequence &sequence, bool checkName=true)
Add a sequence at the end of the container.
std::shared_ptr< Site > removeSite(size_t siteIndex)
Remove a site from the container.
void addSite(const Site &site, bool checkPosition=true)
Add a site in the container.
void deleteSites(size_t siteIndex, size_t length)
Remove a continuous range of sites in the container.
AlignedSequenceContainer * createEmptyContainer() const
Return a copy of this container, but with no data inside.
bool checkSize_(const Sequence &sequence)
Check sequence's size before insertion in sequence container.
void setSitePositions(Vint vPositions)
Set all position attributes of sites.
AlignedSequenceContainer(std::vector< std::shared_ptr< Sequence >> vseq, const Alphabet *alpha)
Build a container with the specified alphabet, with shared Sequences.
AlignedSequenceContainer & operator=(const AlignedSequenceContainer &asc)
virtual Vint getSitePositions() const =0
Get all position attributes of sites.
virtual size_t getNumberOfSites() const =0
Get the number of aligned positions in the container.
Exception thrown when two alphabets do not match.
The Alphabet interface.
Definition: Alphabet.h:133
virtual std::string getAlphabetType() const =0
Identification method.
virtual void addElement(const T &c)=0
Add a character to the end of the list.
virtual void setElement(size_t pos, const T &c)=0
Set the element at position 'pos' to character 'c'.
virtual void deleteElement(size_t pos)=0
Remove the element at position 'pos'.
virtual const Alphabet * getAlphabet() const =0
Get the alphabet associated to the list.
virtual void deleteElements(size_t pos, size_t len)=0
Remove the elements at position 'pos'.
virtual size_t size() const =0
Get the number of elements in the list.
The OrderedSequenceContainer interface.
virtual const Sequence & getSequence(const std::string &name) const=0
Retrieve a sequence object from the container.
virtual const Sequence & getSequence(size_t sequenceIndex) const =0
Retrieve a sequence object from the container.
virtual void setSequence(const std::string &name, const Sequence &sequence, bool checkName)=0
Replace a sequence in the container.
Exception thrown when a sequence is not align with others.
The sequence interface.
Definition: Sequence.h:71
virtual size_t getNumberOfSequences() const =0
Get the number of sequences in the container.
virtual const Comments & getGeneralComments() const =0
Get the comments of this container.
The SiteContainer interface.
Definition: SiteContainer.h:65
The site exception base class.
The Site class.
Definition: Site.h:68
The template VectorPositionedContainer class.
void deleteObject(size_t objectIndex)
Delete an object from the container.
void appendObject(std::shared_ptr< T > object)
void nullify()
Nullify all elements.
VectorPositionedContainer< T > & operator=(const VectorPositionedContainer< T > &vsc)
copy where shared_ptr elements are shared
void addObject(std::shared_ptr< T > object, size_t objectIndex, bool checkPosition=false)
Add an object.
void insertObject(std::shared_ptr< T > object, size_t objectIndex)
Insert an object.
std::shared_ptr< T > removeObject(size_t objectIndex)
Extract and remove a object from the container.
void addObject_(std::shared_ptr< T > object, size_t objectIndex, bool checkPosition=false) const
const std::shared_ptr< T > getObject(size_t objectIndex) const
Retrieve an object from the container. Set as protected since they will be public under T specific na...
void deleteObjects(size_t objectIndex, size_t length)
The VectorSequenceContainer class.
virtual void addSequence(const Sequence &sequence, bool checkName=true)
Add a sequence at the end of the container.
size_t getNumberOfSequences() const
Get the number of sequences in the container.
VectorSequenceContainer & operator=(const VectorSequenceContainer &vsc)
Assign from a VectorSequenceContainer.
void clear()
Delete all objects in the container.
This alphabet is used to deal NumericAlphabet.
std::vector< int > Vint