bpp-seq3  3.0.0
CompressedVectorSiteContainer.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #include <Bpp/Text/TextTools.h>
6 #include <iostream>
7 
9 
10 using namespace std;
11 
12 using namespace bpp;
13 
16 CompressedVectorSiteContainer::CompressedVectorSiteContainer(
17  std::vector< unique_ptr<Site>>& vs,
18  std::shared_ptr<const Alphabet>& alphabet) :
20  siteContainer_(),
21  sequenceContainer_(),
22  sequenceNames_(),
23  sequenceComments_(),
24  index_(0)
25 {
26  if (vs.size() == 0)
27  throw Exception("CompressedVectorSiteContainer::CompressedVectorSiteContainer. Empty site set.");
28  // Seq names and comments:
29  size_t nbSeq = vs[0]->size();
30  for (size_t i = 0; i < nbSeq; ++i)
31  {
32  sequenceNames_.push_back("Seq_" + TextTools::toString(i));
33  sequenceContainer_.appendObject(nullptr, "Seq_" + TextTools::toString(i));
34  }
35 
36  // Now try to add each site:
37  for (auto& site : vs)
38  {
39  addSite(site);
40  }
41 }
42 
43 /******************************************************************************/
44 
46  size_t size,
47  std::shared_ptr<const Alphabet>& alphabet) :
49  siteContainer_(),
50  sequenceContainer_(),
51  sequenceNames_(),
52  sequenceComments_(),
53  index_(0)
54 {
55  // Seq names and comments:
56  for (size_t i = 0; i < size; ++i)
57  {
58  sequenceNames_.push_back("Seq_" + TextTools::toString(i));
59  sequenceContainer_.appendObject(nullptr, "Seq_" + TextTools::toString(i));
60  }
61 }
62 
63 /******************************************************************************/
64 
66  const std::vector<std::string>& sequenceKeys,
67  std::shared_ptr<const Alphabet>& alphabet) :
69  siteContainer_(),
70  sequenceContainer_(),
71  sequenceNames_(),
72  sequenceComments_(),
73  index_(0)
74 {
75  unsigned int i = 0;
76  for (auto key : sequenceKeys)
77  {
78  ++i;
79  sequenceNames_.push_back("Seq_" + TextTools::toString(i));
80  sequenceContainer_.appendObject(nullptr, key);
81  }
82 }
83 
84 /******************************************************************************/
85 
86 CompressedVectorSiteContainer::CompressedVectorSiteContainer(std::shared_ptr<const Alphabet>& alphabet) :
88  siteContainer_(),
89  sequenceContainer_(),
90  sequenceNames_(),
91  sequenceComments_(),
92  index_(0)
93 {}
94 
95 /******************************************************************************/
96 
99  siteContainer_(),
100  sequenceContainer_(),
101  sequenceNames_(vsc.sequenceNames_),
102  sequenceComments_(vsc.sequenceComments_),
103  index_(vsc.index_)
104 {
105  for (const auto& name: vsc.sequenceNames_)
106  {
107  sequenceContainer_.appendObject(nullptr, name);
108  }
109 
110  // Copy the compressed data:
111  for (size_t i = 0; i < vsc.siteContainer_.getSize(); ++i)
112  {
113  auto sitePtr = std::shared_ptr<Site>(vsc.siteContainer_.getObject(i)->clone());
114  siteContainer_.appendObject(sitePtr);
115  }
116 }
117 
118 /******************************************************************************/
119 
122  siteContainer_(),
123  sequenceContainer_(),
124  sequenceNames_(sc.getSequenceNames()),
125  sequenceComments_(sc.getSequenceComments()),
126  index_(0)
127 {
128  for (const auto& name: sc.getSequenceNames())
129  {
130  sequenceContainer_.appendObject(nullptr, name);
131  }
132 
133  // Now try to add each site:
134  for (size_t i = 0; i < sc.getNumberOfSites(); ++i)
135  {
136  auto sitePtr = std::unique_ptr<Site>(sc.site(i).clone());
137  addSite(sitePtr, false);
138  }
139 }
140 
141 /******************************************************************************/
142 
144 {
145  clear();
147 
148  for (const auto& name: vsc.getSequenceNames())
149  {
150  sequenceContainer_.appendObject(nullptr, name);
151  }
152 
153  // Copy the compressed data:
154  index_ = vsc.index_;
155  for (size_t i = 0; i < vsc.siteContainer_.getSize(); ++i)
156  {
157  auto sitePtr = std::shared_ptr<Site>(vsc.siteContainer_.getObject(i)->clone());
158  siteContainer_.appendObject(sitePtr);
159  }
160 
163 
164  return *this;
165 }
166 
167 /******************************************************************************/
168 
170 {
171  clear();
173 
174  // Now try to add each site:
175  for (size_t i = 0; i < sc.getNumberOfSites(); ++i)
176  {
177  auto sitePtr = std::unique_ptr<Site>(sc.site(i).clone());
178  addSite(sitePtr, false);
179  }
180 
183 
184  return *this;
185 }
186 
187 /******************************************************************************/
188 
189 void CompressedVectorSiteContainer::setSite(size_t sitePosition, unique_ptr<Site>& site, bool checkCoordinate)
190 {
191  if (sitePosition >= getNumberOfSites())
192  throw IndexOutOfBoundsException("CompressedVectorSiteContainer::setSite.", sitePosition, 0, getNumberOfSites() - 1);
193 
194  // Check size:
195  if (site->size() != getNumberOfSequences())
196  throw SiteException("AlignedSequenceContainer::setSite. Site does not have the appropriate length", site.get());
197 
198  // New site's alphabet and site container's alphabet matching verification
199  if (site->getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
200  throw AlphabetMismatchException("CompressedVectorSiteContainer::setSite", getAlphabet(), site->getAlphabet());
201 
202  size_t current = index_[sitePosition];
203  size_t siteIndex = getSiteIndex_(*site);
204  if (siteIndex == current)
205  {
206  // Nothing to do here, this is the same site.
207  }
208  else if (siteIndex < getNumberOfUniqueSites())
209  {
210  // The new site is already in the list, si we just update the index:
211  index_[sitePosition] = siteIndex;
212 
213  // We have to check if the previous pattern was unique, and if so, remove it and update indices:
214  bool test = true;
215  for (size_t i = 0; test && i < index_.size(); ++i)
216  {
217  if (index_[i] == current)
218  {
219  // There is another site, so nothing to do...
220  test = false;
221  }
222  }
223  if (test)
224  {
225  // There was no other site pointing toward this pattern, so we remove it.
226  siteContainer_.deleteObject(current);
227  // Now we have to correct all indices:
228  for (size_t i = 0; i < index_.size(); ++i)
229  {
230  if (index_[i] > current)
231  index_[i]--;
232  }
233  }
234  }
235  else
236  {
237  // This is a new pattern, and we have to add it to the list.
238  // Now we have to check if the previous pattern was unique, and if so,
239  // replace it with the new one. Otherwise, add the new site at the end of the list.
240  bool test = true;
241  for (size_t i = 0; test && i < index_.size(); ++i)
242  {
243  if (i != sitePosition && index_[i] == current)
244  {
245  // There is another site
246  test = false;
247  }
248  }
249  if (test)
250  {
251  // we relace the site
252  siteContainer_.addObject(std::move(site), current, false);
253  }
254  else
255  {
256  // We add the site at the end:
257  siteContainer_.appendObject(std::move(site));
258  index_[sitePosition] = siteIndex;
259  }
260  }
261 
262  // Clean Sequence Container cache
263  sequenceContainer_.clear();
264 }
265 
266 /******************************************************************************/
267 
268 std::unique_ptr<Site> CompressedVectorSiteContainer::removeSite(size_t siteIndex)
269 {
270  if (siteIndex >= getNumberOfSites())
271  throw IndexOutOfBoundsException("CompressedVectorSiteContainer::removeSite.", siteIndex, 0, getNumberOfSites() - 1);
272  // Here we need to check whether the pattern corresponding to this site is unique:
273 
274  auto sitePtr = siteContainer_.getObject(index_[siteIndex]);
275  std::get_deleter< SwitchDeleter<Site>>(sitePtr)->off();
276 
277  size_t current = index_[siteIndex];
278  bool test = true;
279  for (size_t j = 0; test && j < index_.size(); ++j)
280  {
281  if (j != siteIndex && index_[j] == current)
282  {
283  // There is another site, so nothing to erase
284  test = false;
285  }
286  }
287  if (test)
288  {
289  // There was no other site pointing toward this pattern, so we remove it.
290  siteContainer_.removeObject(index_[siteIndex]);
291 
292  // Now we have to correct all indices:
293  for (size_t j = 0; j < index_.size(); ++j)
294  {
295  if (index_[j] > current)
296  index_[j]--;
297  }
298  }
299  index_.erase(index_.begin() + static_cast<ptrdiff_t>(siteIndex));
300 
301  // Clean Sequence Container cache
302  sequenceContainer_.clear();
303 
304  return std::unique_ptr<Site>(sitePtr.get());
305 }
306 
307 /******************************************************************************/
308 
310 {
311  if (siteIndex >= getNumberOfSites())
312  throw IndexOutOfBoundsException("CompressedVectorSiteContainer::deleteSite.", siteIndex, 0, getNumberOfSites() - 1);
313  // Here we need to check whether the pattern corresponding to this site is unique:
314 
315  removeSite(siteIndex); // This effectively delete the object as the unique_ptr is not forwarded and will be destroyed.
316 }
317 
318 /******************************************************************************/
319 
320 void CompressedVectorSiteContainer::deleteSites(size_t siteIndex, size_t length)
321 {
322  // This may be optimized later:
323  for (size_t i = 0; i < length; ++i)
324  {
325  deleteSite(siteIndex + i);
326  }
327 }
328 
329 /***************************************************************************/
330 
331 void CompressedVectorSiteContainer::addSite(std::unique_ptr<Site>& site, bool checkCoordinate)
332 {
333  // Check size:
334  if (getNumberOfSequences() != 0 && site->size() != getNumberOfSequences())
335  throw SiteException("CompressedVectorSiteContainer::addSite. Site does not have the appropriate length", site.get());
336 
337  // New site's alphabet and site container's alphabet matching verification
338  if (site->getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
339  {
340  throw AlphabetMismatchException("CompressedVectorSiteContainer::addSite", getAlphabet(), site->getAlphabet());
341  }
342 
343  size_t n = site->size();
344 
345  size_t siteIndex = getSiteIndex_(*site);
346  if (siteIndex == getNumberOfUniqueSites())
347  {
348  // This is a new pattern:
349  std::shared_ptr<Site> sitePtr(site.release(), SwitchDeleter<Site>());
350  siteContainer_.appendObject(sitePtr);
351  }
352 
353  index_.push_back(siteIndex);
354 
355  // Clean Sequence Container cache
356  if (getNumberOfSequences() == 0)
357  {
358  sequenceNames_.resize(n);
359  sequenceComments_.resize(n);
360  for (size_t i = 0; i < n; ++i)
361  {
362  sequenceNames_[i] = "Seq_" + TextTools::toString(i);
363  sequenceContainer_.appendObject(nullptr, sequenceNames_[i]);
364  }
365  }
366  else
367  {
368  sequenceContainer_.nullify();
369  }
370 }
371 
372 /******************************************************************************/
373 
374 void CompressedVectorSiteContainer::addSite(std::unique_ptr<Site>& site, size_t siteIndex, bool checkCoordinates)
375 {
376  if (siteIndex >= getNumberOfSites())
377  throw IndexOutOfBoundsException("CompressedVectorSiteContainer::addSite", siteIndex, 0, getNumberOfSites() - 1);
378 
379  // Check size:
380  if (site->size() != getNumberOfSequences())
381  throw SiteException("CompressedVectorSiteContainer::addSite. Site does not have the appropriate length", site.get());
382 
383  // New site's alphabet and site container's alphabet matching verification
384  if (site->getAlphabet()->getAlphabetType() != getAlphabet()->getAlphabetType())
385  {
386  throw AlphabetMismatchException("CompressedVectorSiteContainer::addSite", getAlphabet(), site->getAlphabet());
387  }
388 
389  size_t n = site->size();
390 
391  size_t index = getSiteIndex_(*site);
392  if (index == getNumberOfUniqueSites())
393  {
394  // This is a new pattern:
395  std::shared_ptr<Site> sitePtr(site.release(), SwitchDeleter<Site>());
396  siteContainer_.appendObject(sitePtr);
397  }
398 
399  index_.insert(index_.begin() + static_cast<ptrdiff_t>(siteIndex), index);
400 
401  // Clean Sequence Container cache
402  if (getNumberOfSequences() == 0)
403  {
404  sequenceNames_.resize(n);
405  sequenceComments_.resize(n);
406  for (size_t i = 0; i < n; ++i)
407  {
408  sequenceNames_[i] = "Seq_" + TextTools::toString(i);
409  sequenceContainer_.appendObject(nullptr, sequenceNames_[i]);
410  }
411  }
412  else
413  {
414  sequenceContainer_.nullify();
415  }
416 }
417 
418 /******************************************************************************/
419 
421 {
422  for (size_t i = 0; i < siteContainer_.getSize(); ++i)
423  {
424  getSite_(i).setCoordinate(static_cast<int>(i) + 1);
425  }
426 }
427 
428 /******************************************************************************/
429 
431 {
432  if (vCoordinates.size() != getNumberOfSites())
433  throw BadSizeException("CompressedVectorSiteContainer::setSitePositions bad size of positions vector", vCoordinates.size(), getNumberOfSites());
434 
435  for (size_t i = 0; i < vCoordinates.size(); ++i)
436  {
437  getSite_(i).setCoordinate(vCoordinates[i]);
438  }
439 }
440 
441 /******************************************************************************/
442 
444 {
445  size_t n = getNumberOfSites();
446  Vint coordinates(n);
447  for (size_t i = 0; i < n; i++)
448  {
449  coordinates[i] = siteContainer_.getObject(index_[i])->getCoordinate();
450  }
451  return coordinates;
452 }
453 
454 /******************************************************************************/
455 
456 const Sequence& CompressedVectorSiteContainer::sequence(size_t sequencePosition) const
457 {
458  if (sequencePosition >= getNumberOfSequences())
459  throw IndexOutOfBoundsException("CompressedVectorSiteContainer::getSequence.", sequencePosition, 0, getNumberOfSequences() - 1);
460 
461  // If Sequence already exists
462  auto name = sequenceContainer_.getObjectName(sequencePosition);
463  if (!sequenceContainer_.isAvailableName(name))
464  return *sequenceContainer_.getObject(sequencePosition);
465 
466  // Main loop : for all sites
467  size_t n = getNumberOfSites();
468  vector<int> sequence(n);
469  for (size_t j = 0; j < n; ++j)
470  {
471  sequence[j] = (*siteContainer_.getObject(index_[j]))[sequencePosition];
472  }
473 
474  auto alphaPtr = getAlphabet();
475  auto ns = std::make_shared<Sequence>(
476  sequenceNames_[sequencePosition],
477  sequence,
478  sequenceComments_[sequencePosition],
479  alphaPtr);
480 
481  sequenceContainer_.addObject_(ns, sequencePosition, sequenceKey(sequencePosition), false);
482 
483  return *ns;
484 }
485 
486 /******************************************************************************/
487 
489 {
490  size_t pos = getNumberOfUniqueSites();
491  bool test;
492  for (size_t i = 0; i < getNumberOfUniqueSites(); ++i)
493  {
494  test = true;
495  const Site& siteI = *siteContainer_.getObject(i);
496 
497  for (size_t j = 0; test && j < site.size(); ++j) // site is supposed to have the correct size, that is the same as all the ones in the container.
498  {
499  if (site[j] != siteI[j])
500  test = false;
501  }
502 
503  if (test)
504  {
505  pos = i;
506  break;
507  }
508  }
509  return pos;
510 }
511 
512 /******************************************************************************/
void setCoordinate(int coordinate) override
Set the position of this site.
Definition: CoreSite.h:146
Partial implementation of the SequenceContainer interface.
AbstractTemplateSequenceContainer & operator=(const AbstractTemplateSequenceContainer< SequenceType, HashType > &sc)
std::shared_ptr< const Alphabet > getAlphabet() const override
std::shared_ptr< const Alphabet > getAlphabet() const override
Get the alphabet associated to the list.
Definition: SymbolList.h:120
size_t size() const override
Get the number of elements in the list.
Definition: SymbolList.h:124
Exception thrown when two alphabets do not match.
A low memory, yet restricted, version of the VectorSiteContainer class.
size_t getNumberOfSites() const override
Get the number of aligned positions in the container.
std::unique_ptr< Site > removeSite(size_t sitePosition) override
Remove a site from the container.
size_t getNumberOfSequences() const override
Get the number of sequences in the container.
const std::string & sequenceKey(size_t sequencePosition) const override
Get the key associated to a given sequence.
const Site & site(size_t sitePosition) const override
Get a site from the container.
void setSiteCoordinates(const Vint &coordinates) override
Set all coordinates of sites.
void reindexSites() override
Set all coordinate attributes.
VectorMappedContainer< Sequence > sequenceContainer_
const Sequence & sequence(size_t sequenceIndex) const override
Retrieve a sequence object from the container.
CompressedVectorSiteContainer(std::vector< std::unique_ptr< Site >> &vs, std::shared_ptr< const Alphabet > &alphabet)
Build a new container from a set of sites.
Vint getSiteCoordinates() const override
Get all coordinates of sites.
void clear() override
Delete all data in the container.
VectorPositionedContainer< Site > siteContainer_
std::vector< std::string > getSequenceNames() const override
CompressedVectorSiteContainer & operator=(const CompressedVectorSiteContainer &vsc)
void addSite(std::unique_ptr< Site > &site, bool checkCoordinate=false) override
Add a site in the container.
void deleteSites(size_t sitePosition, size_t length) override
Remove a continuous range of sites in the container.
void deleteSite(size_t sitePosition) override
Delete a site from the container.
void setSite(size_t sitePosition, std::unique_ptr< Site > &site, bool checkCoordinate=true) override
Set a site in the container.
A basic implementation of the Sequence interface.
Definition: Sequence.h:117
The site exception base class.
The Site class.
Definition: Site.h:73
Site * clone() const
Definition: Site.h:184
virtual std::vector< Comments > getSequenceComments() const =0
virtual std::vector< std::string > getSequenceNames() const =0
virtual const SiteType & site(size_t sitePosition) const override=0
Get a site from the container.
virtual size_t getNumberOfSites() const override=0
Get the number of aligned positions in the container.
std::string toString(T t)
This alphabet is used to deal NumericAlphabet.
std::vector< int > Vint