bpp-popgen  3.0.0
PolymorphismSequenceContainer.cpp
Go to the documentation of this file.
1 //
2 // File: PolymorphismSequenceContainer.h
3 // Created by: Eric Bazin
4 // Sylvain Gaillard
5 // Created on: Wednesday August 04 2004
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 
11 
12  This software is a computer program whose purpose is to provide classes
13  for population genetics analysis.
14 
15  This software is governed by the CeCILL license under French law and
16  abiding by the rules of distribution of free software. You can use,
17  modify and/ or redistribute the software under the terms of the CeCILL
18  license as circulated by CEA, CNRS and INRIA at the following URL
19  "http://www.cecill.info".
20 
21  As a counterpart to the access to the source code and rights to copy,
22  modify and redistribute granted by the license, users are provided only
23  with a limited warranty and the software's author, the holder of the
24  economic rights, and the successive licensors have only limited
25  liability.
26 
27  In this respect, the user's attention is drawn to the risks associated
28  with loading, using, modifying and/or developing or reproducing the
29  software by the user in light of its specific status of free software,
30  that may mean that it is complicated to manipulate, and that also
31  therefore means that it is reserved for developers and experienced
32  professionals having in-depth computer knowledge. Users are therefore
33  encouraged to load and test the software's suitability as regards their
34  requirements in conditions enabling the security of their systems and/or
35  data to be ensured and, more generally, to use and operate it in the
36  same conditions as regards security.
37 
38  The fact that you are presently reading this means that you have had
39  knowledge of the CeCILL license and that you accept its terms.
40  */
41 
43 
44 #include <Bpp/Seq/SequenceTools.h>
45 
46 using namespace bpp;
47 using namespace std;
48 
49 /******************************************************************************/
50 
52  VectorSiteContainer(alpha),
53  ingroup_(vector<bool>()),
54  count_(0),
55  group_(0) {}
56 
57 /******************************************************************************/
58 
60  VectorSiteContainer(size, alpha),
61  ingroup_(size),
62  count_(size),
63  group_(size) {}
64 
65 /******************************************************************************/
66 
67 PolymorphismSequenceContainer::PolymorphismSequenceContainer(const vector<string>& names, const Alphabet* alpha) :
68  VectorSiteContainer(names, alpha),
69  ingroup_(names.size()),
70  count_(names.size()),
71  group_(names.size()) {}
72 
73 /******************************************************************************/
74 
77  ingroup_(sc.getNumberOfSequences(), true),
78  count_(sc.getNumberOfSequences(), 1),
79  group_(sc.getNumberOfSequences(), 1) {}
80 
81 /******************************************************************************/
82 
84  VectorSiteContainer(sc.getAlphabet()),
85  ingroup_(),
86  count_(),
87  group_()
88 {
89  if (sc.getNumberOfSequences() == 0) return; //done.
90 
91  // Add first sequence:
93  for (size_t i = 1; i < sc.getNumberOfSequences(); ++i) {
94  const Sequence& seq = sc.getSequence(i);
95  //Check if this sequence already exists in this container:
96  bool exists = false;
97  for (size_t j = 0; !exists && j < getNumberOfSequences(); ++j) {
99  incrementSequenceCount(j); //We increase frequency, meaning that we discard this sequence name.
100  exists = true;
101  }
102  }
103  if (!exists) {
104  addSequenceWithFrequency(seq, 1);
105  }
106  }
107  ingroup_.resize(getNumberOfSequences(), true);
108  group_.resize(getNumberOfSequences());
109 }
110 
111 /******************************************************************************/
112 
115  ingroup_(sc.getNumberOfSequences(), true),
116  count_(sc.getNumberOfSequences(), 1),
117  group_(sc.getNumberOfSequences(), 1) {}
118 
119 /******************************************************************************/
120 
122  VectorSiteContainer(sc.getAlphabet()),
123  ingroup_(),
124  count_(),
125  group_()
126 {
127  if (sc.getNumberOfSequences() == 0) return; //done.
128 
129  // Add first sequence:
131  for (size_t i = 1; i < sc.getNumberOfSequences(); ++i) {
132  const Sequence& seq = sc.getSequence(i);
133  //Check if this sequence already exists in this container:
134  bool exists = false;
135  for (size_t j = 0; !exists && j < getNumberOfSequences(); ++j) {
137  incrementSequenceCount(j); //We increase frequency, meaning that we discard this sequence name.
138  exists = true;
139  }
140  }
141  if (!exists) {
142  addSequenceWithFrequency(seq, 1);
143  }
144  }
145  ingroup_.resize(getNumberOfSequences(), true);
146  group_.resize(getNumberOfSequences());
147 }
148 
149 /******************************************************************************/
150 
152  VectorSiteContainer(psc),
153  ingroup_(psc.getNumberOfSequences()),
154  count_(psc.getNumberOfSequences()),
155  group_(psc.getNumberOfSequences())
156 {
157  for (size_t i = 0; i < psc.getNumberOfSequences(); i++)
158  {
159  count_[i] = psc.getSequenceCount(i);
160  ingroup_[i] = psc.isIngroupMember(i);
161  group_[i] = psc.getGroupId(i);
162  }
163 }
164 
165 /******************************************************************************/
166 
168 {
170  // Setting up the sequences comments, numbers and ingroup state
171  size_t nbSeq = psc.getNumberOfSequences();
172  count_.resize(nbSeq);
173  ingroup_.resize(nbSeq);
174  group_.resize(nbSeq);
175  for (size_t i = 0; i < nbSeq; i++)
176  {
177  count_[i] = psc.getSequenceCount(i);
178  ingroup_[i] = psc.isIngroupMember(i);
179  group_[i] = psc.getGroupId(i);
180  }
181  return *this;
182 }
183 
184 /******************************************************************************/
185 
186 // ** Class destructor: *******************************************************/
187 
189 {
190  clear();
191 }
192 
193 /******************************************************************************/
194 
195 // ** Other methodes: *********************************************************/
196 
198 {
199  if (index >= getNumberOfSequences())
200  throw IndexOutOfBoundsException("PolymorphismSequenceContainer::removeSequence: index out of bounds.", index, 0, getNumberOfSequences());
201  count_.erase(count_.begin() + static_cast<ptrdiff_t>(index));
202  ingroup_.erase(ingroup_.begin() + static_cast<ptrdiff_t>(index));
203  group_.erase(group_.begin() + static_cast<ptrdiff_t>(index));
205 }
206 
207 /******************************************************************************/
208 
210 {
211  try
212  {
213  return removeSequence(getSequencePosition(name));
214  }
215  catch (SequenceNotFoundException& snfe)
216  {
217  throw SequenceNotFoundException("PolymorphismSequenceContainer::removeSequence.", name);
218  }
219 }
220 
221 /******************************************************************************/
222 
224 {
225  try
226  {
227  delete removeSequence(index);
228  }
229  catch (IndexOutOfBoundsException& ioobe)
230  {
231  throw IndexOutOfBoundsException("PolymorphismSequenceContainer::deleteSequence.", index, 0, getNumberOfSequences());
232  }
233 }
234 
235 /******************************************************************************/
236 
238 {
239  try
240  {
241  delete removeSequence(name);
242  }
243  catch (SequenceNotFoundException& snfe)
244  {
245  throw SequenceNotFoundException("PolymorphismSequenceContainer::deleteSequence.", name);
246  }
247 }
248 
249 /******************************************************************************/
250 
251 void PolymorphismSequenceContainer::addSequenceWithFrequency(const Sequence& sequence, unsigned int frequency, bool checkName)
252 {
253  try
254  {
255  VectorSiteContainer::addSequence(sequence, checkName);
256  }
257  catch (Exception& e)
258  {
259  throw e;
260  }
261  count_.push_back(frequency);
262  ingroup_.push_back(true);
263  group_.push_back(0);
264 }
265 
266 /******************************************************************************/
267 
268 void PolymorphismSequenceContainer::addSequenceWithFrequency(const Sequence& sequence, size_t sequenceIndex, unsigned int frequency, bool checkName)
269 {
270  try
271  {
272  VectorSiteContainer::addSequence(sequence, sequenceIndex, checkName);
273  }
274  catch (Exception& e)
275  {
276  throw e;
277  }
278  count_.insert(count_.begin() + static_cast<ptrdiff_t>(sequenceIndex), frequency);
279  ingroup_.insert(ingroup_.begin() + static_cast<ptrdiff_t>(sequenceIndex), true);
280  group_.insert(group_.begin() + static_cast<ptrdiff_t>(sequenceIndex), 0);
281 }
282 
283 /******************************************************************************/
284 
286 {
288  count_.clear();
289  ingroup_.clear();
290  group_.clear();
291 }
292 
293 /******************************************************************************/
294 
296 {
297  if (index >= getNumberOfSequences())
298  throw IndexOutOfBoundsException("PolymorphismSequenceContainer::getGroupId: index out of bounds.", index, 0, getNumberOfSequences());
299  return group_[index];
300 }
301 
302 /******************************************************************************/
303 
304 size_t PolymorphismSequenceContainer::getGroupId(const std::string& name) const
305 {
306  try
307  {
308  return group_[getSequencePosition(name)];
309  }
310  catch (SequenceNotFoundException& snfe)
311  {
312  throw SequenceNotFoundException("PolymorphismSequenceContainer::getGroupId.", name);
313  }
314 }
315 
316 /******************************************************************************/
317 
319 {
320  set<size_t> grp_ids;
321  for (size_t i = 0; i < group_.size(); i++)
322  {
323  grp_ids.insert(group_[i]);
324  }
325  return grp_ids;
326 }
327 
328 /******************************************************************************/
329 
330 void PolymorphismSequenceContainer::setGroupId(size_t index, size_t group_id)
331 {
332  if (index >= getNumberOfSequences())
333  throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setGroupId: index out of bounds.", index, 0, getNumberOfSequences());
334  group_[index] = group_id;
335 }
336 
337 /******************************************************************************/
338 
339 void PolymorphismSequenceContainer::setGroupId(const std::string& name, size_t group_id)
340 {
341  try
342  {
343  group_[getSequencePosition(name)] = group_id;
344  }
345  catch (SequenceNotFoundException& snfe)
346  {
347  throw SequenceNotFoundException("PolymorphismSequenceContainer::setGroupId.", name);
348  }
349 }
350 
351 /******************************************************************************/
352 
354 {
355  return getAllGroupsIds().size();
356 }
357 
358 /******************************************************************************/
359 
361 {
362  for (auto i: ingroup_) {
363  if (!i) return true;
364  }
365  return false;
366 }
367 
368 /******************************************************************************/
369 
371 {
372  if (index >= getNumberOfSequences())
373  throw IndexOutOfBoundsException("PolymorphismSequenceContainer::isIngroupMember: index out of bounds.", index, 0, getNumberOfSequences());
374  return ingroup_[index];
375 }
376 
377 /******************************************************************************/
378 
379 bool PolymorphismSequenceContainer::isIngroupMember(const std::string& name) const
380 {
381  try
382  {
383  return ingroup_[getSequencePosition(name)];
384  }
385  catch (SequenceNotFoundException& snfe)
386  {
387  throw SequenceNotFoundException("PolymorphismSequenceContainer::isIngroupMember.", name);
388  }
389 }
390 
391 /******************************************************************************/
392 
394 {
395  if (index >= getNumberOfSequences())
396  throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setAsIngroupMember.", index, 0, getNumberOfSequences());
397  ingroup_[index] = true;
398 }
399 
400 /******************************************************************************/
401 
403 {
404  try
405  {
406  size_t seqPos = getSequencePosition(name);
407  ingroup_[seqPos] = true;
408  }
409  catch (SequenceNotFoundException& snfe)
410  {
411  throw SequenceNotFoundException("PolymorphismSequenceContainer::setAsIngroupMember.", name);
412  }
413 }
414 
415 /******************************************************************************/
416 
418 {
419  if (index >= getNumberOfSequences())
420  throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setAsOutgroupMember.", index, 0, getNumberOfSequences());
421  ingroup_[index] = false;
422 }
423 
424 /******************************************************************************/
425 
427 {
428  try
429  {
430  size_t seqPos = getSequencePosition(name);
431  ingroup_[seqPos] = false;
432  }
433  catch (SequenceNotFoundException& snfe)
434  {
435  throw SequenceNotFoundException("PolymorphismSequenceContainer::setAsOutgroupMember.", name);
436  }
437 }
438 
439 /******************************************************************************/
440 
441 void PolymorphismSequenceContainer::setSequenceCount(size_t index, unsigned int count)
442 {
443  if (index >= getNumberOfSequences())
444  throw IndexOutOfBoundsException("PolymorphismSequenceContainer::setSequenceCount.", index, 0, getNumberOfSequences());
445  if (count < 1)
446  throw BadIntegerException("PolymorphismSequenceContainer::setSequenceCount: count can't be < 1.", static_cast<int>(count));
447  count_[index] = count;
448 }
449 
450 /******************************************************************************/
451 
452 void PolymorphismSequenceContainer::setSequenceCount(const std::string& name, unsigned int count)
453 {
454  try
455  {
457  }
458  catch (BadIntegerException& bie)
459  {
460  throw bie;
461  }
462  catch (SequenceNotFoundException& snfe)
463  {
464  throw SequenceNotFoundException("PolymorphismSequenceContainer::setSequenceCount.", name);
465  }
466 }
467 
468 /******************************************************************************/
469 
471 {
472  if (index >= getNumberOfSequences())
473  throw IndexOutOfBoundsException("PolymorphismSequenceContainer::incrementSequenceCount.", index, 0, getNumberOfSequences());
474  count_[index]++;
475 }
476 
477 /******************************************************************************/
478 
480 {
481  try
482  {
484  }
485  catch (SequenceNotFoundException& snfe)
486  {
487  throw SequenceNotFoundException("PolymorphismSequenceContainer::incrementSequenceCount.", name);
488  }
489 }
490 
491 /******************************************************************************/
492 
494 {
495  if (index >= getNumberOfSequences())
496  throw IndexOutOfBoundsException("PolymorphismSequenceContainer::decrementSequenceCount.", index, 0, getNumberOfSequences());
497  if (count_[index] - 1 < 1)
498  throw BadIntegerException("PolymorphismSequenceContainer::decrementSequenceCount: count can't be < 1.", static_cast<int>(count_[index] - 1));
499  count_[index]--;
500 }
501 
502 /******************************************************************************/
503 
505 {
506  try
507  {
509  }
510  catch (BadIntegerException& bie)
511  {
512  throw bie;
513  }
514  catch (SequenceNotFoundException& snfe)
515  {
516  throw SequenceNotFoundException("PolymorphismSequenceContainer::decrementSequenceCount.", name);
517  }
518 }
519 
520 /******************************************************************************/
521 
522 unsigned int PolymorphismSequenceContainer::getSequenceCount(size_t index) const
523 {
524  if (index >= getNumberOfSequences())
525  throw IndexOutOfBoundsException("PolymorphismSequenceContainer::getSequenceCount.", index, 0, getNumberOfSequences());
526  return count_[index];
527 }
528 
529 /******************************************************************************/
530 
531 unsigned int PolymorphismSequenceContainer::getSequenceCount(const std::string& name) const
532 {
533  try
534  {
536  }
537  catch (SequenceNotFoundException& snfe)
538  {
539  throw SequenceNotFoundException("PolymorphismSequenceContainer::getSequenceCount.", name);
540  }
541 }
542 
543 /******************************************************************************/
544 
546  unique_ptr<VectorSiteContainer> sites(new VectorSiteContainer(getAlphabet()));
547  for (size_t i = 0; i < getNumberOfSequences(); ++i) {
548  const Sequence& seq = getSequence(i);
549  unsigned int freq = getSequenceCount(i);
550  if (freq > 1) {
551  for (unsigned int j = 0; j < freq; ++j) {
552  unique_ptr<Sequence> seqdup(seq.clone());
553  seqdup->setName(seq.getName() + "_" + TextTools::toString(j + 1));
554  sites->addSequence(*seqdup);
555  }
556  } else {
557  sites->addSequence(seq);
558  }
559  }
560  return sites.release();
561 }
562 
563 /******************************************************************************/
564 
virtual const Sequence & getSequence(size_t sequenceIndex) const=0
virtual size_t getNumberOfSequences() const=0
The PolymorphismSequenceContainer class.
void clear()
Clear the container of all its sequences.
void setAsIngroupMember(size_t index)
Set a sequence as ingroup member by index.
void setAsOutgroupMember(size_t index)
Set a sequence as outgroup member by index.
virtual ~PolymorphismSequenceContainer()
Destroy a PolymorphismSequenceContainer.
bool isIngroupMember(size_t index) const
Tell if the sequence is ingroup by index.
unsigned int getSequenceCount(size_t index) const
Get the count of a sequence by index.
SiteContainer * toSiteContainer() const
convert the container to a site container, with sequences dulicated according to their respective fre...
void addSequenceWithFrequency(const Sequence &sequence, unsigned int frequency, bool checkName=true)
Add a sequence to the container.
PolymorphismSequenceContainer & operator=(const PolymorphismSequenceContainer &psc)
Operator= : copy operator.
void incrementSequenceCount(size_t index)
Add 1 to the sequence count.
void decrementSequenceCount(size_t index)
Removz 1 to the sequence count.
std::set< size_t > getAllGroupsIds() const
Get all the groups identifiers.
void setSequenceCount(size_t index, unsigned int count)
Set the count of a sequence by index.
size_t getNumberOfGroups() const
Get the number of groups.
PolymorphismSequenceContainer(const Alphabet *alpha)
Build a new empty PolymorphismSequenceContainer.
void deleteSequence(size_t index)
Delete a sequence by index.
Sequence * removeSequence(size_t index)
Remove a sequence by index and return a pointer to this removed sequence.
void setGroupId(size_t index, size_t group_id)
Set the group identifier of a sequence.
size_t getGroupId(size_t index) const
Get the group identifier of the sequence.
static bool areSequencesIdentical(const Sequence &seq1, const Sequence &seq2)
Sequence * clone() const=0
virtual const std::string & getName() const=0
virtual const Alphabet * getAlphabet() const=0
const Sequence & getSequence(size_t sequenceIndex) const
void addSequence(const Sequence &sequence, bool checkName=true)
VectorSiteContainer & operator=(const VectorSiteContainer &vsc)
VectorSiteContainer(const std::vector< const CruxSymbolListSite * > &vs, const Alphabet *alpha, bool checkPositions=true)
Sequence * removeSequence(size_t sequenceIndex)
size_t getSequencePosition(const std::string &name) const
size_t getNumberOfSequences() const
std::string toString(T t)
std::size_t count(const std::string &s, const std::string &pattern)