1 //
2 // File: SequenceFeature.h
3 // Created by: Julien Dutheil
4 // Created on: Mon Nov 21 2011
5 //
7 /*
8 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 This software is a computer program whose purpose is to provide classes
11 for sequences analysis.
13 This software is governed by the CeCILL license under French law and
14 abiding by the rules of distribution of free software. You can use,
15 modify and/ or redistribute the software under the terms of the CeCILL
16 license as circulated by CEA, CNRS and INRIA at the following URL
17 "http://www.cecill.info".
19 As a counterpart to the access to the source code and rights to copy,
20 modify and redistribute granted by the license, users are provided only
21 with a limited warranty and the software's author, the holder of the
22 economic rights, and the successive licensors have only limited
23 liability.
25 In this respect, the user's attention is drawn to the risks associated
26 with loading, using, modifying and/or developing or reproducing the
27 software by the user in light of its specific status of free software,
28 that may mean that it is complicated to manipulate, and that also
29 therefore means that it is reserved for developers and experienced
30 professionals having in-depth computer knowledge. Users are therefore
31 encouraged to load and test the software's suitability as regards their
32 requirements in conditions enabling the security of their systems and/or
33 data to be ensured and, more generally, to use and operate it in the
34 same conditions as regards security.
36 The fact that you are presently reading this means that you have had
37 knowledge of the CeCILL license and that you accept its terms.
38 */
43 //From the STL:
44 #include <string>
45 #include <map>
46 #include <set>
47 #include <algorithm>
49 //From bpp-core:
50 #include <Bpp/Clonable.h>
51 #include <Bpp/Numeric/Range.h>
67 namespace bpp
68 {
75 class SeqRange:
76  public Range<size_t>
77 {
78  private:
79  char strand_;
81  public:
87  SeqRange(size_t a, size_t b, char strand = '.'):
88  Range<size_t>(a, b), strand_(strand) {
89  if (strand != '+' && strand != '-' && strand != '?' && strand != '.')
90  strand_ = '.';
91  }
97  SeqRange(const Range<size_t> range, char strand = '.'):
98  Range<size_t>(range), strand_(strand) {
99  if (strand != '+' && strand != '-' && strand != '?' && strand != '.')
100  strand_ = '.';
101  }
103  SeqRange* clone() const { return new SeqRange(*this); }
105  public:
106  virtual char getStrand() const { return strand_; }
108  virtual bool isNegativeStrand() const { return strand_ == '-'; }
109  virtual bool isStranded() const { return strand_ == '+' || strand_ == '-'; }
110  virtual void invert() {
111  if (isStranded()) {
112  if (isNegativeStrand()) {
113  strand_ = '+';
114  } else {
115  strand_ = '-';
116  }
117  }
118  }
120 };
137  public virtual Clonable
138 {
139  public:
140  static const std::string NO_ATTRIBUTE_SET;
142  public:
143  virtual SequenceFeature* clone() const = 0;
145  public:
149  virtual const std::string& getId() const = 0;
153  virtual void setId(const std::string& id) = 0;
158  virtual const std::string& getSequenceId() const = 0;
162  virtual void setSequenceId(const std::string& id) = 0;
167  virtual const std::string& getSource() const = 0;
171  virtual void setSource(const std::string& source) = 0;
176  virtual const std::string& getType() const = 0;
180  virtual void setType(const std::string& type) = 0;
185  virtual const size_t getStart() const = 0;
190  virtual const size_t getEnd() const = 0;
195  virtual const size_t size() const {
196  return getEnd() - getStart();
197  };
202  virtual bool isStranded() const = 0;
207  virtual bool isNegativeStrand() const = 0;
212  virtual void invert() = 0;
217  virtual SeqRange getRange() const = 0;
222  virtual bool isEmpty() const { return size() == 0; }
227  virtual bool isPoint() const { return size() == 1; }
232  virtual bool overlap(const SequenceFeature& feat) const = 0;
237  virtual bool overlap(const SeqRange& range) const = 0;
242  virtual bool includes(const SeqRange& range) const = 0;
247  virtual bool isIncludedIn(const SeqRange& range) const = 0;
252  virtual const double& getScore() const = 0;
256  virtual void setScore(double score) = 0;
262  virtual const std::string& getAttribute(const std::string& attribute) const = 0;
268  virtual std::string& getAttribute(const std::string& attribute) = 0;
273  virtual std::set< std::string > getAttributeList() const = 0;
281  virtual void setAttribute(const std::string& attribute, const std::string& value) = 0;
286  virtual void removeAttribute(const std::string& attribute) = 0;
288 };
296  public SequenceFeature
297 {
298  protected:
299  std::string id_;
300  std::string sequenceId_;
301  std::string source_;
302  std::string type_;
304  double score_;
305  mutable std::map<std::string, std::string> attributes_;
306  //SequenceFeatureSet subFeatures_;
308  public:
309  BasicSequenceFeature(): id_(""), sequenceId_(""), source_(""), type_(""), range_(0, 0, '.'), score_(-1), attributes_() {}
312  const std::string& id,
313  const std::string& seqId,
314  const std::string& source,
315  const std::string& type,
316  size_t start,
317  size_t end,
318  char strand,
319  double score = -1):
320  id_(id), sequenceId_(seqId), source_(source),
321  type_(type), range_(start, end, strand), score_(score),
322  attributes_()
323  //attributes_(), subFeatures_()
324  {}
326  virtual BasicSequenceFeature* clone() const { return new BasicSequenceFeature(*this); }
328  public:
329  const std::string& getId() const { return id_; }
330  void setId(const std::string& id) { id_ = id; }
331  const std::string& getSequenceId() const { return sequenceId_; }
332  void setSequenceId(const std::string& sid) { sequenceId_ = sid; }
333  const std::string& getSource() const { return source_; }
334  void setSource(const std::string& source) { source_ = source; }
335  const std::string& getType() const { return type_; }
336  void setType(const std::string& type) { type_ = type; }
337  const size_t getStart() const { return range_.begin(); }
338  const size_t getEnd() const { return range_.end(); }
339  bool isStranded() const { return range_.isStranded(); }
340  bool isNegativeStrand() const { return range_.isNegativeStrand(); }
341  void invert() {
342  range_.invert();
343  }
344  const double& getScore() const { return score_; }
345  void setScore(double score) { score_ = score; }
347  const std::string& getAttribute(const std::string& attribute) const {
348  std::map<std::string, std::string>::iterator it = attributes_.find(attribute);
349  if (it != attributes_.end())
350  return it->second;
351  else
352  return NO_ATTRIBUTE_SET;
353  }
355  std::string& getAttribute(const std::string& attribute) {
356  return attributes_[attribute];
357  }
359  void setAttribute(const std::string& attribute, const std::string& value) {
360  attributes_[attribute] = value;
361  }
363  std::set< std::string > getAttributeList() const {
364  std::set< std::string > d;
365  for (std::map<std::string, std::string>::iterator it = attributes_.begin() ; it != attributes_.end() ; it++) {
366  d.insert(it->first);
367  }
368  return d;
369  }
371  void removeAttribute(const std::string& attribute) {
372  std::map<std::string, std::string>::iterator it = attributes_.find(attribute);
373  if (it != attributes_.end()) {
374  attributes_.erase(it);
375  }
376  }
378  SeqRange getRange() const {
379  return SeqRange(range_);
380  }
382  bool overlap(const SequenceFeature& feat) const {
383  if (feat.getSequenceId() == sequenceId_) {
384  return range_.overlap(feat.getRange());
385  }
386  return false;
387  }
389  bool overlap(const SeqRange& range) const {
390  return range_.overlap(range);
391  }
393  virtual bool includes(const SeqRange& range) const {
394  return range_.contains(range);
395  }
397  virtual bool isIncludedIn(const SeqRange& range) const {
398  return range.contains(range_);
399  }
401  //const SequenceFeatureSet& getSubFeatures() const { return subFeatures; }
402  //SequenceFeatureSet& getSubFeatures() { return subFeatures; }
404 };
418 {
419  private:
420  std::vector<SequenceFeature*> features_;
422  public:
425  virtual ~SequenceFeatureSet() { clear(); }
428  features_()
429  {
430  for (std::vector<SequenceFeature*>::const_iterator it = sfs.features_.begin();
431  it != sfs.features_.end();
432  ++it) {
433  features_.push_back((**it).clone());
434  }
435  }
437  {
438  clear();
439  for (std::vector<SequenceFeature*>::const_iterator it = sfs.features_.begin();
440  it != sfs.features_.end();
441  ++it) {
442  features_.push_back((**it).clone());
443  }
444  return *this;
445  }
447  public:
451  void clear()
452  {
453  for (std::vector<SequenceFeature*>::iterator it = features_.begin();
454  it != features_.end();
455  ++it) {
456  delete *it;
457  }
458  features_.clear();
459  }
465  const SequenceFeature& getFeature(size_t i) const {
466  return *features_[i];
467  }
474  const SequenceFeature& operator[](size_t i) const {
475  return *features_[i];
476  }
481  size_t getNumberOfFeatures() const { return features_.size(); }
486  bool isEmpty() const { return features_.size() == 0; }
493  void addFeature(const SequenceFeature& feature) {
494  features_.push_back(feature.clone());
495  }
500  std::set<std::string> getSequences() const {
501  std::set<std::string> seqIds;
502  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
503  it != features_.end();
504  ++it) {
505  seqIds.insert((**it).getSequenceId());
506  }
507  return seqIds;
508  }
513  std::set<std::string> getTypes() const {
514  std::set<std::string> types;
515  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
516  it != features_.end();
517  ++it) {
518  types.insert((**it).getType());
519  }
520  return types;
521  }
529  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
530  it != features_.end();
531  ++it) {
532  coords.addRange((**it).getRange());
533  }
534  }
542  void fillRangeCollectionForSequence(const std::string& seqId, RangeCollection<size_t>& coords) const {
543  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
544  it != features_.end();
545  ++it) {
546  if ((**it).getSequenceId() == seqId) {
547  coords.addRange((**it).getRange());
548  }
549  }
550  }
556  SequenceFeatureSet* getSubsetForType(const std::string& type) const {
557  SequenceFeatureSet* subset = new SequenceFeatureSet();
558  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
559  it != features_.end();
560  ++it) {
561  if ((**it).getType() == type) {
562  subset->addFeature(**it);
563  }
564  }
565  return subset;
566  }
572  SequenceFeatureSet* getSubsetForTypes(const std::vector<std::string>& types) const {
573  SequenceFeatureSet* subset = new SequenceFeatureSet();
574  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
575  it != features_.end();
576  ++it) {
577  if (std::find(types.begin(), types.end(), (**it).getType()) != types.end()) {
578  subset->addFeature(**it);
579  }
580  }
581  return subset;
582  }
588  SequenceFeatureSet* getSubsetForSequence(const std::string& id) const {
589  SequenceFeatureSet* subset = new SequenceFeatureSet();
590  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
591  it != features_.end();
592  ++it) {
593  if ((**it).getSequenceId() == id) {
594  subset->addFeature(**it);
595  }
596  }
597  return subset;
598  }
604  SequenceFeatureSet* getSubsetForSequences(const std::vector<std::string>& ids) const {
605  SequenceFeatureSet* subset = new SequenceFeatureSet();
606  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
607  it != features_.end();
608  ++it) {
609  if (std::find(ids.begin(), ids.end(), (**it).getSequenceId()) != ids.end()) {
610  subset->addFeature(**it);
611  }
612  }
613  return subset;
614  }
622  SequenceFeatureSet* getSubsetForRange(const SeqRange& range, bool complete) const {
623  SequenceFeatureSet* subset = new SequenceFeatureSet();
624  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
625  it != features_.end();
626  ++it) {
627  if (complete) {
628  if ((**it).isIncludedIn(range))
629  subset->addFeature(**it);
630  } else {
631  if ((**it).overlap(range))
632  subset->addFeature(**it);
633  }
634  }
635  return subset;
636  }
638 };
640 } //end of namespace bpp
642 #endif //_SEQUENCEFEATURE_H_
