bpp-seq-omics  2.4.1
SequenceFeature.h
Go to the documentation of this file.
1 //
2 // File: SequenceFeature.h
3 // Created by: Julien Dutheil
4 // Created on: Mon Nov 21 2011
5 //
6 
7 /*
8 Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
9 
10 This software is a computer program whose purpose is to provide classes
11 for sequences analysis.
12 
13 This software is governed by the CeCILL license under French law and
14 abiding by the rules of distribution of free software. You can use,
15 modify and/ or redistribute the software under the terms of the CeCILL
16 license as circulated by CEA, CNRS and INRIA at the following URL
17 "http://www.cecill.info".
18 
19 As a counterpart to the access to the source code and rights to copy,
20 modify and redistribute granted by the license, users are provided only
21 with a limited warranty and the software's author, the holder of the
22 economic rights, and the successive licensors have only limited
23 liability.
24 
25 In this respect, the user's attention is drawn to the risks associated
26 with loading, using, modifying and/or developing or reproducing the
27 software by the user in light of its specific status of free software,
28 that may mean that it is complicated to manipulate, and that also
29 therefore means that it is reserved for developers and experienced
30 professionals having in-depth computer knowledge. Users are therefore
31 encouraged to load and test the software's suitability as regards their
32 requirements in conditions enabling the security of their systems and/or
33 data to be ensured and, more generally, to use and operate it in the
34 same conditions as regards security.
35 
36 The fact that you are presently reading this means that you have had
37 knowledge of the CeCILL license and that you accept its terms.
38 */
39 
40 #ifndef _SEQUENCEFEATURE_H_
41 #define _SEQUENCEFEATURE_H_
42 
43 //From the STL:
44 #include <string>
45 #include <map>
46 #include <set>
47 #include <algorithm>
48 
49 //From bpp-core:
50 #include <Bpp/Clonable.h>
51 #include <Bpp/Numeric/Range.h>
52 
67 namespace bpp
68 {
69 
75 class SeqRange:
76  public Range<size_t>
77 {
78  private:
79  char strand_;
80 
81  public:
87  SeqRange(size_t a, size_t b, char strand = '.'):
88  Range<size_t>(a, b), strand_(strand) {
89  if (strand != '+' && strand != '-' && strand != '?' && strand != '.')
90  strand_ = '.';
91  }
92 
97  SeqRange(const Range<size_t> range, char strand = '.'):
98  Range<size_t>(range), strand_(strand) {
99  if (strand != '+' && strand != '-' && strand != '?' && strand != '.')
100  strand_ = '.';
101  }
102 
103  SeqRange* clone() const { return new SeqRange(*this); }
104 
105  public:
106  virtual char getStrand() const { return strand_; }
107 
108  virtual bool isNegativeStrand() const { return strand_ == '-'; }
109  virtual bool isStranded() const { return strand_ == '+' || strand_ == '-'; }
110  virtual void invert() {
111  if (isStranded()) {
112  if (isNegativeStrand()) {
113  strand_ = '+';
114  } else {
115  strand_ = '-';
116  }
117  }
118  }
119 
120 };
121 
137  public virtual Clonable
138 {
139  public:
140  static const std::string NO_ATTRIBUTE_SET;
141 
142  public:
143  virtual SequenceFeature* clone() const = 0;
144 
145  public:
149  virtual const std::string& getId() const = 0;
153  virtual void setId(const std::string& id) = 0;
154 
158  virtual const std::string& getSequenceId() const = 0;
162  virtual void setSequenceId(const std::string& id) = 0;
163 
167  virtual const std::string& getSource() const = 0;
171  virtual void setSource(const std::string& source) = 0;
172 
176  virtual const std::string& getType() const = 0;
180  virtual void setType(const std::string& type) = 0;
181 
185  virtual const size_t getStart() const = 0;
186 
190  virtual const size_t getEnd() const = 0;
191 
195  virtual const size_t size() const {
196  return getEnd() - getStart();
197  };
198 
202  virtual bool isStranded() const = 0;
203 
207  virtual bool isNegativeStrand() const = 0;
208 
212  virtual void invert() = 0;
213 
217  virtual SeqRange getRange() const = 0;
218 
222  virtual bool isEmpty() const { return size() == 0; }
223 
227  virtual bool isPoint() const { return size() == 1; }
228 
232  virtual bool overlap(const SequenceFeature& feat) const = 0;
233 
237  virtual bool overlap(const SeqRange& range) const = 0;
238 
242  virtual bool includes(const SeqRange& range) const = 0;
243 
247  virtual bool isIncludedIn(const SeqRange& range) const = 0;
248 
252  virtual const double& getScore() const = 0;
256  virtual void setScore(double score) = 0;
257 
262  virtual const std::string& getAttribute(const std::string& attribute) const = 0;
263 
268  virtual std::string& getAttribute(const std::string& attribute) = 0;
269 
273  virtual std::set< std::string > getAttributeList() const = 0;
274 
281  virtual void setAttribute(const std::string& attribute, const std::string& value) = 0;
282 
286  virtual void removeAttribute(const std::string& attribute) = 0;
287 
288 };
289 
296  public SequenceFeature
297 {
298  protected:
299  std::string id_;
300  std::string sequenceId_;
301  std::string source_;
302  std::string type_;
304  double score_;
305  mutable std::map<std::string, std::string> attributes_;
306  //SequenceFeatureSet subFeatures_;
307 
308  public:
309  BasicSequenceFeature(): id_(""), sequenceId_(""), source_(""), type_(""), range_(0, 0, '.'), score_(-1), attributes_() {}
310 
312  const std::string& id,
313  const std::string& seqId,
314  const std::string& source,
315  const std::string& type,
316  size_t start,
317  size_t end,
318  char strand,
319  double score = -1):
320  id_(id), sequenceId_(seqId), source_(source),
321  type_(type), range_(start, end, strand), score_(score),
322  attributes_()
323  //attributes_(), subFeatures_()
324  {}
325 
326  virtual BasicSequenceFeature* clone() const { return new BasicSequenceFeature(*this); }
327 
328  public:
329  const std::string& getId() const { return id_; }
330  void setId(const std::string& id) { id_ = id; }
331  const std::string& getSequenceId() const { return sequenceId_; }
332  void setSequenceId(const std::string& sid) { sequenceId_ = sid; }
333  const std::string& getSource() const { return source_; }
334  void setSource(const std::string& source) { source_ = source; }
335  const std::string& getType() const { return type_; }
336  void setType(const std::string& type) { type_ = type; }
337  const size_t getStart() const { return range_.begin(); }
338  const size_t getEnd() const { return range_.end(); }
339  bool isStranded() const { return range_.isStranded(); }
340  bool isNegativeStrand() const { return range_.isNegativeStrand(); }
341  void invert() {
342  range_.invert();
343  }
344  const double& getScore() const { return score_; }
345  void setScore(double score) { score_ = score; }
346 
347  const std::string& getAttribute(const std::string& attribute) const {
348  std::map<std::string, std::string>::iterator it = attributes_.find(attribute);
349  if (it != attributes_.end())
350  return it->second;
351  else
352  return NO_ATTRIBUTE_SET;
353  }
354 
355  std::string& getAttribute(const std::string& attribute) {
356  return attributes_[attribute];
357  }
358 
359  void setAttribute(const std::string& attribute, const std::string& value) {
360  attributes_[attribute] = value;
361  }
362 
363  std::set< std::string > getAttributeList() const {
364  std::set< std::string > d;
365  for (std::map<std::string, std::string>::iterator it = attributes_.begin() ; it != attributes_.end() ; it++) {
366  d.insert(it->first);
367  }
368  return d;
369  }
370 
371  void removeAttribute(const std::string& attribute) {
372  std::map<std::string, std::string>::iterator it = attributes_.find(attribute);
373  if (it != attributes_.end()) {
374  attributes_.erase(it);
375  }
376  }
377 
378  SeqRange getRange() const {
379  return SeqRange(range_);
380  }
381 
382  bool overlap(const SequenceFeature& feat) const {
383  if (feat.getSequenceId() == sequenceId_) {
384  return range_.overlap(feat.getRange());
385  }
386  return false;
387  }
388 
389  bool overlap(const SeqRange& range) const {
390  return range_.overlap(range);
391  }
392 
393  virtual bool includes(const SeqRange& range) const {
394  return range_.contains(range);
395  }
396 
397  virtual bool isIncludedIn(const SeqRange& range) const {
398  return range.contains(range_);
399  }
400 
401  //const SequenceFeatureSet& getSubFeatures() const { return subFeatures; }
402  //SequenceFeatureSet& getSubFeatures() { return subFeatures; }
403 
404 };
405 
418 {
419  private:
420  std::vector<SequenceFeature*> features_;
421 
422  public:
424 
425  virtual ~SequenceFeatureSet() { clear(); }
426 
428  features_()
429  {
430  for (std::vector<SequenceFeature*>::const_iterator it = sfs.features_.begin();
431  it != sfs.features_.end();
432  ++it) {
433  features_.push_back((**it).clone());
434  }
435  }
437  {
438  clear();
439  for (std::vector<SequenceFeature*>::const_iterator it = sfs.features_.begin();
440  it != sfs.features_.end();
441  ++it) {
442  features_.push_back((**it).clone());
443  }
444  return *this;
445  }
446 
447  public:
451  void clear()
452  {
453  for (std::vector<SequenceFeature*>::iterator it = features_.begin();
454  it != features_.end();
455  ++it) {
456  delete *it;
457  }
458  features_.clear();
459  }
460 
465  const SequenceFeature& getFeature(size_t i) const {
466  return *features_[i];
467  }
468 
474  const SequenceFeature& operator[](size_t i) const {
475  return *features_[i];
476  }
477 
481  size_t getNumberOfFeatures() const { return features_.size(); }
482 
486  bool isEmpty() const { return features_.size() == 0; }
487 
493  void addFeature(const SequenceFeature& feature) {
494  features_.push_back(feature.clone());
495  }
496 
500  std::set<std::string> getSequences() const {
501  std::set<std::string> seqIds;
502  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
503  it != features_.end();
504  ++it) {
505  seqIds.insert((**it).getSequenceId());
506  }
507  return seqIds;
508  }
509 
513  std::set<std::string> getTypes() const {
514  std::set<std::string> types;
515  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
516  it != features_.end();
517  ++it) {
518  types.insert((**it).getType());
519  }
520  return types;
521  }
522 
529  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
530  it != features_.end();
531  ++it) {
532  coords.addRange((**it).getRange());
533  }
534  }
535 
542  void fillRangeCollectionForSequence(const std::string& seqId, RangeCollection<size_t>& coords) const {
543  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
544  it != features_.end();
545  ++it) {
546  if ((**it).getSequenceId() == seqId) {
547  coords.addRange((**it).getRange());
548  }
549  }
550  }
551 
556  SequenceFeatureSet* getSubsetForType(const std::string& type) const {
557  SequenceFeatureSet* subset = new SequenceFeatureSet();
558  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
559  it != features_.end();
560  ++it) {
561  if ((**it).getType() == type) {
562  subset->addFeature(**it);
563  }
564  }
565  return subset;
566  }
567 
572  SequenceFeatureSet* getSubsetForTypes(const std::vector<std::string>& types) const {
573  SequenceFeatureSet* subset = new SequenceFeatureSet();
574  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
575  it != features_.end();
576  ++it) {
577  if (std::find(types.begin(), types.end(), (**it).getType()) != types.end()) {
578  subset->addFeature(**it);
579  }
580  }
581  return subset;
582  }
583 
588  SequenceFeatureSet* getSubsetForSequence(const std::string& id) const {
589  SequenceFeatureSet* subset = new SequenceFeatureSet();
590  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
591  it != features_.end();
592  ++it) {
593  if ((**it).getSequenceId() == id) {
594  subset->addFeature(**it);
595  }
596  }
597  return subset;
598  }
599 
604  SequenceFeatureSet* getSubsetForSequences(const std::vector<std::string>& ids) const {
605  SequenceFeatureSet* subset = new SequenceFeatureSet();
606  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
607  it != features_.end();
608  ++it) {
609  if (std::find(ids.begin(), ids.end(), (**it).getSequenceId()) != ids.end()) {
610  subset->addFeature(**it);
611  }
612  }
613  return subset;
614  }
615 
622  SequenceFeatureSet* getSubsetForRange(const SeqRange& range, bool complete) const {
623  SequenceFeatureSet* subset = new SequenceFeatureSet();
624  for (std::vector<SequenceFeature*>::const_iterator it = features_.begin();
625  it != features_.end();
626  ++it) {
627  if (complete) {
628  if ((**it).isIncludedIn(range))
629  subset->addFeature(**it);
630  } else {
631  if ((**it).overlap(range))
632  subset->addFeature(**it);
633  }
634  }
635  return subset;
636  }
637 
638 };
639 
640 } //end of namespace bpp
641 
642 #endif //_SEQUENCEFEATURE_H_
643 
A very simple implementation of the SequenceFeature class.
const std::string & getSource() const
void setAttribute(const std::string &attribute, const std::string &value)
Set the value of an attribute.
const std::string & getSequenceId() const
const std::string & getAttribute(const std::string &attribute) const
const std::string & getId() const
void removeAttribute(const std::string &attribute)
virtual BasicSequenceFeature * clone() const
const size_t getStart() const
std::string & getAttribute(const std::string &attribute)
std::map< std::string, std::string > attributes_
std::set< std::string > getAttributeList() const
virtual bool isIncludedIn(const SeqRange &range) const
const double & getScore() const
bool overlap(const SeqRange &range) const
void setId(const std::string &id)
void setSequenceId(const std::string &sid)
const std::string & getType() const
BasicSequenceFeature(const std::string &id, const std::string &seqId, const std::string &source, const std::string &type, size_t start, size_t end, char strand, double score=-1)
const size_t getEnd() const
void setSource(const std::string &source)
void setType(const std::string &type)
void setScore(double score)
virtual bool includes(const SeqRange &range) const
bool overlap(const SequenceFeature &feat) const
virtual void addRange(const Range< T > &r)=0
T end() const
bool contains(const Range &r) const
T begin() const
bool overlap(const Range &r) const
a coordinate range on a sequence. Stores coordinates as a Range<size_t> object, but also keep the str...
SeqRange(size_t a, size_t b, char strand='.')
SeqRange * clone() const
virtual bool isStranded() const
SeqRange(const Range< size_t > range, char strand='.')
virtual void invert()
virtual char getStrand() const
virtual bool isNegativeStrand() const
A simple ensemble of sequence features.
void fillRangeCollectionForSequence(const std::string &seqId, RangeCollection< size_t > &coords) const
Get all coordinates of features for a given source. All ranges are added to a RangeCollection contain...
SequenceFeatureSet * getSubsetForRange(const SeqRange &range, bool complete) const
void addFeature(const SequenceFeature &feature)
Add a feature to the container. The feature will be copied and the copy owned by the container.
void fillRangeCollection(RangeCollection< size_t > &coords) const
Get all coordinates of features. All ranges are added to a RangeCollection container,...
std::vector< SequenceFeature * > features_
SequenceFeatureSet * getSubsetForType(const std::string &type) const
SequenceFeatureSet & operator=(const SequenceFeatureSet &sfs)
SequenceFeatureSet * getSubsetForSequences(const std::vector< std::string > &ids) const
std::set< std::string > getTypes() const
void clear()
Delete all features in this set.
const SequenceFeature & operator[](size_t i) const
SequenceFeatureSet(const SequenceFeatureSet &sfs)
SequenceFeatureSet * getSubsetForTypes(const std::vector< std::string > &types) const
SequenceFeatureSet * getSubsetForSequence(const std::string &id) const
size_t getNumberOfFeatures() const
const SequenceFeature & getFeature(size_t i) const
std::set< std::string > getSequences() const
The base interface for sequence features.
virtual std::set< std::string > getAttributeList() const =0
virtual void setId(const std::string &id)=0
virtual SeqRange getRange() const =0
virtual const std::string & getType() const =0
virtual void invert()=0
virtual bool isPoint() const
virtual bool includes(const SeqRange &range) const =0
virtual SequenceFeature * clone() const =0
virtual void removeAttribute(const std::string &attribute)=0
virtual const std::string & getSource() const =0
virtual const size_t getEnd() const =0
virtual void setSequenceId(const std::string &id)=0
virtual bool isStranded() const =0
virtual bool overlap(const SequenceFeature &feat) const =0
virtual const std::string & getAttribute(const std::string &attribute) const =0
virtual bool isEmpty() const
static const std::string NO_ATTRIBUTE_SET
virtual void setAttribute(const std::string &attribute, const std::string &value)=0
Set the value of an attribute.
virtual void setScore(double score)=0
virtual const std::string & getId() const =0
virtual bool isIncludedIn(const SeqRange &range) const =0
virtual const size_t getStart() const =0
virtual const std::string & getSequenceId() const =0
virtual const double & getScore() const =0
virtual void setSource(const std::string &source)=0
virtual void setType(const std::string &type)=0
virtual const size_t size() const
virtual std::string & getAttribute(const std::string &attribute)=0
virtual bool isNegativeStrand() const =0
virtual bool overlap(const SeqRange &range) const =0