bpp-seq3  3.0.0
Fasta.h
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
5 #ifndef BPP_SEQ_IO_FASTA_H
6 #define BPP_SEQ_IO_FASTA_H
7 
8 
9 #include "../Container/SequenceContainer.h"
10 #include "../Container/VectorSequenceContainer.h"
11 #include "../Sequence.h"
12 #include "AbstractIAlignment.h"
13 #include "AbstractISequence.h"
14 #include "AbstractOSequence.h"
15 #include "ISequenceStream.h"
16 #include "OSequenceStream.h"
17 #include "SequenceFileIndex.h"
18 
19 namespace bpp
20 {
26 class Fasta :
27  public AbstractISequence,
28  public AbstractIAlignment,
29  public AbstractOSequence2,
30  public virtual ISequenceStream,
31  public virtual OSequenceStream
32 {
33 protected:
37  unsigned int charsByLine_; // Number of char by line (output only)
38  bool checkNames_; // If names must be checked in container
39  bool extended_; // If using HUPO-PSI extensions
40  bool strictNames_; // If name is between '>' and first space
41 
42 public:
51  Fasta(unsigned int charsByLine = 100, bool checkSequenceNames = true, bool extended = false, bool strictSequenceNames = false) : charsByLine_(charsByLine), checkNames_(checkSequenceNames), extended_(extended), strictNames_(strictSequenceNames) {}
52 
53  // Class destructor
54  virtual ~Fasta() {}
55 
56 public:
62  void appendSequencesFromStream(std::istream& input, SequenceContainerInterface& sc) const override;
70  void appendAlignmentFromStream(std::istream& input, SequenceContainerInterface& sc) const override
71  {
72  appendSequencesFromStream(input, sc); // This may raise an exception if sequences are not aligned!
73  }
81  void writeSequences(std::ostream& output, const SequenceContainerInterface& sc) const override;
82 
83  void writeSequences(const std::string& path, const SequenceContainerInterface& sc, bool overwrite = true) const override
84  {
85  AbstractOSequence::writeSequences(path, sc, overwrite);
86  }
94  const std::string getFormatName() const override { return "FASTA file"; }
95 
96  const std::string getFormatDescription() const override
97  {
98  return "Sequence name (preceded by >) in one line, sequence content, no comments";
99  }
107  bool nextSequence(std::istream& input, Sequence& seq) const override;
115  void writeSequence(std::ostream& output, const Sequence& seq) const override;
121  bool checkNames() const { return checkNames_; }
122 
128  void checkNames(bool yn) { checkNames_ = yn; }
129 
133  bool strictNames() const { return strictNames_; }
134 
140  void strictNames(bool yn) { strictNames_ = yn; }
141 
147  {
148 public:
151  void build(const std::string& path)
152  {
153  build(path, false);
154  }
161  void build(const std::string& path, const bool strictSequenceNames);
162  std::streampos getSequencePosition(const std::string& id) const;
163  size_t getNumberOfSequences() const
164  {
165  return index_.size();
166  }
170  void read(const std::string& path);
174  void write(const std::string& path);
178  void getSequence(const std::string& seqid, Sequence& seq, const std::string& path) const;
179  void getSequence(const std::string& seqid, Sequence& seq, const std::string& path, const bool strictSequenceNames) const;
180 
181 private:
182  std::map<std::string, std::streampos> index_;
183  std::streampos fileSize_;
184  };
185 };
186 } // end of namespace bpp.
187 #endif // BPP_SEQ_IO_FASTA_H
Partial implementation of the IAlignment interface, dedicated to alignment readers.
Partial implementation of the ISequence interface.
Partial implementation of the OAlignment interface.
void writeSequences(std::ostream &output, const SequenceContainerInterface &sc) const override=0
Write a container to a stream.
The SequenceFileIndex class for Fasta format.
Definition: Fasta.h:147
void read(const std::string &path)
Read the index from a file.
Definition: Fasta.cpp:243
void build(const std::string &path)
Build the index given a path to the file.
Definition: Fasta.h:151
size_t getNumberOfSequences() const
Get the number of sequences.
Definition: Fasta.h:163
std::map< std::string, std::streampos > index_
Definition: Fasta.h:182
std::streampos fileSize_
Definition: Fasta.h:183
void write(const std::string &path)
Write the index to a file.
Definition: Fasta.cpp:260
void getSequence(const std::string &seqid, Sequence &seq, const std::string &path) const
Get a sequence given its ID.
Definition: Fasta.cpp:270
std::streampos getSequencePosition(const std::string &id) const
Get the position of a Sequence given its ID.
Definition: Fasta.cpp:233
The fasta sequence file format.
Definition: Fasta.h:32
unsigned int charsByLine_
The maximum number of chars to be written on a line.
Definition: Fasta.h:37
void appendAlignmentFromStream(std::istream &input, SequenceContainerInterface &sc) const override
Append sequences to a container from a stream.
Definition: Fasta.h:70
const std::string getFormatDescription() const override
Definition: Fasta.h:96
bool strictNames() const
Definition: Fasta.h:133
void checkNames(bool yn)
Tell whether the sequence names should be checked when reading from files.
Definition: Fasta.h:128
bool nextSequence(std::istream &input, Sequence &seq) const override
Definition: Fasta.cpp:18
bool extended_
Definition: Fasta.h:39
void appendSequencesFromStream(std::istream &input, SequenceContainerInterface &sc) const override
Append sequences to a container from a stream.
Definition: Fasta.cpp:119
Fasta(unsigned int charsByLine=100, bool checkSequenceNames=true, bool extended=false, bool strictSequenceNames=false)
Build a new Fasta object.
Definition: Fasta.h:51
virtual ~Fasta()
Definition: Fasta.h:54
void writeSequences(const std::string &path, const SequenceContainerInterface &sc, bool overwrite=true) const override
Write a container to a file.
Definition: Fasta.h:83
bool checkNames() const
Definition: Fasta.h:121
bool checkNames_
Definition: Fasta.h:38
bool strictNames_
Definition: Fasta.h:40
void strictNames(bool yn)
Tell wethed the sequence name should be restrected to the first non blank characters.
Definition: Fasta.h:140
void writeSequence(std::ostream &output, const Sequence &seq) const override
Definition: Fasta.cpp:88
void writeSequences(std::ostream &output, const SequenceContainerInterface &sc) const override
Write a container to a stream.
Definition: Fasta.cpp:178
const std::string getFormatName() const override
Definition: Fasta.h:94
Index to retrieve Sequence in a file.
A basic implementation of the Sequence interface.
Definition: Sequence.h:117
The ISequenceStream interface.
The OSequenceStream interface.
The SequenceContainer interface.
This alphabet is used to deal NumericAlphabet.