10 #include "../StringSequenceTools.h"
21 throw IOException(
"Fasta::nextSequence: can't read from istream input");
26 string linebuffer =
"";
30 c =
static_cast<char>(input.peek());
41 getline(input, linebuffer);
45 seqname = string(linebuffer.begin() + 1, linebuffer.end());
56 bool res = (!input.eof());
58 if (strictNames_ || extended_)
60 size_t pos = seqname.find_first_of(
" \t\n");
62 if (pos != string::npos)
64 seqcmt = seqname.substr(pos + 1);
65 seqname = seqname.substr(0, pos);
77 seqcmts.push_back(seqcmt);
91 throw IOException(
"Fasta::writeSequence: can't write to ostream output");
97 for (
unsigned int i = 0; i < seq.
getComments().size(); i++)
105 for (
size_t i = 0; i < seq.
size(); ++i)
108 if (buffer.size() >= charsByLine_)
110 output << string(buffer.begin(), buffer.begin() + charsByLine_) << endl;
111 buffer.erase(0, charsByLine_);
114 output << string(buffer.begin(), buffer.end()) << endl;
122 throw IOException(
"Fasta::appendFromStream: can't read from istream input");
129 while (!input.eof() && hasSeq)
134 if (extended_ && c ==
'#')
142 if (extended_ && header)
146 line.erase(line.begin());
147 cmts.push_back(line);
160 if (c ==
'>' && last_c ==
'\n')
165 auto tmpseq = make_unique<Sequence>(
"",
"", alphaPtr);
166 hasSeq = nextSequence(input, *tmpseq);
170 if (extended_ && cmts.size())
181 throw IOException(
"Fasta::write: can't write to ostream output");
186 for (
size_t i = 0; i < sc.
getComments().size(); ++i)
195 for (
size_t i = 0; i < names.size(); ++i)
197 writeSequence(output, sc.
sequence(names[i]));
208 std::ifstream f_in(path.c_str());
210 f_in.seekg(0, std::ios::end);
211 fileSize_ = f_in.tellg();
213 f_in.seekg(0, std::ios::beg);
214 streampos pos = f_in.tellg();
216 std::string seq_id =
"";
221 pos =
static_cast<int>(f_in.tellg()) - 1;
222 std::getline(f_in, seq_id);
223 if (strictSequenceNames)
225 seq_id = seq_id.substr(0, seq_id.find_first_of(
" \t\n"));
227 index_[seq_id] = pos;
235 std::map<std::string, streampos>::const_iterator it = index_.find(
id);
236 if (it != index_.end())
240 throw Exception(
"Sequence not found: " +
id);
245 std::ifstream f_in(path.c_str());
246 std::string line_buffer =
"";
249 std::getline(f_in, line_buffer);
262 std::ofstream f_out(path.c_str());
263 for (std::map<std::string, streampos>::const_iterator it = index_.begin(); it != index_.end(); ++it)
272 getSequence(seqid, seq, path,
false);
279 streampos seq_pos = this->getSequencePosition(seqid);
280 std::ifstream fasta(path.c_str());
281 fasta.seekg(seq_pos);
const std::string & getName() const override
Get the name of this sequence.
void setName(const std::string &name) override
Set the name of this sequence.
size_t size() const override
Get the number of elements in the list.
void read(const std::string &path)
Read the index from a file.
void build(const std::string &path)
Build the index given a path to the file.
void write(const std::string &path)
Write the index to a file.
void getSequence(const std::string &seqid, Sequence &seq, const std::string &path) const
Get a sequence given its ID.
std::streampos getSequencePosition(const std::string &id) const
Get the position of a Sequence given its ID.
The fasta sequence file format.
bool nextSequence(std::istream &input, Sequence &seq) const override
void appendSequencesFromStream(std::istream &input, SequenceContainerInterface &sc) const override
Append sequences to a container from a stream.
void writeSequence(std::ostream &output, const Sequence &seq) const override
void writeSequences(std::ostream &output, const SequenceContainerInterface &sc) const override
Write a container to a stream.
A basic implementation of the Sequence interface.
void setContent(const std::string &sequence) override
Set the whole content of the sequence.
std::string getChar(size_t pos) const override
Get the element at position 'pos' as a character.
const std::string & nextToken()
bool hasMoreToken() const
const std::string & getToken(size_t pos) const
The SequenceContainer interface.
virtual void addSequence(const HashType &sequenceKey, std::unique_ptr< SequenceType > &sequencePtr)=0
Add a sequence to the container.
virtual const SequenceType & sequence(const HashType &sequenceKey) const override=0
Retrieve a sequence object from the container.
virtual std::vector< std::string > getSequenceNames() const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get a pointer toward the container's alphabet.
int toInt(const std::string &s, char scientificNotation='e')
std::string removeWhiteSpaces(const std::string &s)
std::string removeSurroundingWhiteSpaces(const std::string &s)
std::string toUpper(const std::string &s)
bool isWhiteSpaceCharacter(char c)
bool isEmpty(const std::string &s)
std::string toString(T t)
This alphabet is used to deal NumericAlphabet.
std::vector< std::string > Comments
Declaration of Comments type.