5 #include "../StringSequenceTools.h"
17 throw IOException (
"Mase::read : fail to open file");
22 string temp, name, sequence =
"";
23 bool comments =
false;
31 getline(input, temp,
'\n');
42 fileComments.push_back(temp);
47 if ((name !=
"") && (sequence !=
""))
51 auto seqPtr = make_unique<Sequence>(name, sequence, seqComments, alphaPtr);
59 temp.erase(temp.begin());
61 seqComments.push_back(temp);
80 if ((name !=
"") && (sequence !=
""))
83 auto seqPtr = make_unique<Sequence>(name, sequence, seqComments, alphaPtr);
98 throw IOException (
"Mase::write : failed to open file");
104 if (comments.size() == 0)
106 output <<
";;" << endl;
108 for (
size_t i = 0; i < comments.size(); i++)
110 output <<
";;" << comments[i] << endl;
113 string seq, temp =
"";
118 comments = sc.
sequence(seqKey).getComments();
122 if (comments.size() == 0)
124 output <<
";" << endl;
128 for (
size_t j = 0; j < comments.size(); j++)
130 output <<
";" << comments[j] << endl;
135 output << sc.
sequence(seqKey).getName() << endl;
138 seq = sc.
sequence(seqKey).toString();
141 if (seq.size() > charsByLine_)
144 temp.erase(temp.begin() +
static_cast<ptrdiff_t
>(charsByLine_), temp.end());
145 output << temp << endl;
146 seq.erase(seq.begin(), seq.begin() +
static_cast<ptrdiff_t
>(charsByLine_));
150 output << seq << endl;
164 if (input.peek() ==
';')
168 if (input.peek() ==
';')
176 string::size_type index = line.find(
"# of");
177 if (index < line.npos)
179 StringTokenizer st(
string(line.begin() +
static_cast<ptrdiff_t
>(index + 4), line.end()),
" \t=;");
181 unsigned int numberOfSegments = TextTools::to<unsigned int>(st.
nextToken());
185 while (siteSelection.
size() < numberOfSegments)
188 if (line[0] !=
';' || line[1] !=
';')
189 throw Exception(
"Mase::readHeader_(): corrupted file, site selection " + name +
" is incomplete. Aborting.");
190 line = line.substr(2);
195 unsigned int begin = TextTools::to<unsigned int>(st3.
nextToken());
196 unsigned int end = TextTools::to<unsigned int>(st3.
nextToken());
200 if (siteSelection.
size() > numberOfSegments)
208 index = line.find(
"@ of");
209 if (index < line.npos)
213 unsigned int numberOfSequences = TextTools::to<unsigned int>(st.
nextToken());
216 vector<size_t> sequenceSelection;
217 while (sequenceSelection.size() < numberOfSequences)
220 if (line[0] !=
';' || line[1] !=
';')
221 throw Exception(
"Mase::readHeader_(): corrupted file, sequence selection " + name +
" is incomplete. Aborting.");
222 line = line.substr(2);
226 unsigned int pos = TextTools::to<unsigned int>(st2.
nextToken());
228 sequenceSelection.push_back(pos);
230 if (sequenceSelection.size() > numberOfSequences)
238 index = line.find(
"$");
239 if (index < line.npos)
247 if (line[0] !=
';' || line[1] !=
';')
248 throw Exception(
"Mase::readHeader_(): corrupted file, tree " + name +
" is incomplete. Aborting.");
274 for (
size_t i = 0; i < treeNames.size(); ++i)
276 output <<
";;$ " + treeNames[i] << endl;
277 output <<
";;" + header.
getTree(treeNames[i]);
283 for (
size_t i = 0; i < siteSelectionNames.size(); ++i)
286 output <<
";;Site selection " << siteSelectionNames[i] <<
" (" << ranges.
totalLength() <<
" sites)" << endl;
287 output <<
";;# of segments=" << ranges.
size() <<
" " << siteSelectionNames[i] << endl;
289 for (
size_t j = 0; j < ranges.
size(); ++j)
292 if ((j + 1) % 10 == 0)
293 output << endl <<
";;";
300 for (
size_t i = 0; i < sequenceSelectionNames.size(); ++i)
303 output <<
";;@ of species=" << set.size() <<
" " << sequenceSelectionNames[i] << endl;
305 for (
unsigned int j = 0; j < set.size(); ++j)
307 output <<
" " << set[j];
308 if ((j + 1) % 10 == 0)
309 output << endl <<
";;";
void appendSequencesFromStream(std::istream &input, SequenceContainerInterface &sc) const override
Append sequences to a container from a stream.
void writeSequences(std::ostream &output, const SequenceContainerInterface &sc) const override
Write a container to a stream.
void writeHeader_(std::ostream &output, const MaseHeader &header) const
void readHeader_(std::istream &input, MaseHeader &header) const
void addRange(const Range< T > &r)
size_t totalLength() const
const Range< T > & getRange(size_t i) const
const std::string & nextToken()
bool hasMoreToken() const
std::string unparseRemainingTokens() const
The SequenceContainer interface.
virtual void addSequence(const HashType &sequenceKey, std::unique_ptr< SequenceType > &sequencePtr)=0
Add a sequence to the container.
virtual const SequenceType & sequence(const HashType &sequenceKey) const override=0
Retrieve a sequence object from the container.
virtual std::vector< HashType > getSequenceKeys() const =0
virtual std::shared_ptr< const Alphabet > getAlphabet() const =0
Get a pointer toward the container's alphabet.
std::string removeSurroundingWhiteSpaces(const std::string &s)
bool endsWith(const std::string &s, const std::string &pattern)
std::string toString(T t)
This alphabet is used to deal NumericAlphabet.
std::vector< std::string > Comments
Declaration of Comments type.