57 unique_ptr<MafSequence> currentSequence;
64 getline(*stream_, line,
'\n');
65 if (TextTools::isEmpty(line))
69 if (currentSequence) {
72 currentSequence.reset();
78 else if (line[0] ==
'a')
80 if (currentSequence) {
83 currentSequence.reset();
90 map<string, string> args;
93 KeyvalTools::multipleKeyvals(line.substr(2), args,
" ");
95 if (args.find(
"score") != args.end())
96 if (args[
"score"] !=
"NA")
97 block->
setScore(TextTools::toDouble(args[
"score"]));
99 if (args.find(
"pass") != args.end())
100 block->
setPass(TextTools::to<unsigned int>(args[
"pass"]));
103 else if (line[0] ==
's')
108 throw IOException(
"Sequence description should include a source field.");
111 throw IOException(
"Sequence description should include a start field.");
112 unsigned int start = TextTools::to<unsigned int>(st.
nextToken());
114 throw IOException(
"Sequence description should include a size field.");
115 unsigned int size = TextTools::to<unsigned int>(st.
nextToken());
117 throw IOException(
"Sequence description should include a strand field.");
120 throw Exception(
"MafAlignmentParser::nextBlock. Strand specification is incorrect, should be only one character long, found " + TextTools::toString(tmp.size()) +
".");
121 char strand = tmp[0];
124 throw IOException(
"Sequence description should include a source size field.");
125 unsigned int srcSize = TextTools::to<unsigned int>(st.
nextToken());
126 if (currentSequence) {
129 currentSequence.reset();
132 throw IOException(
"Sequence description without a sequence.");
134 if (dotOption_ == DOT_ASGAP) {
135 std::replace(seq.begin(), seq.end(),
'.',
'-');
137 if (dotOption_ == DOT_ASUNRES) {
138 std::replace(seq.begin(), seq.end(),
'.',
'N');
140 currentSequence.reset(
new MafSequence(src, seq, start, strand, srcSize));
141 if (currentSequence->getGenomicSize() != size) {
142 if (checkSequenceSize_)
143 throw Exception(
"MafAlignmentParser::nextBlock. Sequence found (" + src +
") does not match specified size: " + TextTools::toString(currentSequence->getGenomicSize()) +
", should be " + TextTools::toString(size) +
".");
146 ApplicationTools::displayWarning(
"MafAlignmentParser::nextBlock. Sequence found (" + src +
") does not match specified size: " + TextTools::toString(currentSequence->getGenomicSize()) +
", should be " + TextTools::toString(size) +
".");
152 vector<bool> mask(currentSequence->size());
153 for (
size_t i = 0; i < mask.size(); ++i) {
154 mask[i] = cmAlphabet_.isMasked(seq[i]);
159 else if (line[0] ==
'q')
161 if (!currentSequence)
162 throw Exception(
"MafAlignmentParser::nextBlock(). Quality scores found, but there is currently no sequence!");
166 if (name != currentSequence->getName())
167 throw Exception(
"MafAlignmentParser::nextBlock(). Quality scores found, but with a different name from the previous sequence: " + name +
", should be " + currentSequence->getName() +
".");
171 for (
size_t i = 0; i < qstr.size(); ++i) {
175 }
else if (c ==
'0' || c ==
'1' || c ==
'2' || c==
'3' || c ==
'4' || c ==
'5' || c ==
'6' || c ==
'7' || c ==
'8' || c ==
'9') {
177 }
else if (c ==
'F' || c ==
'f') {
179 }
else if (c ==
'?' || c ==
'.') {
182 throw Exception(
"MafAlignmentParser::nextBlock(). Unvalid quality score: " + TextTools::toString(c) +
". Should be 0-9, F or '-'.");
185 currentSequence->addAnnotation(seqQual);
191 if (currentSequence) {
194 currentSequence.reset();
A synteny block data structure, the basic unit of a MAF alignement file.
void setScore(double score)
void setPass(unsigned int pass)
void addSequence(const MafSequence &sequence)
A sequence class which is used to store data from MAF files.
void setScore(size_t pos, int score)
const std::string & nextToken()
bool hasMoreToken() const