9 #include "../Container/AlignedSequenceContainer.h"
10 #include "../Container/SequenceContainerTools.h"
11 #include "../Container/VectorSequenceContainer.h"
20 for (
size_t i = 0; i < maseFileHeader.size(); i++)
22 string current = maseFileHeader[i];
23 string::size_type index = current.find(
"# of");
24 if (index < current.npos)
26 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 4), current.end()),
" \t=;");
28 size_t numberOfSegments = TextTools::to<size_t>(st.
nextToken());
36 while (i < maseFileHeader.size())
38 current = maseFileHeader[i++];
44 size_t begin = TextTools::to<size_t>(st3.
nextToken());
45 size_t end = TextTools::to<size_t>(st3.
nextToken());
48 for (
size_t j = begin; j <= end; j++)
50 selection.push_back(j - 1);
53 if (counter == numberOfSegments)
60 if (selection.size() == 0)
62 throw IOException(
"Site set " + setName +
" has not been found in the sequence file.");
72 for (
size_t i = 0; i < maseFileHeader.size(); i++)
74 string current = maseFileHeader[i];
76 string::size_type index = current.find(
"@ of");
77 if (index < current.npos)
79 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 4), current.end()),
" \t=;");
81 size_t numberOfSequences = TextTools::to<size_t>(st.
nextToken());
89 while (i < maseFileHeader.size())
91 current = maseFileHeader[i++];
95 size_t seqIndex = TextTools::to<size_t>(st2.
nextToken());
98 selection.push_back(seqIndex - 1);
100 if (counter == numberOfSequences)
107 if (selection.size() == 0)
109 throw IOException(
"Sequence set " + setName +
" has not been found in the sequence file.");
116 map<string, size_t> MaseTools::getAvailableSiteSelections(
const Comments& maseHeader)
118 map<string, size_t> selections;
119 for (
size_t i = 0; i < maseHeader.size(); i++)
121 string current = maseHeader[i];
123 string::size_type index = current.find(
"# of");
124 if (index < current.npos)
126 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 4), current.end()),
" \t\n\f\r=;");
128 size_t numberOfSegments = TextTools::to<size_t>(st.
nextToken());
136 while (i < maseHeader.size())
139 current = maseHeader[i];
145 size_t begin = TextTools::to<size_t>(st3.
nextToken());
146 size_t end = TextTools::to<size_t>(st3.
nextToken());
148 nbSites += end - begin + 1;
150 if (counter == numberOfSegments)
152 selections[name] = nbSites;
163 map<string, size_t> MaseTools::getAvailableSequenceSelections(
const Comments& maseHeader)
165 map<string, size_t> selections;
166 for (
size_t i = 0; i < maseHeader.size(); i++)
168 string current = maseHeader[i];
170 string::size_type index = current.find(
"@ of");
171 if (index < current.npos)
173 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 4), current.end()),
" \t\n\f\r=;");
175 size_t numberOfSequences = TextTools::fromString<size_t>(st.
nextToken());
181 selections[name] = numberOfSequences;
189 size_t MaseTools::getPhase(
const Comments& maseFileHeader,
const string& setName)
192 string::size_type index = 0;
193 for (
size_t i = 0; i < maseFileHeader.size(); i++)
195 string current = maseFileHeader[i];
197 index = current.find(
"# of");
198 if (index < current.npos)
200 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 12), current.end()),
" \t\n\f\r=;");
215 index = current.find(
"/codon_start");
216 if (index < current.npos)
218 StringTokenizer st(
string(current.begin() +
static_cast<ptrdiff_t
>(index + 12), current.end()),
" \t\n\f\r=;");
219 phase = TextTools::to<size_t>(st.
nextToken());
222 throw Exception(
"PolymorphismSequenceContainer::getPhase: no /codon_start found, or site selection missing.");
const std::string & nextToken()
bool hasMoreToken() const
std::string unparseRemainingTokens() const
This alphabet is used to deal NumericAlphabet.
std::vector< size_t > SiteSelection
std::vector< size_t > SequenceSelection
std::vector< std::string > Comments
Declaration of Comments type.