30 throw Exception (
"StringSequenceTools::subseq: Invalid interval");
33 string temp(sequence);
36 temp.erase(temp.begin() +
static_cast<ptrdiff_t
>(end + 1), temp.end());
37 temp.erase(temp.begin(), temp.begin() +
static_cast<ptrdiff_t
>(begin));
60 string result(sequence);
63 for (
unsigned int i = 0; i < result.size(); i++)
65 if (result[i] == chars)
66 result.erase(result.begin() + i);
77 string result(sequence);
80 for (
unsigned int i = 0; i < chars.size(); i++)
83 for (
unsigned int j = 0; j < result.size(); j++)
85 if (result[j] == chars[i])
86 result.erase(result.begin() + j);
98 string* result =
new string;
101 size_t size = sequence.size();
102 for (
size_t i = 0; i < size; i++)
104 *result += sequence[size - i - 1];
116 string* result =
new string;
119 size_t size = sequence.size();
120 for (
unsigned int i = 0; i < size; i++)
124 case 'A': *result +=
'T';
126 case 'C': *result +=
'G';
128 case 'G': *result +=
'C';
130 case 'T': *result +=
'A';
132 case 'M': *result +=
'K';
134 case 'R': *result +=
'Y';
136 case 'Y': *result +=
'R';
138 case 'K': *result +=
'M';
140 case 'V': *result +=
'B';
142 case 'H': *result +=
'D';
144 case 'D': *result +=
'H';
146 case 'B': *result +=
'V';
148 default: *result += sequence[i];
162 map<char, double> counts;
165 if (window < sequence.size())
166 throw BadIntegerException(
"StringSequenceTools::getGCContent : specified window too high",
static_cast<int>(window));
169 if (pos + window > sequence.size())
171 pos = sequence.size() - window;
175 for (
size_t i = pos; i < pos + window; i++)
177 switch (toupper(sequence[i]))
179 case 'A': counts[
'A'] += 1;
181 case 'C': counts[
'C'] += 1;
183 case 'G': counts[
'G'] += 1;
185 case 'T': counts[
'T'] += 1;
187 case 'M': counts[
'A'] += 0.5;
190 case 'R': counts[
'A'] += 0.5;
193 case 'W': counts[
'A'] += 0.5;
196 case 'S': counts[
'C'] += 0.5;
199 case 'Y': counts[
'C'] += 0.5;
202 case 'K': counts[
'G'] += 0.5;
205 case 'V': counts[
'A'] += 0.34;
209 case 'H': counts[
'A'] += 0.34;
213 case 'D': counts[
'A'] += 0.34;
217 case 'B': counts[
'C'] += 0.34;
221 case '-':
throw Exception(
"StringSequenceTools::getGCContent : Gap found in sequence");
224 default: counts[
'A'] += 0.25;
232 return (counts[
'G'] + counts[
'C']) /
static_cast<double>(window);
246 vector<int> code(
static_cast<size_t>(floor(
static_cast<double>(sequence.size()) /
static_cast<double>(size))));
249 while (pos + size <= sequence.size())
251 code[
count] = alphabet->charToInt(sequence.substr(pos, size));
263 for (
auto i : sequence)
265 result += alphabet->intToChar(i);
275 if (sequence.size() == 0)
277 throw Exception(
"Sequence::getAlphabetFromSequence : Empty sequence string");
287 for (
auto i : sequence)
292 case 0: u =
true;
break;
293 case 3: p =
true;
break;
294 case 2: r =
true;
break;
295 case 5: pd =
true;
break;
300 throw Exception(
"Sequence::getAlphabetFromSequence : Unknown character detected in specified sequence");
302 throw Exception(
"Sequence::getAlphabetFromSequence : Both 'T' and 'U' in the same sequence!");
304 throw Exception(
"Sequence::getAlphabetFromSequence : Protein character and 'U' in the same sequence!");
std::string resizeLeft(const std::string &s, std::size_t newSize, char fill)
std::string resizeRight(const std::string &s, std::size_t newSize, char fill)
std::size_t count(const std::string &s, const std::string &pattern)
This alphabet is used to deal NumericAlphabet.