bpp-core3  3.0.0
NestedStringTokenizer.cpp
Go to the documentation of this file.
1 // SPDX-FileCopyrightText: The Bio++ Development Group
2 //
3 // SPDX-License-Identifier: CECILL-2.1
4 
6 #include "TextTools.h"
7 
8 using namespace bpp;
9 
10 // From the STL:
11 #include <iostream>
12 
13 using namespace std;
14 
15 NestedStringTokenizer::NestedStringTokenizer(const std::string& s, const std::string& open, const std::string& end, const std::string& delimiters, bool solid) :
17 {
18  int blocks = 0;
19  string cache = "";
20  if (!solid)
21  {
22  string::size_type index = s.find_first_not_of(delimiters, 0);
23  while (index != s.npos)
24  {
25  string::size_type newIndex = s.find_first_of(delimiters, index);
26  bool endBlockFound = false;
27  while (!endBlockFound)
28  {
29  if (newIndex != s.npos)
30  {
31  string token = s.substr(index, newIndex - index);
32  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
33 
34  if (blocks == 0)
35  {
36  tokens_.push_back(cache + token);
37  cache = ""; // reset cache.
38  index = s.find_first_not_of(delimiters, newIndex);
39  endBlockFound = true;
40  }
41  else
42  {
43  // Ignore this token untill closing block is found
44  cache += s.substr(index, newIndex - index + 1);
45  index = newIndex + 1;
46  newIndex = s.find_first_of(delimiters, index);
47  }
48  }
49  else
50  {
51  string token = s.substr(index);
52  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
53  if (blocks == 0)
54  {
55  tokens_.push_back(cache + token);
56  cache = ""; // reset cache.
57  index = newIndex;
58  endBlockFound = true;
59  }
60  else
61  throw Exception("NestedStringTokenizer (constructor). Unclosed block.");
62  }
63  }
64  }
65  }
66  else
67  {
68  string::size_type index = 0;
69  while (index != s.npos)
70  {
71  string::size_type newIndex = s.find(delimiters, index);
72  bool endBlockFound = false;
73  while (!endBlockFound)
74  {
75  if (newIndex != s.npos)
76  {
77  string token = s.substr(index, newIndex - index);
78  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
79 
80  if (blocks == 0)
81  {
82  tokens_.push_back(cache + token);
83  cache = ""; // reset cache.
84  index = newIndex + delimiters.size();
85  endBlockFound = true;
86  }
87  else
88  {
89  // Ignore this token untill closing block is found
90  cache += s.substr(index, newIndex - index + 1);
91  index = newIndex + 1;
92  newIndex = s.find(delimiters, index);
93  }
94  }
95  else
96  {
97  string token = s.substr(index);
98  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
99  if (blocks == 0)
100  {
101  tokens_.push_back(cache + token);
102  cache = ""; // reset cache.
103  index = newIndex;
104  endBlockFound = true;
105  }
106  else
107  throw Exception("Unclosed block.");
108  }
109  }
110  }
111  }
112 }
113 
115 {
116  if (!hasMoreToken())
117  throw Exception("No more token in nested tokenizer.");
118  return tokens_[currentPosition_++];
119 }
A tokenizer for strings.
bool hasMoreToken() const
Tell if some tokens are still available.
std::size_t count(const std::string &s, const std::string &pattern)
Count the occurences of a given pattern in a string.
Definition: TextTools.cpp:388
STL namespace.
std::deque< std::string > tokens_
Where the tokens are stored.
const std::string & nextToken()
Get the next available token. If no token is availbale, throw an Exception.
Exception base class. Overload exception constructor (to control the exceptions mechanism). Destructor is already virtual (from std::exception)
Definition: Exceptions.h:20
NestedStringTokenizer(const std::string &s, const std::string &open, const std::string &end, const std::string &delimiters=" \\", bool solid=false)
Build a new StringTokenizer from a string.
size_t currentPosition_
the current position in the token list.