bpp-core3  3.0.0
NestedStringTokenizer.cpp
Go to the documentation of this file.
1 //
2 // File: NestedStringTokenizer.cpp
3 // Authors:
4 // Julien Dutheil
5 // Last modified: 2006-05-22 10:57:00
6 //
7 
8 /*
9  Copyright or © or Copr. Bio++ Development Team, (November 17, 2004)
10 
11  This software is a computer program whose purpose is to map data onto
12  a sequence or a phylogenetic tree.
13 
14  This software is governed by the CeCILL license under French law and
15  abiding by the rules of distribution of free software. You can use,
16  modify and/ or redistribute the software under the terms of the CeCILL
17  license as circulated by CEA, CNRS and INRIA at the following URL
18  "http://www.cecill.info".
19 
20  As a counterpart to the access to the source code and rights to copy,
21  modify and redistribute granted by the license, users are provided only
22  with a limited warranty and the software's author, the holder of the
23  economic rights, and the successive licensors have only limited
24  liability.
25 
26  In this respect, the user's attention is drawn to the risks associated
27  with loading, using, modifying and/or developing or reproducing the
28  software by the user in light of its specific status of free software,
29  that may mean that it is complicated to manipulate, and that also
30  therefore means that it is reserved for developers and experienced
31  professionals having in-depth computer knowledge. Users are therefore
32  encouraged to load and test the software's suitability as regards their
33  requirements in conditions enabling the security of their systems and/or
34  data to be ensured and, more generally, to use and operate it in the
35  same conditions as regards security.
36 
37  The fact that you are presently reading this means that you have had
38  knowledge of the CeCILL license and that you accept its terms.
39 */
40 
41 
42 #include "NestedStringTokenizer.h"
43 #include "TextTools.h"
44 
45 using namespace bpp;
46 
47 // From the STL:
48 #include <iostream>
49 
50 using namespace std;
51 
52 NestedStringTokenizer::NestedStringTokenizer(const std::string& s, const std::string& open, const std::string& end, const std::string& delimiters, bool solid) :
54 {
55  int blocks = 0;
56  string cache = "";
57  if (!solid)
58  {
59  string::size_type index = s.find_first_not_of(delimiters, 0);
60  while (index != s.npos)
61  {
62  string::size_type newIndex = s.find_first_of(delimiters, index);
63  bool endBlockFound = false;
64  while (!endBlockFound)
65  {
66  if (newIndex != s.npos)
67  {
68  string token = s.substr(index, newIndex - index);
69  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
70 
71  if (blocks == 0)
72  {
73  tokens_.push_back(cache + token);
74  cache = ""; // reset cache.
75  index = s.find_first_not_of(delimiters, newIndex);
76  endBlockFound = true;
77  }
78  else
79  {
80  // Ignore this token untill closing block is found
81  cache += s.substr(index, newIndex - index + 1);
82  index = newIndex + 1;
83  newIndex = s.find_first_of(delimiters, index);
84  }
85  }
86  else
87  {
88  string token = s.substr(index);
89  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
90  if (blocks == 0)
91  {
92  tokens_.push_back(cache + token);
93  cache = ""; // reset cache.
94  index = newIndex;
95  endBlockFound = true;
96  }
97  else
98  throw Exception("NestedStringTokenizer (constructor). Unclosed block.");
99  }
100  }
101  }
102  }
103  else
104  {
105  string::size_type index = 0;
106  while (index != s.npos)
107  {
108  string::size_type newIndex = s.find(delimiters, index);
109  bool endBlockFound = false;
110  while (!endBlockFound)
111  {
112  if (newIndex != s.npos)
113  {
114  string token = s.substr(index, newIndex - index);
115  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
116 
117  if (blocks == 0)
118  {
119  tokens_.push_back(cache + token);
120  cache = ""; // reset cache.
121  index = newIndex + delimiters.size();
122  endBlockFound = true;
123  }
124  else
125  {
126  // Ignore this token untill closing block is found
127  cache += s.substr(index, newIndex - index + 1);
128  index = newIndex + 1;
129  newIndex = s.find(delimiters, index);
130  }
131  }
132  else
133  {
134  string token = s.substr(index);
135  blocks += static_cast<int>(TextTools::count(token, open)) - static_cast<int>(TextTools::count(token, end));
136  if (blocks == 0)
137  {
138  tokens_.push_back(cache + token);
139  cache = ""; // reset cache.
140  index = newIndex;
141  endBlockFound = true;
142  }
143  else
144  throw Exception("Unclosed block.");
145  }
146  }
147  }
148  }
149 }
150 
152 {
153  if (!hasMoreToken())
154  throw Exception("No more token in nested tokenizer.");
155  return tokens_[currentPosition_++];
156 }
Exception base class. Overload exception constructor (to control the exceptions mechanism)....
Definition: Exceptions.h:59
NestedStringTokenizer(const std::string &s, const std::string &open, const std::string &end, const std::string &delimiters=" \t\n\f\r", bool solid=false)
Build a new StringTokenizer from a string.
const std::string & nextToken()
Get the next available token. If no token is availbale, throw an Exception.
A tokenizer for strings.
size_t currentPosition_
the current position in the token list.
bool hasMoreToken() const
Tell if some tokens are still available.
std::deque< std::string > tokens_
Where the tokens are stored.
std::size_t count(const std::string &s, const std::string &pattern)
Count the occurences of a given pattern in a string.
Definition: TextTools.cpp:426