libstdc++
regex_executor.h
Go to the documentation of this file.
1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library. This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  * @file bits/regex_executor.h
27  * This is an internal header file, included by other library headers.
28  * Do not attempt to use it directly. @headername{regex}
29  */
30 
31 // FIXME convert comments to doxygen format.
32 
33 namespace std _GLIBCXX_VISIBILITY(default)
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37 namespace __detail
38 {
39  /**
40  * @addtogroup regex-detail
41  * @{
42  */
43 
44  /**
45  * @brief Takes a regex and an input string and does the matching.
46  *
47  * The %_Executor class has two modes: DFS mode and BFS mode, controlled
48  * by the template parameter %__dfs_mode.
49  */
50  template<typename _BiIter, typename _Alloc, typename _TraitsT,
51  bool __dfs_mode>
52  class _Executor
53  {
55  using __dfs = true_type;
56  using __bfs = false_type;
57 
58  enum class _Match_mode : unsigned char { _Exact, _Prefix };
59 
60  public:
61  typedef typename iterator_traits<_BiIter>::value_type _CharT;
65  typedef typename _TraitsT::char_class_type _ClassT;
66  typedef _NFA<_TraitsT> _NFAT;
67 
68  public:
69  _Executor(_BiIter __begin,
70  _BiIter __end,
71  _ResultsVec& __results,
72  const _RegexT& __re,
73  _FlagT __flags)
74  : _M_begin(__begin),
75  _M_end(__end),
76  _M_re(__re),
77  _M_nfa(*__re._M_automaton),
78  _M_results(__results),
79  _M_rep_count(_M_nfa.size()),
80  _M_states(_M_nfa._M_start(), _M_nfa.size()),
81  _M_flags(__flags)
82  {
83  using namespace regex_constants;
84  if (__flags & match_prev_avail) // ignore not_bol and not_bow
85  _M_flags &= ~(match_not_bol | match_not_bow);
86  }
87 
88  // Set matched when string exactly matches the pattern.
89  bool
90  _M_match()
91  {
92  _M_current = _M_begin;
93  return _M_main(_Match_mode::_Exact);
94  }
95 
96  // Set matched when some prefix of the string matches the pattern.
97  bool
98  _M_search_from_first()
99  {
100  _M_current = _M_begin;
101  return _M_main(_Match_mode::_Prefix);
102  }
103 
104  bool
105  _M_search();
106 
107  private:
108  void
109  _M_rep_once_more(_Match_mode __match_mode, _StateIdT);
110 
111  void
112  _M_handle_repeat(_Match_mode, _StateIdT);
113 
114  void
115  _M_handle_subexpr_begin(_Match_mode, _StateIdT);
116 
117  void
118  _M_handle_subexpr_end(_Match_mode, _StateIdT);
119 
120  void
121  _M_handle_line_begin_assertion(_Match_mode, _StateIdT);
122 
123  void
124  _M_handle_line_end_assertion(_Match_mode, _StateIdT);
125 
126  void
127  _M_handle_word_boundary(_Match_mode, _StateIdT);
128 
129  void
130  _M_handle_subexpr_lookahead(_Match_mode, _StateIdT);
131 
132  void
133  _M_handle_match(_Match_mode, _StateIdT);
134 
135  void
136  _M_handle_backref(_Match_mode, _StateIdT);
137 
138  void
139  _M_handle_accept(_Match_mode, _StateIdT);
140 
141  void
142  _M_handle_alternative(_Match_mode, _StateIdT);
143 
144  void
145  _M_dfs(_Match_mode __match_mode, _StateIdT __start);
146 
147  bool
148  _M_main(_Match_mode __match_mode)
149  { return _M_main_dispatch(__match_mode, __search_mode{}); }
150 
151  bool
152  _M_main_dispatch(_Match_mode __match_mode, __dfs);
153 
154  bool
155  _M_main_dispatch(_Match_mode __match_mode, __bfs);
156 
157  bool
158  _M_is_word(_CharT __ch) const
159  {
160  static const _CharT __s[2] = { 'w' };
161  return _M_re._M_automaton->_M_traits.isctype
162  (__ch, _M_re._M_automaton->_M_traits.lookup_classname(__s, __s+1));
163  }
164 
165  bool
166  _M_at_begin() const
167  {
168  if (_M_current == _M_begin)
169  {
170  // match_not_bol means ^ does not match [_M_begin,_M_begin)
171  if (_M_flags & regex_constants::match_not_bol)
172  return false;
173  // match_prev_avail means _M_begin is not the start of the input.
174  if (_M_flags & regex_constants::match_prev_avail)
175  {
176  // For ECMAScript multiline matches, check if the previous
177  // character is a line terminator.
178  if (_M_match_multiline())
179  return _M_is_line_terminator(*std::prev(_M_current));
180  else
181  return false;
182  }
183  else // ^ matches at _M_begin
184  return true;
185  }
186  else if (_M_match_multiline())
187  return _M_is_line_terminator(*std::prev(_M_current));
188  else
189  return false;
190  }
191 
192  bool
193  _M_at_end() const
194  {
195  if (_M_current == _M_end)
196  return !(_M_flags & regex_constants::match_not_eol);
197  else if (_M_match_multiline())
198  return _M_is_line_terminator(*_M_current);
199  else
200  return false;
201  }
202 
203  bool
204  _M_word_boundary() const;
205 
206  bool
207  _M_lookahead(_StateIdT __next);
208 
209  bool
210  _M_is_line_terminator(_CharT __c) const
211  {
212  const auto& __traits = _M_re._M_automaton->_M_traits;
213  const auto& __ct = use_facet<ctype<_CharT>>(__traits.getloc());
214  const char __n{ __ct.narrow(__c, ' ') };
215  if (__n == '\n')
216  return true;
217  if (_M_re._M_automaton->_M_options() & regex_constants::ECMAScript)
218  {
219  if (__n == '\r')
220  return true;
221  // FIXME: U+2028 (line separator) and U+2029 (paragraph separator)
222  }
223  return false;
224  }
225 
226  bool
227  _M_match_multiline() const noexcept
228  {
229  constexpr auto __m
231  return (_M_re._M_automaton->_M_options() & __m) == __m;
232  }
233 
234  // Holds additional information used in BFS-mode.
235  template<typename _SearchMode, typename _ResultsVec>
236  struct _State_info;
237 
238  template<typename _ResultsVec>
239  struct _State_info<__bfs, _ResultsVec>
240  {
241  explicit
242  _State_info(_StateIdT __start, size_t __n)
243  : _M_visited_states(new bool[__n]()), _M_start(__start)
244  { }
245 
246  bool _M_visited(_StateIdT __i)
247  {
248  if (_M_visited_states[__i])
249  return true;
250  _M_visited_states[__i] = true;
251  return false;
252  }
253 
254  void _M_queue(_StateIdT __i, const _ResultsVec& __res)
255  { _M_match_queue.emplace_back(__i, __res); }
256 
257  // Dummy implementations for BFS mode.
258  _BiIter* _M_get_sol_pos() { return nullptr; }
259 
260  // Saves states that need to be considered for the next character.
261  vector<pair<_StateIdT, _ResultsVec>> _M_match_queue;
262  // Indicates which states are already visited.
263  unique_ptr<bool[]> _M_visited_states;
264  // To record current solution.
265  _StateIdT _M_start;
266  };
267 
268  template<typename _ResultsVec>
269  struct _State_info<__dfs, _ResultsVec>
270  {
271  explicit
272  _State_info(_StateIdT __start, size_t) : _M_start(__start)
273  { }
274 
275  // Dummy implementations for DFS mode.
276  bool _M_visited(_StateIdT) const { return false; }
277  void _M_queue(_StateIdT, const _ResultsVec&) { }
278 
279  _BiIter* _M_get_sol_pos() { return &_M_sol_pos; }
280 
281  // To record current solution.
282  _StateIdT _M_start;
283  _BiIter _M_sol_pos;
284  };
285 
286  public:
287  _ResultsVec _M_cur_results;
288  _BiIter _M_current;
289  _BiIter _M_begin;
290  const _BiIter _M_end;
291  const _RegexT& _M_re;
292  const _NFAT& _M_nfa;
293  _ResultsVec& _M_results;
294  vector<pair<_BiIter, int>> _M_rep_count;
295  _State_info<__search_mode, _ResultsVec> _M_states;
296  _FlagT _M_flags;
297  // Do we have a solution so far?
298  bool _M_has_sol;
299  };
300 
301  ///@} regex-detail
302 } // namespace __detail
303 _GLIBCXX_END_NAMESPACE_VERSION
304 } // namespace std
305 
306 #include <bits/regex_executor.tcc>
integral_constant< bool, true > true_type
The type used as a compile-time boolean with true value.
Definition: type_traits:83
integral_constant< bool, false > false_type
The type used as a compile-time boolean with false value.
Definition: type_traits:86
ISO C++ entities toplevel namespace is std.
constexpr match_flag_type match_not_bow
constexpr match_flag_type match_not_bol
constexpr syntax_option_type ECMAScript
constexpr syntax_option_type __multiline
Extension: Equivalent to regex_constants::multiline for C++11 and C++14.
constexpr match_flag_type match_not_eol
match_flag_type
This is a bitmask type indicating regex matching rules.
constexpr match_flag_type match_prev_avail
integral_constant
Definition: type_traits:66
Traits class for iterators.
Takes a regex and an input string and does the matching.
A standard container which offers fixed time access to individual elements in any order.
Definition: stl_vector.h:390