libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2010-2013 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex_nfa.h 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 namespace std _GLIBCXX_VISIBILITY(default) 00032 { 00033 namespace __detail 00034 { 00035 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00036 00037 /** 00038 * @addtogroup regex-detail 00039 * @{ 00040 */ 00041 00042 /// Base class for, um, automata. Could be an NFA or a DFA. Your choice. 00043 class _Automaton 00044 { 00045 public: 00046 typedef unsigned int _SizeT; 00047 00048 public: 00049 virtual 00050 ~_Automaton() { } 00051 00052 virtual _SizeT 00053 _M_sub_count() const = 0; 00054 00055 #ifdef _GLIBCXX_DEBUG 00056 virtual std::ostream& 00057 _M_dot(std::ostream& __ostr) const = 0; 00058 #endif 00059 }; 00060 00061 /// Generic shared pointer to an automaton. 00062 typedef std::shared_ptr<_Automaton> _AutomatonPtr; 00063 00064 /// Operation codes that define the type of transitions within the base NFA 00065 /// that represents the regular expression. 00066 enum _Opcode 00067 { 00068 _S_opcode_unknown = 0, 00069 _S_opcode_alternative = 1, 00070 _S_opcode_subexpr_begin = 4, 00071 _S_opcode_subexpr_end = 5, 00072 _S_opcode_match = 100, 00073 _S_opcode_accept = 255 00074 }; 00075 00076 /// Provides a generic facade for a templated match_results. 00077 struct _Results 00078 { 00079 virtual void _M_set_pos(int __i, int __j, const _PatternCursor& __p) = 0; 00080 virtual void _M_set_matched(int __i, bool __is_matched) = 0; 00081 }; 00082 00083 /// Tags current state (for subexpr begin/end). 00084 typedef std::function<void (const _PatternCursor&, _Results&)> _Tagger; 00085 00086 /// Start state tag. 00087 template<typename _FwdIterT, typename _TraitsT> 00088 struct _StartTagger 00089 { 00090 explicit 00091 _StartTagger(int __i) 00092 : _M_index(__i) 00093 { } 00094 00095 void 00096 operator()(const _PatternCursor& __pc, _Results& __r) 00097 { __r._M_set_pos(_M_index, 0, __pc); } 00098 00099 int _M_index; 00100 }; 00101 00102 /// End state tag. 00103 template<typename _FwdIterT, typename _TraitsT> 00104 struct _EndTagger 00105 { 00106 explicit 00107 _EndTagger(int __i) 00108 : _M_index(__i) 00109 { } 00110 00111 void 00112 operator()(const _PatternCursor& __pc, _Results& __r) 00113 { __r._M_set_pos(_M_index, 1, __pc); } 00114 00115 int _M_index; 00116 _FwdIterT _M_pos; 00117 }; 00118 00119 /// Indicates if current state matches cursor current. 00120 typedef std::function<bool (const _PatternCursor&)> _Matcher; 00121 00122 /// Matches any character 00123 inline bool 00124 _AnyMatcher(const _PatternCursor&) 00125 { return true; } 00126 00127 /// Matches a single character 00128 template<typename _InIterT, typename _TraitsT> 00129 struct _CharMatcher 00130 { 00131 typedef typename _TraitsT::char_type char_type; 00132 00133 explicit 00134 _CharMatcher(char_type __c, const _TraitsT& __t = _TraitsT()) 00135 : _M_traits(__t), _M_c(_M_traits.translate(__c)) 00136 { } 00137 00138 bool 00139 operator()(const _PatternCursor& __pc) const 00140 { 00141 typedef const _SpecializedCursor<_InIterT>& _CursorT; 00142 _CursorT __c = static_cast<_CursorT>(__pc); 00143 return _M_traits.translate(__c._M_current()) == _M_c; 00144 } 00145 00146 const _TraitsT& _M_traits; 00147 char_type _M_c; 00148 }; 00149 00150 /// Matches a character range (bracket expression) 00151 template<typename _InIterT, typename _TraitsT> 00152 struct _RangeMatcher 00153 { 00154 typedef typename _TraitsT::char_type _CharT; 00155 typedef std::basic_string<_CharT> _StringT; 00156 00157 explicit 00158 _RangeMatcher(bool __is_non_matching, const _TraitsT& __t = _TraitsT()) 00159 : _M_traits(__t), _M_is_non_matching(__is_non_matching) 00160 { } 00161 00162 bool 00163 operator()(const _PatternCursor& __pc) const 00164 { 00165 typedef const _SpecializedCursor<_InIterT>& _CursorT; 00166 _CursorT __c = static_cast<_CursorT>(__pc); 00167 return true; 00168 } 00169 00170 void 00171 _M_add_char(_CharT __c) 00172 { } 00173 00174 void 00175 _M_add_collating_element(const _StringT& __s) 00176 { } 00177 00178 void 00179 _M_add_equivalence_class(const _StringT& __s) 00180 { } 00181 00182 void 00183 _M_add_character_class(const _StringT& __s) 00184 { } 00185 00186 void 00187 _M_make_range() 00188 { } 00189 00190 const _TraitsT& _M_traits; 00191 bool _M_is_non_matching; 00192 }; 00193 00194 /// Identifies a state in the NFA. 00195 typedef int _StateIdT; 00196 00197 /// The special case in which a state identifier is not an index. 00198 static const _StateIdT _S_invalid_state_id = -1; 00199 00200 00201 /** 00202 * @brief struct _State 00203 * 00204 * An individual state in an NFA 00205 * 00206 * In this case a "state" is an entry in the NFA definition coupled 00207 * with its outgoing transition(s). All states have a single outgoing 00208 * transition, except for accepting states (which have no outgoing 00209 * transitions) and alt states, which have two outgoing transitions. 00210 */ 00211 struct _State 00212 { 00213 typedef int _OpcodeT; 00214 00215 _OpcodeT _M_opcode; // type of outgoing transition 00216 _StateIdT _M_next; // outgoing transition 00217 _StateIdT _M_alt; // for _S_opcode_alternative 00218 unsigned int _M_subexpr; // for _S_opcode_subexpr_* 00219 _Tagger _M_tagger; // for _S_opcode_subexpr_* 00220 _Matcher _M_matches; // for _S_opcode_match 00221 00222 explicit _State(_OpcodeT __opcode) 00223 : _M_opcode(__opcode), _M_next(_S_invalid_state_id) 00224 { } 00225 00226 _State(const _Matcher& __m) 00227 : _M_opcode(_S_opcode_match), _M_next(_S_invalid_state_id), _M_matches(__m) 00228 { } 00229 00230 _State(_OpcodeT __opcode, unsigned int __s, const _Tagger& __t) 00231 : _M_opcode(__opcode), _M_next(_S_invalid_state_id), _M_subexpr(__s), 00232 _M_tagger(__t) 00233 { } 00234 00235 _State(_StateIdT __next, _StateIdT __alt) 00236 : _M_opcode(_S_opcode_alternative), _M_next(__next), _M_alt(__alt) 00237 { } 00238 00239 #ifdef _GLIBCXX_DEBUG 00240 std::ostream& 00241 _M_print(std::ostream& ostr) const; 00242 00243 // Prints graphviz dot commands for state. 00244 std::ostream& 00245 _M_dot(std::ostream& __ostr, _StateIdT __id) const; 00246 #endif 00247 }; 00248 00249 00250 /// The Grep Matcher works on sets of states. Here are sets of states. 00251 typedef std::set<_StateIdT> _StateSet; 00252 00253 /** 00254 * @brief struct _Nfa 00255 * 00256 * A collection of all states making up an NFA. 00257 * 00258 * An NFA is a 4-tuple M = (K, S, s, F), where 00259 * K is a finite set of states, 00260 * S is the alphabet of the NFA, 00261 * s is the initial state, 00262 * F is a set of final (accepting) states. 00263 * 00264 * This NFA class is templated on S, a type that will hold values of the 00265 * underlying alphabet (without regard to semantics of that alphabet). The 00266 * other elements of the tuple are generated during construction of the NFA 00267 * and are available through accessor member functions. 00268 */ 00269 class _Nfa 00270 : public _Automaton, public std::vector<_State> 00271 { 00272 public: 00273 typedef _State _StateT; 00274 typedef unsigned int _SizeT; 00275 typedef regex_constants::syntax_option_type _FlagT; 00276 00277 _Nfa(_FlagT __f) 00278 : _M_flags(__f), _M_start_state(0), _M_subexpr_count(0) 00279 { } 00280 00281 ~_Nfa() 00282 { } 00283 00284 _FlagT 00285 _M_options() const 00286 { return _M_flags; } 00287 00288 _StateIdT 00289 _M_start() const 00290 { return _M_start_state; } 00291 00292 const _StateSet& 00293 _M_final_states() const 00294 { return _M_accepting_states; } 00295 00296 _SizeT 00297 _M_sub_count() const 00298 { return _M_subexpr_count; } 00299 00300 _StateIdT 00301 _M_insert_accept() 00302 { 00303 this->push_back(_StateT(_S_opcode_accept)); 00304 _M_accepting_states.insert(this->size()-1); 00305 return this->size()-1; 00306 } 00307 00308 _StateIdT 00309 _M_insert_alt(_StateIdT __next, _StateIdT __alt) 00310 { 00311 this->push_back(_StateT(__next, __alt)); 00312 return this->size()-1; 00313 } 00314 00315 _StateIdT 00316 _M_insert_matcher(_Matcher __m) 00317 { 00318 this->push_back(_StateT(__m)); 00319 return this->size()-1; 00320 } 00321 00322 _StateIdT 00323 _M_insert_subexpr_begin(const _Tagger& __t) 00324 { 00325 this->push_back(_StateT(_S_opcode_subexpr_begin, _M_subexpr_count++, 00326 __t)); 00327 return this->size()-1; 00328 } 00329 00330 _StateIdT 00331 _M_insert_subexpr_end(unsigned int __i, const _Tagger& __t) 00332 { 00333 this->push_back(_StateT(_S_opcode_subexpr_end, __i, __t)); 00334 return this->size()-1; 00335 } 00336 00337 #ifdef _GLIBCXX_DEBUG 00338 std::ostream& 00339 _M_dot(std::ostream& __ostr) const; 00340 #endif 00341 00342 private: 00343 _FlagT _M_flags; 00344 _StateIdT _M_start_state; 00345 _StateSet _M_accepting_states; 00346 _SizeT _M_subexpr_count; 00347 }; 00348 00349 /// Describes a sequence of one or more %_State, its current start 00350 /// and end(s). This structure contains fragments of an NFA during 00351 /// construction. 00352 class _StateSeq 00353 { 00354 public: 00355 // Constructs a single-node sequence 00356 _StateSeq(_Nfa& __ss, _StateIdT __s, _StateIdT __e = _S_invalid_state_id) 00357 : _M_nfa(__ss), _M_start(__s), _M_end1(__s), _M_end2(__e) 00358 { } 00359 // Constructs a split sequence from two other sequencces 00360 _StateSeq(const _StateSeq& __e1, const _StateSeq& __e2) 00361 : _M_nfa(__e1._M_nfa), 00362 _M_start(_M_nfa._M_insert_alt(__e1._M_start, __e2._M_start)), 00363 _M_end1(__e1._M_end1), _M_end2(__e2._M_end1) 00364 { } 00365 00366 // Constructs a split sequence from a single sequence 00367 _StateSeq(const _StateSeq& __e, _StateIdT __id) 00368 : _M_nfa(__e._M_nfa), 00369 _M_start(_M_nfa._M_insert_alt(__id, __e._M_start)), 00370 _M_end1(__id), _M_end2(__e._M_end1) 00371 { } 00372 00373 // Constructs a copy of a %_StateSeq 00374 _StateSeq(const _StateSeq& __rhs) 00375 : _M_nfa(__rhs._M_nfa), _M_start(__rhs._M_start), 00376 _M_end1(__rhs._M_end1), _M_end2(__rhs._M_end2) 00377 { } 00378 00379 00380 _StateSeq& operator=(const _StateSeq& __rhs); 00381 00382 _StateIdT 00383 _M_front() const 00384 { return _M_start; } 00385 00386 // Extends a sequence by one. 00387 void 00388 _M_push_back(_StateIdT __id); 00389 00390 // Extends and maybe joins a sequence. 00391 void 00392 _M_append(_StateIdT __id); 00393 00394 void 00395 _M_append(_StateSeq& __rhs); 00396 00397 // Clones an entire sequence. 00398 _StateIdT 00399 _M_clone(); 00400 00401 private: 00402 _Nfa& _M_nfa; 00403 _StateIdT _M_start; 00404 _StateIdT _M_end1; 00405 _StateIdT _M_end2; 00406 00407 }; 00408 00409 //@} regex-detail 00410 _GLIBCXX_END_NAMESPACE_VERSION 00411 } // namespace __detail 00412 } // namespace std 00413 00414 #include <bits/regex_nfa.tcc> 00415