libstdc++
|
00001 // class template regex -*- C++ -*- 00002 00003 // Copyright (C) 2010-2013 Free Software Foundation, Inc. 00004 // 00005 // This file is part of the GNU ISO C++ Library. This library is free 00006 // software; you can redistribute it and/or modify it under the 00007 // terms of the GNU General Public License as published by the 00008 // Free Software Foundation; either version 3, or (at your option) 00009 // any later version. 00010 00011 // This library is distributed in the hope that it will be useful, 00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of 00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00014 // GNU General Public License for more details. 00015 00016 // Under Section 7 of GPL version 3, you are granted additional 00017 // permissions described in the GCC Runtime Library Exception, version 00018 // 3.1, as published by the Free Software Foundation. 00019 00020 // You should have received a copy of the GNU General Public License and 00021 // a copy of the GCC Runtime Library Exception along with this program; 00022 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 00023 // <http://www.gnu.org/licenses/>. 00024 00025 /** 00026 * @file bits/regex_compiler.h 00027 * This is an internal header file, included by other library headers. 00028 * Do not attempt to use it directly. @headername{regex} 00029 */ 00030 00031 namespace std _GLIBCXX_VISIBILITY(default) 00032 { 00033 namespace __detail 00034 { 00035 _GLIBCXX_BEGIN_NAMESPACE_VERSION 00036 00037 /** 00038 * @addtogroup regex-detail 00039 * @{ 00040 */ 00041 00042 /// Base class for scanner. 00043 struct _Scanner_base 00044 { 00045 typedef unsigned int _StateT; 00046 00047 static constexpr _StateT _S_state_at_start = 1 << 0; 00048 static constexpr _StateT _S_state_in_brace = 1 << 2; 00049 static constexpr _StateT _S_state_in_bracket = 1 << 3; 00050 00051 virtual ~_Scanner_base() { }; 00052 }; 00053 00054 /** 00055 * @brief struct _Scanner. Scans an input range for regex tokens. 00056 * 00057 * The %_Scanner class interprets the regular expression pattern in 00058 * the input range passed to its constructor as a sequence of parse 00059 * tokens passed to the regular expression compiler. The sequence 00060 * of tokens provided depends on the flag settings passed to the 00061 * constructor: different regular expression grammars will interpret 00062 * the same input pattern in syntactically different ways. 00063 */ 00064 template<typename _InputIterator> 00065 class _Scanner: public _Scanner_base 00066 { 00067 public: 00068 typedef _InputIterator _IteratorT; 00069 typedef typename std::iterator_traits<_IteratorT>::value_type _CharT; 00070 typedef std::basic_string<_CharT> _StringT; 00071 typedef regex_constants::syntax_option_type _FlagT; 00072 typedef const std::ctype<_CharT> _CtypeT; 00073 00074 /// Token types returned from the scanner. 00075 enum _TokenT 00076 { 00077 _S_token_anychar, 00078 _S_token_backref, 00079 _S_token_bracket_begin, 00080 _S_token_bracket_end, 00081 _S_token_inverse_class, 00082 _S_token_char_class_name, 00083 _S_token_closure0, 00084 _S_token_closure1, 00085 _S_token_collelem_multi, 00086 _S_token_collelem_single, 00087 _S_token_collsymbol, 00088 _S_token_comma, 00089 _S_token_dash, 00090 _S_token_dup_count, 00091 _S_token_eof, 00092 _S_token_equiv_class_name, 00093 _S_token_interval_begin, 00094 _S_token_interval_end, 00095 _S_token_line_begin, 00096 _S_token_line_end, 00097 _S_token_opt, 00098 _S_token_or, 00099 _S_token_ord_char, 00100 _S_token_quoted_char, 00101 _S_token_subexpr_begin, 00102 _S_token_subexpr_end, 00103 _S_token_word_begin, 00104 _S_token_word_end, 00105 _S_token_unknown 00106 }; 00107 00108 _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags, 00109 std::locale __loc) 00110 : _M_current(__begin) , _M_end(__end) , _M_flags(__flags), 00111 _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start) 00112 { _M_advance(); } 00113 00114 void 00115 _M_advance(); 00116 00117 _TokenT 00118 _M_token() const 00119 { return _M_curToken; } 00120 00121 const _StringT& 00122 _M_value() const 00123 { return _M_curValue; } 00124 00125 #ifdef _GLIBCXX_DEBUG 00126 std::ostream& 00127 _M_print(std::ostream&); 00128 #endif 00129 00130 private: 00131 void 00132 _M_eat_escape(); 00133 00134 void 00135 _M_scan_in_brace(); 00136 00137 void 00138 _M_scan_in_bracket(); 00139 00140 void 00141 _M_eat_charclass(); 00142 00143 void 00144 _M_eat_equivclass(); 00145 00146 void 00147 _M_eat_collsymbol(); 00148 00149 _IteratorT _M_current; 00150 _IteratorT _M_end; 00151 _FlagT _M_flags; 00152 _CtypeT& _M_ctype; 00153 _TokenT _M_curToken; 00154 _StringT _M_curValue; 00155 _StateT _M_state; 00156 }; 00157 00158 template<typename _InputIterator> 00159 void 00160 _Scanner<_InputIterator>:: 00161 _M_advance() 00162 { 00163 if (_M_current == _M_end) 00164 { 00165 _M_curToken = _S_token_eof; 00166 return; 00167 } 00168 00169 _CharT __c = *_M_current; 00170 if (_M_state & _S_state_in_bracket) 00171 { 00172 _M_scan_in_bracket(); 00173 return; 00174 } 00175 if (_M_state & _S_state_in_brace) 00176 { 00177 _M_scan_in_brace(); 00178 return; 00179 } 00180 #if 0 00181 // TODO: re-enable line anchors when _M_assertion is implemented. 00182 // See PR libstdc++/47724 00183 else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^')) 00184 { 00185 _M_curToken = _S_token_line_begin; 00186 ++_M_current; 00187 return; 00188 } 00189 else if (__c == _M_ctype.widen('$')) 00190 { 00191 _M_curToken = _S_token_line_end; 00192 ++_M_current; 00193 return; 00194 } 00195 #endif 00196 else if (__c == _M_ctype.widen('.')) 00197 { 00198 _M_curToken = _S_token_anychar; 00199 ++_M_current; 00200 return; 00201 } 00202 else if (__c == _M_ctype.widen('*')) 00203 { 00204 _M_curToken = _S_token_closure0; 00205 ++_M_current; 00206 return; 00207 } 00208 else if (__c == _M_ctype.widen('+')) 00209 { 00210 _M_curToken = _S_token_closure1; 00211 ++_M_current; 00212 return; 00213 } 00214 else if (__c == _M_ctype.widen('|')) 00215 { 00216 _M_curToken = _S_token_or; 00217 ++_M_current; 00218 return; 00219 } 00220 else if (__c == _M_ctype.widen('[')) 00221 { 00222 _M_curToken = _S_token_bracket_begin; 00223 _M_state |= (_S_state_in_bracket | _S_state_at_start); 00224 ++_M_current; 00225 return; 00226 } 00227 else if (__c == _M_ctype.widen('\\')) 00228 { 00229 _M_eat_escape(); 00230 return; 00231 } 00232 else if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 00233 { 00234 if (__c == _M_ctype.widen('(')) 00235 { 00236 _M_curToken = _S_token_subexpr_begin; 00237 ++_M_current; 00238 return; 00239 } 00240 else if (__c == _M_ctype.widen(')')) 00241 { 00242 _M_curToken = _S_token_subexpr_end; 00243 ++_M_current; 00244 return; 00245 } 00246 else if (__c == _M_ctype.widen('{')) 00247 { 00248 _M_curToken = _S_token_interval_begin; 00249 _M_state |= _S_state_in_brace; 00250 ++_M_current; 00251 return; 00252 } 00253 } 00254 00255 _M_curToken = _S_token_ord_char; 00256 _M_curValue.assign(1, __c); 00257 ++_M_current; 00258 } 00259 00260 00261 template<typename _InputIterator> 00262 void 00263 _Scanner<_InputIterator>:: 00264 _M_scan_in_brace() 00265 { 00266 if (_M_ctype.is(_CtypeT::digit, *_M_current)) 00267 { 00268 _M_curToken = _S_token_dup_count; 00269 _M_curValue.assign(1, *_M_current); 00270 ++_M_current; 00271 while (_M_current != _M_end 00272 && _M_ctype.is(_CtypeT::digit, *_M_current)) 00273 { 00274 _M_curValue += *_M_current; 00275 ++_M_current; 00276 } 00277 return; 00278 } 00279 else if (*_M_current == _M_ctype.widen(',')) 00280 { 00281 _M_curToken = _S_token_comma; 00282 ++_M_current; 00283 return; 00284 } 00285 if (_M_flags & (regex_constants::basic | regex_constants::grep)) 00286 { 00287 if (*_M_current == _M_ctype.widen('\\')) 00288 _M_eat_escape(); 00289 } 00290 else 00291 { 00292 if (*_M_current == _M_ctype.widen('}')) 00293 { 00294 _M_curToken = _S_token_interval_end; 00295 _M_state &= ~_S_state_in_brace; 00296 ++_M_current; 00297 return; 00298 } 00299 } 00300 } 00301 00302 template<typename _InputIterator> 00303 void 00304 _Scanner<_InputIterator>:: 00305 _M_scan_in_bracket() 00306 { 00307 if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^')) 00308 { 00309 _M_curToken = _S_token_inverse_class; 00310 _M_state &= ~_S_state_at_start; 00311 ++_M_current; 00312 return; 00313 } 00314 else if (*_M_current == _M_ctype.widen('[')) 00315 { 00316 ++_M_current; 00317 if (_M_current == _M_end) 00318 { 00319 _M_curToken = _S_token_eof; 00320 return; 00321 } 00322 00323 if (*_M_current == _M_ctype.widen('.')) 00324 { 00325 _M_curToken = _S_token_collsymbol; 00326 _M_eat_collsymbol(); 00327 return; 00328 } 00329 else if (*_M_current == _M_ctype.widen(':')) 00330 { 00331 _M_curToken = _S_token_char_class_name; 00332 _M_eat_charclass(); 00333 return; 00334 } 00335 else if (*_M_current == _M_ctype.widen('=')) 00336 { 00337 _M_curToken = _S_token_equiv_class_name; 00338 _M_eat_equivclass(); 00339 return; 00340 } 00341 } 00342 else if (*_M_current == _M_ctype.widen('-')) 00343 { 00344 _M_curToken = _S_token_dash; 00345 ++_M_current; 00346 return; 00347 } 00348 else if (*_M_current == _M_ctype.widen(']')) 00349 { 00350 if (!(_M_flags & regex_constants::ECMAScript) 00351 || !(_M_state & _S_state_at_start)) 00352 { 00353 // special case: only if _not_ chr first after 00354 // '[' or '[^' and if not ECMAscript 00355 _M_curToken = _S_token_bracket_end; 00356 ++_M_current; 00357 return; 00358 } 00359 } 00360 _M_curToken = _S_token_collelem_single; 00361 _M_curValue.assign(1, *_M_current); 00362 ++_M_current; 00363 } 00364 00365 template<typename _InputIterator> 00366 void 00367 _Scanner<_InputIterator>:: 00368 _M_eat_escape() 00369 { 00370 ++_M_current; 00371 if (_M_current == _M_end) 00372 { 00373 _M_curToken = _S_token_eof; 00374 return; 00375 } 00376 _CharT __c = *_M_current; 00377 ++_M_current; 00378 00379 if (__c == _M_ctype.widen('(')) 00380 { 00381 if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 00382 { 00383 _M_curToken = _S_token_ord_char; 00384 _M_curValue.assign(1, __c); 00385 } 00386 else 00387 _M_curToken = _S_token_subexpr_begin; 00388 } 00389 else if (__c == _M_ctype.widen(')')) 00390 { 00391 if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 00392 { 00393 _M_curToken = _S_token_ord_char; 00394 _M_curValue.assign(1, __c); 00395 } 00396 else 00397 _M_curToken = _S_token_subexpr_end; 00398 } 00399 else if (__c == _M_ctype.widen('{')) 00400 { 00401 if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 00402 { 00403 _M_curToken = _S_token_ord_char; 00404 _M_curValue.assign(1, __c); 00405 } 00406 else 00407 { 00408 _M_curToken = _S_token_interval_begin; 00409 _M_state |= _S_state_in_brace; 00410 } 00411 } 00412 else if (__c == _M_ctype.widen('}')) 00413 { 00414 if (!(_M_flags & (regex_constants::basic | regex_constants::grep))) 00415 { 00416 _M_curToken = _S_token_ord_char; 00417 _M_curValue.assign(1, __c); 00418 } 00419 else 00420 { 00421 if (!(_M_state && _S_state_in_brace)) 00422 __throw_regex_error(regex_constants::error_badbrace); 00423 _M_state &= ~_S_state_in_brace; 00424 _M_curToken = _S_token_interval_end; 00425 } 00426 } 00427 else if (__c == _M_ctype.widen('x')) 00428 { 00429 ++_M_current; 00430 if (_M_current == _M_end) 00431 { 00432 _M_curToken = _S_token_eof; 00433 return; 00434 } 00435 if (_M_ctype.is(_CtypeT::digit, *_M_current)) 00436 { 00437 _M_curValue.assign(1, *_M_current); 00438 ++_M_current; 00439 if (_M_current == _M_end) 00440 { 00441 _M_curToken = _S_token_eof; 00442 return; 00443 } 00444 if (_M_ctype.is(_CtypeT::digit, *_M_current)) 00445 { 00446 _M_curValue += *_M_current; 00447 ++_M_current; 00448 return; 00449 } 00450 } 00451 } 00452 else if (__c == _M_ctype.widen('^') 00453 || __c == _M_ctype.widen('.') 00454 || __c == _M_ctype.widen('*') 00455 || __c == _M_ctype.widen('$') 00456 || __c == _M_ctype.widen('\\')) 00457 { 00458 _M_curToken = _S_token_ord_char; 00459 _M_curValue.assign(1, __c); 00460 } 00461 else if (_M_ctype.is(_CtypeT::digit, __c)) 00462 { 00463 _M_curToken = _S_token_backref; 00464 _M_curValue.assign(1, __c); 00465 } 00466 else 00467 __throw_regex_error(regex_constants::error_escape); 00468 } 00469 00470 00471 // Eats a character class or throwns an exception. 00472 // current point to ':' delimiter on entry, char after ']' on return 00473 template<typename _InputIterator> 00474 void 00475 _Scanner<_InputIterator>:: 00476 _M_eat_charclass() 00477 { 00478 ++_M_current; // skip ':' 00479 if (_M_current == _M_end) 00480 __throw_regex_error(regex_constants::error_ctype); 00481 for (_M_curValue.clear(); 00482 _M_current != _M_end && *_M_current != _M_ctype.widen(':'); 00483 ++_M_current) 00484 _M_curValue += *_M_current; 00485 if (_M_current == _M_end) 00486 __throw_regex_error(regex_constants::error_ctype); 00487 ++_M_current; // skip ':' 00488 if (*_M_current != _M_ctype.widen(']')) 00489 __throw_regex_error(regex_constants::error_ctype); 00490 ++_M_current; // skip ']' 00491 } 00492 00493 00494 template<typename _InputIterator> 00495 void 00496 _Scanner<_InputIterator>:: 00497 _M_eat_equivclass() 00498 { 00499 ++_M_current; // skip '=' 00500 if (_M_current == _M_end) 00501 __throw_regex_error(regex_constants::error_collate); 00502 for (_M_curValue.clear(); 00503 _M_current != _M_end && *_M_current != _M_ctype.widen('='); 00504 ++_M_current) 00505 _M_curValue += *_M_current; 00506 if (_M_current == _M_end) 00507 __throw_regex_error(regex_constants::error_collate); 00508 ++_M_current; // skip '=' 00509 if (*_M_current != _M_ctype.widen(']')) 00510 __throw_regex_error(regex_constants::error_collate); 00511 ++_M_current; // skip ']' 00512 } 00513 00514 00515 template<typename _InputIterator> 00516 void 00517 _Scanner<_InputIterator>:: 00518 _M_eat_collsymbol() 00519 { 00520 ++_M_current; // skip '.' 00521 if (_M_current == _M_end) 00522 __throw_regex_error(regex_constants::error_collate); 00523 for (_M_curValue.clear(); 00524 _M_current != _M_end && *_M_current != _M_ctype.widen('.'); 00525 ++_M_current) 00526 _M_curValue += *_M_current; 00527 if (_M_current == _M_end) 00528 __throw_regex_error(regex_constants::error_collate); 00529 ++_M_current; // skip '.' 00530 if (*_M_current != _M_ctype.widen(']')) 00531 __throw_regex_error(regex_constants::error_collate); 00532 ++_M_current; // skip ']' 00533 } 00534 00535 #ifdef _GLIBCXX_DEBUG 00536 template<typename _InputIterator> 00537 std::ostream& 00538 _Scanner<_InputIterator>:: 00539 _M_print(std::ostream& ostr) 00540 { 00541 switch (_M_curToken) 00542 { 00543 case _S_token_anychar: 00544 ostr << "any-character\n"; 00545 break; 00546 case _S_token_backref: 00547 ostr << "backref\n"; 00548 break; 00549 case _S_token_bracket_begin: 00550 ostr << "bracket-begin\n"; 00551 break; 00552 case _S_token_bracket_end: 00553 ostr << "bracket-end\n"; 00554 break; 00555 case _S_token_char_class_name: 00556 ostr << "char-class-name \"" << _M_curValue << "\"\n"; 00557 break; 00558 case _S_token_closure0: 00559 ostr << "closure0\n"; 00560 break; 00561 case _S_token_closure1: 00562 ostr << "closure1\n"; 00563 break; 00564 case _S_token_collelem_multi: 00565 ostr << "coll-elem-multi \"" << _M_curValue << "\"\n"; 00566 break; 00567 case _S_token_collelem_single: 00568 ostr << "coll-elem-single \"" << _M_curValue << "\"\n"; 00569 break; 00570 case _S_token_collsymbol: 00571 ostr << "collsymbol \"" << _M_curValue << "\"\n"; 00572 break; 00573 case _S_token_comma: 00574 ostr << "comma\n"; 00575 break; 00576 case _S_token_dash: 00577 ostr << "dash\n"; 00578 break; 00579 case _S_token_dup_count: 00580 ostr << "dup count: " << _M_curValue << "\n"; 00581 break; 00582 case _S_token_eof: 00583 ostr << "EOF\n"; 00584 break; 00585 case _S_token_equiv_class_name: 00586 ostr << "equiv-class-name \"" << _M_curValue << "\"\n"; 00587 break; 00588 case _S_token_interval_begin: 00589 ostr << "interval begin\n"; 00590 break; 00591 case _S_token_interval_end: 00592 ostr << "interval end\n"; 00593 break; 00594 case _S_token_line_begin: 00595 ostr << "line begin\n"; 00596 break; 00597 case _S_token_line_end: 00598 ostr << "line end\n"; 00599 break; 00600 case _S_token_opt: 00601 ostr << "opt\n"; 00602 break; 00603 case _S_token_or: 00604 ostr << "or\n"; 00605 break; 00606 case _S_token_ord_char: 00607 ostr << "ordinary character: \"" << _M_value() << "\"\n"; 00608 break; 00609 case _S_token_quoted_char: 00610 ostr << "quoted char\n"; 00611 break; 00612 case _S_token_subexpr_begin: 00613 ostr << "subexpr begin\n"; 00614 break; 00615 case _S_token_subexpr_end: 00616 ostr << "subexpr end\n"; 00617 break; 00618 case _S_token_word_begin: 00619 ostr << "word begin\n"; 00620 break; 00621 case _S_token_word_end: 00622 ostr << "word end\n"; 00623 break; 00624 case _S_token_unknown: 00625 ostr << "-- unknown token --\n"; 00626 break; 00627 } 00628 return ostr; 00629 } 00630 #endif 00631 00632 /// Builds an NFA from an input iterator interval. 00633 template<typename _InIter, typename _TraitsT> 00634 class _Compiler 00635 { 00636 public: 00637 typedef _InIter _IterT; 00638 typedef typename std::iterator_traits<_InIter>::value_type _CharT; 00639 typedef std::basic_string<_CharT> _StringT; 00640 typedef regex_constants::syntax_option_type _FlagT; 00641 00642 _Compiler(const _InIter& __b, const _InIter& __e, 00643 _TraitsT& __traits, _FlagT __flags); 00644 00645 const _Nfa& 00646 _M_nfa() const 00647 { return _M_state_store; } 00648 00649 private: 00650 typedef _Scanner<_InIter> _ScannerT; 00651 typedef typename _ScannerT::_TokenT _TokenT; 00652 typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT; 00653 typedef _RangeMatcher<_InIter, _TraitsT> _RMatcherT; 00654 00655 // accepts a specific token or returns false. 00656 bool 00657 _M_match_token(_TokenT __token); 00658 00659 void 00660 _M_disjunction(); 00661 00662 bool 00663 _M_alternative(); 00664 00665 bool 00666 _M_term(); 00667 00668 bool 00669 _M_assertion(); 00670 00671 bool 00672 _M_quantifier(); 00673 00674 bool 00675 _M_atom(); 00676 00677 bool 00678 _M_bracket_expression(); 00679 00680 bool 00681 _M_bracket_list(_RMatcherT& __matcher); 00682 00683 bool 00684 _M_follow_list(_RMatcherT& __matcher); 00685 00686 bool 00687 _M_follow_list2(_RMatcherT& __matcher); 00688 00689 bool 00690 _M_expression_term(_RMatcherT& __matcher); 00691 00692 bool 00693 _M_range_expression(_RMatcherT& __matcher); 00694 00695 bool 00696 _M_start_range(_RMatcherT& __matcher); 00697 00698 bool 00699 _M_collating_symbol(_RMatcherT& __matcher); 00700 00701 bool 00702 _M_equivalence_class(_RMatcherT& __matcher); 00703 00704 bool 00705 _M_character_class(_RMatcherT& __matcher); 00706 00707 int 00708 _M_cur_int_value(int __radix); 00709 00710 _TraitsT& _M_traits; 00711 _ScannerT _M_scanner; 00712 _StringT _M_cur_value; 00713 _Nfa _M_state_store; 00714 _StackT _M_stack; 00715 }; 00716 00717 template<typename _InIter, typename _TraitsT> 00718 _Compiler<_InIter, _TraitsT>:: 00719 _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits, 00720 _Compiler<_InIter, _TraitsT>::_FlagT __flags) 00721 : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()), 00722 _M_state_store(__flags) 00723 { 00724 typedef _StartTagger<_InIter, _TraitsT> _Start; 00725 typedef _EndTagger<_InIter, _TraitsT> _End; 00726 00727 _StateSeq __r(_M_state_store, 00728 _M_state_store._M_insert_subexpr_begin(_Start(0))); 00729 _M_disjunction(); 00730 if (!_M_stack.empty()) 00731 { 00732 __r._M_append(_M_stack.top()); 00733 _M_stack.pop(); 00734 } 00735 __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0))); 00736 __r._M_append(_M_state_store._M_insert_accept()); 00737 } 00738 00739 template<typename _InIter, typename _TraitsT> 00740 bool 00741 _Compiler<_InIter, _TraitsT>:: 00742 _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token) 00743 { 00744 if (token == _M_scanner._M_token()) 00745 { 00746 _M_cur_value = _M_scanner._M_value(); 00747 _M_scanner._M_advance(); 00748 return true; 00749 } 00750 return false; 00751 } 00752 00753 template<typename _InIter, typename _TraitsT> 00754 void 00755 _Compiler<_InIter, _TraitsT>:: 00756 _M_disjunction() 00757 { 00758 this->_M_alternative(); 00759 if (_M_match_token(_ScannerT::_S_token_or)) 00760 { 00761 _StateSeq __alt1 = _M_stack.top(); _M_stack.pop(); 00762 this->_M_disjunction(); 00763 _StateSeq __alt2 = _M_stack.top(); _M_stack.pop(); 00764 _M_stack.push(_StateSeq(__alt1, __alt2)); 00765 } 00766 } 00767 00768 template<typename _InIter, typename _TraitsT> 00769 bool 00770 _Compiler<_InIter, _TraitsT>:: 00771 _M_alternative() 00772 { 00773 if (this->_M_term()) 00774 { 00775 _StateSeq __re = _M_stack.top(); _M_stack.pop(); 00776 this->_M_alternative(); 00777 if (!_M_stack.empty()) 00778 { 00779 __re._M_append(_M_stack.top()); 00780 _M_stack.pop(); 00781 } 00782 _M_stack.push(__re); 00783 return true; 00784 } 00785 return false; 00786 } 00787 00788 template<typename _InIter, typename _TraitsT> 00789 bool 00790 _Compiler<_InIter, _TraitsT>:: 00791 _M_term() 00792 { 00793 if (this->_M_assertion()) 00794 return true; 00795 if (this->_M_atom()) 00796 { 00797 this->_M_quantifier(); 00798 return true; 00799 } 00800 return false; 00801 } 00802 00803 template<typename _InIter, typename _TraitsT> 00804 bool 00805 _Compiler<_InIter, _TraitsT>:: 00806 _M_assertion() 00807 { 00808 if (_M_match_token(_ScannerT::_S_token_line_begin)) 00809 { 00810 // __m.push(_Matcher::_S_opcode_line_begin); 00811 return true; 00812 } 00813 if (_M_match_token(_ScannerT::_S_token_line_end)) 00814 { 00815 // __m.push(_Matcher::_S_opcode_line_end); 00816 return true; 00817 } 00818 if (_M_match_token(_ScannerT::_S_token_word_begin)) 00819 { 00820 // __m.push(_Matcher::_S_opcode_word_begin); 00821 return true; 00822 } 00823 if (_M_match_token(_ScannerT::_S_token_word_end)) 00824 { 00825 // __m.push(_Matcher::_S_opcode_word_end); 00826 return true; 00827 } 00828 return false; 00829 } 00830 00831 template<typename _InIter, typename _TraitsT> 00832 bool 00833 _Compiler<_InIter, _TraitsT>:: 00834 _M_quantifier() 00835 { 00836 if (_M_match_token(_ScannerT::_S_token_closure0)) 00837 { 00838 if (_M_stack.empty()) 00839 __throw_regex_error(regex_constants::error_badrepeat); 00840 _StateSeq __r(_M_stack.top(), -1); 00841 __r._M_append(__r._M_front()); 00842 _M_stack.pop(); 00843 _M_stack.push(__r); 00844 return true; 00845 } 00846 if (_M_match_token(_ScannerT::_S_token_closure1)) 00847 { 00848 if (_M_stack.empty()) 00849 __throw_regex_error(regex_constants::error_badrepeat); 00850 _StateSeq __r(_M_state_store, 00851 _M_state_store. 00852 _M_insert_alt(_S_invalid_state_id, 00853 _M_stack.top()._M_front())); 00854 _M_stack.top()._M_append(__r); 00855 return true; 00856 } 00857 if (_M_match_token(_ScannerT::_S_token_opt)) 00858 { 00859 if (_M_stack.empty()) 00860 __throw_regex_error(regex_constants::error_badrepeat); 00861 _StateSeq __r(_M_stack.top(), -1); 00862 _M_stack.pop(); 00863 _M_stack.push(__r); 00864 return true; 00865 } 00866 if (_M_match_token(_ScannerT::_S_token_interval_begin)) 00867 { 00868 if (_M_stack.empty()) 00869 __throw_regex_error(regex_constants::error_badrepeat); 00870 if (!_M_match_token(_ScannerT::_S_token_dup_count)) 00871 __throw_regex_error(regex_constants::error_badbrace); 00872 _StateSeq __r(_M_stack.top()); 00873 int __min_rep = _M_cur_int_value(10); 00874 for (int __i = 1; __i < __min_rep; ++__i) 00875 _M_stack.top()._M_append(__r._M_clone()); 00876 if (_M_match_token(_ScannerT::_S_token_comma)) 00877 if (_M_match_token(_ScannerT::_S_token_dup_count)) 00878 { 00879 int __n = _M_cur_int_value(10) - __min_rep; 00880 if (__n < 0) 00881 __throw_regex_error(regex_constants::error_badbrace); 00882 for (int __i = 0; __i < __n; ++__i) 00883 { 00884 _StateSeq __r(_M_state_store, 00885 _M_state_store. 00886 _M_insert_alt(_S_invalid_state_id, 00887 _M_stack.top()._M_front())); 00888 _M_stack.top()._M_append(__r); 00889 } 00890 } 00891 else 00892 { 00893 _StateSeq __r(_M_stack.top(), -1); 00894 __r._M_push_back(__r._M_front()); 00895 _M_stack.pop(); 00896 _M_stack.push(__r); 00897 } 00898 if (!_M_match_token(_ScannerT::_S_token_interval_end)) 00899 __throw_regex_error(regex_constants::error_brace); 00900 return true; 00901 } 00902 return false; 00903 } 00904 00905 template<typename _InIter, typename _TraitsT> 00906 bool 00907 _Compiler<_InIter, _TraitsT>:: 00908 _M_atom() 00909 { 00910 typedef _CharMatcher<_InIter, _TraitsT> _CMatcher; 00911 typedef _StartTagger<_InIter, _TraitsT> _Start; 00912 typedef _EndTagger<_InIter, _TraitsT> _End; 00913 00914 if (_M_match_token(_ScannerT::_S_token_anychar)) 00915 { 00916 _M_stack.push(_StateSeq(_M_state_store, 00917 _M_state_store._M_insert_matcher 00918 (_AnyMatcher))); 00919 return true; 00920 } 00921 if (_M_match_token(_ScannerT::_S_token_ord_char)) 00922 { 00923 _M_stack.push(_StateSeq(_M_state_store, 00924 _M_state_store._M_insert_matcher 00925 (_CMatcher(_M_cur_value[0], _M_traits)))); 00926 return true; 00927 } 00928 if (_M_match_token(_ScannerT::_S_token_quoted_char)) 00929 { 00930 // note that in the ECMA grammar, this case covers backrefs. 00931 _M_stack.push(_StateSeq(_M_state_store, 00932 _M_state_store._M_insert_matcher 00933 (_CMatcher(_M_cur_value[0], _M_traits)))); 00934 return true; 00935 } 00936 if (_M_match_token(_ScannerT::_S_token_backref)) 00937 { 00938 // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value); 00939 return true; 00940 } 00941 if (_M_match_token(_ScannerT::_S_token_subexpr_begin)) 00942 { 00943 int __mark = _M_state_store._M_sub_count(); 00944 _StateSeq __r(_M_state_store, 00945 _M_state_store. 00946 _M_insert_subexpr_begin(_Start(__mark))); 00947 this->_M_disjunction(); 00948 if (!_M_match_token(_ScannerT::_S_token_subexpr_end)) 00949 __throw_regex_error(regex_constants::error_paren); 00950 if (!_M_stack.empty()) 00951 { 00952 __r._M_append(_M_stack.top()); 00953 _M_stack.pop(); 00954 } 00955 __r._M_append(_M_state_store._M_insert_subexpr_end 00956 (__mark, _End(__mark))); 00957 _M_stack.push(__r); 00958 return true; 00959 } 00960 return _M_bracket_expression(); 00961 } 00962 00963 template<typename _InIter, typename _TraitsT> 00964 bool 00965 _Compiler<_InIter, _TraitsT>:: 00966 _M_bracket_expression() 00967 { 00968 if (_M_match_token(_ScannerT::_S_token_bracket_begin)) 00969 { 00970 _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin), 00971 _M_traits); 00972 if (!_M_bracket_list(__matcher) 00973 || !_M_match_token(_ScannerT::_S_token_bracket_end)) 00974 __throw_regex_error(regex_constants::error_brack); 00975 _M_stack.push(_StateSeq(_M_state_store, 00976 _M_state_store._M_insert_matcher(__matcher))); 00977 return true; 00978 } 00979 return false; 00980 } 00981 00982 // If the dash is the last character in the bracket expression, it is not 00983 // special. 00984 template<typename _InIter, typename _TraitsT> 00985 bool 00986 _Compiler<_InIter, _TraitsT>:: 00987 _M_bracket_list(_RMatcherT& __matcher) 00988 { 00989 if (_M_follow_list(__matcher)) 00990 { 00991 if (_M_match_token(_ScannerT::_S_token_dash)) 00992 __matcher._M_add_char(_M_cur_value[0]); 00993 return true; 00994 } 00995 return false; 00996 } 00997 00998 template<typename _InIter, typename _TraitsT> 00999 bool 01000 _Compiler<_InIter, _TraitsT>:: 01001 _M_follow_list(_RMatcherT& __matcher) 01002 { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); } 01003 01004 template<typename _InIter, typename _TraitsT> 01005 bool 01006 _Compiler<_InIter, _TraitsT>:: 01007 _M_follow_list2(_RMatcherT& __matcher) 01008 { 01009 if (_M_expression_term(__matcher)) 01010 return _M_follow_list2(__matcher); 01011 return true; 01012 } 01013 01014 template<typename _InIter, typename _TraitsT> 01015 bool 01016 _Compiler<_InIter, _TraitsT>:: 01017 _M_expression_term(_RMatcherT& __matcher) 01018 { 01019 return (_M_collating_symbol(__matcher) 01020 || _M_character_class(__matcher) 01021 || _M_equivalence_class(__matcher) 01022 || (_M_start_range(__matcher) 01023 && _M_range_expression(__matcher))); 01024 } 01025 01026 template<typename _InIter, typename _TraitsT> 01027 bool 01028 _Compiler<_InIter, _TraitsT>:: 01029 _M_range_expression(_RMatcherT& __matcher) 01030 { 01031 if (!_M_collating_symbol(__matcher)) 01032 if (!_M_match_token(_ScannerT::_S_token_dash)) 01033 __throw_regex_error(regex_constants::error_range); 01034 __matcher._M_make_range(); 01035 return true; 01036 } 01037 01038 template<typename _InIter, typename _TraitsT> 01039 bool 01040 _Compiler<_InIter, _TraitsT>:: 01041 _M_start_range(_RMatcherT& __matcher) 01042 { return _M_match_token(_ScannerT::_S_token_dash); } 01043 01044 template<typename _InIter, typename _TraitsT> 01045 bool 01046 _Compiler<_InIter, _TraitsT>:: 01047 _M_collating_symbol(_RMatcherT& __matcher) 01048 { 01049 if (_M_match_token(_ScannerT::_S_token_collelem_single)) 01050 { 01051 __matcher._M_add_char(_M_cur_value[0]); 01052 return true; 01053 } 01054 if (_M_match_token(_ScannerT::_S_token_collsymbol)) 01055 { 01056 __matcher._M_add_collating_element(_M_cur_value); 01057 return true; 01058 } 01059 return false; 01060 } 01061 01062 template<typename _InIter, typename _TraitsT> 01063 bool 01064 _Compiler<_InIter, _TraitsT>:: 01065 _M_equivalence_class(_RMatcherT& __matcher) 01066 { 01067 if (_M_match_token(_ScannerT::_S_token_equiv_class_name)) 01068 { 01069 __matcher._M_add_equivalence_class(_M_cur_value); 01070 return true; 01071 } 01072 return false; 01073 } 01074 01075 template<typename _InIter, typename _TraitsT> 01076 bool 01077 _Compiler<_InIter, _TraitsT>:: 01078 _M_character_class(_RMatcherT& __matcher) 01079 { 01080 if (_M_match_token(_ScannerT::_S_token_char_class_name)) 01081 { 01082 __matcher._M_add_character_class(_M_cur_value); 01083 return true; 01084 } 01085 return false; 01086 } 01087 01088 template<typename _InIter, typename _TraitsT> 01089 int 01090 _Compiler<_InIter, _TraitsT>:: 01091 _M_cur_int_value(int __radix) 01092 { 01093 int __v = 0; 01094 for (typename _StringT::size_type __i = 0; 01095 __i < _M_cur_value.length(); ++__i) 01096 __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix); 01097 return __v; 01098 } 01099 01100 template<typename _InIter, typename _TraitsT> 01101 _AutomatonPtr 01102 __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t, 01103 regex_constants::syntax_option_type __f) 01104 { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t, 01105 __f)._M_nfa())); } 01106 01107 //@} regex-detail 01108 _GLIBCXX_END_NAMESPACE_VERSION 01109 } // namespace __detail 01110 } // namespace std