libstdc++
regex_compiler.h
Go to the documentation of this file.
00001 // class template regex -*- C++ -*-
00002 
00003 // Copyright (C) 2010-2013 Free Software Foundation, Inc.
00004 //
00005 // This file is part of the GNU ISO C++ Library.  This library is free
00006 // software; you can redistribute it and/or modify it under the
00007 // terms of the GNU General Public License as published by the
00008 // Free Software Foundation; either version 3, or (at your option)
00009 // any later version.
00010 
00011 // This library is distributed in the hope that it will be useful,
00012 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00013 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00014 // GNU General Public License for more details.
00015 
00016 // Under Section 7 of GPL version 3, you are granted additional
00017 // permissions described in the GCC Runtime Library Exception, version
00018 // 3.1, as published by the Free Software Foundation.
00019 
00020 // You should have received a copy of the GNU General Public License and
00021 // a copy of the GCC Runtime Library Exception along with this program;
00022 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00023 // <http://www.gnu.org/licenses/>.
00024 
00025 /**
00026  *  @file bits/regex_compiler.h
00027  *  This is an internal header file, included by other library headers.
00028  *  Do not attempt to use it directly. @headername{regex}
00029  */
00030 
00031 namespace std _GLIBCXX_VISIBILITY(default)
00032 {
00033 namespace __detail
00034 {
00035 _GLIBCXX_BEGIN_NAMESPACE_VERSION
00036 
00037   /**
00038    * @addtogroup regex-detail
00039    * @{
00040    */
00041 
00042   /// Base class for scanner.
00043   struct _Scanner_base
00044   {
00045     typedef unsigned int _StateT;
00046 
00047     static constexpr _StateT _S_state_at_start    = 1 << 0;
00048     static constexpr _StateT _S_state_in_brace    = 1 << 2;
00049     static constexpr _StateT _S_state_in_bracket  = 1 << 3;
00050 
00051     virtual ~_Scanner_base() { };
00052   };
00053 
00054   /**
00055    * @brief struct _Scanner. Scans an input range for regex tokens.
00056    *
00057    * The %_Scanner class interprets the regular expression pattern in
00058    * the input range passed to its constructor as a sequence of parse
00059    * tokens passed to the regular expression compiler.  The sequence
00060    * of tokens provided depends on the flag settings passed to the
00061    * constructor: different regular expression grammars will interpret
00062    * the same input pattern in syntactically different ways.
00063    */
00064   template<typename _InputIterator>
00065     class _Scanner: public _Scanner_base
00066     {
00067     public:
00068       typedef _InputIterator                                        _IteratorT;
00069       typedef typename std::iterator_traits<_IteratorT>::value_type _CharT;
00070       typedef std::basic_string<_CharT>                             _StringT;
00071       typedef regex_constants::syntax_option_type                   _FlagT;
00072       typedef const std::ctype<_CharT>                              _CtypeT;
00073 
00074       /// Token types returned from the scanner.
00075       enum _TokenT
00076       {
00077     _S_token_anychar,
00078     _S_token_backref,
00079     _S_token_bracket_begin,
00080     _S_token_bracket_end,
00081     _S_token_inverse_class,
00082     _S_token_char_class_name,
00083     _S_token_closure0,
00084     _S_token_closure1,
00085     _S_token_collelem_multi,
00086     _S_token_collelem_single,
00087     _S_token_collsymbol,
00088     _S_token_comma,
00089     _S_token_dash,
00090     _S_token_dup_count,
00091     _S_token_eof,
00092     _S_token_equiv_class_name,
00093     _S_token_interval_begin,
00094     _S_token_interval_end,
00095     _S_token_line_begin,
00096     _S_token_line_end,
00097     _S_token_opt,
00098     _S_token_or,
00099     _S_token_ord_char,
00100     _S_token_quoted_char,
00101     _S_token_subexpr_begin,
00102     _S_token_subexpr_end,
00103     _S_token_word_begin,
00104     _S_token_word_end,
00105     _S_token_unknown
00106       };
00107 
00108       _Scanner(_IteratorT __begin, _IteratorT __end, _FlagT __flags,
00109            std::locale __loc)
00110       : _M_current(__begin) , _M_end(__end) , _M_flags(__flags),
00111         _M_ctype(std::use_facet<_CtypeT>(__loc)), _M_state(_S_state_at_start)
00112       { _M_advance(); }
00113 
00114       void
00115       _M_advance();
00116 
00117       _TokenT
00118       _M_token() const
00119       { return _M_curToken; }
00120 
00121       const _StringT&
00122       _M_value() const
00123       { return _M_curValue; }
00124 
00125 #ifdef _GLIBCXX_DEBUG
00126       std::ostream&
00127       _M_print(std::ostream&);
00128 #endif
00129 
00130     private:
00131       void
00132       _M_eat_escape();
00133 
00134       void
00135       _M_scan_in_brace();
00136 
00137       void
00138       _M_scan_in_bracket();
00139 
00140       void
00141       _M_eat_charclass();
00142 
00143       void
00144       _M_eat_equivclass();
00145 
00146       void
00147       _M_eat_collsymbol();
00148 
00149       _IteratorT  _M_current;
00150       _IteratorT  _M_end;
00151       _FlagT      _M_flags;
00152       _CtypeT&    _M_ctype;
00153       _TokenT     _M_curToken;
00154       _StringT    _M_curValue;
00155       _StateT     _M_state;
00156     };
00157 
00158   template<typename _InputIterator>
00159     void
00160     _Scanner<_InputIterator>::
00161     _M_advance()
00162     {
00163       if (_M_current == _M_end)
00164     {
00165       _M_curToken = _S_token_eof;
00166       return;
00167     }
00168 
00169       _CharT __c = *_M_current;
00170       if (_M_state & _S_state_in_bracket)
00171     {
00172       _M_scan_in_bracket();
00173       return;
00174     }
00175       if (_M_state & _S_state_in_brace)
00176     {
00177       _M_scan_in_brace();
00178       return;
00179     }
00180 #if 0
00181       // TODO: re-enable line anchors when _M_assertion is implemented.
00182       // See PR libstdc++/47724
00183       else if (_M_state & _S_state_at_start && __c == _M_ctype.widen('^'))
00184     {
00185       _M_curToken = _S_token_line_begin;
00186       ++_M_current;
00187       return;
00188     }
00189       else if (__c == _M_ctype.widen('$'))
00190     {
00191       _M_curToken = _S_token_line_end;
00192       ++_M_current;
00193       return;
00194     }
00195 #endif
00196       else if (__c == _M_ctype.widen('.'))
00197     {
00198       _M_curToken = _S_token_anychar;
00199       ++_M_current;
00200       return;
00201     }
00202       else if (__c == _M_ctype.widen('*'))
00203     {
00204       _M_curToken = _S_token_closure0;
00205       ++_M_current;
00206       return;
00207     }
00208       else if (__c == _M_ctype.widen('+'))
00209     {
00210       _M_curToken = _S_token_closure1;
00211       ++_M_current;
00212       return;
00213     }
00214       else if (__c == _M_ctype.widen('|'))
00215     {
00216       _M_curToken = _S_token_or;
00217       ++_M_current;
00218       return;
00219     }
00220       else if (__c == _M_ctype.widen('['))
00221     {
00222       _M_curToken = _S_token_bracket_begin;
00223       _M_state |= (_S_state_in_bracket | _S_state_at_start);
00224       ++_M_current;
00225       return;
00226     }
00227       else if (__c == _M_ctype.widen('\\'))
00228     {
00229       _M_eat_escape();
00230       return;
00231     }
00232       else if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00233     {
00234       if (__c == _M_ctype.widen('('))
00235         {
00236           _M_curToken = _S_token_subexpr_begin;
00237           ++_M_current;
00238           return;
00239         }
00240       else if (__c == _M_ctype.widen(')'))
00241         {
00242           _M_curToken = _S_token_subexpr_end;
00243           ++_M_current;
00244           return;
00245         }
00246       else if (__c == _M_ctype.widen('{'))
00247         {
00248           _M_curToken = _S_token_interval_begin;
00249           _M_state |= _S_state_in_brace;
00250           ++_M_current;
00251           return;
00252         }
00253     }
00254 
00255       _M_curToken = _S_token_ord_char;
00256       _M_curValue.assign(1, __c);
00257       ++_M_current;
00258     }
00259 
00260 
00261   template<typename _InputIterator>
00262     void
00263     _Scanner<_InputIterator>::
00264     _M_scan_in_brace()
00265     {
00266       if (_M_ctype.is(_CtypeT::digit, *_M_current))
00267     {
00268       _M_curToken = _S_token_dup_count;
00269       _M_curValue.assign(1, *_M_current);
00270       ++_M_current;
00271       while (_M_current != _M_end
00272          && _M_ctype.is(_CtypeT::digit, *_M_current))
00273         {
00274           _M_curValue += *_M_current;
00275           ++_M_current;
00276         }
00277       return;
00278     }
00279       else if (*_M_current == _M_ctype.widen(','))
00280     {
00281       _M_curToken = _S_token_comma;
00282       ++_M_current;
00283       return;
00284     }
00285       if (_M_flags & (regex_constants::basic | regex_constants::grep))
00286     {
00287       if (*_M_current == _M_ctype.widen('\\'))
00288         _M_eat_escape();
00289     }
00290       else 
00291     {
00292       if (*_M_current == _M_ctype.widen('}'))
00293         {
00294           _M_curToken = _S_token_interval_end;
00295           _M_state &= ~_S_state_in_brace;
00296           ++_M_current;
00297           return;
00298         }
00299     }
00300     }
00301 
00302   template<typename _InputIterator>
00303     void
00304     _Scanner<_InputIterator>::
00305     _M_scan_in_bracket()
00306     {
00307       if (_M_state & _S_state_at_start && *_M_current == _M_ctype.widen('^'))
00308     {
00309       _M_curToken = _S_token_inverse_class;
00310       _M_state &= ~_S_state_at_start;
00311       ++_M_current;
00312       return;
00313     }
00314       else if (*_M_current == _M_ctype.widen('['))
00315     {
00316       ++_M_current;
00317       if (_M_current == _M_end)
00318         {
00319           _M_curToken = _S_token_eof;
00320           return;
00321         }
00322 
00323       if (*_M_current == _M_ctype.widen('.'))
00324         {
00325           _M_curToken = _S_token_collsymbol;
00326           _M_eat_collsymbol();
00327           return;
00328         }
00329       else if (*_M_current == _M_ctype.widen(':'))
00330         {
00331           _M_curToken = _S_token_char_class_name;
00332           _M_eat_charclass();
00333           return;
00334         }
00335       else if (*_M_current == _M_ctype.widen('='))
00336         {
00337           _M_curToken = _S_token_equiv_class_name;
00338           _M_eat_equivclass();
00339           return;
00340         }
00341     }
00342       else if (*_M_current == _M_ctype.widen('-'))
00343     {
00344       _M_curToken = _S_token_dash;
00345       ++_M_current;
00346       return;
00347     }
00348       else if (*_M_current == _M_ctype.widen(']'))
00349     {
00350       if (!(_M_flags & regex_constants::ECMAScript)
00351           || !(_M_state & _S_state_at_start))
00352         {
00353           // special case: only if  _not_ chr first after
00354           // '[' or '[^' and if not ECMAscript
00355           _M_curToken = _S_token_bracket_end;
00356           ++_M_current;
00357           return;
00358         }
00359     }
00360       _M_curToken = _S_token_collelem_single;
00361       _M_curValue.assign(1, *_M_current);
00362       ++_M_current;
00363     }
00364 
00365   template<typename _InputIterator>
00366     void
00367     _Scanner<_InputIterator>::
00368     _M_eat_escape()
00369     {
00370       ++_M_current;
00371       if (_M_current == _M_end)
00372     {
00373       _M_curToken = _S_token_eof;
00374       return;
00375     }
00376       _CharT __c = *_M_current;
00377       ++_M_current;
00378 
00379       if (__c == _M_ctype.widen('('))
00380     {
00381       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00382         {
00383           _M_curToken = _S_token_ord_char;
00384           _M_curValue.assign(1, __c);
00385         }
00386       else
00387         _M_curToken = _S_token_subexpr_begin;
00388     }
00389       else if (__c == _M_ctype.widen(')'))
00390     {
00391       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00392         {
00393           _M_curToken = _S_token_ord_char;
00394           _M_curValue.assign(1, __c);
00395         }
00396       else
00397         _M_curToken = _S_token_subexpr_end;
00398     }
00399       else if (__c == _M_ctype.widen('{'))
00400     {
00401       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00402         {
00403           _M_curToken = _S_token_ord_char;
00404           _M_curValue.assign(1, __c);
00405         }
00406       else
00407         {
00408           _M_curToken = _S_token_interval_begin;
00409           _M_state |= _S_state_in_brace;
00410         }
00411     }
00412       else if (__c == _M_ctype.widen('}'))
00413     {
00414       if (!(_M_flags & (regex_constants::basic | regex_constants::grep)))
00415         {
00416           _M_curToken = _S_token_ord_char;
00417           _M_curValue.assign(1, __c);
00418         }
00419       else
00420         {
00421           if (!(_M_state && _S_state_in_brace))
00422         __throw_regex_error(regex_constants::error_badbrace);
00423           _M_state &= ~_S_state_in_brace;
00424           _M_curToken = _S_token_interval_end;
00425         }
00426     }
00427       else if (__c == _M_ctype.widen('x'))
00428     {
00429       ++_M_current;
00430       if (_M_current == _M_end)
00431         {
00432           _M_curToken = _S_token_eof;
00433           return;
00434         }
00435       if (_M_ctype.is(_CtypeT::digit, *_M_current))
00436         {
00437           _M_curValue.assign(1, *_M_current);
00438           ++_M_current;
00439           if (_M_current == _M_end)
00440         {
00441           _M_curToken = _S_token_eof;
00442           return;
00443         }
00444           if (_M_ctype.is(_CtypeT::digit, *_M_current))
00445         {
00446           _M_curValue += *_M_current;
00447           ++_M_current;
00448           return;
00449         }
00450         }
00451     }
00452       else if (__c == _M_ctype.widen('^')
00453            || __c == _M_ctype.widen('.')
00454            || __c == _M_ctype.widen('*')
00455            || __c == _M_ctype.widen('$')
00456            || __c == _M_ctype.widen('\\'))
00457     {
00458       _M_curToken = _S_token_ord_char;
00459       _M_curValue.assign(1, __c);
00460     }
00461       else if (_M_ctype.is(_CtypeT::digit, __c))
00462     {
00463       _M_curToken = _S_token_backref;
00464       _M_curValue.assign(1, __c);
00465     }
00466       else
00467     __throw_regex_error(regex_constants::error_escape);
00468     }
00469 
00470 
00471   // Eats a character class or throwns an exception.
00472   // current point to ':' delimiter on entry, char after ']' on return
00473   template<typename _InputIterator>
00474     void
00475     _Scanner<_InputIterator>::
00476     _M_eat_charclass()
00477     {
00478       ++_M_current; // skip ':'
00479       if (_M_current == _M_end)
00480     __throw_regex_error(regex_constants::error_ctype);
00481       for (_M_curValue.clear();
00482        _M_current != _M_end && *_M_current != _M_ctype.widen(':');
00483        ++_M_current)
00484     _M_curValue += *_M_current;
00485       if (_M_current == _M_end)
00486     __throw_regex_error(regex_constants::error_ctype);
00487       ++_M_current; // skip ':'
00488       if (*_M_current != _M_ctype.widen(']'))
00489     __throw_regex_error(regex_constants::error_ctype);
00490       ++_M_current; // skip ']'
00491     }
00492 
00493 
00494   template<typename _InputIterator>
00495     void
00496     _Scanner<_InputIterator>::
00497     _M_eat_equivclass()
00498     {
00499       ++_M_current; // skip '='
00500       if (_M_current == _M_end)
00501     __throw_regex_error(regex_constants::error_collate);
00502       for (_M_curValue.clear();
00503        _M_current != _M_end && *_M_current != _M_ctype.widen('=');
00504        ++_M_current)
00505     _M_curValue += *_M_current;
00506       if (_M_current == _M_end)
00507     __throw_regex_error(regex_constants::error_collate);
00508       ++_M_current; // skip '='
00509       if (*_M_current != _M_ctype.widen(']'))
00510     __throw_regex_error(regex_constants::error_collate);
00511       ++_M_current; // skip ']'
00512     }
00513 
00514 
00515   template<typename _InputIterator>
00516     void
00517     _Scanner<_InputIterator>::
00518     _M_eat_collsymbol()
00519     {
00520       ++_M_current; // skip '.'
00521       if (_M_current == _M_end)
00522     __throw_regex_error(regex_constants::error_collate);
00523       for (_M_curValue.clear();
00524        _M_current != _M_end && *_M_current != _M_ctype.widen('.');
00525        ++_M_current)
00526     _M_curValue += *_M_current;
00527       if (_M_current == _M_end)
00528     __throw_regex_error(regex_constants::error_collate);
00529       ++_M_current; // skip '.'
00530       if (*_M_current != _M_ctype.widen(']'))
00531     __throw_regex_error(regex_constants::error_collate);
00532       ++_M_current; // skip ']'
00533     }
00534 
00535 #ifdef _GLIBCXX_DEBUG
00536   template<typename _InputIterator>
00537     std::ostream&
00538     _Scanner<_InputIterator>::
00539     _M_print(std::ostream& ostr)
00540     {
00541       switch (_M_curToken)
00542       {
00543     case _S_token_anychar:
00544       ostr << "any-character\n";
00545       break;
00546     case _S_token_backref:
00547       ostr << "backref\n";
00548       break;
00549     case _S_token_bracket_begin:
00550       ostr << "bracket-begin\n";
00551       break;
00552     case _S_token_bracket_end:
00553       ostr << "bracket-end\n";
00554       break;
00555     case _S_token_char_class_name:
00556       ostr << "char-class-name \"" << _M_curValue << "\"\n";
00557       break;
00558     case _S_token_closure0:
00559       ostr << "closure0\n";
00560       break;
00561     case _S_token_closure1:
00562       ostr << "closure1\n";
00563       break;
00564     case _S_token_collelem_multi:
00565       ostr << "coll-elem-multi \"" << _M_curValue << "\"\n";
00566       break;
00567     case _S_token_collelem_single:
00568       ostr << "coll-elem-single \"" << _M_curValue << "\"\n";
00569       break;
00570     case _S_token_collsymbol:
00571       ostr << "collsymbol \"" << _M_curValue << "\"\n";
00572       break;
00573     case _S_token_comma:
00574       ostr << "comma\n";
00575       break;
00576     case _S_token_dash:
00577       ostr << "dash\n";
00578       break;
00579     case _S_token_dup_count:
00580       ostr << "dup count: " << _M_curValue << "\n";
00581       break;
00582     case _S_token_eof:
00583       ostr << "EOF\n";
00584       break;
00585     case _S_token_equiv_class_name:
00586       ostr << "equiv-class-name \"" << _M_curValue << "\"\n";
00587       break;
00588     case _S_token_interval_begin:
00589       ostr << "interval begin\n";
00590       break;
00591     case _S_token_interval_end:
00592       ostr << "interval end\n";
00593       break;
00594     case _S_token_line_begin:
00595       ostr << "line begin\n";
00596       break;
00597     case _S_token_line_end:
00598       ostr << "line end\n";
00599       break;
00600     case _S_token_opt:
00601       ostr << "opt\n";
00602       break;
00603     case _S_token_or:
00604       ostr << "or\n";
00605       break;
00606     case _S_token_ord_char:
00607       ostr << "ordinary character: \"" << _M_value() << "\"\n";
00608       break;
00609     case _S_token_quoted_char:
00610       ostr << "quoted char\n";
00611       break;
00612     case _S_token_subexpr_begin:
00613       ostr << "subexpr begin\n";
00614       break;
00615     case _S_token_subexpr_end:
00616       ostr << "subexpr end\n";
00617       break;
00618     case _S_token_word_begin:
00619       ostr << "word begin\n";
00620       break;
00621     case _S_token_word_end:
00622       ostr << "word end\n";
00623       break;
00624     case _S_token_unknown:
00625       ostr << "-- unknown token --\n";
00626       break;
00627       }
00628       return ostr;
00629     }
00630 #endif
00631 
00632   /// Builds an NFA from an input iterator interval.
00633   template<typename _InIter, typename _TraitsT>
00634     class _Compiler
00635     {
00636     public:
00637       typedef _InIter                                            _IterT;
00638       typedef typename std::iterator_traits<_InIter>::value_type _CharT;
00639       typedef std::basic_string<_CharT>                          _StringT;
00640       typedef regex_constants::syntax_option_type                _FlagT;
00641 
00642       _Compiler(const _InIter& __b, const _InIter& __e,
00643         _TraitsT& __traits, _FlagT __flags);
00644 
00645       const _Nfa&
00646       _M_nfa() const
00647       { return _M_state_store; }
00648 
00649     private:
00650       typedef _Scanner<_InIter>                              _ScannerT;
00651       typedef typename _ScannerT::_TokenT                    _TokenT;
00652       typedef std::stack<_StateSeq, std::vector<_StateSeq> > _StackT;
00653       typedef _RangeMatcher<_InIter, _TraitsT>               _RMatcherT;
00654 
00655       // accepts a specific token or returns false.
00656       bool
00657       _M_match_token(_TokenT __token);
00658 
00659       void
00660       _M_disjunction();
00661 
00662       bool
00663       _M_alternative();
00664 
00665       bool
00666       _M_term();
00667 
00668       bool
00669       _M_assertion();
00670 
00671       bool
00672       _M_quantifier();
00673 
00674       bool
00675       _M_atom();
00676 
00677       bool
00678       _M_bracket_expression();
00679 
00680       bool
00681       _M_bracket_list(_RMatcherT& __matcher);
00682 
00683       bool
00684       _M_follow_list(_RMatcherT& __matcher);
00685 
00686       bool
00687       _M_follow_list2(_RMatcherT& __matcher);
00688 
00689       bool
00690       _M_expression_term(_RMatcherT& __matcher);
00691 
00692       bool
00693       _M_range_expression(_RMatcherT& __matcher);
00694 
00695       bool
00696       _M_start_range(_RMatcherT& __matcher);
00697 
00698       bool
00699       _M_collating_symbol(_RMatcherT& __matcher);
00700 
00701       bool
00702       _M_equivalence_class(_RMatcherT& __matcher);
00703 
00704       bool
00705       _M_character_class(_RMatcherT& __matcher);
00706 
00707       int
00708       _M_cur_int_value(int __radix);
00709 
00710       _TraitsT&      _M_traits;
00711       _ScannerT      _M_scanner;
00712       _StringT       _M_cur_value;
00713       _Nfa           _M_state_store;
00714       _StackT        _M_stack;
00715     };
00716 
00717   template<typename _InIter, typename _TraitsT>
00718     _Compiler<_InIter, _TraitsT>::
00719     _Compiler(const _InIter& __b, const _InIter& __e, _TraitsT& __traits,
00720           _Compiler<_InIter, _TraitsT>::_FlagT __flags)
00721     : _M_traits(__traits), _M_scanner(__b, __e, __flags, _M_traits.getloc()),
00722       _M_state_store(__flags)
00723     {
00724       typedef _StartTagger<_InIter, _TraitsT> _Start;
00725       typedef _EndTagger<_InIter, _TraitsT> _End;
00726 
00727       _StateSeq __r(_M_state_store,
00728                 _M_state_store._M_insert_subexpr_begin(_Start(0)));
00729       _M_disjunction();
00730       if (!_M_stack.empty())
00731     {
00732       __r._M_append(_M_stack.top());
00733       _M_stack.pop();
00734     }
00735       __r._M_append(_M_state_store._M_insert_subexpr_end(0, _End(0)));
00736       __r._M_append(_M_state_store._M_insert_accept());
00737     }
00738 
00739   template<typename _InIter, typename _TraitsT>
00740     bool
00741     _Compiler<_InIter, _TraitsT>::
00742     _M_match_token(_Compiler<_InIter, _TraitsT>::_TokenT token)
00743     { 
00744       if (token == _M_scanner._M_token())
00745     {
00746       _M_cur_value = _M_scanner._M_value();
00747       _M_scanner._M_advance();
00748       return true;
00749     }
00750       return false;
00751     }
00752 
00753   template<typename _InIter, typename _TraitsT>
00754     void
00755     _Compiler<_InIter, _TraitsT>::
00756     _M_disjunction()
00757     {
00758       this->_M_alternative();
00759       if (_M_match_token(_ScannerT::_S_token_or))
00760     {
00761       _StateSeq __alt1 = _M_stack.top(); _M_stack.pop();
00762       this->_M_disjunction();
00763       _StateSeq __alt2 = _M_stack.top(); _M_stack.pop();
00764       _M_stack.push(_StateSeq(__alt1, __alt2));
00765     }
00766     }
00767 
00768   template<typename _InIter, typename _TraitsT>
00769     bool
00770     _Compiler<_InIter, _TraitsT>::
00771     _M_alternative()
00772     {
00773       if (this->_M_term())
00774     {
00775       _StateSeq __re = _M_stack.top(); _M_stack.pop();
00776       this->_M_alternative();
00777       if (!_M_stack.empty())
00778         {
00779           __re._M_append(_M_stack.top());
00780           _M_stack.pop();
00781         }
00782       _M_stack.push(__re);
00783       return true;
00784     }
00785       return false;
00786     }
00787 
00788   template<typename _InIter, typename _TraitsT>
00789     bool
00790     _Compiler<_InIter, _TraitsT>::
00791     _M_term()
00792     {
00793       if (this->_M_assertion())
00794     return true;
00795       if (this->_M_atom())
00796     {
00797       this->_M_quantifier();
00798       return true;
00799     }
00800       return false;
00801     }
00802 
00803   template<typename _InIter, typename _TraitsT>
00804     bool
00805     _Compiler<_InIter, _TraitsT>::
00806     _M_assertion()
00807     {
00808       if (_M_match_token(_ScannerT::_S_token_line_begin))
00809     {
00810       // __m.push(_Matcher::_S_opcode_line_begin);
00811       return true;
00812     }
00813       if (_M_match_token(_ScannerT::_S_token_line_end))
00814     {
00815       // __m.push(_Matcher::_S_opcode_line_end);
00816       return true;
00817     }
00818       if (_M_match_token(_ScannerT::_S_token_word_begin))
00819     {
00820       // __m.push(_Matcher::_S_opcode_word_begin);
00821       return true;
00822     }
00823       if (_M_match_token(_ScannerT::_S_token_word_end))
00824     {
00825       // __m.push(_Matcher::_S_opcode_word_end);
00826       return true;
00827     }
00828       return false;
00829     }
00830 
00831   template<typename _InIter, typename _TraitsT>
00832     bool
00833     _Compiler<_InIter, _TraitsT>::
00834     _M_quantifier()
00835     {
00836       if (_M_match_token(_ScannerT::_S_token_closure0))
00837     {
00838       if (_M_stack.empty())
00839         __throw_regex_error(regex_constants::error_badrepeat);
00840       _StateSeq __r(_M_stack.top(), -1);
00841       __r._M_append(__r._M_front());
00842       _M_stack.pop();
00843       _M_stack.push(__r);
00844       return true;
00845     }
00846       if (_M_match_token(_ScannerT::_S_token_closure1))
00847     {
00848       if (_M_stack.empty())
00849         __throw_regex_error(regex_constants::error_badrepeat);
00850       _StateSeq __r(_M_state_store,
00851             _M_state_store.
00852             _M_insert_alt(_S_invalid_state_id,
00853                       _M_stack.top()._M_front()));
00854       _M_stack.top()._M_append(__r);
00855       return true;
00856     }
00857       if (_M_match_token(_ScannerT::_S_token_opt))
00858     {
00859       if (_M_stack.empty())
00860       __throw_regex_error(regex_constants::error_badrepeat);
00861       _StateSeq __r(_M_stack.top(), -1);
00862       _M_stack.pop();
00863       _M_stack.push(__r);
00864       return true;
00865     }
00866       if (_M_match_token(_ScannerT::_S_token_interval_begin))
00867     {
00868       if (_M_stack.empty())
00869         __throw_regex_error(regex_constants::error_badrepeat);
00870       if (!_M_match_token(_ScannerT::_S_token_dup_count))
00871         __throw_regex_error(regex_constants::error_badbrace);
00872       _StateSeq __r(_M_stack.top());
00873       int __min_rep = _M_cur_int_value(10);
00874       for (int __i = 1; __i < __min_rep; ++__i)
00875         _M_stack.top()._M_append(__r._M_clone()); 
00876       if (_M_match_token(_ScannerT::_S_token_comma))
00877         if (_M_match_token(_ScannerT::_S_token_dup_count))
00878           {
00879         int __n = _M_cur_int_value(10) - __min_rep;
00880         if (__n < 0)
00881           __throw_regex_error(regex_constants::error_badbrace);
00882         for (int __i = 0; __i < __n; ++__i)
00883           {
00884             _StateSeq __r(_M_state_store,
00885                   _M_state_store.
00886                   _M_insert_alt(_S_invalid_state_id,
00887                         _M_stack.top()._M_front()));
00888             _M_stack.top()._M_append(__r);
00889           }
00890           }
00891         else
00892           {
00893         _StateSeq __r(_M_stack.top(), -1);
00894         __r._M_push_back(__r._M_front());
00895         _M_stack.pop();
00896         _M_stack.push(__r);
00897           }
00898       if (!_M_match_token(_ScannerT::_S_token_interval_end))
00899         __throw_regex_error(regex_constants::error_brace);
00900       return true;
00901     }
00902       return false;
00903     }
00904 
00905   template<typename _InIter, typename _TraitsT>
00906     bool
00907     _Compiler<_InIter, _TraitsT>::
00908     _M_atom()
00909     {
00910       typedef _CharMatcher<_InIter, _TraitsT> _CMatcher;
00911       typedef _StartTagger<_InIter, _TraitsT> _Start;
00912       typedef _EndTagger<_InIter, _TraitsT> _End;
00913 
00914       if (_M_match_token(_ScannerT::_S_token_anychar))
00915     {
00916       _M_stack.push(_StateSeq(_M_state_store,
00917                                   _M_state_store._M_insert_matcher
00918                                   (_AnyMatcher)));
00919       return true;
00920     }
00921       if (_M_match_token(_ScannerT::_S_token_ord_char))
00922     {
00923       _M_stack.push(_StateSeq(_M_state_store,
00924                                   _M_state_store._M_insert_matcher
00925                                   (_CMatcher(_M_cur_value[0], _M_traits))));
00926       return true;
00927     }
00928       if (_M_match_token(_ScannerT::_S_token_quoted_char))
00929     {
00930       // note that in the ECMA grammar, this case covers backrefs.
00931       _M_stack.push(_StateSeq(_M_state_store,
00932                   _M_state_store._M_insert_matcher
00933                   (_CMatcher(_M_cur_value[0], _M_traits))));
00934       return true;
00935     }
00936       if (_M_match_token(_ScannerT::_S_token_backref))
00937     {
00938       // __m.push(_Matcher::_S_opcode_ordchar, _M_cur_value);
00939       return true;
00940     }
00941       if (_M_match_token(_ScannerT::_S_token_subexpr_begin))
00942     {
00943       int __mark = _M_state_store._M_sub_count();
00944       _StateSeq __r(_M_state_store,
00945             _M_state_store.
00946             _M_insert_subexpr_begin(_Start(__mark)));
00947       this->_M_disjunction();
00948       if (!_M_match_token(_ScannerT::_S_token_subexpr_end))
00949         __throw_regex_error(regex_constants::error_paren);
00950       if (!_M_stack.empty())
00951         {
00952           __r._M_append(_M_stack.top());
00953           _M_stack.pop();
00954         }
00955       __r._M_append(_M_state_store._M_insert_subexpr_end
00956             (__mark, _End(__mark)));
00957       _M_stack.push(__r);
00958       return true;
00959     }
00960       return _M_bracket_expression();
00961     }
00962 
00963   template<typename _InIter, typename _TraitsT>
00964     bool
00965     _Compiler<_InIter, _TraitsT>::
00966     _M_bracket_expression()
00967     {
00968       if (_M_match_token(_ScannerT::_S_token_bracket_begin))
00969     {
00970       _RMatcherT __matcher(_M_match_token(_ScannerT::_S_token_line_begin),
00971                    _M_traits);
00972       if (!_M_bracket_list(__matcher)
00973           || !_M_match_token(_ScannerT::_S_token_bracket_end))
00974         __throw_regex_error(regex_constants::error_brack);
00975       _M_stack.push(_StateSeq(_M_state_store,
00976                   _M_state_store._M_insert_matcher(__matcher)));
00977       return true;
00978     }
00979       return false;
00980     }
00981 
00982   // If the dash is the last character in the bracket expression, it is not
00983   // special.
00984   template<typename _InIter, typename _TraitsT>
00985     bool
00986     _Compiler<_InIter, _TraitsT>::
00987     _M_bracket_list(_RMatcherT& __matcher)
00988     {
00989       if (_M_follow_list(__matcher))
00990     {
00991       if (_M_match_token(_ScannerT::_S_token_dash))
00992         __matcher._M_add_char(_M_cur_value[0]);
00993       return true;
00994     }
00995       return false;
00996     }
00997 
00998   template<typename _InIter, typename _TraitsT>
00999     bool
01000     _Compiler<_InIter, _TraitsT>::
01001     _M_follow_list(_RMatcherT& __matcher)
01002     { return _M_expression_term(__matcher) && _M_follow_list2(__matcher); }
01003 
01004   template<typename _InIter, typename _TraitsT>
01005     bool
01006     _Compiler<_InIter, _TraitsT>::
01007     _M_follow_list2(_RMatcherT& __matcher)
01008     {
01009       if (_M_expression_term(__matcher))
01010     return _M_follow_list2(__matcher);
01011       return true;
01012     }
01013 
01014   template<typename _InIter, typename _TraitsT>
01015     bool
01016     _Compiler<_InIter, _TraitsT>::
01017     _M_expression_term(_RMatcherT& __matcher)
01018     {
01019       return (_M_collating_symbol(__matcher)
01020           || _M_character_class(__matcher)
01021           || _M_equivalence_class(__matcher)
01022           || (_M_start_range(__matcher)
01023           && _M_range_expression(__matcher)));
01024     }
01025 
01026   template<typename _InIter, typename _TraitsT>
01027     bool
01028     _Compiler<_InIter, _TraitsT>::
01029     _M_range_expression(_RMatcherT& __matcher)
01030     {
01031       if (!_M_collating_symbol(__matcher))
01032     if (!_M_match_token(_ScannerT::_S_token_dash))
01033       __throw_regex_error(regex_constants::error_range);
01034       __matcher._M_make_range();
01035       return true;
01036     }
01037 
01038   template<typename _InIter, typename _TraitsT>
01039     bool
01040     _Compiler<_InIter, _TraitsT>::
01041     _M_start_range(_RMatcherT& __matcher)
01042     { return _M_match_token(_ScannerT::_S_token_dash); }
01043 
01044   template<typename _InIter, typename _TraitsT>
01045     bool
01046     _Compiler<_InIter, _TraitsT>::
01047     _M_collating_symbol(_RMatcherT& __matcher)
01048     {
01049       if (_M_match_token(_ScannerT::_S_token_collelem_single))
01050     {
01051       __matcher._M_add_char(_M_cur_value[0]);
01052       return true;
01053     }
01054       if (_M_match_token(_ScannerT::_S_token_collsymbol))
01055     {
01056       __matcher._M_add_collating_element(_M_cur_value);
01057       return true;
01058     }
01059       return false;
01060     }
01061 
01062   template<typename _InIter, typename _TraitsT>
01063     bool
01064     _Compiler<_InIter, _TraitsT>::
01065     _M_equivalence_class(_RMatcherT& __matcher)
01066     {
01067       if (_M_match_token(_ScannerT::_S_token_equiv_class_name))
01068     {
01069       __matcher._M_add_equivalence_class(_M_cur_value);
01070       return true;
01071     }
01072       return false;
01073     }
01074 
01075   template<typename _InIter, typename _TraitsT>
01076     bool
01077     _Compiler<_InIter, _TraitsT>::
01078     _M_character_class(_RMatcherT& __matcher)
01079     {
01080       if (_M_match_token(_ScannerT::_S_token_char_class_name))
01081     {
01082       __matcher._M_add_character_class(_M_cur_value);
01083       return true;
01084     }
01085       return false;
01086     }
01087 
01088   template<typename _InIter, typename _TraitsT>
01089     int
01090     _Compiler<_InIter, _TraitsT>::
01091     _M_cur_int_value(int __radix)
01092     {
01093       int __v = 0;
01094       for (typename _StringT::size_type __i = 0;
01095        __i < _M_cur_value.length(); ++__i)
01096     __v =__v * __radix + _M_traits.value(_M_cur_value[__i], __radix);
01097       return __v;
01098     }
01099 
01100   template<typename _InIter, typename _TraitsT>
01101     _AutomatonPtr
01102     __compile(const _InIter& __b, const _InIter& __e, _TraitsT& __t,
01103           regex_constants::syntax_option_type __f)
01104     { return _AutomatonPtr(new _Nfa(_Compiler<_InIter, _TraitsT>(__b, __e, __t,
01105                                         __f)._M_nfa())); }
01106 
01107  //@} regex-detail
01108 _GLIBCXX_END_NAMESPACE_VERSION
01109 } // namespace __detail
01110 } // namespace std