Viewing file: regex_scanner.h (6.92 KB) -rw-r--r-- Select action/file-type: (+) | (+) | (+) | Code (+) | Session (+) | (+) | SDB (+) | (+) | (+) | (+) | (+) | (+) |
// class template regex -*- C++ -*-
// Copyright (C) 2013-2018 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the // terms of the GNU General Public License as published by the // Free Software Foundation; either version 3, or (at your option) // any later version.
// This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details.
// Under Section 7 of GPL version 3, you are granted additional // permissions described in the GCC Runtime Library Exception, version // 3.1, as published by the Free Software Foundation.
// You should have received a copy of the GNU General Public License and // a copy of the GCC Runtime Library Exception along with this program; // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // <http://www.gnu.org/licenses/>.
/** * @file bits/regex_scanner.h * This is an internal header file, included by other library headers. * Do not attempt to use it directly. @headername{regex} */
namespace std _GLIBCXX_VISIBILITY(default) { _GLIBCXX_BEGIN_NAMESPACE_VERSION
namespace __detail { /** * @addtogroup regex-detail * @{ */
struct _ScannerBase { public: /// Token types returned from the scanner. enum _TokenT : unsigned { _S_token_anychar, _S_token_ord_char, _S_token_oct_num, _S_token_hex_num, _S_token_backref, _S_token_subexpr_begin, _S_token_subexpr_no_group_begin, _S_token_subexpr_lookahead_begin, // neg if _M_value[0] == 'n' _S_token_subexpr_end, _S_token_bracket_begin, _S_token_bracket_neg_begin, _S_token_bracket_end, _S_token_interval_begin, _S_token_interval_end, _S_token_quoted_class, _S_token_char_class_name, _S_token_collsymbol, _S_token_equiv_class_name, _S_token_opt, _S_token_or, _S_token_closure0, _S_token_closure1, _S_token_line_begin, _S_token_line_end, _S_token_word_bound, // neg if _M_value[0] == 'n' _S_token_comma, _S_token_dup_count, _S_token_eof, _S_token_bracket_dash, _S_token_unknown = -1u };
protected: typedef regex_constants::syntax_option_type _FlagT;
enum _StateT { _S_state_normal, _S_state_in_brace, _S_state_in_bracket, };
protected: _ScannerBase(_FlagT __flags) : _M_state(_S_state_normal), _M_flags(__flags), _M_escape_tbl(_M_is_ecma() ? _M_ecma_escape_tbl : _M_awk_escape_tbl), _M_spec_char(_M_is_ecma() ? _M_ecma_spec_char : _M_flags & regex_constants::basic ? _M_basic_spec_char : _M_flags & regex_constants::extended ? _M_extended_spec_char : _M_flags & regex_constants::grep ? ".[\\*^$\n" : _M_flags & regex_constants::egrep ? ".[\\()*+?{|^$\n" : _M_flags & regex_constants::awk ? _M_extended_spec_char : nullptr), _M_at_bracket_start(false) { __glibcxx_assert(_M_spec_char); }
protected: const char* _M_find_escape(char __c) { auto __it = _M_escape_tbl; for (; __it->first != '\0'; ++__it) if (__it->first == __c) return &__it->second; return nullptr; }
bool _M_is_ecma() const { return _M_flags & regex_constants::ECMAScript; }
bool _M_is_basic() const { return _M_flags & (regex_constants::basic | regex_constants::grep); }
bool _M_is_extended() const { return _M_flags & (regex_constants::extended | regex_constants::egrep | regex_constants::awk); }
bool _M_is_grep() const { return _M_flags & (regex_constants::grep | regex_constants::egrep); }
bool _M_is_awk() const { return _M_flags & regex_constants::awk; }
protected: // TODO: Make them static in the next abi change. const std::pair<char, _TokenT> _M_token_tbl[9] = { {'^', _S_token_line_begin}, {'$', _S_token_line_end}, {'.', _S_token_anychar}, {'*', _S_token_closure0}, {'+', _S_token_closure1}, {'?', _S_token_opt}, {'|', _S_token_or}, {'\n', _S_token_or}, // grep and egrep {'\0', _S_token_or}, }; const std::pair<char, char> _M_ecma_escape_tbl[8] = { {'0', '\0'}, {'b', '\b'}, {'f', '\f'}, {'n', '\n'}, {'r', '\r'}, {'t', '\t'}, {'v', '\v'}, {'\0', '\0'}, }; const std::pair<char, char> _M_awk_escape_tbl[11] = { {'"', '"'}, {'/', '/'}, {'\\', '\\'}, {'a', '\a'}, {'b', '\b'}, {'f', '\f'}, {'n', '\n'}, {'r', '\r'}, {'t', '\t'}, {'v', '\v'}, {'\0', '\0'}, }; const char* _M_ecma_spec_char = "^$\\.*+?()[]{}|"; const char* _M_basic_spec_char = ".[\\*^$"; const char* _M_extended_spec_char = ".[\\()*+?{|^$";
_StateT _M_state; _FlagT _M_flags; _TokenT _M_token; const std::pair<char, char>* _M_escape_tbl; const char* _M_spec_char; bool _M_at_bracket_start; };
/** * @brief Scans an input range for regex tokens. * * The %_Scanner class interprets the regular expression pattern in * the input range passed to its constructor as a sequence of parse * tokens passed to the regular expression compiler. The sequence * of tokens provided depends on the flag settings passed to the * constructor: different regular expression grammars will interpret * the same input pattern in syntactically different ways. */ template<typename _CharT> class _Scanner : public _ScannerBase { public: typedef const _CharT* _IterT; typedef std::basic_string<_CharT> _StringT; typedef regex_constants::syntax_option_type _FlagT; typedef const std::ctype<_CharT> _CtypeT;
_Scanner(_IterT __begin, _IterT __end, _FlagT __flags, std::locale __loc);
void _M_advance();
_TokenT _M_get_token() const { return _M_token; }
const _StringT& _M_get_value() const { return _M_value; }
#ifdef _GLIBCXX_DEBUG std::ostream& _M_print(std::ostream&); #endif
private: void _M_scan_normal();
void _M_scan_in_bracket();
void _M_scan_in_brace();
void _M_eat_escape_ecma();
void _M_eat_escape_posix();
void _M_eat_escape_awk();
void _M_eat_class(char);
_IterT _M_current; _IterT _M_end; _CtypeT& _M_ctype; _StringT _M_value; void (_Scanner::* _M_eat_escape)(); };
//@} regex-detail } // namespace __detail _GLIBCXX_END_NAMESPACE_VERSION } // namespace std
#include <bits/regex_scanner.tcc>
|