// class template regex -*- C++ -*- // Copyright (C) 2013-2015 Free Software Foundation, Inc. // // This file is part of the GNU ISO C++ Library. This library is free // software; you can redistribute it and/or modify it under the // terms of the GNU General Public License as published by the // Free Software Foundation; either version 3, or (at your option) // any later version. // This library is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // Under Section 7 of GPL version 3, you are granted additional // permissions described in the GCC Runtime Library Exception, version // 3.1, as published by the Free Software Foundation. // You should have received a copy of the GNU General Public License and // a copy of the GCC Runtime Library Exception along with this program; // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see // . /** * @file bits/regex.tcc * This is an internal header file, included by other library headers. * Do not attempt to use it directly. @headername{regex} */ // A non-standard switch to let the user pick the matching algorithm. // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA // algorithm will be used. This algorithm is not enabled by default, // and cannot be used if the regex contains back-references, but has better // (polynomial instead of exponential) worst case performance. // See __regex_algo_impl below. namespace std _GLIBCXX_VISIBILITY(default) { namespace __detail { _GLIBCXX_BEGIN_NAMESPACE_VERSION // Result of merging regex_match and regex_search. // // __policy now can be _S_auto (auto dispatch) and _S_alternate (use // the other one if possible, for test purpose). // // That __match_mode is true means regex_match, else regex_search. template bool __regex_algo_impl(_BiIter __s, _BiIter __e, match_results<_BiIter, _Alloc>& __m, const basic_regex<_CharT, _TraitsT>& __re, regex_constants::match_flag_type __flags) { if (__re._M_automaton == nullptr) return false; typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m; __m._M_begin = __s; __m._M_resize(__re._M_automaton->_M_sub_count()); for (auto& __it : __res) __it.matched = false; // __policy is used by testsuites so that they can use Thompson NFA // without defining a macro. Users should define // _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach. bool __ret; if (!__re._M_automaton->_M_has_backref && !(__re._M_flags & regex_constants::ECMAScript) #ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA && __policy == _RegexExecutorPolicy::_S_alternate #endif ) { _Executor<_BiIter, _Alloc, _TraitsT, false> __executor(__s, __e, __m, __re, __flags); if (__match_mode) __ret = __executor._M_match(); else __ret = __executor._M_search(); } else { _Executor<_BiIter, _Alloc, _TraitsT, true> __executor(__s, __e, __m, __re, __flags); if (__match_mode) __ret = __executor._M_match(); else __ret = __executor._M_search(); } if (__ret) { for (auto& __it : __res) if (!__it.matched) __it.first = __it.second = __e; auto& __pre = __m._M_prefix(); auto& __suf = __m._M_suffix(); if (__match_mode) { __pre.matched = false; __pre.first = __s; __pre.second = __s; __suf.matched = false; __suf.first = __e; __suf.second = __e; } else { __pre.first = __s; __pre.second = __res[0].first; __pre.matched = (__pre.first != __pre.second); __suf.first = __res[0].second; __suf.second = __e; __suf.matched = (__suf.first != __suf.second); } } else { __m._M_resize(0); for (auto& __it : __res) { __it.matched = false; __it.first = __it.second = __e; } } return __ret; } _GLIBCXX_END_NAMESPACE_VERSION } _GLIBCXX_BEGIN_NAMESPACE_VERSION template template typename regex_traits<_Ch_type>::string_type regex_traits<_Ch_type>:: lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const { typedef std::ctype __ctype_type; const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); static const char* __collatenames[] = { "NUL", "SOH", "STX", "ETX", "EOT", "ENQ", "ACK", "alert", "backspace", "tab", "newline", "vertical-tab", "form-feed", "carriage-return", "SO", "SI", "DLE", "DC1", "DC2", "DC3", "DC4", "NAK", "SYN", "ETB", "CAN", "EM", "SUB", "ESC", "IS4", "IS3", "IS2", "IS1", "space", "exclamation-mark", "quotation-mark", "number-sign", "dollar-sign", "percent-sign", "ampersand", "apostrophe", "left-parenthesis", "right-parenthesis", "asterisk", "plus-sign", "comma", "hyphen", "period", "slash", "zero", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine", "colon", "semicolon", "less-than-sign", "equals-sign", "greater-than-sign", "question-mark", "commercial-at", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z", "left-square-bracket", "backslash", "right-square-bracket", "circumflex", "underscore", "grave-accent", "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", "s", "t", "u", "v", "w", "x", "y", "z", "left-curly-bracket", "vertical-line", "right-curly-bracket", "tilde", "DEL", }; string __s; for (; __first != __last; ++__first) __s += __fctyp.narrow(*__first, 0); for (const auto& __it : __collatenames) if (__s == __it) return string_type(1, __fctyp.widen( static_cast(&__it - __collatenames))); // TODO Add digraph support: // http://boost.sourceforge.net/libs/regex/doc/collating_names.html return string_type(); } template template typename regex_traits<_Ch_type>::char_class_type regex_traits<_Ch_type>:: lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const { typedef std::ctype __ctype_type; const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); // Mappings from class name to class mask. static const pair __classnames[] = { {"d", ctype_base::digit}, {"w", {ctype_base::alnum, _RegexMask::_S_under}}, {"s", ctype_base::space}, {"alnum", ctype_base::alnum}, {"alpha", ctype_base::alpha}, {"blank", ctype_base::blank}, {"cntrl", ctype_base::cntrl}, {"digit", ctype_base::digit}, {"graph", ctype_base::graph}, {"lower", ctype_base::lower}, {"print", ctype_base::print}, {"punct", ctype_base::punct}, {"space", ctype_base::space}, {"upper", ctype_base::upper}, {"xdigit", ctype_base::xdigit}, }; string __s; for (; __first != __last; ++__first) __s += __fctyp.narrow(__fctyp.tolower(*__first), 0); for (const auto& __it : __classnames) if (__s == __it.first) { if (__icase && ((__it.second & (ctype_base::lower | ctype_base::upper)) != 0)) return ctype_base::alpha; return __it.second; } return 0; } template bool regex_traits<_Ch_type>:: isctype(_Ch_type __c, char_class_type __f) const { typedef std::ctype __ctype_type; const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); return __fctyp.is(__f._M_base, __c) // [[:w:]] || ((__f._M_extended & _RegexMask::_S_under) && __c == __fctyp.widen('_')); } template int regex_traits<_Ch_type>:: value(_Ch_type __ch, int __radix) const { std::basic_istringstream __is(string_type(1, __ch)); long __v; if (__radix == 8) __is >> std::oct; else if (__radix == 16) __is >> std::hex; __is >> __v; return __is.fail() ? -1 : __v; } template template _Out_iter match_results<_Bi_iter, _Alloc>:: format(_Out_iter __out, const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, match_flag_type __flags) const { _GLIBCXX_DEBUG_ASSERT( ready() ); regex_traits __traits; typedef std::ctype __ctype_type; const __ctype_type& __fctyp(use_facet<__ctype_type>(__traits.getloc())); auto __output = [&](size_t __idx) { auto& __sub = (*this)[__idx]; if (__sub.matched) __out = std::copy(__sub.first, __sub.second, __out); }; if (__flags & regex_constants::format_sed) { for (; __fmt_first != __fmt_last;) if (*__fmt_first == '&') { __output(0); ++__fmt_first; } else if (*__fmt_first == '\\') { if (++__fmt_first != __fmt_last && __fctyp.is(__ctype_type::digit, *__fmt_first)) __output(__traits.value(*__fmt_first++, 10)); else *__out++ = '\\'; } else *__out++ = *__fmt_first++; } else { while (1) { auto __next = std::find(__fmt_first, __fmt_last, '$'); if (__next == __fmt_last) break; __out = std::copy(__fmt_first, __next, __out); auto __eat = [&](char __ch) -> bool { if (*__next == __ch) { ++__next; return true; } return false; }; if (++__next == __fmt_last) *__out++ = '$'; else if (__eat('$')) *__out++ = '$'; else if (__eat('&')) __output(0); else if (__eat('`')) { auto& __sub = _M_prefix(); if (__sub.matched) __out = std::copy(__sub.first, __sub.second, __out); } else if (__eat('\'')) { auto& __sub = _M_suffix(); if (__sub.matched) __out = std::copy(__sub.first, __sub.second, __out); } else if (__fctyp.is(__ctype_type::digit, *__next)) { long __num = __traits.value(*__next, 10); if (++__next != __fmt_last && __fctyp.is(__ctype_type::digit, *__next)) { __num *= 10; __num += __traits.value(*__next++, 10); } if (0 <= __num && __num < this->size()) __output(__num); } else *__out++ = '$'; __fmt_first = __next; } __out = std::copy(__fmt_first, __fmt_last, __out); } return __out; } template _Out_iter regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, const basic_regex<_Ch_type, _Rx_traits>& __e, const _Ch_type* __fmt, regex_constants::match_flag_type __flags) { typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; _IterT __i(__first, __last, __e, __flags); _IterT __end; if (__i == __end) { if (!(__flags & regex_constants::format_no_copy)) __out = std::copy(__first, __last, __out); } else { sub_match<_Bi_iter> __last; auto __len = char_traits<_Ch_type>::length(__fmt); for (; __i != __end; ++__i) { if (!(__flags & regex_constants::format_no_copy)) __out = std::copy(__i->prefix().first, __i->prefix().second, __out); __out = __i->format(__out, __fmt, __fmt + __len, __flags); __last = __i->suffix(); if (__flags & regex_constants::format_first_only) break; } if (!(__flags & regex_constants::format_no_copy)) __out = std::copy(__last.first, __last.second, __out); } return __out; } template bool regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator==(const regex_iterator& __rhs) const { return (_M_match.empty() && __rhs._M_match.empty()) || (_M_begin == __rhs._M_begin && _M_end == __rhs._M_end && _M_pregex == __rhs._M_pregex && _M_flags == __rhs._M_flags && _M_match[0] == __rhs._M_match[0]); } template regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator++() { // In all cases in which the call to regex_search returns true, // match.prefix().first shall be equal to the previous value of // match[0].second, and for each index i in the half-open range // [0, match.size()) for which match[i].matched is true, // match[i].position() shall return distance(begin, match[i].first). // [28.12.1.4.5] if (_M_match[0].matched) { auto __start = _M_match[0].second; auto __prefix_first = _M_match[0].second; if (_M_match[0].first == _M_match[0].second) { if (__start == _M_end) { _M_match = value_type(); return *this; } else { if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags | regex_constants::match_not_null | regex_constants::match_continuous)) { _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); auto& __prefix = _M_match._M_prefix(); __prefix.first = __prefix_first; __prefix.matched = __prefix.first != __prefix.second; // [28.12.1.4.5] _M_match._M_begin = _M_begin; return *this; } else ++__start; } } _M_flags |= regex_constants::match_prev_avail; if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) { _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); auto& __prefix = _M_match._M_prefix(); __prefix.first = __prefix_first; __prefix.matched = __prefix.first != __prefix.second; // [28.12.1.4.5] _M_match._M_begin = _M_begin; } else _M_match = value_type(); } return *this; } template regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator=(const regex_token_iterator& __rhs) { _M_position = __rhs._M_position; _M_subs = __rhs._M_subs; _M_n = __rhs._M_n; _M_suffix = __rhs._M_suffix; _M_has_m1 = __rhs._M_has_m1; _M_normalize_result(); return *this; } template bool regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator==(const regex_token_iterator& __rhs) const { if (_M_end_of_seq() && __rhs._M_end_of_seq()) return true; if (_M_suffix.matched && __rhs._M_suffix.matched && _M_suffix == __rhs._M_suffix) return true; if (_M_end_of_seq() || _M_suffix.matched || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) return false; return _M_position == __rhs._M_position && _M_n == __rhs._M_n && _M_subs == __rhs._M_subs; } template regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator++() { _Position __prev = _M_position; if (_M_suffix.matched) *this = regex_token_iterator(); else if (_M_n + 1 < _M_subs.size()) { _M_n++; _M_result = &_M_current_match(); } else { _M_n = 0; ++_M_position; if (_M_position != _Position()) _M_result = &_M_current_match(); else if (_M_has_m1 && __prev->suffix().length() != 0) { _M_suffix.matched = true; _M_suffix.first = __prev->suffix().first; _M_suffix.second = __prev->suffix().second; _M_result = &_M_suffix; } else *this = regex_token_iterator(); } return *this; } template void regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: _M_init(_Bi_iter __a, _Bi_iter __b) { _M_has_m1 = false; for (auto __it : _M_subs) if (__it == -1) { _M_has_m1 = true; break; } if (_M_position != _Position()) _M_result = &_M_current_match(); else if (_M_has_m1) { _M_suffix.matched = true; _M_suffix.first = __a; _M_suffix.second = __b; _M_result = &_M_suffix; } else _M_result = nullptr; } _GLIBCXX_END_NAMESPACE_VERSION } // namespace