123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678 |
- // class template regex -*- C++ -*-
- // Copyright (C) 2013-2015 Free Software Foundation, Inc.
- //
- // This file is part of the GNU ISO C++ Library. This library is free
- // software; you can redistribute it and/or modify it under the
- // terms of the GNU General Public License as published by the
- // Free Software Foundation; either version 3, or (at your option)
- // any later version.
- // This library is distributed in the hope that it will be useful,
- // but WITHOUT ANY WARRANTY; without even the implied warranty of
- // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- // GNU General Public License for more details.
- // Under Section 7 of GPL version 3, you are granted additional
- // permissions described in the GCC Runtime Library Exception, version
- // 3.1, as published by the Free Software Foundation.
- // You should have received a copy of the GNU General Public License and
- // a copy of the GCC Runtime Library Exception along with this program;
- // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- // <http://www.gnu.org/licenses/>.
- /**
- * @file bits/regex.tcc
- * This is an internal header file, included by other library headers.
- * Do not attempt to use it directly. @headername{regex}
- */
- // A non-standard switch to let the user pick the matching algorithm.
- // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA
- // algorithm will be used. This algorithm is not enabled by default,
- // and cannot be used if the regex contains back-references, but has better
- // (polynomial instead of exponential) worst case performance.
- // See __regex_algo_impl below.
- namespace std _GLIBCXX_VISIBILITY(default)
- {
- namespace __detail
- {
- _GLIBCXX_BEGIN_NAMESPACE_VERSION
- // Result of merging regex_match and regex_search.
- //
- // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
- // the other one if possible, for test purpose).
- //
- // That __match_mode is true means regex_match, else regex_search.
- template<typename _BiIter, typename _Alloc,
- typename _CharT, typename _TraitsT,
- _RegexExecutorPolicy __policy,
- bool __match_mode>
- bool
- __regex_algo_impl(_BiIter __s,
- _BiIter __e,
- match_results<_BiIter, _Alloc>& __m,
- const basic_regex<_CharT, _TraitsT>& __re,
- regex_constants::match_flag_type __flags)
- {
- if (__re._M_automaton == nullptr)
- return false;
- typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
- __m._M_begin = __s;
- __m._M_resize(__re._M_automaton->_M_sub_count());
- for (auto& __it : __res)
- __it.matched = false;
- // __policy is used by testsuites so that they can use Thompson NFA
- // without defining a macro. Users should define
- // _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach.
- bool __ret;
- if (!__re._M_automaton->_M_has_backref
- && !(__re._M_flags & regex_constants::ECMAScript)
- #ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA
- && __policy == _RegexExecutorPolicy::_S_alternate
- #endif
- )
- {
- _Executor<_BiIter, _Alloc, _TraitsT, false>
- __executor(__s, __e, __m, __re, __flags);
- if (__match_mode)
- __ret = __executor._M_match();
- else
- __ret = __executor._M_search();
- }
- else
- {
- _Executor<_BiIter, _Alloc, _TraitsT, true>
- __executor(__s, __e, __m, __re, __flags);
- if (__match_mode)
- __ret = __executor._M_match();
- else
- __ret = __executor._M_search();
- }
- if (__ret)
- {
- for (auto& __it : __res)
- if (!__it.matched)
- __it.first = __it.second = __e;
- auto& __pre = __m._M_prefix();
- auto& __suf = __m._M_suffix();
- if (__match_mode)
- {
- __pre.matched = false;
- __pre.first = __s;
- __pre.second = __s;
- __suf.matched = false;
- __suf.first = __e;
- __suf.second = __e;
- }
- else
- {
- __pre.first = __s;
- __pre.second = __res[0].first;
- __pre.matched = (__pre.first != __pre.second);
- __suf.first = __res[0].second;
- __suf.second = __e;
- __suf.matched = (__suf.first != __suf.second);
- }
- }
- else
- {
- __m._M_resize(0);
- for (auto& __it : __res)
- {
- __it.matched = false;
- __it.first = __it.second = __e;
- }
- }
- return __ret;
- }
- _GLIBCXX_END_NAMESPACE_VERSION
- }
- _GLIBCXX_BEGIN_NAMESPACE_VERSION
- template<typename _Ch_type>
- template<typename _Fwd_iter>
- typename regex_traits<_Ch_type>::string_type
- regex_traits<_Ch_type>::
- lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
- {
- typedef std::ctype<char_type> __ctype_type;
- const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
- static const char* __collatenames[] =
- {
- "NUL",
- "SOH",
- "STX",
- "ETX",
- "EOT",
- "ENQ",
- "ACK",
- "alert",
- "backspace",
- "tab",
- "newline",
- "vertical-tab",
- "form-feed",
- "carriage-return",
- "SO",
- "SI",
- "DLE",
- "DC1",
- "DC2",
- "DC3",
- "DC4",
- "NAK",
- "SYN",
- "ETB",
- "CAN",
- "EM",
- "SUB",
- "ESC",
- "IS4",
- "IS3",
- "IS2",
- "IS1",
- "space",
- "exclamation-mark",
- "quotation-mark",
- "number-sign",
- "dollar-sign",
- "percent-sign",
- "ampersand",
- "apostrophe",
- "left-parenthesis",
- "right-parenthesis",
- "asterisk",
- "plus-sign",
- "comma",
- "hyphen",
- "period",
- "slash",
- "zero",
- "one",
- "two",
- "three",
- "four",
- "five",
- "six",
- "seven",
- "eight",
- "nine",
- "colon",
- "semicolon",
- "less-than-sign",
- "equals-sign",
- "greater-than-sign",
- "question-mark",
- "commercial-at",
- "A",
- "B",
- "C",
- "D",
- "E",
- "F",
- "G",
- "H",
- "I",
- "J",
- "K",
- "L",
- "M",
- "N",
- "O",
- "P",
- "Q",
- "R",
- "S",
- "T",
- "U",
- "V",
- "W",
- "X",
- "Y",
- "Z",
- "left-square-bracket",
- "backslash",
- "right-square-bracket",
- "circumflex",
- "underscore",
- "grave-accent",
- "a",
- "b",
- "c",
- "d",
- "e",
- "f",
- "g",
- "h",
- "i",
- "j",
- "k",
- "l",
- "m",
- "n",
- "o",
- "p",
- "q",
- "r",
- "s",
- "t",
- "u",
- "v",
- "w",
- "x",
- "y",
- "z",
- "left-curly-bracket",
- "vertical-line",
- "right-curly-bracket",
- "tilde",
- "DEL",
- };
- string __s;
- for (; __first != __last; ++__first)
- __s += __fctyp.narrow(*__first, 0);
- for (const auto& __it : __collatenames)
- if (__s == __it)
- return string_type(1, __fctyp.widen(
- static_cast<char>(&__it - __collatenames)));
- // TODO Add digraph support:
- // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
- return string_type();
- }
- template<typename _Ch_type>
- template<typename _Fwd_iter>
- typename regex_traits<_Ch_type>::char_class_type
- regex_traits<_Ch_type>::
- lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
- {
- typedef std::ctype<char_type> __ctype_type;
- const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
- // Mappings from class name to class mask.
- static const pair<const char*, char_class_type> __classnames[] =
- {
- {"d", ctype_base::digit},
- {"w", {ctype_base::alnum, _RegexMask::_S_under}},
- {"s", ctype_base::space},
- {"alnum", ctype_base::alnum},
- {"alpha", ctype_base::alpha},
- {"blank", ctype_base::blank},
- {"cntrl", ctype_base::cntrl},
- {"digit", ctype_base::digit},
- {"graph", ctype_base::graph},
- {"lower", ctype_base::lower},
- {"print", ctype_base::print},
- {"punct", ctype_base::punct},
- {"space", ctype_base::space},
- {"upper", ctype_base::upper},
- {"xdigit", ctype_base::xdigit},
- };
- string __s;
- for (; __first != __last; ++__first)
- __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
- for (const auto& __it : __classnames)
- if (__s == __it.first)
- {
- if (__icase
- && ((__it.second
- & (ctype_base::lower | ctype_base::upper)) != 0))
- return ctype_base::alpha;
- return __it.second;
- }
- return 0;
- }
- template<typename _Ch_type>
- bool
- regex_traits<_Ch_type>::
- isctype(_Ch_type __c, char_class_type __f) const
- {
- typedef std::ctype<char_type> __ctype_type;
- const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
- return __fctyp.is(__f._M_base, __c)
- // [[:w:]]
- || ((__f._M_extended & _RegexMask::_S_under)
- && __c == __fctyp.widen('_'));
- }
- template<typename _Ch_type>
- int
- regex_traits<_Ch_type>::
- value(_Ch_type __ch, int __radix) const
- {
- std::basic_istringstream<char_type> __is(string_type(1, __ch));
- long __v;
- if (__radix == 8)
- __is >> std::oct;
- else if (__radix == 16)
- __is >> std::hex;
- __is >> __v;
- return __is.fail() ? -1 : __v;
- }
- template<typename _Bi_iter, typename _Alloc>
- template<typename _Out_iter>
- _Out_iter match_results<_Bi_iter, _Alloc>::
- format(_Out_iter __out,
- const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
- const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
- match_flag_type __flags) const
- {
- _GLIBCXX_DEBUG_ASSERT( ready() );
- regex_traits<char_type> __traits;
- typedef std::ctype<char_type> __ctype_type;
- const __ctype_type&
- __fctyp(use_facet<__ctype_type>(__traits.getloc()));
- auto __output = [&](size_t __idx)
- {
- auto& __sub = (*this)[__idx];
- if (__sub.matched)
- __out = std::copy(__sub.first, __sub.second, __out);
- };
- if (__flags & regex_constants::format_sed)
- {
- for (; __fmt_first != __fmt_last;)
- if (*__fmt_first == '&')
- {
- __output(0);
- ++__fmt_first;
- }
- else if (*__fmt_first == '\\')
- {
- if (++__fmt_first != __fmt_last
- && __fctyp.is(__ctype_type::digit, *__fmt_first))
- __output(__traits.value(*__fmt_first++, 10));
- else
- *__out++ = '\\';
- }
- else
- *__out++ = *__fmt_first++;
- }
- else
- {
- while (1)
- {
- auto __next = std::find(__fmt_first, __fmt_last, '$');
- if (__next == __fmt_last)
- break;
- __out = std::copy(__fmt_first, __next, __out);
- auto __eat = [&](char __ch) -> bool
- {
- if (*__next == __ch)
- {
- ++__next;
- return true;
- }
- return false;
- };
- if (++__next == __fmt_last)
- *__out++ = '$';
- else if (__eat('$'))
- *__out++ = '$';
- else if (__eat('&'))
- __output(0);
- else if (__eat('`'))
- {
- auto& __sub = _M_prefix();
- if (__sub.matched)
- __out = std::copy(__sub.first, __sub.second, __out);
- }
- else if (__eat('\''))
- {
- auto& __sub = _M_suffix();
- if (__sub.matched)
- __out = std::copy(__sub.first, __sub.second, __out);
- }
- else if (__fctyp.is(__ctype_type::digit, *__next))
- {
- long __num = __traits.value(*__next, 10);
- if (++__next != __fmt_last
- && __fctyp.is(__ctype_type::digit, *__next))
- {
- __num *= 10;
- __num += __traits.value(*__next++, 10);
- }
- if (0 <= __num && __num < this->size())
- __output(__num);
- }
- else
- *__out++ = '$';
- __fmt_first = __next;
- }
- __out = std::copy(__fmt_first, __fmt_last, __out);
- }
- return __out;
- }
- template<typename _Out_iter, typename _Bi_iter,
- typename _Rx_traits, typename _Ch_type>
- _Out_iter
- regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
- const basic_regex<_Ch_type, _Rx_traits>& __e,
- const _Ch_type* __fmt,
- regex_constants::match_flag_type __flags)
- {
- typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
- _IterT __i(__first, __last, __e, __flags);
- _IterT __end;
- if (__i == __end)
- {
- if (!(__flags & regex_constants::format_no_copy))
- __out = std::copy(__first, __last, __out);
- }
- else
- {
- sub_match<_Bi_iter> __last;
- auto __len = char_traits<_Ch_type>::length(__fmt);
- for (; __i != __end; ++__i)
- {
- if (!(__flags & regex_constants::format_no_copy))
- __out = std::copy(__i->prefix().first, __i->prefix().second,
- __out);
- __out = __i->format(__out, __fmt, __fmt + __len, __flags);
- __last = __i->suffix();
- if (__flags & regex_constants::format_first_only)
- break;
- }
- if (!(__flags & regex_constants::format_no_copy))
- __out = std::copy(__last.first, __last.second, __out);
- }
- return __out;
- }
- template<typename _Bi_iter,
- typename _Ch_type,
- typename _Rx_traits>
- bool
- regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
- operator==(const regex_iterator& __rhs) const
- {
- return (_M_match.empty() && __rhs._M_match.empty())
- || (_M_begin == __rhs._M_begin
- && _M_end == __rhs._M_end
- && _M_pregex == __rhs._M_pregex
- && _M_flags == __rhs._M_flags
- && _M_match[0] == __rhs._M_match[0]);
- }
- template<typename _Bi_iter,
- typename _Ch_type,
- typename _Rx_traits>
- regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
- regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
- operator++()
- {
- // In all cases in which the call to regex_search returns true,
- // match.prefix().first shall be equal to the previous value of
- // match[0].second, and for each index i in the half-open range
- // [0, match.size()) for which match[i].matched is true,
- // match[i].position() shall return distance(begin, match[i].first).
- // [28.12.1.4.5]
- if (_M_match[0].matched)
- {
- auto __start = _M_match[0].second;
- auto __prefix_first = _M_match[0].second;
- if (_M_match[0].first == _M_match[0].second)
- {
- if (__start == _M_end)
- {
- _M_match = value_type();
- return *this;
- }
- else
- {
- if (regex_search(__start, _M_end, _M_match, *_M_pregex,
- _M_flags
- | regex_constants::match_not_null
- | regex_constants::match_continuous))
- {
- _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
- auto& __prefix = _M_match._M_prefix();
- __prefix.first = __prefix_first;
- __prefix.matched = __prefix.first != __prefix.second;
- // [28.12.1.4.5]
- _M_match._M_begin = _M_begin;
- return *this;
- }
- else
- ++__start;
- }
- }
- _M_flags |= regex_constants::match_prev_avail;
- if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
- {
- _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
- auto& __prefix = _M_match._M_prefix();
- __prefix.first = __prefix_first;
- __prefix.matched = __prefix.first != __prefix.second;
- // [28.12.1.4.5]
- _M_match._M_begin = _M_begin;
- }
- else
- _M_match = value_type();
- }
- return *this;
- }
- template<typename _Bi_iter,
- typename _Ch_type,
- typename _Rx_traits>
- regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
- regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
- operator=(const regex_token_iterator& __rhs)
- {
- _M_position = __rhs._M_position;
- _M_subs = __rhs._M_subs;
- _M_n = __rhs._M_n;
- _M_suffix = __rhs._M_suffix;
- _M_has_m1 = __rhs._M_has_m1;
- _M_normalize_result();
- return *this;
- }
- template<typename _Bi_iter,
- typename _Ch_type,
- typename _Rx_traits>
- bool
- regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
- operator==(const regex_token_iterator& __rhs) const
- {
- if (_M_end_of_seq() && __rhs._M_end_of_seq())
- return true;
- if (_M_suffix.matched && __rhs._M_suffix.matched
- && _M_suffix == __rhs._M_suffix)
- return true;
- if (_M_end_of_seq() || _M_suffix.matched
- || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
- return false;
- return _M_position == __rhs._M_position
- && _M_n == __rhs._M_n
- && _M_subs == __rhs._M_subs;
- }
- template<typename _Bi_iter,
- typename _Ch_type,
- typename _Rx_traits>
- regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
- regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
- operator++()
- {
- _Position __prev = _M_position;
- if (_M_suffix.matched)
- *this = regex_token_iterator();
- else if (_M_n + 1 < _M_subs.size())
- {
- _M_n++;
- _M_result = &_M_current_match();
- }
- else
- {
- _M_n = 0;
- ++_M_position;
- if (_M_position != _Position())
- _M_result = &_M_current_match();
- else if (_M_has_m1 && __prev->suffix().length() != 0)
- {
- _M_suffix.matched = true;
- _M_suffix.first = __prev->suffix().first;
- _M_suffix.second = __prev->suffix().second;
- _M_result = &_M_suffix;
- }
- else
- *this = regex_token_iterator();
- }
- return *this;
- }
- template<typename _Bi_iter,
- typename _Ch_type,
- typename _Rx_traits>
- void
- regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
- _M_init(_Bi_iter __a, _Bi_iter __b)
- {
- _M_has_m1 = false;
- for (auto __it : _M_subs)
- if (__it == -1)
- {
- _M_has_m1 = true;
- break;
- }
- if (_M_position != _Position())
- _M_result = &_M_current_match();
- else if (_M_has_m1)
- {
- _M_suffix.matched = true;
- _M_suffix.first = __a;
- _M_suffix.second = __b;
- _M_result = &_M_suffix;
- }
- else
- _M_result = nullptr;
- }
- _GLIBCXX_END_NAMESPACE_VERSION
- } // namespace
|