regex.tcc 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678
  1. // class template regex -*- C++ -*-
  2. // Copyright (C) 2013-2015 Free Software Foundation, Inc.
  3. //
  4. // This file is part of the GNU ISO C++ Library. This library is free
  5. // software; you can redistribute it and/or modify it under the
  6. // terms of the GNU General Public License as published by the
  7. // Free Software Foundation; either version 3, or (at your option)
  8. // any later version.
  9. // This library is distributed in the hope that it will be useful,
  10. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  11. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  12. // GNU General Public License for more details.
  13. // Under Section 7 of GPL version 3, you are granted additional
  14. // permissions described in the GCC Runtime Library Exception, version
  15. // 3.1, as published by the Free Software Foundation.
  16. // You should have received a copy of the GNU General Public License and
  17. // a copy of the GCC Runtime Library Exception along with this program;
  18. // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
  19. // <http://www.gnu.org/licenses/>.
  20. /**
  21. * @file bits/regex.tcc
  22. * This is an internal header file, included by other library headers.
  23. * Do not attempt to use it directly. @headername{regex}
  24. */
  25. // A non-standard switch to let the user pick the matching algorithm.
  26. // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA
  27. // algorithm will be used. This algorithm is not enabled by default,
  28. // and cannot be used if the regex contains back-references, but has better
  29. // (polynomial instead of exponential) worst case performance.
  30. // See __regex_algo_impl below.
  31. namespace std _GLIBCXX_VISIBILITY(default)
  32. {
  33. namespace __detail
  34. {
  35. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  36. // Result of merging regex_match and regex_search.
  37. //
  38. // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
  39. // the other one if possible, for test purpose).
  40. //
  41. // That __match_mode is true means regex_match, else regex_search.
  42. template<typename _BiIter, typename _Alloc,
  43. typename _CharT, typename _TraitsT,
  44. _RegexExecutorPolicy __policy,
  45. bool __match_mode>
  46. bool
  47. __regex_algo_impl(_BiIter __s,
  48. _BiIter __e,
  49. match_results<_BiIter, _Alloc>& __m,
  50. const basic_regex<_CharT, _TraitsT>& __re,
  51. regex_constants::match_flag_type __flags)
  52. {
  53. if (__re._M_automaton == nullptr)
  54. return false;
  55. typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
  56. __m._M_begin = __s;
  57. __m._M_resize(__re._M_automaton->_M_sub_count());
  58. for (auto& __it : __res)
  59. __it.matched = false;
  60. // __policy is used by testsuites so that they can use Thompson NFA
  61. // without defining a macro. Users should define
  62. // _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach.
  63. bool __ret;
  64. if (!__re._M_automaton->_M_has_backref
  65. && !(__re._M_flags & regex_constants::ECMAScript)
  66. #ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA
  67. && __policy == _RegexExecutorPolicy::_S_alternate
  68. #endif
  69. )
  70. {
  71. _Executor<_BiIter, _Alloc, _TraitsT, false>
  72. __executor(__s, __e, __m, __re, __flags);
  73. if (__match_mode)
  74. __ret = __executor._M_match();
  75. else
  76. __ret = __executor._M_search();
  77. }
  78. else
  79. {
  80. _Executor<_BiIter, _Alloc, _TraitsT, true>
  81. __executor(__s, __e, __m, __re, __flags);
  82. if (__match_mode)
  83. __ret = __executor._M_match();
  84. else
  85. __ret = __executor._M_search();
  86. }
  87. if (__ret)
  88. {
  89. for (auto& __it : __res)
  90. if (!__it.matched)
  91. __it.first = __it.second = __e;
  92. auto& __pre = __m._M_prefix();
  93. auto& __suf = __m._M_suffix();
  94. if (__match_mode)
  95. {
  96. __pre.matched = false;
  97. __pre.first = __s;
  98. __pre.second = __s;
  99. __suf.matched = false;
  100. __suf.first = __e;
  101. __suf.second = __e;
  102. }
  103. else
  104. {
  105. __pre.first = __s;
  106. __pre.second = __res[0].first;
  107. __pre.matched = (__pre.first != __pre.second);
  108. __suf.first = __res[0].second;
  109. __suf.second = __e;
  110. __suf.matched = (__suf.first != __suf.second);
  111. }
  112. }
  113. else
  114. {
  115. __m._M_resize(0);
  116. for (auto& __it : __res)
  117. {
  118. __it.matched = false;
  119. __it.first = __it.second = __e;
  120. }
  121. }
  122. return __ret;
  123. }
  124. _GLIBCXX_END_NAMESPACE_VERSION
  125. }
  126. _GLIBCXX_BEGIN_NAMESPACE_VERSION
  127. template<typename _Ch_type>
  128. template<typename _Fwd_iter>
  129. typename regex_traits<_Ch_type>::string_type
  130. regex_traits<_Ch_type>::
  131. lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
  132. {
  133. typedef std::ctype<char_type> __ctype_type;
  134. const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
  135. static const char* __collatenames[] =
  136. {
  137. "NUL",
  138. "SOH",
  139. "STX",
  140. "ETX",
  141. "EOT",
  142. "ENQ",
  143. "ACK",
  144. "alert",
  145. "backspace",
  146. "tab",
  147. "newline",
  148. "vertical-tab",
  149. "form-feed",
  150. "carriage-return",
  151. "SO",
  152. "SI",
  153. "DLE",
  154. "DC1",
  155. "DC2",
  156. "DC3",
  157. "DC4",
  158. "NAK",
  159. "SYN",
  160. "ETB",
  161. "CAN",
  162. "EM",
  163. "SUB",
  164. "ESC",
  165. "IS4",
  166. "IS3",
  167. "IS2",
  168. "IS1",
  169. "space",
  170. "exclamation-mark",
  171. "quotation-mark",
  172. "number-sign",
  173. "dollar-sign",
  174. "percent-sign",
  175. "ampersand",
  176. "apostrophe",
  177. "left-parenthesis",
  178. "right-parenthesis",
  179. "asterisk",
  180. "plus-sign",
  181. "comma",
  182. "hyphen",
  183. "period",
  184. "slash",
  185. "zero",
  186. "one",
  187. "two",
  188. "three",
  189. "four",
  190. "five",
  191. "six",
  192. "seven",
  193. "eight",
  194. "nine",
  195. "colon",
  196. "semicolon",
  197. "less-than-sign",
  198. "equals-sign",
  199. "greater-than-sign",
  200. "question-mark",
  201. "commercial-at",
  202. "A",
  203. "B",
  204. "C",
  205. "D",
  206. "E",
  207. "F",
  208. "G",
  209. "H",
  210. "I",
  211. "J",
  212. "K",
  213. "L",
  214. "M",
  215. "N",
  216. "O",
  217. "P",
  218. "Q",
  219. "R",
  220. "S",
  221. "T",
  222. "U",
  223. "V",
  224. "W",
  225. "X",
  226. "Y",
  227. "Z",
  228. "left-square-bracket",
  229. "backslash",
  230. "right-square-bracket",
  231. "circumflex",
  232. "underscore",
  233. "grave-accent",
  234. "a",
  235. "b",
  236. "c",
  237. "d",
  238. "e",
  239. "f",
  240. "g",
  241. "h",
  242. "i",
  243. "j",
  244. "k",
  245. "l",
  246. "m",
  247. "n",
  248. "o",
  249. "p",
  250. "q",
  251. "r",
  252. "s",
  253. "t",
  254. "u",
  255. "v",
  256. "w",
  257. "x",
  258. "y",
  259. "z",
  260. "left-curly-bracket",
  261. "vertical-line",
  262. "right-curly-bracket",
  263. "tilde",
  264. "DEL",
  265. };
  266. string __s;
  267. for (; __first != __last; ++__first)
  268. __s += __fctyp.narrow(*__first, 0);
  269. for (const auto& __it : __collatenames)
  270. if (__s == __it)
  271. return string_type(1, __fctyp.widen(
  272. static_cast<char>(&__it - __collatenames)));
  273. // TODO Add digraph support:
  274. // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
  275. return string_type();
  276. }
  277. template<typename _Ch_type>
  278. template<typename _Fwd_iter>
  279. typename regex_traits<_Ch_type>::char_class_type
  280. regex_traits<_Ch_type>::
  281. lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
  282. {
  283. typedef std::ctype<char_type> __ctype_type;
  284. const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
  285. // Mappings from class name to class mask.
  286. static const pair<const char*, char_class_type> __classnames[] =
  287. {
  288. {"d", ctype_base::digit},
  289. {"w", {ctype_base::alnum, _RegexMask::_S_under}},
  290. {"s", ctype_base::space},
  291. {"alnum", ctype_base::alnum},
  292. {"alpha", ctype_base::alpha},
  293. {"blank", ctype_base::blank},
  294. {"cntrl", ctype_base::cntrl},
  295. {"digit", ctype_base::digit},
  296. {"graph", ctype_base::graph},
  297. {"lower", ctype_base::lower},
  298. {"print", ctype_base::print},
  299. {"punct", ctype_base::punct},
  300. {"space", ctype_base::space},
  301. {"upper", ctype_base::upper},
  302. {"xdigit", ctype_base::xdigit},
  303. };
  304. string __s;
  305. for (; __first != __last; ++__first)
  306. __s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
  307. for (const auto& __it : __classnames)
  308. if (__s == __it.first)
  309. {
  310. if (__icase
  311. && ((__it.second
  312. & (ctype_base::lower | ctype_base::upper)) != 0))
  313. return ctype_base::alpha;
  314. return __it.second;
  315. }
  316. return 0;
  317. }
  318. template<typename _Ch_type>
  319. bool
  320. regex_traits<_Ch_type>::
  321. isctype(_Ch_type __c, char_class_type __f) const
  322. {
  323. typedef std::ctype<char_type> __ctype_type;
  324. const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
  325. return __fctyp.is(__f._M_base, __c)
  326. // [[:w:]]
  327. || ((__f._M_extended & _RegexMask::_S_under)
  328. && __c == __fctyp.widen('_'));
  329. }
  330. template<typename _Ch_type>
  331. int
  332. regex_traits<_Ch_type>::
  333. value(_Ch_type __ch, int __radix) const
  334. {
  335. std::basic_istringstream<char_type> __is(string_type(1, __ch));
  336. long __v;
  337. if (__radix == 8)
  338. __is >> std::oct;
  339. else if (__radix == 16)
  340. __is >> std::hex;
  341. __is >> __v;
  342. return __is.fail() ? -1 : __v;
  343. }
  344. template<typename _Bi_iter, typename _Alloc>
  345. template<typename _Out_iter>
  346. _Out_iter match_results<_Bi_iter, _Alloc>::
  347. format(_Out_iter __out,
  348. const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
  349. const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
  350. match_flag_type __flags) const
  351. {
  352. _GLIBCXX_DEBUG_ASSERT( ready() );
  353. regex_traits<char_type> __traits;
  354. typedef std::ctype<char_type> __ctype_type;
  355. const __ctype_type&
  356. __fctyp(use_facet<__ctype_type>(__traits.getloc()));
  357. auto __output = [&](size_t __idx)
  358. {
  359. auto& __sub = (*this)[__idx];
  360. if (__sub.matched)
  361. __out = std::copy(__sub.first, __sub.second, __out);
  362. };
  363. if (__flags & regex_constants::format_sed)
  364. {
  365. for (; __fmt_first != __fmt_last;)
  366. if (*__fmt_first == '&')
  367. {
  368. __output(0);
  369. ++__fmt_first;
  370. }
  371. else if (*__fmt_first == '\\')
  372. {
  373. if (++__fmt_first != __fmt_last
  374. && __fctyp.is(__ctype_type::digit, *__fmt_first))
  375. __output(__traits.value(*__fmt_first++, 10));
  376. else
  377. *__out++ = '\\';
  378. }
  379. else
  380. *__out++ = *__fmt_first++;
  381. }
  382. else
  383. {
  384. while (1)
  385. {
  386. auto __next = std::find(__fmt_first, __fmt_last, '$');
  387. if (__next == __fmt_last)
  388. break;
  389. __out = std::copy(__fmt_first, __next, __out);
  390. auto __eat = [&](char __ch) -> bool
  391. {
  392. if (*__next == __ch)
  393. {
  394. ++__next;
  395. return true;
  396. }
  397. return false;
  398. };
  399. if (++__next == __fmt_last)
  400. *__out++ = '$';
  401. else if (__eat('$'))
  402. *__out++ = '$';
  403. else if (__eat('&'))
  404. __output(0);
  405. else if (__eat('`'))
  406. {
  407. auto& __sub = _M_prefix();
  408. if (__sub.matched)
  409. __out = std::copy(__sub.first, __sub.second, __out);
  410. }
  411. else if (__eat('\''))
  412. {
  413. auto& __sub = _M_suffix();
  414. if (__sub.matched)
  415. __out = std::copy(__sub.first, __sub.second, __out);
  416. }
  417. else if (__fctyp.is(__ctype_type::digit, *__next))
  418. {
  419. long __num = __traits.value(*__next, 10);
  420. if (++__next != __fmt_last
  421. && __fctyp.is(__ctype_type::digit, *__next))
  422. {
  423. __num *= 10;
  424. __num += __traits.value(*__next++, 10);
  425. }
  426. if (0 <= __num && __num < this->size())
  427. __output(__num);
  428. }
  429. else
  430. *__out++ = '$';
  431. __fmt_first = __next;
  432. }
  433. __out = std::copy(__fmt_first, __fmt_last, __out);
  434. }
  435. return __out;
  436. }
  437. template<typename _Out_iter, typename _Bi_iter,
  438. typename _Rx_traits, typename _Ch_type>
  439. _Out_iter
  440. regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
  441. const basic_regex<_Ch_type, _Rx_traits>& __e,
  442. const _Ch_type* __fmt,
  443. regex_constants::match_flag_type __flags)
  444. {
  445. typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
  446. _IterT __i(__first, __last, __e, __flags);
  447. _IterT __end;
  448. if (__i == __end)
  449. {
  450. if (!(__flags & regex_constants::format_no_copy))
  451. __out = std::copy(__first, __last, __out);
  452. }
  453. else
  454. {
  455. sub_match<_Bi_iter> __last;
  456. auto __len = char_traits<_Ch_type>::length(__fmt);
  457. for (; __i != __end; ++__i)
  458. {
  459. if (!(__flags & regex_constants::format_no_copy))
  460. __out = std::copy(__i->prefix().first, __i->prefix().second,
  461. __out);
  462. __out = __i->format(__out, __fmt, __fmt + __len, __flags);
  463. __last = __i->suffix();
  464. if (__flags & regex_constants::format_first_only)
  465. break;
  466. }
  467. if (!(__flags & regex_constants::format_no_copy))
  468. __out = std::copy(__last.first, __last.second, __out);
  469. }
  470. return __out;
  471. }
  472. template<typename _Bi_iter,
  473. typename _Ch_type,
  474. typename _Rx_traits>
  475. bool
  476. regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  477. operator==(const regex_iterator& __rhs) const
  478. {
  479. return (_M_match.empty() && __rhs._M_match.empty())
  480. || (_M_begin == __rhs._M_begin
  481. && _M_end == __rhs._M_end
  482. && _M_pregex == __rhs._M_pregex
  483. && _M_flags == __rhs._M_flags
  484. && _M_match[0] == __rhs._M_match[0]);
  485. }
  486. template<typename _Bi_iter,
  487. typename _Ch_type,
  488. typename _Rx_traits>
  489. regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
  490. regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  491. operator++()
  492. {
  493. // In all cases in which the call to regex_search returns true,
  494. // match.prefix().first shall be equal to the previous value of
  495. // match[0].second, and for each index i in the half-open range
  496. // [0, match.size()) for which match[i].matched is true,
  497. // match[i].position() shall return distance(begin, match[i].first).
  498. // [28.12.1.4.5]
  499. if (_M_match[0].matched)
  500. {
  501. auto __start = _M_match[0].second;
  502. auto __prefix_first = _M_match[0].second;
  503. if (_M_match[0].first == _M_match[0].second)
  504. {
  505. if (__start == _M_end)
  506. {
  507. _M_match = value_type();
  508. return *this;
  509. }
  510. else
  511. {
  512. if (regex_search(__start, _M_end, _M_match, *_M_pregex,
  513. _M_flags
  514. | regex_constants::match_not_null
  515. | regex_constants::match_continuous))
  516. {
  517. _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
  518. auto& __prefix = _M_match._M_prefix();
  519. __prefix.first = __prefix_first;
  520. __prefix.matched = __prefix.first != __prefix.second;
  521. // [28.12.1.4.5]
  522. _M_match._M_begin = _M_begin;
  523. return *this;
  524. }
  525. else
  526. ++__start;
  527. }
  528. }
  529. _M_flags |= regex_constants::match_prev_avail;
  530. if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
  531. {
  532. _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched);
  533. auto& __prefix = _M_match._M_prefix();
  534. __prefix.first = __prefix_first;
  535. __prefix.matched = __prefix.first != __prefix.second;
  536. // [28.12.1.4.5]
  537. _M_match._M_begin = _M_begin;
  538. }
  539. else
  540. _M_match = value_type();
  541. }
  542. return *this;
  543. }
  544. template<typename _Bi_iter,
  545. typename _Ch_type,
  546. typename _Rx_traits>
  547. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
  548. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  549. operator=(const regex_token_iterator& __rhs)
  550. {
  551. _M_position = __rhs._M_position;
  552. _M_subs = __rhs._M_subs;
  553. _M_n = __rhs._M_n;
  554. _M_suffix = __rhs._M_suffix;
  555. _M_has_m1 = __rhs._M_has_m1;
  556. _M_normalize_result();
  557. return *this;
  558. }
  559. template<typename _Bi_iter,
  560. typename _Ch_type,
  561. typename _Rx_traits>
  562. bool
  563. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  564. operator==(const regex_token_iterator& __rhs) const
  565. {
  566. if (_M_end_of_seq() && __rhs._M_end_of_seq())
  567. return true;
  568. if (_M_suffix.matched && __rhs._M_suffix.matched
  569. && _M_suffix == __rhs._M_suffix)
  570. return true;
  571. if (_M_end_of_seq() || _M_suffix.matched
  572. || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
  573. return false;
  574. return _M_position == __rhs._M_position
  575. && _M_n == __rhs._M_n
  576. && _M_subs == __rhs._M_subs;
  577. }
  578. template<typename _Bi_iter,
  579. typename _Ch_type,
  580. typename _Rx_traits>
  581. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
  582. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  583. operator++()
  584. {
  585. _Position __prev = _M_position;
  586. if (_M_suffix.matched)
  587. *this = regex_token_iterator();
  588. else if (_M_n + 1 < _M_subs.size())
  589. {
  590. _M_n++;
  591. _M_result = &_M_current_match();
  592. }
  593. else
  594. {
  595. _M_n = 0;
  596. ++_M_position;
  597. if (_M_position != _Position())
  598. _M_result = &_M_current_match();
  599. else if (_M_has_m1 && __prev->suffix().length() != 0)
  600. {
  601. _M_suffix.matched = true;
  602. _M_suffix.first = __prev->suffix().first;
  603. _M_suffix.second = __prev->suffix().second;
  604. _M_result = &_M_suffix;
  605. }
  606. else
  607. *this = regex_token_iterator();
  608. }
  609. return *this;
  610. }
  611. template<typename _Bi_iter,
  612. typename _Ch_type,
  613. typename _Rx_traits>
  614. void
  615. regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
  616. _M_init(_Bi_iter __a, _Bi_iter __b)
  617. {
  618. _M_has_m1 = false;
  619. for (auto __it : _M_subs)
  620. if (__it == -1)
  621. {
  622. _M_has_m1 = true;
  623. break;
  624. }
  625. if (_M_position != _Position())
  626. _M_result = &_M_current_match();
  627. else if (_M_has_m1)
  628. {
  629. _M_suffix.matched = true;
  630. _M_suffix.first = __a;
  631. _M_suffix.second = __b;
  632. _M_result = &_M_suffix;
  633. }
  634. else
  635. _M_result = nullptr;
  636. }
  637. _GLIBCXX_END_NAMESPACE_VERSION
  638. } // namespace