1 // class template regex -*- C++ -*- 2 3 // Copyright (C) 2013-2020 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** 26 * @file bits/regex.tcc 27 * This is an internal header file, included by other library headers. 28 * Do not attempt to use it directly. @headername{regex} 29 */ 30 31 namespace std _GLIBCXX_VISIBILITY(default) 32 { 33 _GLIBCXX_BEGIN_NAMESPACE_VERSION 34 35 namespace __detail 36 { 37 /// @cond undocumented 38 39 // Result of merging regex_match and regex_search. 40 // 41 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use 42 // the other one if possible, for test purpose). 43 // 44 // That __match_mode is true means regex_match, else regex_search. 45 template<typename _BiIter, typename _Alloc, 46 typename _CharT, typename _TraitsT, 47 _RegexExecutorPolicy __policy, 48 bool __match_mode> 49 bool __regex_algo_impl(_BiIter __s,_BiIter __e,match_results<_BiIter,_Alloc> & __m,const basic_regex<_CharT,_TraitsT> & __re,regex_constants::match_flag_type __flags)50 __regex_algo_impl(_BiIter __s, 51 _BiIter __e, 52 match_results<_BiIter, _Alloc>& __m, 53 const basic_regex<_CharT, _TraitsT>& __re, 54 regex_constants::match_flag_type __flags) 55 { 56 if (__re._M_automaton == nullptr) 57 return false; 58 59 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m; 60 __m._M_begin = __s; 61 __m._M_resize(__re._M_automaton->_M_sub_count()); 62 63 bool __ret; 64 if ((__re.flags() & regex_constants::__polynomial) 65 || (__policy == _RegexExecutorPolicy::_S_alternate 66 && !__re._M_automaton->_M_has_backref)) 67 { 68 _Executor<_BiIter, _Alloc, _TraitsT, false> 69 __executor(__s, __e, __m, __re, __flags); 70 if (__match_mode) 71 __ret = __executor._M_match(); 72 else 73 __ret = __executor._M_search(); 74 } 75 else 76 { 77 _Executor<_BiIter, _Alloc, _TraitsT, true> 78 __executor(__s, __e, __m, __re, __flags); 79 if (__match_mode) 80 __ret = __executor._M_match(); 81 else 82 __ret = __executor._M_search(); 83 } 84 if (__ret) 85 { 86 for (auto& __it : __res) 87 if (!__it.matched) 88 __it.first = __it.second = __e; 89 auto& __pre = __m._M_prefix(); 90 auto& __suf = __m._M_suffix(); 91 if (__match_mode) 92 { 93 __pre.matched = false; 94 __pre.first = __s; 95 __pre.second = __s; 96 __suf.matched = false; 97 __suf.first = __e; 98 __suf.second = __e; 99 } 100 else 101 { 102 __pre.first = __s; 103 __pre.second = __res[0].first; 104 __pre.matched = (__pre.first != __pre.second); 105 __suf.first = __res[0].second; 106 __suf.second = __e; 107 __suf.matched = (__suf.first != __suf.second); 108 } 109 } 110 else 111 { 112 __m._M_establish_failed_match(__e); 113 } 114 return __ret; 115 } 116 /// @endcond 117 } // namespace __detail 118 119 /// @cond 120 121 template<typename _Ch_type> 122 template<typename _Fwd_iter> 123 typename regex_traits<_Ch_type>::string_type 124 regex_traits<_Ch_type>:: lookup_collatename(_Fwd_iter __first,_Fwd_iter __last) const125 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const 126 { 127 typedef std::ctype<char_type> __ctype_type; 128 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 129 130 static const char* __collatenames[] = 131 { 132 "NUL", 133 "SOH", 134 "STX", 135 "ETX", 136 "EOT", 137 "ENQ", 138 "ACK", 139 "alert", 140 "backspace", 141 "tab", 142 "newline", 143 "vertical-tab", 144 "form-feed", 145 "carriage-return", 146 "SO", 147 "SI", 148 "DLE", 149 "DC1", 150 "DC2", 151 "DC3", 152 "DC4", 153 "NAK", 154 "SYN", 155 "ETB", 156 "CAN", 157 "EM", 158 "SUB", 159 "ESC", 160 "IS4", 161 "IS3", 162 "IS2", 163 "IS1", 164 "space", 165 "exclamation-mark", 166 "quotation-mark", 167 "number-sign", 168 "dollar-sign", 169 "percent-sign", 170 "ampersand", 171 "apostrophe", 172 "left-parenthesis", 173 "right-parenthesis", 174 "asterisk", 175 "plus-sign", 176 "comma", 177 "hyphen", 178 "period", 179 "slash", 180 "zero", 181 "one", 182 "two", 183 "three", 184 "four", 185 "five", 186 "six", 187 "seven", 188 "eight", 189 "nine", 190 "colon", 191 "semicolon", 192 "less-than-sign", 193 "equals-sign", 194 "greater-than-sign", 195 "question-mark", 196 "commercial-at", 197 "A", 198 "B", 199 "C", 200 "D", 201 "E", 202 "F", 203 "G", 204 "H", 205 "I", 206 "J", 207 "K", 208 "L", 209 "M", 210 "N", 211 "O", 212 "P", 213 "Q", 214 "R", 215 "S", 216 "T", 217 "U", 218 "V", 219 "W", 220 "X", 221 "Y", 222 "Z", 223 "left-square-bracket", 224 "backslash", 225 "right-square-bracket", 226 "circumflex", 227 "underscore", 228 "grave-accent", 229 "a", 230 "b", 231 "c", 232 "d", 233 "e", 234 "f", 235 "g", 236 "h", 237 "i", 238 "j", 239 "k", 240 "l", 241 "m", 242 "n", 243 "o", 244 "p", 245 "q", 246 "r", 247 "s", 248 "t", 249 "u", 250 "v", 251 "w", 252 "x", 253 "y", 254 "z", 255 "left-curly-bracket", 256 "vertical-line", 257 "right-curly-bracket", 258 "tilde", 259 "DEL", 260 }; 261 262 string __s; 263 for (; __first != __last; ++__first) 264 __s += __fctyp.narrow(*__first, 0); 265 266 for (const auto& __it : __collatenames) 267 if (__s == __it) 268 return string_type(1, __fctyp.widen( 269 static_cast<char>(&__it - __collatenames))); 270 271 // TODO Add digraph support: 272 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html 273 274 return string_type(); 275 } 276 277 template<typename _Ch_type> 278 template<typename _Fwd_iter> 279 typename regex_traits<_Ch_type>::char_class_type 280 regex_traits<_Ch_type>:: lookup_classname(_Fwd_iter __first,_Fwd_iter __last,bool __icase) const281 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const 282 { 283 typedef std::ctype<char_type> __ctype_type; 284 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 285 286 // Mappings from class name to class mask. 287 static const pair<const char*, char_class_type> __classnames[] = 288 { 289 {"d", ctype_base::digit}, 290 {"w", {ctype_base::alnum, _RegexMask::_S_under}}, 291 {"s", ctype_base::space}, 292 {"alnum", ctype_base::alnum}, 293 {"alpha", ctype_base::alpha}, 294 {"blank", ctype_base::blank}, 295 {"cntrl", ctype_base::cntrl}, 296 {"digit", ctype_base::digit}, 297 {"graph", ctype_base::graph}, 298 {"lower", ctype_base::lower}, 299 {"print", ctype_base::print}, 300 {"punct", ctype_base::punct}, 301 {"space", ctype_base::space}, 302 {"upper", ctype_base::upper}, 303 {"xdigit", ctype_base::xdigit}, 304 }; 305 306 string __s; 307 for (; __first != __last; ++__first) 308 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0); 309 310 for (const auto& __it : __classnames) 311 if (__s == __it.first) 312 { 313 if (__icase 314 && ((__it.second 315 & (ctype_base::lower | ctype_base::upper)) != 0)) 316 return ctype_base::alpha; 317 return __it.second; 318 } 319 return 0; 320 } 321 322 template<typename _Ch_type> 323 bool 324 regex_traits<_Ch_type>:: isctype(_Ch_type __c,char_class_type __f) const325 isctype(_Ch_type __c, char_class_type __f) const 326 { 327 typedef std::ctype<char_type> __ctype_type; 328 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 329 330 return __fctyp.is(__f._M_base, __c) 331 // [[:w:]] 332 || ((__f._M_extended & _RegexMask::_S_under) 333 && __c == __fctyp.widen('_')); 334 } 335 336 template<typename _Ch_type> 337 int 338 regex_traits<_Ch_type>:: value(_Ch_type __ch,int __radix) const339 value(_Ch_type __ch, int __radix) const 340 { 341 std::basic_istringstream<char_type> __is(string_type(1, __ch)); 342 long __v; 343 if (__radix == 8) 344 __is >> std::oct; 345 else if (__radix == 16) 346 __is >> std::hex; 347 __is >> __v; 348 return __is.fail() ? -1 : __v; 349 } 350 351 template<typename _Bi_iter, typename _Alloc> 352 template<typename _Out_iter> 353 _Out_iter 354 match_results<_Bi_iter, _Alloc>:: format(_Out_iter __out,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_first,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_last,match_flag_type __flags) const355 format(_Out_iter __out, 356 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, 357 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, 358 match_flag_type __flags) const 359 { 360 __glibcxx_assert( ready() ); 361 regex_traits<char_type> __traits; 362 typedef std::ctype<char_type> __ctype_type; 363 const __ctype_type& 364 __fctyp(use_facet<__ctype_type>(__traits.getloc())); 365 366 auto __output = [&](size_t __idx) 367 { 368 auto& __sub = (*this)[__idx]; 369 if (__sub.matched) 370 __out = std::copy(__sub.first, __sub.second, __out); 371 }; 372 373 if (__flags & regex_constants::format_sed) 374 { 375 bool __escaping = false; 376 for (; __fmt_first != __fmt_last; __fmt_first++) 377 { 378 if (__escaping) 379 { 380 __escaping = false; 381 if (__fctyp.is(__ctype_type::digit, *__fmt_first)) 382 __output(__traits.value(*__fmt_first, 10)); 383 else 384 *__out++ = *__fmt_first; 385 continue; 386 } 387 if (*__fmt_first == '\\') 388 { 389 __escaping = true; 390 continue; 391 } 392 if (*__fmt_first == '&') 393 { 394 __output(0); 395 continue; 396 } 397 *__out++ = *__fmt_first; 398 } 399 if (__escaping) 400 *__out++ = '\\'; 401 } 402 else 403 { 404 while (1) 405 { 406 auto __next = std::find(__fmt_first, __fmt_last, '$'); 407 if (__next == __fmt_last) 408 break; 409 410 __out = std::copy(__fmt_first, __next, __out); 411 412 auto __eat = [&](char __ch) -> bool 413 { 414 if (*__next == __ch) 415 { 416 ++__next; 417 return true; 418 } 419 return false; 420 }; 421 422 if (++__next == __fmt_last) 423 *__out++ = '$'; 424 else if (__eat('$')) 425 *__out++ = '$'; 426 else if (__eat('&')) 427 __output(0); 428 else if (__eat('`')) 429 { 430 auto& __sub = _M_prefix(); 431 if (__sub.matched) 432 __out = std::copy(__sub.first, __sub.second, __out); 433 } 434 else if (__eat('\'')) 435 { 436 auto& __sub = _M_suffix(); 437 if (__sub.matched) 438 __out = std::copy(__sub.first, __sub.second, __out); 439 } 440 else if (__fctyp.is(__ctype_type::digit, *__next)) 441 { 442 long __num = __traits.value(*__next, 10); 443 if (++__next != __fmt_last 444 && __fctyp.is(__ctype_type::digit, *__next)) 445 { 446 __num *= 10; 447 __num += __traits.value(*__next++, 10); 448 } 449 if (0 <= __num && __num < this->size()) 450 __output(__num); 451 } 452 else 453 *__out++ = '$'; 454 __fmt_first = __next; 455 } 456 __out = std::copy(__fmt_first, __fmt_last, __out); 457 } 458 return __out; 459 } 460 461 template<typename _Out_iter, typename _Bi_iter, 462 typename _Rx_traits, typename _Ch_type> 463 _Out_iter 464 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, 465 const basic_regex<_Ch_type, _Rx_traits>& __e, 466 const _Ch_type* __fmt, 467 regex_constants::match_flag_type __flags) 468 { 469 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; 470 _IterT __i(__first, __last, __e, __flags); 471 _IterT __end; 472 if (__i == __end) 473 { 474 if (!(__flags & regex_constants::format_no_copy)) 475 __out = std::copy(__first, __last, __out); 476 } 477 else 478 { 479 sub_match<_Bi_iter> __last; 480 auto __len = char_traits<_Ch_type>::length(__fmt); 481 for (; __i != __end; ++__i) 482 { 483 if (!(__flags & regex_constants::format_no_copy)) 484 __out = std::copy(__i->prefix().first, __i->prefix().second, 485 __out); 486 __out = __i->format(__out, __fmt, __fmt + __len, __flags); 487 __last = __i->suffix(); 488 if (__flags & regex_constants::format_first_only) 489 break; 490 } 491 if (!(__flags & regex_constants::format_no_copy)) 492 __out = std::copy(__last.first, __last.second, __out); 493 } 494 return __out; 495 } 496 497 template<typename _Bi_iter, 498 typename _Ch_type, 499 typename _Rx_traits> 500 bool 501 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ==(const regex_iterator & __rhs) const502 operator==(const regex_iterator& __rhs) const noexcept 503 { 504 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr) 505 return true; 506 return _M_pregex == __rhs._M_pregex 507 && _M_begin == __rhs._M_begin 508 && _M_end == __rhs._M_end 509 && _M_flags == __rhs._M_flags 510 && _M_match[0] == __rhs._M_match[0]; 511 } 512 513 template<typename _Bi_iter, 514 typename _Ch_type, 515 typename _Rx_traits> 516 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 517 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ++()518 operator++() 519 { 520 // In all cases in which the call to regex_search returns true, 521 // match.prefix().first shall be equal to the previous value of 522 // match[0].second, and for each index i in the half-open range 523 // [0, match.size()) for which match[i].matched is true, 524 // match[i].position() shall return distance(begin, match[i].first). 525 // [28.12.1.4.5] 526 if (_M_match[0].matched) 527 { 528 auto __start = _M_match[0].second; 529 auto __prefix_first = _M_match[0].second; 530 if (_M_match[0].first == _M_match[0].second) 531 { 532 if (__start == _M_end) 533 { 534 _M_pregex = nullptr; 535 return *this; 536 } 537 else 538 { 539 if (regex_search(__start, _M_end, _M_match, *_M_pregex, 540 _M_flags 541 | regex_constants::match_not_null 542 | regex_constants::match_continuous)) 543 { 544 __glibcxx_assert(_M_match[0].matched); 545 auto& __prefix = _M_match._M_prefix(); 546 __prefix.first = __prefix_first; 547 __prefix.matched = __prefix.first != __prefix.second; 548 // [28.12.1.4.5] 549 _M_match._M_begin = _M_begin; 550 return *this; 551 } 552 else 553 ++__start; 554 } 555 } 556 _M_flags |= regex_constants::match_prev_avail; 557 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) 558 { 559 __glibcxx_assert(_M_match[0].matched); 560 auto& __prefix = _M_match._M_prefix(); 561 __prefix.first = __prefix_first; 562 __prefix.matched = __prefix.first != __prefix.second; 563 // [28.12.1.4.5] 564 _M_match._M_begin = _M_begin; 565 } 566 else 567 _M_pregex = nullptr; 568 } 569 return *this; 570 } 571 572 template<typename _Bi_iter, 573 typename _Ch_type, 574 typename _Rx_traits> 575 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 576 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator =(const regex_token_iterator & __rhs)577 operator=(const regex_token_iterator& __rhs) 578 { 579 _M_position = __rhs._M_position; 580 _M_subs = __rhs._M_subs; 581 _M_n = __rhs._M_n; 582 _M_suffix = __rhs._M_suffix; 583 _M_has_m1 = __rhs._M_has_m1; 584 _M_normalize_result(); 585 return *this; 586 } 587 588 template<typename _Bi_iter, 589 typename _Ch_type, 590 typename _Rx_traits> 591 bool 592 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ==(const regex_token_iterator & __rhs) const593 operator==(const regex_token_iterator& __rhs) const 594 { 595 if (_M_end_of_seq() && __rhs._M_end_of_seq()) 596 return true; 597 if (_M_suffix.matched && __rhs._M_suffix.matched 598 && _M_suffix == __rhs._M_suffix) 599 return true; 600 if (_M_end_of_seq() || _M_suffix.matched 601 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) 602 return false; 603 return _M_position == __rhs._M_position 604 && _M_n == __rhs._M_n 605 && _M_subs == __rhs._M_subs; 606 } 607 608 template<typename _Bi_iter, 609 typename _Ch_type, 610 typename _Rx_traits> 611 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 612 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ++()613 operator++() 614 { 615 _Position __prev = _M_position; 616 if (_M_suffix.matched) 617 *this = regex_token_iterator(); 618 else if (_M_n + 1 < _M_subs.size()) 619 { 620 _M_n++; 621 _M_result = &_M_current_match(); 622 } 623 else 624 { 625 _M_n = 0; 626 ++_M_position; 627 if (_M_position != _Position()) 628 _M_result = &_M_current_match(); 629 else if (_M_has_m1 && __prev->suffix().length() != 0) 630 { 631 _M_suffix.matched = true; 632 _M_suffix.first = __prev->suffix().first; 633 _M_suffix.second = __prev->suffix().second; 634 _M_result = &_M_suffix; 635 } 636 else 637 *this = regex_token_iterator(); 638 } 639 return *this; 640 } 641 642 template<typename _Bi_iter, 643 typename _Ch_type, 644 typename _Rx_traits> 645 void 646 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: _M_init(_Bi_iter __a,_Bi_iter __b)647 _M_init(_Bi_iter __a, _Bi_iter __b) 648 { 649 _M_has_m1 = false; 650 for (auto __it : _M_subs) 651 if (__it == -1) 652 { 653 _M_has_m1 = true; 654 break; 655 } 656 if (_M_position != _Position()) 657 _M_result = &_M_current_match(); 658 else if (_M_has_m1) 659 { 660 _M_suffix.matched = true; 661 _M_suffix.first = __a; 662 _M_suffix.second = __b; 663 _M_result = &_M_suffix; 664 } 665 else 666 _M_result = nullptr; 667 } 668 669 /// @endcond 670 671 _GLIBCXX_END_NAMESPACE_VERSION 672 } // namespace 673