1 // class template regex -*- C++ -*- 2 3 // Copyright (C) 2013-2015 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** 26 * @file bits/regex.tcc 27 * This is an internal header file, included by other library headers. 28 * Do not attempt to use it directly. @headername{regex} 29 */ 30 31 // A non-standard switch to let the user pick the matching algorithm. 32 // If _GLIBCXX_REGEX_USE_THOMPSON_NFA is defined, the thompson NFA 33 // algorithm will be used. This algorithm is not enabled by default, 34 // and cannot be used if the regex contains back-references, but has better 35 // (polynomial instead of exponential) worst case performance. 36 // See __regex_algo_impl below. 37 38 namespace std _GLIBCXX_VISIBILITY(default) 39 { 40 namespace __detail 41 { 42 _GLIBCXX_BEGIN_NAMESPACE_VERSION 43 44 // Result of merging regex_match and regex_search. 45 // 46 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use 47 // the other one if possible, for test purpose). 48 // 49 // That __match_mode is true means regex_match, else regex_search. 50 template<typename _BiIter, typename _Alloc, 51 typename _CharT, typename _TraitsT, 52 _RegexExecutorPolicy __policy, 53 bool __match_mode> 54 bool __regex_algo_impl(_BiIter __s,_BiIter __e,match_results<_BiIter,_Alloc> & __m,const basic_regex<_CharT,_TraitsT> & __re,regex_constants::match_flag_type __flags)55 __regex_algo_impl(_BiIter __s, 56 _BiIter __e, 57 match_results<_BiIter, _Alloc>& __m, 58 const basic_regex<_CharT, _TraitsT>& __re, 59 regex_constants::match_flag_type __flags) 60 { 61 if (__re._M_automaton == nullptr) 62 return false; 63 64 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m; 65 __m._M_begin = __s; 66 __m._M_resize(__re._M_automaton->_M_sub_count()); 67 for (auto& __it : __res) 68 __it.matched = false; 69 70 // __policy is used by testsuites so that they can use Thompson NFA 71 // without defining a macro. Users should define 72 // _GLIBCXX_REGEX_USE_THOMPSON_NFA if they need to use this approach. 73 bool __ret; 74 if (!__re._M_automaton->_M_has_backref 75 && !(__re._M_flags & regex_constants::ECMAScript) 76 #ifndef _GLIBCXX_REGEX_USE_THOMPSON_NFA 77 && __policy == _RegexExecutorPolicy::_S_alternate 78 #endif 79 ) 80 { 81 _Executor<_BiIter, _Alloc, _TraitsT, false> 82 __executor(__s, __e, __m, __re, __flags); 83 if (__match_mode) 84 __ret = __executor._M_match(); 85 else 86 __ret = __executor._M_search(); 87 } 88 else 89 { 90 _Executor<_BiIter, _Alloc, _TraitsT, true> 91 __executor(__s, __e, __m, __re, __flags); 92 if (__match_mode) 93 __ret = __executor._M_match(); 94 else 95 __ret = __executor._M_search(); 96 } 97 if (__ret) 98 { 99 for (auto& __it : __res) 100 if (!__it.matched) 101 __it.first = __it.second = __e; 102 auto& __pre = __m._M_prefix(); 103 auto& __suf = __m._M_suffix(); 104 if (__match_mode) 105 { 106 __pre.matched = false; 107 __pre.first = __s; 108 __pre.second = __s; 109 __suf.matched = false; 110 __suf.first = __e; 111 __suf.second = __e; 112 } 113 else 114 { 115 __pre.first = __s; 116 __pre.second = __res[0].first; 117 __pre.matched = (__pre.first != __pre.second); 118 __suf.first = __res[0].second; 119 __suf.second = __e; 120 __suf.matched = (__suf.first != __suf.second); 121 } 122 } 123 else 124 { 125 __m._M_resize(0); 126 for (auto& __it : __res) 127 { 128 __it.matched = false; 129 __it.first = __it.second = __e; 130 } 131 } 132 return __ret; 133 } 134 135 _GLIBCXX_END_NAMESPACE_VERSION 136 } 137 138 _GLIBCXX_BEGIN_NAMESPACE_VERSION 139 140 template<typename _Ch_type> 141 template<typename _Fwd_iter> 142 typename regex_traits<_Ch_type>::string_type 143 regex_traits<_Ch_type>:: lookup_collatename(_Fwd_iter __first,_Fwd_iter __last) const144 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const 145 { 146 typedef std::ctype<char_type> __ctype_type; 147 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 148 149 static const char* __collatenames[] = 150 { 151 "NUL", 152 "SOH", 153 "STX", 154 "ETX", 155 "EOT", 156 "ENQ", 157 "ACK", 158 "alert", 159 "backspace", 160 "tab", 161 "newline", 162 "vertical-tab", 163 "form-feed", 164 "carriage-return", 165 "SO", 166 "SI", 167 "DLE", 168 "DC1", 169 "DC2", 170 "DC3", 171 "DC4", 172 "NAK", 173 "SYN", 174 "ETB", 175 "CAN", 176 "EM", 177 "SUB", 178 "ESC", 179 "IS4", 180 "IS3", 181 "IS2", 182 "IS1", 183 "space", 184 "exclamation-mark", 185 "quotation-mark", 186 "number-sign", 187 "dollar-sign", 188 "percent-sign", 189 "ampersand", 190 "apostrophe", 191 "left-parenthesis", 192 "right-parenthesis", 193 "asterisk", 194 "plus-sign", 195 "comma", 196 "hyphen", 197 "period", 198 "slash", 199 "zero", 200 "one", 201 "two", 202 "three", 203 "four", 204 "five", 205 "six", 206 "seven", 207 "eight", 208 "nine", 209 "colon", 210 "semicolon", 211 "less-than-sign", 212 "equals-sign", 213 "greater-than-sign", 214 "question-mark", 215 "commercial-at", 216 "A", 217 "B", 218 "C", 219 "D", 220 "E", 221 "F", 222 "G", 223 "H", 224 "I", 225 "J", 226 "K", 227 "L", 228 "M", 229 "N", 230 "O", 231 "P", 232 "Q", 233 "R", 234 "S", 235 "T", 236 "U", 237 "V", 238 "W", 239 "X", 240 "Y", 241 "Z", 242 "left-square-bracket", 243 "backslash", 244 "right-square-bracket", 245 "circumflex", 246 "underscore", 247 "grave-accent", 248 "a", 249 "b", 250 "c", 251 "d", 252 "e", 253 "f", 254 "g", 255 "h", 256 "i", 257 "j", 258 "k", 259 "l", 260 "m", 261 "n", 262 "o", 263 "p", 264 "q", 265 "r", 266 "s", 267 "t", 268 "u", 269 "v", 270 "w", 271 "x", 272 "y", 273 "z", 274 "left-curly-bracket", 275 "vertical-line", 276 "right-curly-bracket", 277 "tilde", 278 "DEL", 279 }; 280 281 string __s; 282 for (; __first != __last; ++__first) 283 __s += __fctyp.narrow(*__first, 0); 284 285 for (const auto& __it : __collatenames) 286 if (__s == __it) 287 return string_type(1, __fctyp.widen( 288 static_cast<char>(&__it - __collatenames))); 289 290 // TODO Add digraph support: 291 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html 292 293 return string_type(); 294 } 295 296 template<typename _Ch_type> 297 template<typename _Fwd_iter> 298 typename regex_traits<_Ch_type>::char_class_type 299 regex_traits<_Ch_type>:: lookup_classname(_Fwd_iter __first,_Fwd_iter __last,bool __icase) const300 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const 301 { 302 typedef std::ctype<char_type> __ctype_type; 303 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 304 305 // Mappings from class name to class mask. 306 static const pair<const char*, char_class_type> __classnames[] = 307 { 308 {"d", ctype_base::digit}, 309 {"w", {ctype_base::alnum, _RegexMask::_S_under}}, 310 {"s", ctype_base::space}, 311 {"alnum", ctype_base::alnum}, 312 {"alpha", ctype_base::alpha}, 313 {"blank", ctype_base::blank}, 314 {"cntrl", ctype_base::cntrl}, 315 {"digit", ctype_base::digit}, 316 {"graph", ctype_base::graph}, 317 {"lower", ctype_base::lower}, 318 {"print", ctype_base::print}, 319 {"punct", ctype_base::punct}, 320 {"space", ctype_base::space}, 321 {"upper", ctype_base::upper}, 322 {"xdigit", ctype_base::xdigit}, 323 }; 324 325 string __s; 326 for (; __first != __last; ++__first) 327 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0); 328 329 for (const auto& __it : __classnames) 330 if (__s == __it.first) 331 { 332 if (__icase 333 && ((__it.second 334 & (ctype_base::lower | ctype_base::upper)) != 0)) 335 return ctype_base::alpha; 336 return __it.second; 337 } 338 return 0; 339 } 340 341 template<typename _Ch_type> 342 bool 343 regex_traits<_Ch_type>:: isctype(_Ch_type __c,char_class_type __f) const344 isctype(_Ch_type __c, char_class_type __f) const 345 { 346 typedef std::ctype<char_type> __ctype_type; 347 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 348 349 return __fctyp.is(__f._M_base, __c) 350 // [[:w:]] 351 || ((__f._M_extended & _RegexMask::_S_under) 352 && __c == __fctyp.widen('_')); 353 } 354 355 template<typename _Ch_type> 356 int 357 regex_traits<_Ch_type>:: value(_Ch_type __ch,int __radix) const358 value(_Ch_type __ch, int __radix) const 359 { 360 std::basic_istringstream<char_type> __is(string_type(1, __ch)); 361 long __v; 362 if (__radix == 8) 363 __is >> std::oct; 364 else if (__radix == 16) 365 __is >> std::hex; 366 __is >> __v; 367 return __is.fail() ? -1 : __v; 368 } 369 370 template<typename _Bi_iter, typename _Alloc> 371 template<typename _Out_iter> 372 _Out_iter match_results<_Bi_iter, _Alloc>:: format(_Out_iter __out,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_first,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_last,match_flag_type __flags) const373 format(_Out_iter __out, 374 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, 375 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, 376 match_flag_type __flags) const 377 { 378 _GLIBCXX_DEBUG_ASSERT( ready() ); 379 regex_traits<char_type> __traits; 380 typedef std::ctype<char_type> __ctype_type; 381 const __ctype_type& 382 __fctyp(use_facet<__ctype_type>(__traits.getloc())); 383 384 auto __output = [&](size_t __idx) 385 { 386 auto& __sub = (*this)[__idx]; 387 if (__sub.matched) 388 __out = std::copy(__sub.first, __sub.second, __out); 389 }; 390 391 if (__flags & regex_constants::format_sed) 392 { 393 for (; __fmt_first != __fmt_last;) 394 if (*__fmt_first == '&') 395 { 396 __output(0); 397 ++__fmt_first; 398 } 399 else if (*__fmt_first == '\\') 400 { 401 if (++__fmt_first != __fmt_last 402 && __fctyp.is(__ctype_type::digit, *__fmt_first)) 403 __output(__traits.value(*__fmt_first++, 10)); 404 else 405 *__out++ = '\\'; 406 } 407 else 408 *__out++ = *__fmt_first++; 409 } 410 else 411 { 412 while (1) 413 { 414 auto __next = std::find(__fmt_first, __fmt_last, '$'); 415 if (__next == __fmt_last) 416 break; 417 418 __out = std::copy(__fmt_first, __next, __out); 419 420 auto __eat = [&](char __ch) -> bool 421 { 422 if (*__next == __ch) 423 { 424 ++__next; 425 return true; 426 } 427 return false; 428 }; 429 430 if (++__next == __fmt_last) 431 *__out++ = '$'; 432 else if (__eat('$')) 433 *__out++ = '$'; 434 else if (__eat('&')) 435 __output(0); 436 else if (__eat('`')) 437 { 438 auto& __sub = _M_prefix(); 439 if (__sub.matched) 440 __out = std::copy(__sub.first, __sub.second, __out); 441 } 442 else if (__eat('\'')) 443 { 444 auto& __sub = _M_suffix(); 445 if (__sub.matched) 446 __out = std::copy(__sub.first, __sub.second, __out); 447 } 448 else if (__fctyp.is(__ctype_type::digit, *__next)) 449 { 450 long __num = __traits.value(*__next, 10); 451 if (++__next != __fmt_last 452 && __fctyp.is(__ctype_type::digit, *__next)) 453 { 454 __num *= 10; 455 __num += __traits.value(*__next++, 10); 456 } 457 if (0 <= __num && __num < this->size()) 458 __output(__num); 459 } 460 else 461 *__out++ = '$'; 462 __fmt_first = __next; 463 } 464 __out = std::copy(__fmt_first, __fmt_last, __out); 465 } 466 return __out; 467 } 468 469 template<typename _Out_iter, typename _Bi_iter, 470 typename _Rx_traits, typename _Ch_type> 471 _Out_iter 472 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, 473 const basic_regex<_Ch_type, _Rx_traits>& __e, 474 const _Ch_type* __fmt, 475 regex_constants::match_flag_type __flags) 476 { 477 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; 478 _IterT __i(__first, __last, __e, __flags); 479 _IterT __end; 480 if (__i == __end) 481 { 482 if (!(__flags & regex_constants::format_no_copy)) 483 __out = std::copy(__first, __last, __out); 484 } 485 else 486 { 487 sub_match<_Bi_iter> __last; 488 auto __len = char_traits<_Ch_type>::length(__fmt); 489 for (; __i != __end; ++__i) 490 { 491 if (!(__flags & regex_constants::format_no_copy)) 492 __out = std::copy(__i->prefix().first, __i->prefix().second, 493 __out); 494 __out = __i->format(__out, __fmt, __fmt + __len, __flags); 495 __last = __i->suffix(); 496 if (__flags & regex_constants::format_first_only) 497 break; 498 } 499 if (!(__flags & regex_constants::format_no_copy)) 500 __out = std::copy(__last.first, __last.second, __out); 501 } 502 return __out; 503 } 504 505 template<typename _Bi_iter, 506 typename _Ch_type, 507 typename _Rx_traits> 508 bool 509 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ==(const regex_iterator & __rhs) const510 operator==(const regex_iterator& __rhs) const 511 { 512 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr) 513 return true; 514 return _M_pregex == __rhs._M_pregex 515 && _M_begin == __rhs._M_begin 516 && _M_end == __rhs._M_end 517 && _M_flags == __rhs._M_flags 518 && _M_match[0] == __rhs._M_match[0]; 519 } 520 521 template<typename _Bi_iter, 522 typename _Ch_type, 523 typename _Rx_traits> 524 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 525 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ++()526 operator++() 527 { 528 // In all cases in which the call to regex_search returns true, 529 // match.prefix().first shall be equal to the previous value of 530 // match[0].second, and for each index i in the half-open range 531 // [0, match.size()) for which match[i].matched is true, 532 // match[i].position() shall return distance(begin, match[i].first). 533 // [28.12.1.4.5] 534 if (_M_match[0].matched) 535 { 536 auto __start = _M_match[0].second; 537 auto __prefix_first = _M_match[0].second; 538 if (_M_match[0].first == _M_match[0].second) 539 { 540 if (__start == _M_end) 541 { 542 _M_pregex = nullptr; 543 return *this; 544 } 545 else 546 { 547 if (regex_search(__start, _M_end, _M_match, *_M_pregex, 548 _M_flags 549 | regex_constants::match_not_null 550 | regex_constants::match_continuous)) 551 { 552 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); 553 auto& __prefix = _M_match._M_prefix(); 554 __prefix.first = __prefix_first; 555 __prefix.matched = __prefix.first != __prefix.second; 556 // [28.12.1.4.5] 557 _M_match._M_begin = _M_begin; 558 return *this; 559 } 560 else 561 ++__start; 562 } 563 } 564 _M_flags |= regex_constants::match_prev_avail; 565 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) 566 { 567 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); 568 auto& __prefix = _M_match._M_prefix(); 569 __prefix.first = __prefix_first; 570 __prefix.matched = __prefix.first != __prefix.second; 571 // [28.12.1.4.5] 572 _M_match._M_begin = _M_begin; 573 } 574 else 575 _M_pregex = nullptr; 576 } 577 return *this; 578 } 579 580 template<typename _Bi_iter, 581 typename _Ch_type, 582 typename _Rx_traits> 583 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 584 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator =(const regex_token_iterator & __rhs)585 operator=(const regex_token_iterator& __rhs) 586 { 587 _M_position = __rhs._M_position; 588 _M_subs = __rhs._M_subs; 589 _M_n = __rhs._M_n; 590 _M_suffix = __rhs._M_suffix; 591 _M_has_m1 = __rhs._M_has_m1; 592 _M_normalize_result(); 593 return *this; 594 } 595 596 template<typename _Bi_iter, 597 typename _Ch_type, 598 typename _Rx_traits> 599 bool 600 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ==(const regex_token_iterator & __rhs) const601 operator==(const regex_token_iterator& __rhs) const 602 { 603 if (_M_end_of_seq() && __rhs._M_end_of_seq()) 604 return true; 605 if (_M_suffix.matched && __rhs._M_suffix.matched 606 && _M_suffix == __rhs._M_suffix) 607 return true; 608 if (_M_end_of_seq() || _M_suffix.matched 609 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) 610 return false; 611 return _M_position == __rhs._M_position 612 && _M_n == __rhs._M_n 613 && _M_subs == __rhs._M_subs; 614 } 615 616 template<typename _Bi_iter, 617 typename _Ch_type, 618 typename _Rx_traits> 619 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 620 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ++()621 operator++() 622 { 623 _Position __prev = _M_position; 624 if (_M_suffix.matched) 625 *this = regex_token_iterator(); 626 else if (_M_n + 1 < _M_subs.size()) 627 { 628 _M_n++; 629 _M_result = &_M_current_match(); 630 } 631 else 632 { 633 _M_n = 0; 634 ++_M_position; 635 if (_M_position != _Position()) 636 _M_result = &_M_current_match(); 637 else if (_M_has_m1 && __prev->suffix().length() != 0) 638 { 639 _M_suffix.matched = true; 640 _M_suffix.first = __prev->suffix().first; 641 _M_suffix.second = __prev->suffix().second; 642 _M_result = &_M_suffix; 643 } 644 else 645 *this = regex_token_iterator(); 646 } 647 return *this; 648 } 649 650 template<typename _Bi_iter, 651 typename _Ch_type, 652 typename _Rx_traits> 653 void 654 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: _M_init(_Bi_iter __a,_Bi_iter __b)655 _M_init(_Bi_iter __a, _Bi_iter __b) 656 { 657 _M_has_m1 = false; 658 for (auto __it : _M_subs) 659 if (__it == -1) 660 { 661 _M_has_m1 = true; 662 break; 663 } 664 if (_M_position != _Position()) 665 _M_result = &_M_current_match(); 666 else if (_M_has_m1) 667 { 668 _M_suffix.matched = true; 669 _M_suffix.first = __a; 670 _M_suffix.second = __b; 671 _M_result = &_M_suffix; 672 } 673 else 674 _M_result = nullptr; 675 } 676 677 _GLIBCXX_END_NAMESPACE_VERSION 678 } // namespace 679 680