1 // class template regex -*- C++ -*- 2 3 // Copyright (C) 2013-2014 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** 26 * @file bits/regex.tcc 27 * This is an internal header file, included by other library headers. 28 * Do not attempt to use it directly. @headername{regex} 29 */ 30 31 // See below __regex_algo_impl to get what this is talking about. The default 32 // value 1 indicated a conservative optimization without giving up worst case 33 // performance. 34 #ifndef _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 35 #define _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 1 36 #endif 37 38 namespace std _GLIBCXX_VISIBILITY(default) 39 { 40 namespace __detail 41 { 42 _GLIBCXX_BEGIN_NAMESPACE_VERSION 43 44 // Result of merging regex_match and regex_search. 45 // 46 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use 47 // the other one if possible, for test purpose). 48 // 49 // That __match_mode is true means regex_match, else regex_search. 50 template<typename _BiIter, typename _Alloc, 51 typename _CharT, typename _TraitsT, 52 _RegexExecutorPolicy __policy, 53 bool __match_mode> 54 bool __regex_algo_impl(_BiIter __s,_BiIter __e,match_results<_BiIter,_Alloc> & __m,const basic_regex<_CharT,_TraitsT> & __re,regex_constants::match_flag_type __flags)55 __regex_algo_impl(_BiIter __s, 56 _BiIter __e, 57 match_results<_BiIter, _Alloc>& __m, 58 const basic_regex<_CharT, _TraitsT>& __re, 59 regex_constants::match_flag_type __flags) 60 { 61 if (__re._M_automaton == nullptr) 62 return false; 63 64 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m; 65 __m._M_begin = __s; 66 __res.resize(__re._M_automaton->_M_sub_count() + 2); 67 for (auto& __it : __res) 68 __it.matched = false; 69 70 // This function decide which executor to use under given circumstances. 71 // The _S_auto policy now is the following: if a NFA has no 72 // back-references and has more than _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT 73 // quantifiers (*, +, ?), the BFS executor will be used, other wise 74 // DFS executor. This is because DFS executor has a exponential upper 75 // bound, but better best-case performace. Meanwhile, BFS executor can 76 // effectively prevent from exponential-long time matching (which must 77 // contains many quantifiers), but it's slower in average. 78 // 79 // For simple regex, BFS executor could be 2 or more times slower than 80 // DFS executor. 81 // 82 // Of course, BFS executor cannot handle back-references. 83 bool __ret; 84 if (!__re._M_automaton->_M_has_backref 85 && (__policy == _RegexExecutorPolicy::_S_alternate 86 || __re._M_automaton->_M_quant_count 87 > _GLIBCXX_REGEX_DFS_QUANTIFIERS_LIMIT)) 88 { 89 _Executor<_BiIter, _Alloc, _TraitsT, false> 90 __executor(__s, __e, __m, __re, __flags); 91 if (__match_mode) 92 __ret = __executor._M_match(); 93 else 94 __ret = __executor._M_search(); 95 } 96 else 97 { 98 _Executor<_BiIter, _Alloc, _TraitsT, true> 99 __executor(__s, __e, __m, __re, __flags); 100 if (__match_mode) 101 __ret = __executor._M_match(); 102 else 103 __ret = __executor._M_search(); 104 } 105 if (__ret) 106 { 107 for (auto __it : __res) 108 if (!__it.matched) 109 __it.first = __it.second = __e; 110 auto& __pre = __res[__res.size()-2]; 111 auto& __suf = __res[__res.size()-1]; 112 if (__match_mode) 113 { 114 __pre.matched = false; 115 __pre.first = __s; 116 __pre.second = __s; 117 __suf.matched = false; 118 __suf.first = __e; 119 __suf.second = __e; 120 } 121 else 122 { 123 __pre.first = __s; 124 __pre.second = __res[0].first; 125 __pre.matched = (__pre.first != __pre.second); 126 __suf.first = __res[0].second; 127 __suf.second = __e; 128 __suf.matched = (__suf.first != __suf.second); 129 } 130 } 131 return __ret; 132 } 133 134 _GLIBCXX_END_NAMESPACE_VERSION 135 } 136 137 _GLIBCXX_BEGIN_NAMESPACE_VERSION 138 139 template<typename _Ch_type> 140 template<typename _Fwd_iter> 141 typename regex_traits<_Ch_type>::string_type 142 regex_traits<_Ch_type>:: lookup_collatename(_Fwd_iter __first,_Fwd_iter __last) const143 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const 144 { 145 typedef std::ctype<char_type> __ctype_type; 146 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 147 148 static const char* __collatenames[] = 149 { 150 "NUL", 151 "SOH", 152 "STX", 153 "ETX", 154 "EOT", 155 "ENQ", 156 "ACK", 157 "alert", 158 "backspace", 159 "tab", 160 "newline", 161 "vertical-tab", 162 "form-feed", 163 "carriage-return", 164 "SO", 165 "SI", 166 "DLE", 167 "DC1", 168 "DC2", 169 "DC3", 170 "DC4", 171 "NAK", 172 "SYN", 173 "ETB", 174 "CAN", 175 "EM", 176 "SUB", 177 "ESC", 178 "IS4", 179 "IS3", 180 "IS2", 181 "IS1", 182 "space", 183 "exclamation-mark", 184 "quotation-mark", 185 "number-sign", 186 "dollar-sign", 187 "percent-sign", 188 "ampersand", 189 "apostrophe", 190 "left-parenthesis", 191 "right-parenthesis", 192 "asterisk", 193 "plus-sign", 194 "comma", 195 "hyphen", 196 "period", 197 "slash", 198 "zero", 199 "one", 200 "two", 201 "three", 202 "four", 203 "five", 204 "six", 205 "seven", 206 "eight", 207 "nine", 208 "colon", 209 "semicolon", 210 "less-than-sign", 211 "equals-sign", 212 "greater-than-sign", 213 "question-mark", 214 "commercial-at", 215 "A", 216 "B", 217 "C", 218 "D", 219 "E", 220 "F", 221 "G", 222 "H", 223 "I", 224 "J", 225 "K", 226 "L", 227 "M", 228 "N", 229 "O", 230 "P", 231 "Q", 232 "R", 233 "S", 234 "T", 235 "U", 236 "V", 237 "W", 238 "X", 239 "Y", 240 "Z", 241 "left-square-bracket", 242 "backslash", 243 "right-square-bracket", 244 "circumflex", 245 "underscore", 246 "grave-accent", 247 "a", 248 "b", 249 "c", 250 "d", 251 "e", 252 "f", 253 "g", 254 "h", 255 "i", 256 "j", 257 "k", 258 "l", 259 "m", 260 "n", 261 "o", 262 "p", 263 "q", 264 "r", 265 "s", 266 "t", 267 "u", 268 "v", 269 "w", 270 "x", 271 "y", 272 "z", 273 "left-curly-bracket", 274 "vertical-line", 275 "right-curly-bracket", 276 "tilde", 277 "DEL", 278 }; 279 280 string __s; 281 for (; __first != __last; ++__first) 282 __s += __fctyp.narrow(*__first, 0); 283 284 for (const auto& __it : __collatenames) 285 if (__s == __it) 286 return string_type(1, __fctyp.widen( 287 static_cast<char>(&__it - __collatenames))); 288 289 // TODO Add digraph support: 290 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html 291 292 return string_type(); 293 } 294 295 template<typename _Ch_type> 296 template<typename _Fwd_iter> 297 typename regex_traits<_Ch_type>::char_class_type 298 regex_traits<_Ch_type>:: lookup_classname(_Fwd_iter __first,_Fwd_iter __last,bool __icase) const299 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const 300 { 301 typedef std::ctype<char_type> __ctype_type; 302 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 303 304 // Mappings from class name to class mask. 305 static const pair<const char*, char_class_type> __classnames[] = 306 { 307 {"d", ctype_base::digit}, 308 {"w", {ctype_base::alnum, _RegexMask::_S_under}}, 309 {"s", ctype_base::space}, 310 {"alnum", ctype_base::alnum}, 311 {"alpha", ctype_base::alpha}, 312 {"blank", {0, _RegexMask::_S_blank}}, 313 {"cntrl", ctype_base::cntrl}, 314 {"digit", ctype_base::digit}, 315 {"graph", ctype_base::graph}, 316 {"lower", ctype_base::lower}, 317 {"print", ctype_base::print}, 318 {"punct", ctype_base::punct}, 319 {"space", ctype_base::space}, 320 {"upper", ctype_base::upper}, 321 {"xdigit", ctype_base::xdigit}, 322 }; 323 324 string __s; 325 for (; __first != __last; ++__first) 326 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0); 327 328 for (const auto& __it : __classnames) 329 if (__s == __it.first) 330 { 331 if (__icase 332 && ((__it.second 333 & (ctype_base::lower | ctype_base::upper)) != 0)) 334 return ctype_base::alpha; 335 return __it.second; 336 } 337 return 0; 338 } 339 340 template<typename _Ch_type> 341 bool 342 regex_traits<_Ch_type>:: isctype(_Ch_type __c,char_class_type __f) const343 isctype(_Ch_type __c, char_class_type __f) const 344 { 345 typedef std::ctype<char_type> __ctype_type; 346 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 347 348 return __fctyp.is(__f._M_base, __c) 349 // [[:w:]] 350 || ((__f._M_extended & _RegexMask::_S_under) 351 && __c == __fctyp.widen('_')) 352 // [[:blank:]] 353 || ((__f._M_extended & _RegexMask::_S_blank) 354 && (__c == __fctyp.widen(' ') 355 || __c == __fctyp.widen('\t'))); 356 } 357 358 template<typename _Ch_type> 359 int 360 regex_traits<_Ch_type>:: value(_Ch_type __ch,int __radix) const361 value(_Ch_type __ch, int __radix) const 362 { 363 std::basic_istringstream<char_type> __is(string_type(1, __ch)); 364 long __v; 365 if (__radix == 8) 366 __is >> std::oct; 367 else if (__radix == 16) 368 __is >> std::hex; 369 __is >> __v; 370 return __is.fail() ? -1 : __v; 371 } 372 373 template<typename _Bi_iter, typename _Alloc> 374 template<typename _Out_iter> 375 _Out_iter match_results<_Bi_iter, _Alloc>:: format(_Out_iter __out,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_first,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_last,match_flag_type __flags) const376 format(_Out_iter __out, 377 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, 378 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, 379 match_flag_type __flags) const 380 { 381 _GLIBCXX_DEBUG_ASSERT( ready() ); 382 regex_traits<char_type> __traits; 383 typedef std::ctype<char_type> __ctype_type; 384 const __ctype_type& 385 __fctyp(use_facet<__ctype_type>(__traits.getloc())); 386 387 auto __output = [&](size_t __idx) 388 { 389 auto& __sub = _Base_type::operator[](__idx); 390 if (__sub.matched) 391 __out = std::copy(__sub.first, __sub.second, __out); 392 }; 393 394 if (__flags & regex_constants::format_sed) 395 { 396 for (; __fmt_first != __fmt_last;) 397 if (*__fmt_first == '&') 398 { 399 __output(0); 400 ++__fmt_first; 401 } 402 else if (*__fmt_first == '\\') 403 { 404 if (++__fmt_first != __fmt_last 405 && __fctyp.is(__ctype_type::digit, *__fmt_first)) 406 __output(__traits.value(*__fmt_first++, 10)); 407 else 408 *__out++ = '\\'; 409 } 410 else 411 *__out++ = *__fmt_first++; 412 } 413 else 414 { 415 while (1) 416 { 417 auto __next = std::find(__fmt_first, __fmt_last, '$'); 418 if (__next == __fmt_last) 419 break; 420 421 __out = std::copy(__fmt_first, __next, __out); 422 423 auto __eat = [&](char __ch) -> bool 424 { 425 if (*__next == __ch) 426 { 427 ++__next; 428 return true; 429 } 430 return false; 431 }; 432 433 if (++__next == __fmt_last) 434 *__out++ = '$'; 435 else if (__eat('$')) 436 *__out++ = '$'; 437 else if (__eat('&')) 438 __output(0); 439 else if (__eat('`')) 440 __output(_Base_type::size()-2); 441 else if (__eat('\'')) 442 __output(_Base_type::size()-1); 443 else if (__fctyp.is(__ctype_type::digit, *__next)) 444 { 445 long __num = __traits.value(*__next, 10); 446 if (++__next != __fmt_last 447 && __fctyp.is(__ctype_type::digit, *__next)) 448 { 449 __num *= 10; 450 __num += __traits.value(*__next++, 10); 451 } 452 if (0 <= __num && __num < this->size()) 453 __output(__num); 454 } 455 else 456 *__out++ = '$'; 457 __fmt_first = __next; 458 } 459 __out = std::copy(__fmt_first, __fmt_last, __out); 460 } 461 return __out; 462 } 463 464 template<typename _Out_iter, typename _Bi_iter, 465 typename _Rx_traits, typename _Ch_type> 466 _Out_iter 467 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, 468 const basic_regex<_Ch_type, _Rx_traits>& __e, 469 const _Ch_type* __fmt, 470 regex_constants::match_flag_type __flags) 471 { 472 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; 473 _IterT __i(__first, __last, __e, __flags); 474 _IterT __end; 475 if (__i == __end) 476 { 477 if (!(__flags & regex_constants::format_no_copy)) 478 __out = std::copy(__first, __last, __out); 479 } 480 else 481 { 482 sub_match<_Bi_iter> __last; 483 auto __len = char_traits<_Ch_type>::length(__fmt); 484 for (; __i != __end; ++__i) 485 { 486 if (!(__flags & regex_constants::format_no_copy)) 487 __out = std::copy(__i->prefix().first, __i->prefix().second, 488 __out); 489 __out = __i->format(__out, __fmt, __fmt + __len, __flags); 490 __last = __i->suffix(); 491 if (__flags & regex_constants::format_first_only) 492 break; 493 } 494 if (!(__flags & regex_constants::format_no_copy)) 495 __out = std::copy(__last.first, __last.second, __out); 496 } 497 return __out; 498 } 499 500 template<typename _Bi_iter, 501 typename _Ch_type, 502 typename _Rx_traits> 503 bool 504 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ==(const regex_iterator & __rhs) const505 operator==(const regex_iterator& __rhs) const 506 { 507 return (_M_match.empty() && __rhs._M_match.empty()) 508 || (_M_begin == __rhs._M_begin 509 && _M_end == __rhs._M_end 510 && _M_pregex == __rhs._M_pregex 511 && _M_flags == __rhs._M_flags 512 && _M_match[0] == __rhs._M_match[0]); 513 } 514 515 template<typename _Bi_iter, 516 typename _Ch_type, 517 typename _Rx_traits> 518 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 519 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ++()520 operator++() 521 { 522 // In all cases in which the call to regex_search returns true, 523 // match.prefix().first shall be equal to the previous value of 524 // match[0].second, and for each index i in the half-open range 525 // [0, match.size()) for which match[i].matched is true, 526 // match[i].position() shall return distance(begin, match[i].first). 527 // [28.12.1.4.5] 528 if (_M_match[0].matched) 529 { 530 auto __start = _M_match[0].second; 531 auto __prefix_first = _M_match[0].second; 532 if (_M_match[0].first == _M_match[0].second) 533 { 534 if (__start == _M_end) 535 { 536 _M_match = value_type(); 537 return *this; 538 } 539 else 540 { 541 if (regex_search(__start, _M_end, _M_match, *_M_pregex, 542 _M_flags 543 | regex_constants::match_not_null 544 | regex_constants::match_continuous)) 545 { 546 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); 547 auto& __prefix = _M_match.at(_M_match.size()); 548 __prefix.first = __prefix_first; 549 __prefix.matched = __prefix.first != __prefix.second; 550 // [28.12.1.4.5] 551 _M_match._M_begin = _M_begin; 552 return *this; 553 } 554 else 555 ++__start; 556 } 557 } 558 _M_flags |= regex_constants::match_prev_avail; 559 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) 560 { 561 _GLIBCXX_DEBUG_ASSERT(_M_match[0].matched); 562 auto& __prefix = _M_match.at(_M_match.size()); 563 __prefix.first = __prefix_first; 564 __prefix.matched = __prefix.first != __prefix.second; 565 // [28.12.1.4.5] 566 _M_match._M_begin = _M_begin; 567 } 568 else 569 _M_match = value_type(); 570 } 571 return *this; 572 } 573 574 template<typename _Bi_iter, 575 typename _Ch_type, 576 typename _Rx_traits> 577 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 578 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator =(const regex_token_iterator & __rhs)579 operator=(const regex_token_iterator& __rhs) 580 { 581 _M_position = __rhs._M_position; 582 _M_subs = __rhs._M_subs; 583 _M_n = __rhs._M_n; 584 _M_suffix = __rhs._M_suffix; 585 _M_has_m1 = __rhs._M_has_m1; 586 _M_normalize_result(); 587 return *this; 588 } 589 590 template<typename _Bi_iter, 591 typename _Ch_type, 592 typename _Rx_traits> 593 bool 594 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ==(const regex_token_iterator & __rhs) const595 operator==(const regex_token_iterator& __rhs) const 596 { 597 if (_M_end_of_seq() && __rhs._M_end_of_seq()) 598 return true; 599 if (_M_suffix.matched && __rhs._M_suffix.matched 600 && _M_suffix == __rhs._M_suffix) 601 return true; 602 if (_M_end_of_seq() || _M_suffix.matched 603 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) 604 return false; 605 return _M_position == __rhs._M_position 606 && _M_n == __rhs._M_n 607 && _M_subs == __rhs._M_subs; 608 } 609 610 template<typename _Bi_iter, 611 typename _Ch_type, 612 typename _Rx_traits> 613 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 614 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ++()615 operator++() 616 { 617 _Position __prev = _M_position; 618 if (_M_suffix.matched) 619 *this = regex_token_iterator(); 620 else if (_M_n + 1 < _M_subs.size()) 621 { 622 _M_n++; 623 _M_result = &_M_current_match(); 624 } 625 else 626 { 627 _M_n = 0; 628 ++_M_position; 629 if (_M_position != _Position()) 630 _M_result = &_M_current_match(); 631 else if (_M_has_m1 && __prev->suffix().length() != 0) 632 { 633 _M_suffix.matched = true; 634 _M_suffix.first = __prev->suffix().first; 635 _M_suffix.second = __prev->suffix().second; 636 _M_result = &_M_suffix; 637 } 638 else 639 *this = regex_token_iterator(); 640 } 641 return *this; 642 } 643 644 template<typename _Bi_iter, 645 typename _Ch_type, 646 typename _Rx_traits> 647 void 648 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: _M_init(_Bi_iter __a,_Bi_iter __b)649 _M_init(_Bi_iter __a, _Bi_iter __b) 650 { 651 _M_has_m1 = false; 652 for (auto __it : _M_subs) 653 if (__it == -1) 654 { 655 _M_has_m1 = true; 656 break; 657 } 658 if (_M_position != _Position()) 659 _M_result = &_M_current_match(); 660 else if (_M_has_m1) 661 { 662 _M_suffix.matched = true; 663 _M_suffix.first = __a; 664 _M_suffix.second = __b; 665 _M_result = &_M_suffix; 666 } 667 else 668 _M_result = nullptr; 669 } 670 671 _GLIBCXX_END_NAMESPACE_VERSION 672 } // namespace 673 674