1 // class template regex -*- C++ -*- 2 3 // Copyright (C) 2013-2016 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 /** 26 * @file bits/regex.tcc 27 * This is an internal header file, included by other library headers. 28 * Do not attempt to use it directly. @headername{regex} 29 */ 30 31 namespace std _GLIBCXX_VISIBILITY(default) 32 { 33 namespace __detail 34 { 35 _GLIBCXX_BEGIN_NAMESPACE_VERSION 36 37 // Result of merging regex_match and regex_search. 38 // 39 // __policy now can be _S_auto (auto dispatch) and _S_alternate (use 40 // the other one if possible, for test purpose). 41 // 42 // That __match_mode is true means regex_match, else regex_search. 43 template<typename _BiIter, typename _Alloc, 44 typename _CharT, typename _TraitsT, 45 _RegexExecutorPolicy __policy, 46 bool __match_mode> 47 bool __regex_algo_impl(_BiIter __s,_BiIter __e,match_results<_BiIter,_Alloc> & __m,const basic_regex<_CharT,_TraitsT> & __re,regex_constants::match_flag_type __flags)48 __regex_algo_impl(_BiIter __s, 49 _BiIter __e, 50 match_results<_BiIter, _Alloc>& __m, 51 const basic_regex<_CharT, _TraitsT>& __re, 52 regex_constants::match_flag_type __flags) 53 { 54 if (__re._M_automaton == nullptr) 55 return false; 56 57 typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m; 58 __m._M_begin = __s; 59 __m._M_resize(__re._M_automaton->_M_sub_count()); 60 for (auto& __it : __res) 61 __it.matched = false; 62 63 bool __ret; 64 if ((__re.flags() & regex_constants::__polynomial) 65 || (__policy == _RegexExecutorPolicy::_S_alternate 66 && !__re._M_automaton->_M_has_backref)) 67 { 68 _Executor<_BiIter, _Alloc, _TraitsT, false> 69 __executor(__s, __e, __m, __re, __flags); 70 if (__match_mode) 71 __ret = __executor._M_match(); 72 else 73 __ret = __executor._M_search(); 74 } 75 else 76 { 77 _Executor<_BiIter, _Alloc, _TraitsT, true> 78 __executor(__s, __e, __m, __re, __flags); 79 if (__match_mode) 80 __ret = __executor._M_match(); 81 else 82 __ret = __executor._M_search(); 83 } 84 if (__ret) 85 { 86 for (auto& __it : __res) 87 if (!__it.matched) 88 __it.first = __it.second = __e; 89 auto& __pre = __m._M_prefix(); 90 auto& __suf = __m._M_suffix(); 91 if (__match_mode) 92 { 93 __pre.matched = false; 94 __pre.first = __s; 95 __pre.second = __s; 96 __suf.matched = false; 97 __suf.first = __e; 98 __suf.second = __e; 99 } 100 else 101 { 102 __pre.first = __s; 103 __pre.second = __res[0].first; 104 __pre.matched = (__pre.first != __pre.second); 105 __suf.first = __res[0].second; 106 __suf.second = __e; 107 __suf.matched = (__suf.first != __suf.second); 108 } 109 } 110 else 111 { 112 __m._M_resize(0); 113 for (auto& __it : __res) 114 { 115 __it.matched = false; 116 __it.first = __it.second = __e; 117 } 118 } 119 return __ret; 120 } 121 122 _GLIBCXX_END_NAMESPACE_VERSION 123 } 124 125 _GLIBCXX_BEGIN_NAMESPACE_VERSION 126 127 template<typename _Ch_type> 128 template<typename _Fwd_iter> 129 typename regex_traits<_Ch_type>::string_type 130 regex_traits<_Ch_type>:: lookup_collatename(_Fwd_iter __first,_Fwd_iter __last) const131 lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const 132 { 133 typedef std::ctype<char_type> __ctype_type; 134 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 135 136 static const char* __collatenames[] = 137 { 138 "NUL", 139 "SOH", 140 "STX", 141 "ETX", 142 "EOT", 143 "ENQ", 144 "ACK", 145 "alert", 146 "backspace", 147 "tab", 148 "newline", 149 "vertical-tab", 150 "form-feed", 151 "carriage-return", 152 "SO", 153 "SI", 154 "DLE", 155 "DC1", 156 "DC2", 157 "DC3", 158 "DC4", 159 "NAK", 160 "SYN", 161 "ETB", 162 "CAN", 163 "EM", 164 "SUB", 165 "ESC", 166 "IS4", 167 "IS3", 168 "IS2", 169 "IS1", 170 "space", 171 "exclamation-mark", 172 "quotation-mark", 173 "number-sign", 174 "dollar-sign", 175 "percent-sign", 176 "ampersand", 177 "apostrophe", 178 "left-parenthesis", 179 "right-parenthesis", 180 "asterisk", 181 "plus-sign", 182 "comma", 183 "hyphen", 184 "period", 185 "slash", 186 "zero", 187 "one", 188 "two", 189 "three", 190 "four", 191 "five", 192 "six", 193 "seven", 194 "eight", 195 "nine", 196 "colon", 197 "semicolon", 198 "less-than-sign", 199 "equals-sign", 200 "greater-than-sign", 201 "question-mark", 202 "commercial-at", 203 "A", 204 "B", 205 "C", 206 "D", 207 "E", 208 "F", 209 "G", 210 "H", 211 "I", 212 "J", 213 "K", 214 "L", 215 "M", 216 "N", 217 "O", 218 "P", 219 "Q", 220 "R", 221 "S", 222 "T", 223 "U", 224 "V", 225 "W", 226 "X", 227 "Y", 228 "Z", 229 "left-square-bracket", 230 "backslash", 231 "right-square-bracket", 232 "circumflex", 233 "underscore", 234 "grave-accent", 235 "a", 236 "b", 237 "c", 238 "d", 239 "e", 240 "f", 241 "g", 242 "h", 243 "i", 244 "j", 245 "k", 246 "l", 247 "m", 248 "n", 249 "o", 250 "p", 251 "q", 252 "r", 253 "s", 254 "t", 255 "u", 256 "v", 257 "w", 258 "x", 259 "y", 260 "z", 261 "left-curly-bracket", 262 "vertical-line", 263 "right-curly-bracket", 264 "tilde", 265 "DEL", 266 }; 267 268 string __s; 269 for (; __first != __last; ++__first) 270 __s += __fctyp.narrow(*__first, 0); 271 272 for (const auto& __it : __collatenames) 273 if (__s == __it) 274 return string_type(1, __fctyp.widen( 275 static_cast<char>(&__it - __collatenames))); 276 277 // TODO Add digraph support: 278 // http://boost.sourceforge.net/libs/regex/doc/collating_names.html 279 280 return string_type(); 281 } 282 283 template<typename _Ch_type> 284 template<typename _Fwd_iter> 285 typename regex_traits<_Ch_type>::char_class_type 286 regex_traits<_Ch_type>:: lookup_classname(_Fwd_iter __first,_Fwd_iter __last,bool __icase) const287 lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const 288 { 289 typedef std::ctype<char_type> __ctype_type; 290 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 291 292 // Mappings from class name to class mask. 293 static const pair<const char*, char_class_type> __classnames[] = 294 { 295 {"d", ctype_base::digit}, 296 {"w", {ctype_base::alnum, _RegexMask::_S_under}}, 297 {"s", ctype_base::space}, 298 {"alnum", ctype_base::alnum}, 299 {"alpha", ctype_base::alpha}, 300 {"blank", ctype_base::blank}, 301 {"cntrl", ctype_base::cntrl}, 302 {"digit", ctype_base::digit}, 303 {"graph", ctype_base::graph}, 304 {"lower", ctype_base::lower}, 305 {"print", ctype_base::print}, 306 {"punct", ctype_base::punct}, 307 {"space", ctype_base::space}, 308 {"upper", ctype_base::upper}, 309 {"xdigit", ctype_base::xdigit}, 310 }; 311 312 string __s; 313 for (; __first != __last; ++__first) 314 __s += __fctyp.narrow(__fctyp.tolower(*__first), 0); 315 316 for (const auto& __it : __classnames) 317 if (__s == __it.first) 318 { 319 if (__icase 320 && ((__it.second 321 & (ctype_base::lower | ctype_base::upper)) != 0)) 322 return ctype_base::alpha; 323 return __it.second; 324 } 325 return 0; 326 } 327 328 template<typename _Ch_type> 329 bool 330 regex_traits<_Ch_type>:: isctype(_Ch_type __c,char_class_type __f) const331 isctype(_Ch_type __c, char_class_type __f) const 332 { 333 typedef std::ctype<char_type> __ctype_type; 334 const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale)); 335 336 return __fctyp.is(__f._M_base, __c) 337 // [[:w:]] 338 || ((__f._M_extended & _RegexMask::_S_under) 339 && __c == __fctyp.widen('_')); 340 } 341 342 template<typename _Ch_type> 343 int 344 regex_traits<_Ch_type>:: value(_Ch_type __ch,int __radix) const345 value(_Ch_type __ch, int __radix) const 346 { 347 std::basic_istringstream<char_type> __is(string_type(1, __ch)); 348 long __v; 349 if (__radix == 8) 350 __is >> std::oct; 351 else if (__radix == 16) 352 __is >> std::hex; 353 __is >> __v; 354 return __is.fail() ? -1 : __v; 355 } 356 357 template<typename _Bi_iter, typename _Alloc> 358 template<typename _Out_iter> 359 _Out_iter match_results<_Bi_iter, _Alloc>:: format(_Out_iter __out,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_first,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_last,match_flag_type __flags) const360 format(_Out_iter __out, 361 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first, 362 const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last, 363 match_flag_type __flags) const 364 { 365 __glibcxx_assert( ready() ); 366 regex_traits<char_type> __traits; 367 typedef std::ctype<char_type> __ctype_type; 368 const __ctype_type& 369 __fctyp(use_facet<__ctype_type>(__traits.getloc())); 370 371 auto __output = [&](size_t __idx) 372 { 373 auto& __sub = (*this)[__idx]; 374 if (__sub.matched) 375 __out = std::copy(__sub.first, __sub.second, __out); 376 }; 377 378 if (__flags & regex_constants::format_sed) 379 { 380 for (; __fmt_first != __fmt_last;) 381 if (*__fmt_first == '&') 382 { 383 __output(0); 384 ++__fmt_first; 385 } 386 else if (*__fmt_first == '\\') 387 { 388 if (++__fmt_first != __fmt_last 389 && __fctyp.is(__ctype_type::digit, *__fmt_first)) 390 __output(__traits.value(*__fmt_first++, 10)); 391 else 392 *__out++ = '\\'; 393 } 394 else 395 *__out++ = *__fmt_first++; 396 } 397 else 398 { 399 while (1) 400 { 401 auto __next = std::find(__fmt_first, __fmt_last, '$'); 402 if (__next == __fmt_last) 403 break; 404 405 __out = std::copy(__fmt_first, __next, __out); 406 407 auto __eat = [&](char __ch) -> bool 408 { 409 if (*__next == __ch) 410 { 411 ++__next; 412 return true; 413 } 414 return false; 415 }; 416 417 if (++__next == __fmt_last) 418 *__out++ = '$'; 419 else if (__eat('$')) 420 *__out++ = '$'; 421 else if (__eat('&')) 422 __output(0); 423 else if (__eat('`')) 424 { 425 auto& __sub = _M_prefix(); 426 if (__sub.matched) 427 __out = std::copy(__sub.first, __sub.second, __out); 428 } 429 else if (__eat('\'')) 430 { 431 auto& __sub = _M_suffix(); 432 if (__sub.matched) 433 __out = std::copy(__sub.first, __sub.second, __out); 434 } 435 else if (__fctyp.is(__ctype_type::digit, *__next)) 436 { 437 long __num = __traits.value(*__next, 10); 438 if (++__next != __fmt_last 439 && __fctyp.is(__ctype_type::digit, *__next)) 440 { 441 __num *= 10; 442 __num += __traits.value(*__next++, 10); 443 } 444 if (0 <= __num && __num < this->size()) 445 __output(__num); 446 } 447 else 448 *__out++ = '$'; 449 __fmt_first = __next; 450 } 451 __out = std::copy(__fmt_first, __fmt_last, __out); 452 } 453 return __out; 454 } 455 456 template<typename _Out_iter, typename _Bi_iter, 457 typename _Rx_traits, typename _Ch_type> 458 _Out_iter 459 regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last, 460 const basic_regex<_Ch_type, _Rx_traits>& __e, 461 const _Ch_type* __fmt, 462 regex_constants::match_flag_type __flags) 463 { 464 typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT; 465 _IterT __i(__first, __last, __e, __flags); 466 _IterT __end; 467 if (__i == __end) 468 { 469 if (!(__flags & regex_constants::format_no_copy)) 470 __out = std::copy(__first, __last, __out); 471 } 472 else 473 { 474 sub_match<_Bi_iter> __last; 475 auto __len = char_traits<_Ch_type>::length(__fmt); 476 for (; __i != __end; ++__i) 477 { 478 if (!(__flags & regex_constants::format_no_copy)) 479 __out = std::copy(__i->prefix().first, __i->prefix().second, 480 __out); 481 __out = __i->format(__out, __fmt, __fmt + __len, __flags); 482 __last = __i->suffix(); 483 if (__flags & regex_constants::format_first_only) 484 break; 485 } 486 if (!(__flags & regex_constants::format_no_copy)) 487 __out = std::copy(__last.first, __last.second, __out); 488 } 489 return __out; 490 } 491 492 template<typename _Bi_iter, 493 typename _Ch_type, 494 typename _Rx_traits> 495 bool 496 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ==(const regex_iterator & __rhs) const497 operator==(const regex_iterator& __rhs) const 498 { 499 if (_M_pregex == nullptr && __rhs._M_pregex == nullptr) 500 return true; 501 return _M_pregex == __rhs._M_pregex 502 && _M_begin == __rhs._M_begin 503 && _M_end == __rhs._M_end 504 && _M_flags == __rhs._M_flags 505 && _M_match[0] == __rhs._M_match[0]; 506 } 507 508 template<typename _Bi_iter, 509 typename _Ch_type, 510 typename _Rx_traits> 511 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 512 regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ++()513 operator++() 514 { 515 // In all cases in which the call to regex_search returns true, 516 // match.prefix().first shall be equal to the previous value of 517 // match[0].second, and for each index i in the half-open range 518 // [0, match.size()) for which match[i].matched is true, 519 // match[i].position() shall return distance(begin, match[i].first). 520 // [28.12.1.4.5] 521 if (_M_match[0].matched) 522 { 523 auto __start = _M_match[0].second; 524 auto __prefix_first = _M_match[0].second; 525 if (_M_match[0].first == _M_match[0].second) 526 { 527 if (__start == _M_end) 528 { 529 _M_pregex = nullptr; 530 return *this; 531 } 532 else 533 { 534 if (regex_search(__start, _M_end, _M_match, *_M_pregex, 535 _M_flags 536 | regex_constants::match_not_null 537 | regex_constants::match_continuous)) 538 { 539 __glibcxx_assert(_M_match[0].matched); 540 auto& __prefix = _M_match._M_prefix(); 541 __prefix.first = __prefix_first; 542 __prefix.matched = __prefix.first != __prefix.second; 543 // [28.12.1.4.5] 544 _M_match._M_begin = _M_begin; 545 return *this; 546 } 547 else 548 ++__start; 549 } 550 } 551 _M_flags |= regex_constants::match_prev_avail; 552 if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags)) 553 { 554 __glibcxx_assert(_M_match[0].matched); 555 auto& __prefix = _M_match._M_prefix(); 556 __prefix.first = __prefix_first; 557 __prefix.matched = __prefix.first != __prefix.second; 558 // [28.12.1.4.5] 559 _M_match._M_begin = _M_begin; 560 } 561 else 562 _M_pregex = nullptr; 563 } 564 return *this; 565 } 566 567 template<typename _Bi_iter, 568 typename _Ch_type, 569 typename _Rx_traits> 570 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 571 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator =(const regex_token_iterator & __rhs)572 operator=(const regex_token_iterator& __rhs) 573 { 574 _M_position = __rhs._M_position; 575 _M_subs = __rhs._M_subs; 576 _M_n = __rhs._M_n; 577 _M_suffix = __rhs._M_suffix; 578 _M_has_m1 = __rhs._M_has_m1; 579 _M_normalize_result(); 580 return *this; 581 } 582 583 template<typename _Bi_iter, 584 typename _Ch_type, 585 typename _Rx_traits> 586 bool 587 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ==(const regex_token_iterator & __rhs) const588 operator==(const regex_token_iterator& __rhs) const 589 { 590 if (_M_end_of_seq() && __rhs._M_end_of_seq()) 591 return true; 592 if (_M_suffix.matched && __rhs._M_suffix.matched 593 && _M_suffix == __rhs._M_suffix) 594 return true; 595 if (_M_end_of_seq() || _M_suffix.matched 596 || __rhs._M_end_of_seq() || __rhs._M_suffix.matched) 597 return false; 598 return _M_position == __rhs._M_position 599 && _M_n == __rhs._M_n 600 && _M_subs == __rhs._M_subs; 601 } 602 603 template<typename _Bi_iter, 604 typename _Ch_type, 605 typename _Rx_traits> 606 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>& 607 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: operator ++()608 operator++() 609 { 610 _Position __prev = _M_position; 611 if (_M_suffix.matched) 612 *this = regex_token_iterator(); 613 else if (_M_n + 1 < _M_subs.size()) 614 { 615 _M_n++; 616 _M_result = &_M_current_match(); 617 } 618 else 619 { 620 _M_n = 0; 621 ++_M_position; 622 if (_M_position != _Position()) 623 _M_result = &_M_current_match(); 624 else if (_M_has_m1 && __prev->suffix().length() != 0) 625 { 626 _M_suffix.matched = true; 627 _M_suffix.first = __prev->suffix().first; 628 _M_suffix.second = __prev->suffix().second; 629 _M_result = &_M_suffix; 630 } 631 else 632 *this = regex_token_iterator(); 633 } 634 return *this; 635 } 636 637 template<typename _Bi_iter, 638 typename _Ch_type, 639 typename _Rx_traits> 640 void 641 regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>:: _M_init(_Bi_iter __a,_Bi_iter __b)642 _M_init(_Bi_iter __a, _Bi_iter __b) 643 { 644 _M_has_m1 = false; 645 for (auto __it : _M_subs) 646 if (__it == -1) 647 { 648 _M_has_m1 = true; 649 break; 650 } 651 if (_M_position != _Position()) 652 _M_result = &_M_current_match(); 653 else if (_M_has_m1) 654 { 655 _M_suffix.matched = true; 656 _M_suffix.first = __a; 657 _M_suffix.second = __b; 658 _M_result = &_M_suffix; 659 } 660 else 661 _M_result = nullptr; 662 } 663 664 _GLIBCXX_END_NAMESPACE_VERSION 665 } // namespace 666 667