1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2020 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex.tcc
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 _GLIBCXX_BEGIN_NAMESPACE_VERSION
34 
35 namespace __detail
36 {
37   /// @cond undocumented
38 
39   // Result of merging regex_match and regex_search.
40   //
41   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
42   // the other one if possible, for test purpose).
43   //
44   // That __match_mode is true means regex_match, else regex_search.
45   template<typename _BiIter, typename _Alloc,
46 	   typename _CharT, typename _TraitsT,
47 	   _RegexExecutorPolicy __policy,
48 	   bool __match_mode>
49     bool
__regex_algo_impl(_BiIter __s,_BiIter __e,match_results<_BiIter,_Alloc> & __m,const basic_regex<_CharT,_TraitsT> & __re,regex_constants::match_flag_type __flags)50     __regex_algo_impl(_BiIter                              __s,
51 		      _BiIter                              __e,
52 		      match_results<_BiIter, _Alloc>&      __m,
53 		      const basic_regex<_CharT, _TraitsT>& __re,
54 		      regex_constants::match_flag_type     __flags)
55     {
56       if (__re._M_automaton == nullptr)
57 	return false;
58 
59       typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
60       __m._M_begin = __s;
61       __m._M_resize(__re._M_automaton->_M_sub_count());
62 
63       bool __ret;
64       if ((__re.flags() & regex_constants::__polynomial)
65 	  || (__policy == _RegexExecutorPolicy::_S_alternate
66 	      && !__re._M_automaton->_M_has_backref))
67 	{
68 	  _Executor<_BiIter, _Alloc, _TraitsT, false>
69 	    __executor(__s, __e, __m, __re, __flags);
70 	  if (__match_mode)
71 	    __ret = __executor._M_match();
72 	  else
73 	    __ret = __executor._M_search();
74 	}
75       else
76 	{
77 	  _Executor<_BiIter, _Alloc, _TraitsT, true>
78 	    __executor(__s, __e, __m, __re, __flags);
79 	  if (__match_mode)
80 	    __ret = __executor._M_match();
81 	  else
82 	    __ret = __executor._M_search();
83 	}
84       if (__ret)
85 	{
86 	  for (auto& __it : __res)
87 	    if (!__it.matched)
88 	      __it.first = __it.second = __e;
89 	  auto& __pre = __m._M_prefix();
90 	  auto& __suf = __m._M_suffix();
91 	  if (__match_mode)
92 	    {
93 	      __pre.matched = false;
94 	      __pre.first = __s;
95 	      __pre.second = __s;
96 	      __suf.matched = false;
97 	      __suf.first = __e;
98 	      __suf.second = __e;
99 	    }
100 	  else
101 	    {
102 	      __pre.first = __s;
103 	      __pre.second = __res[0].first;
104 	      __pre.matched = (__pre.first != __pre.second);
105 	      __suf.first = __res[0].second;
106 	      __suf.second = __e;
107 	      __suf.matched = (__suf.first != __suf.second);
108 	    }
109 	}
110       else
111 	{
112 	  __m._M_establish_failed_match(__e);
113 	}
114       return __ret;
115     }
116   /// @endcond
117 } // namespace __detail
118 
119   /// @cond
120 
121   template<typename _Ch_type>
122   template<typename _Fwd_iter>
123     typename regex_traits<_Ch_type>::string_type
124     regex_traits<_Ch_type>::
lookup_collatename(_Fwd_iter __first,_Fwd_iter __last) const125     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
126     {
127       typedef std::ctype<char_type> __ctype_type;
128       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
129 
130       static const char* __collatenames[] =
131 	{
132 	  "NUL",
133 	  "SOH",
134 	  "STX",
135 	  "ETX",
136 	  "EOT",
137 	  "ENQ",
138 	  "ACK",
139 	  "alert",
140 	  "backspace",
141 	  "tab",
142 	  "newline",
143 	  "vertical-tab",
144 	  "form-feed",
145 	  "carriage-return",
146 	  "SO",
147 	  "SI",
148 	  "DLE",
149 	  "DC1",
150 	  "DC2",
151 	  "DC3",
152 	  "DC4",
153 	  "NAK",
154 	  "SYN",
155 	  "ETB",
156 	  "CAN",
157 	  "EM",
158 	  "SUB",
159 	  "ESC",
160 	  "IS4",
161 	  "IS3",
162 	  "IS2",
163 	  "IS1",
164 	  "space",
165 	  "exclamation-mark",
166 	  "quotation-mark",
167 	  "number-sign",
168 	  "dollar-sign",
169 	  "percent-sign",
170 	  "ampersand",
171 	  "apostrophe",
172 	  "left-parenthesis",
173 	  "right-parenthesis",
174 	  "asterisk",
175 	  "plus-sign",
176 	  "comma",
177 	  "hyphen",
178 	  "period",
179 	  "slash",
180 	  "zero",
181 	  "one",
182 	  "two",
183 	  "three",
184 	  "four",
185 	  "five",
186 	  "six",
187 	  "seven",
188 	  "eight",
189 	  "nine",
190 	  "colon",
191 	  "semicolon",
192 	  "less-than-sign",
193 	  "equals-sign",
194 	  "greater-than-sign",
195 	  "question-mark",
196 	  "commercial-at",
197 	  "A",
198 	  "B",
199 	  "C",
200 	  "D",
201 	  "E",
202 	  "F",
203 	  "G",
204 	  "H",
205 	  "I",
206 	  "J",
207 	  "K",
208 	  "L",
209 	  "M",
210 	  "N",
211 	  "O",
212 	  "P",
213 	  "Q",
214 	  "R",
215 	  "S",
216 	  "T",
217 	  "U",
218 	  "V",
219 	  "W",
220 	  "X",
221 	  "Y",
222 	  "Z",
223 	  "left-square-bracket",
224 	  "backslash",
225 	  "right-square-bracket",
226 	  "circumflex",
227 	  "underscore",
228 	  "grave-accent",
229 	  "a",
230 	  "b",
231 	  "c",
232 	  "d",
233 	  "e",
234 	  "f",
235 	  "g",
236 	  "h",
237 	  "i",
238 	  "j",
239 	  "k",
240 	  "l",
241 	  "m",
242 	  "n",
243 	  "o",
244 	  "p",
245 	  "q",
246 	  "r",
247 	  "s",
248 	  "t",
249 	  "u",
250 	  "v",
251 	  "w",
252 	  "x",
253 	  "y",
254 	  "z",
255 	  "left-curly-bracket",
256 	  "vertical-line",
257 	  "right-curly-bracket",
258 	  "tilde",
259 	  "DEL",
260 	};
261 
262       string __s;
263       for (; __first != __last; ++__first)
264 	__s += __fctyp.narrow(*__first, 0);
265 
266       for (const auto& __it : __collatenames)
267 	if (__s == __it)
268 	  return string_type(1, __fctyp.widen(
269 	    static_cast<char>(&__it - __collatenames)));
270 
271       // TODO Add digraph support:
272       // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
273 
274       return string_type();
275     }
276 
277   template<typename _Ch_type>
278   template<typename _Fwd_iter>
279     typename regex_traits<_Ch_type>::char_class_type
280     regex_traits<_Ch_type>::
lookup_classname(_Fwd_iter __first,_Fwd_iter __last,bool __icase) const281     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
282     {
283       typedef std::ctype<char_type> __ctype_type;
284       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
285 
286       // Mappings from class name to class mask.
287       static const pair<const char*, char_class_type> __classnames[] =
288       {
289 	{"d", ctype_base::digit},
290 	{"w", {ctype_base::alnum, _RegexMask::_S_under}},
291 	{"s", ctype_base::space},
292 	{"alnum", ctype_base::alnum},
293 	{"alpha", ctype_base::alpha},
294 	{"blank", ctype_base::blank},
295 	{"cntrl", ctype_base::cntrl},
296 	{"digit", ctype_base::digit},
297 	{"graph", ctype_base::graph},
298 	{"lower", ctype_base::lower},
299 	{"print", ctype_base::print},
300 	{"punct", ctype_base::punct},
301 	{"space", ctype_base::space},
302 	{"upper", ctype_base::upper},
303 	{"xdigit", ctype_base::xdigit},
304       };
305 
306       string __s;
307       for (; __first != __last; ++__first)
308 	__s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
309 
310       for (const auto& __it : __classnames)
311 	if (__s == __it.first)
312 	  {
313 	    if (__icase
314 		&& ((__it.second
315 		     & (ctype_base::lower | ctype_base::upper)) != 0))
316 	      return ctype_base::alpha;
317 	    return __it.second;
318 	  }
319       return 0;
320     }
321 
322   template<typename _Ch_type>
323     bool
324     regex_traits<_Ch_type>::
isctype(_Ch_type __c,char_class_type __f) const325     isctype(_Ch_type __c, char_class_type __f) const
326     {
327       typedef std::ctype<char_type> __ctype_type;
328       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
329 
330       return __fctyp.is(__f._M_base, __c)
331 	// [[:w:]]
332 	|| ((__f._M_extended & _RegexMask::_S_under)
333 	    && __c == __fctyp.widen('_'));
334     }
335 
336   template<typename _Ch_type>
337     int
338     regex_traits<_Ch_type>::
value(_Ch_type __ch,int __radix) const339     value(_Ch_type __ch, int __radix) const
340     {
341       std::basic_istringstream<char_type> __is(string_type(1, __ch));
342       long __v;
343       if (__radix == 8)
344 	__is >> std::oct;
345       else if (__radix == 16)
346 	__is >> std::hex;
347       __is >> __v;
348       return __is.fail() ? -1 : __v;
349     }
350 
351   template<typename _Bi_iter, typename _Alloc>
352   template<typename _Out_iter>
353     _Out_iter
354     match_results<_Bi_iter, _Alloc>::
format(_Out_iter __out,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_first,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_last,match_flag_type __flags) const355     format(_Out_iter __out,
356 	   const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
357 	   const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
358 	   match_flag_type __flags) const
359     {
360       __glibcxx_assert( ready() );
361       regex_traits<char_type> __traits;
362       typedef std::ctype<char_type> __ctype_type;
363       const __ctype_type&
364 	__fctyp(use_facet<__ctype_type>(__traits.getloc()));
365 
366       auto __output = [&](size_t __idx)
367 	{
368 	  auto& __sub = (*this)[__idx];
369 	  if (__sub.matched)
370 	    __out = std::copy(__sub.first, __sub.second, __out);
371 	};
372 
373       if (__flags & regex_constants::format_sed)
374 	{
375 	  bool __escaping = false;
376 	  for (; __fmt_first != __fmt_last; __fmt_first++)
377 	    {
378 	      if (__escaping)
379 		{
380 		  __escaping = false;
381 		  if (__fctyp.is(__ctype_type::digit, *__fmt_first))
382 		    __output(__traits.value(*__fmt_first, 10));
383 		  else
384 		    *__out++ = *__fmt_first;
385 		  continue;
386 		}
387 	      if (*__fmt_first == '\\')
388 		{
389 		  __escaping = true;
390 		  continue;
391 		}
392 	      if (*__fmt_first == '&')
393 		{
394 		  __output(0);
395 		  continue;
396 		}
397 	      *__out++ = *__fmt_first;
398 	    }
399 	  if (__escaping)
400 	    *__out++ = '\\';
401 	}
402       else
403 	{
404 	  while (1)
405 	    {
406 	      auto __next = std::find(__fmt_first, __fmt_last, '$');
407 	      if (__next == __fmt_last)
408 		break;
409 
410 	      __out = std::copy(__fmt_first, __next, __out);
411 
412 	      auto __eat = [&](char __ch) -> bool
413 		{
414 		  if (*__next == __ch)
415 		    {
416 		      ++__next;
417 		      return true;
418 		    }
419 		  return false;
420 		};
421 
422 	      if (++__next == __fmt_last)
423 		*__out++ = '$';
424 	      else if (__eat('$'))
425 		*__out++ = '$';
426 	      else if (__eat('&'))
427 		__output(0);
428 	      else if (__eat('`'))
429 		{
430 		  auto& __sub = _M_prefix();
431 		  if (__sub.matched)
432 		    __out = std::copy(__sub.first, __sub.second, __out);
433 		}
434 	      else if (__eat('\''))
435 		{
436 		  auto& __sub = _M_suffix();
437 		  if (__sub.matched)
438 		    __out = std::copy(__sub.first, __sub.second, __out);
439 		}
440 	      else if (__fctyp.is(__ctype_type::digit, *__next))
441 		{
442 		  long __num = __traits.value(*__next, 10);
443 		  if (++__next != __fmt_last
444 		      && __fctyp.is(__ctype_type::digit, *__next))
445 		    {
446 		      __num *= 10;
447 		      __num += __traits.value(*__next++, 10);
448 		    }
449 		  if (0 <= __num && __num < this->size())
450 		    __output(__num);
451 		}
452 	      else
453 		*__out++ = '$';
454 	      __fmt_first = __next;
455 	    }
456 	  __out = std::copy(__fmt_first, __fmt_last, __out);
457 	}
458       return __out;
459     }
460 
461   template<typename _Out_iter, typename _Bi_iter,
462 	   typename _Rx_traits, typename _Ch_type>
463     _Out_iter
464     regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
465 		  const basic_regex<_Ch_type, _Rx_traits>& __e,
466 		  const _Ch_type* __fmt,
467 		  regex_constants::match_flag_type __flags)
468     {
469       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
470       _IterT __i(__first, __last, __e, __flags);
471       _IterT __end;
472       if (__i == __end)
473 	{
474 	  if (!(__flags & regex_constants::format_no_copy))
475 	    __out = std::copy(__first, __last, __out);
476 	}
477       else
478 	{
479 	  sub_match<_Bi_iter> __last;
480 	  auto __len = char_traits<_Ch_type>::length(__fmt);
481 	  for (; __i != __end; ++__i)
482 	    {
483 	      if (!(__flags & regex_constants::format_no_copy))
484 		__out = std::copy(__i->prefix().first, __i->prefix().second,
485 				  __out);
486 	      __out = __i->format(__out, __fmt, __fmt + __len, __flags);
487 	      __last = __i->suffix();
488 	      if (__flags & regex_constants::format_first_only)
489 		break;
490 	    }
491 	  if (!(__flags & regex_constants::format_no_copy))
492 	    __out = std::copy(__last.first, __last.second, __out);
493 	}
494       return __out;
495     }
496 
497   template<typename _Bi_iter,
498 	   typename _Ch_type,
499 	   typename _Rx_traits>
500     bool
501     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ==(const regex_iterator & __rhs) const502     operator==(const regex_iterator& __rhs) const noexcept
503     {
504       if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
505 	return true;
506       return _M_pregex == __rhs._M_pregex
507 	  && _M_begin == __rhs._M_begin
508 	  && _M_end == __rhs._M_end
509 	  && _M_flags == __rhs._M_flags
510 	  && _M_match[0] == __rhs._M_match[0];
511     }
512 
513   template<typename _Bi_iter,
514 	   typename _Ch_type,
515 	   typename _Rx_traits>
516     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
517     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ++()518     operator++()
519     {
520       // In all cases in which the call to regex_search returns true,
521       // match.prefix().first shall be equal to the previous value of
522       // match[0].second, and for each index i in the half-open range
523       // [0, match.size()) for which match[i].matched is true,
524       // match[i].position() shall return distance(begin, match[i].first).
525       // [28.12.1.4.5]
526       if (_M_match[0].matched)
527 	{
528 	  auto __start = _M_match[0].second;
529 	  auto __prefix_first = _M_match[0].second;
530 	  if (_M_match[0].first == _M_match[0].second)
531 	    {
532 	      if (__start == _M_end)
533 		{
534 		  _M_pregex = nullptr;
535 		  return *this;
536 		}
537 	      else
538 		{
539 		  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
540 				   _M_flags
541 				   | regex_constants::match_not_null
542 				   | regex_constants::match_continuous))
543 		    {
544 		      __glibcxx_assert(_M_match[0].matched);
545 		      auto& __prefix = _M_match._M_prefix();
546 		      __prefix.first = __prefix_first;
547 		      __prefix.matched = __prefix.first != __prefix.second;
548 		      // [28.12.1.4.5]
549 		      _M_match._M_begin = _M_begin;
550 		      return *this;
551 		    }
552 		  else
553 		    ++__start;
554 		}
555 	    }
556 	  _M_flags |= regex_constants::match_prev_avail;
557 	  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
558 	    {
559 	      __glibcxx_assert(_M_match[0].matched);
560 	      auto& __prefix = _M_match._M_prefix();
561 	      __prefix.first = __prefix_first;
562 	      __prefix.matched = __prefix.first != __prefix.second;
563 	      // [28.12.1.4.5]
564 	      _M_match._M_begin = _M_begin;
565 	    }
566 	  else
567 	    _M_pregex = nullptr;
568 	}
569       return *this;
570     }
571 
572   template<typename _Bi_iter,
573 	   typename _Ch_type,
574 	   typename _Rx_traits>
575     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
576     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator =(const regex_token_iterator & __rhs)577     operator=(const regex_token_iterator& __rhs)
578     {
579       _M_position = __rhs._M_position;
580       _M_subs = __rhs._M_subs;
581       _M_n = __rhs._M_n;
582       _M_suffix = __rhs._M_suffix;
583       _M_has_m1 = __rhs._M_has_m1;
584       _M_normalize_result();
585       return *this;
586     }
587 
588   template<typename _Bi_iter,
589 	   typename _Ch_type,
590 	   typename _Rx_traits>
591     bool
592     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ==(const regex_token_iterator & __rhs) const593     operator==(const regex_token_iterator& __rhs) const
594     {
595       if (_M_end_of_seq() && __rhs._M_end_of_seq())
596 	return true;
597       if (_M_suffix.matched && __rhs._M_suffix.matched
598 	  && _M_suffix == __rhs._M_suffix)
599 	return true;
600       if (_M_end_of_seq() || _M_suffix.matched
601 	  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
602 	return false;
603       return _M_position == __rhs._M_position
604 	&& _M_n == __rhs._M_n
605 	&& _M_subs == __rhs._M_subs;
606     }
607 
608   template<typename _Bi_iter,
609 	   typename _Ch_type,
610 	   typename _Rx_traits>
611     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
612     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ++()613     operator++()
614     {
615       _Position __prev = _M_position;
616       if (_M_suffix.matched)
617 	*this = regex_token_iterator();
618       else if (_M_n + 1 < _M_subs.size())
619 	{
620 	  _M_n++;
621 	  _M_result = &_M_current_match();
622 	}
623       else
624 	{
625 	  _M_n = 0;
626 	  ++_M_position;
627 	  if (_M_position != _Position())
628 	    _M_result = &_M_current_match();
629 	  else if (_M_has_m1 && __prev->suffix().length() != 0)
630 	    {
631 	      _M_suffix.matched = true;
632 	      _M_suffix.first = __prev->suffix().first;
633 	      _M_suffix.second = __prev->suffix().second;
634 	      _M_result = &_M_suffix;
635 	    }
636 	  else
637 	    *this = regex_token_iterator();
638 	}
639       return *this;
640     }
641 
642   template<typename _Bi_iter,
643 	   typename _Ch_type,
644 	   typename _Rx_traits>
645     void
646     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
_M_init(_Bi_iter __a,_Bi_iter __b)647     _M_init(_Bi_iter __a, _Bi_iter __b)
648     {
649       _M_has_m1 = false;
650       for (auto __it : _M_subs)
651 	if (__it == -1)
652 	  {
653 	    _M_has_m1 = true;
654 	    break;
655 	  }
656       if (_M_position != _Position())
657 	_M_result = &_M_current_match();
658       else if (_M_has_m1)
659 	{
660 	  _M_suffix.matched = true;
661 	  _M_suffix.first = __a;
662 	  _M_suffix.second = __b;
663 	  _M_result = &_M_suffix;
664 	}
665       else
666 	_M_result = nullptr;
667     }
668 
669   /// @endcond
670 
671 _GLIBCXX_END_NAMESPACE_VERSION
672 } // namespace
673