1 // class template regex -*- C++ -*-
2 
3 // Copyright (C) 2013-2016 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 /**
26  *  @file bits/regex.tcc
27  *  This is an internal header file, included by other library headers.
28  *  Do not attempt to use it directly. @headername{regex}
29  */
30 
31 namespace std _GLIBCXX_VISIBILITY(default)
32 {
33 namespace __detail
34 {
35 _GLIBCXX_BEGIN_NAMESPACE_VERSION
36 
37   // Result of merging regex_match and regex_search.
38   //
39   // __policy now can be _S_auto (auto dispatch) and _S_alternate (use
40   // the other one if possible, for test purpose).
41   //
42   // That __match_mode is true means regex_match, else regex_search.
43   template<typename _BiIter, typename _Alloc,
44 	   typename _CharT, typename _TraitsT,
45 	   _RegexExecutorPolicy __policy,
46 	   bool __match_mode>
47     bool
__regex_algo_impl(_BiIter __s,_BiIter __e,match_results<_BiIter,_Alloc> & __m,const basic_regex<_CharT,_TraitsT> & __re,regex_constants::match_flag_type __flags)48     __regex_algo_impl(_BiIter                              __s,
49 		      _BiIter                              __e,
50 		      match_results<_BiIter, _Alloc>&      __m,
51 		      const basic_regex<_CharT, _TraitsT>& __re,
52 		      regex_constants::match_flag_type     __flags)
53     {
54       if (__re._M_automaton == nullptr)
55 	return false;
56 
57       typename match_results<_BiIter, _Alloc>::_Base_type& __res = __m;
58       __m._M_begin = __s;
59       __m._M_resize(__re._M_automaton->_M_sub_count());
60       for (auto& __it : __res)
61 	__it.matched = false;
62 
63       bool __ret;
64       if ((__re.flags() & regex_constants::__polynomial)
65 	  || (__policy == _RegexExecutorPolicy::_S_alternate
66 	      && !__re._M_automaton->_M_has_backref))
67 	{
68 	  _Executor<_BiIter, _Alloc, _TraitsT, false>
69 	    __executor(__s, __e, __m, __re, __flags);
70 	  if (__match_mode)
71 	    __ret = __executor._M_match();
72 	  else
73 	    __ret = __executor._M_search();
74 	}
75       else
76 	{
77 	  _Executor<_BiIter, _Alloc, _TraitsT, true>
78 	    __executor(__s, __e, __m, __re, __flags);
79 	  if (__match_mode)
80 	    __ret = __executor._M_match();
81 	  else
82 	    __ret = __executor._M_search();
83 	}
84       if (__ret)
85 	{
86 	  for (auto& __it : __res)
87 	    if (!__it.matched)
88 	      __it.first = __it.second = __e;
89 	  auto& __pre = __m._M_prefix();
90 	  auto& __suf = __m._M_suffix();
91 	  if (__match_mode)
92 	    {
93 	      __pre.matched = false;
94 	      __pre.first = __s;
95 	      __pre.second = __s;
96 	      __suf.matched = false;
97 	      __suf.first = __e;
98 	      __suf.second = __e;
99 	    }
100 	  else
101 	    {
102 	      __pre.first = __s;
103 	      __pre.second = __res[0].first;
104 	      __pre.matched = (__pre.first != __pre.second);
105 	      __suf.first = __res[0].second;
106 	      __suf.second = __e;
107 	      __suf.matched = (__suf.first != __suf.second);
108 	    }
109 	}
110       else
111 	{
112 	  __m._M_resize(0);
113 	  for (auto& __it : __res)
114 	    {
115 	      __it.matched = false;
116 	      __it.first = __it.second = __e;
117 	    }
118 	}
119       return __ret;
120     }
121 
122 _GLIBCXX_END_NAMESPACE_VERSION
123 }
124 
125 _GLIBCXX_BEGIN_NAMESPACE_VERSION
126 
127   template<typename _Ch_type>
128   template<typename _Fwd_iter>
129     typename regex_traits<_Ch_type>::string_type
130     regex_traits<_Ch_type>::
lookup_collatename(_Fwd_iter __first,_Fwd_iter __last) const131     lookup_collatename(_Fwd_iter __first, _Fwd_iter __last) const
132     {
133       typedef std::ctype<char_type> __ctype_type;
134       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
135 
136       static const char* __collatenames[] =
137 	{
138 	  "NUL",
139 	  "SOH",
140 	  "STX",
141 	  "ETX",
142 	  "EOT",
143 	  "ENQ",
144 	  "ACK",
145 	  "alert",
146 	  "backspace",
147 	  "tab",
148 	  "newline",
149 	  "vertical-tab",
150 	  "form-feed",
151 	  "carriage-return",
152 	  "SO",
153 	  "SI",
154 	  "DLE",
155 	  "DC1",
156 	  "DC2",
157 	  "DC3",
158 	  "DC4",
159 	  "NAK",
160 	  "SYN",
161 	  "ETB",
162 	  "CAN",
163 	  "EM",
164 	  "SUB",
165 	  "ESC",
166 	  "IS4",
167 	  "IS3",
168 	  "IS2",
169 	  "IS1",
170 	  "space",
171 	  "exclamation-mark",
172 	  "quotation-mark",
173 	  "number-sign",
174 	  "dollar-sign",
175 	  "percent-sign",
176 	  "ampersand",
177 	  "apostrophe",
178 	  "left-parenthesis",
179 	  "right-parenthesis",
180 	  "asterisk",
181 	  "plus-sign",
182 	  "comma",
183 	  "hyphen",
184 	  "period",
185 	  "slash",
186 	  "zero",
187 	  "one",
188 	  "two",
189 	  "three",
190 	  "four",
191 	  "five",
192 	  "six",
193 	  "seven",
194 	  "eight",
195 	  "nine",
196 	  "colon",
197 	  "semicolon",
198 	  "less-than-sign",
199 	  "equals-sign",
200 	  "greater-than-sign",
201 	  "question-mark",
202 	  "commercial-at",
203 	  "A",
204 	  "B",
205 	  "C",
206 	  "D",
207 	  "E",
208 	  "F",
209 	  "G",
210 	  "H",
211 	  "I",
212 	  "J",
213 	  "K",
214 	  "L",
215 	  "M",
216 	  "N",
217 	  "O",
218 	  "P",
219 	  "Q",
220 	  "R",
221 	  "S",
222 	  "T",
223 	  "U",
224 	  "V",
225 	  "W",
226 	  "X",
227 	  "Y",
228 	  "Z",
229 	  "left-square-bracket",
230 	  "backslash",
231 	  "right-square-bracket",
232 	  "circumflex",
233 	  "underscore",
234 	  "grave-accent",
235 	  "a",
236 	  "b",
237 	  "c",
238 	  "d",
239 	  "e",
240 	  "f",
241 	  "g",
242 	  "h",
243 	  "i",
244 	  "j",
245 	  "k",
246 	  "l",
247 	  "m",
248 	  "n",
249 	  "o",
250 	  "p",
251 	  "q",
252 	  "r",
253 	  "s",
254 	  "t",
255 	  "u",
256 	  "v",
257 	  "w",
258 	  "x",
259 	  "y",
260 	  "z",
261 	  "left-curly-bracket",
262 	  "vertical-line",
263 	  "right-curly-bracket",
264 	  "tilde",
265 	  "DEL",
266 	};
267 
268       string __s;
269       for (; __first != __last; ++__first)
270 	__s += __fctyp.narrow(*__first, 0);
271 
272       for (const auto& __it : __collatenames)
273 	if (__s == __it)
274 	  return string_type(1, __fctyp.widen(
275 	    static_cast<char>(&__it - __collatenames)));
276 
277       // TODO Add digraph support:
278       // http://boost.sourceforge.net/libs/regex/doc/collating_names.html
279 
280       return string_type();
281     }
282 
283   template<typename _Ch_type>
284   template<typename _Fwd_iter>
285     typename regex_traits<_Ch_type>::char_class_type
286     regex_traits<_Ch_type>::
lookup_classname(_Fwd_iter __first,_Fwd_iter __last,bool __icase) const287     lookup_classname(_Fwd_iter __first, _Fwd_iter __last, bool __icase) const
288     {
289       typedef std::ctype<char_type> __ctype_type;
290       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
291 
292       // Mappings from class name to class mask.
293       static const pair<const char*, char_class_type> __classnames[] =
294       {
295 	{"d", ctype_base::digit},
296 	{"w", {ctype_base::alnum, _RegexMask::_S_under}},
297 	{"s", ctype_base::space},
298 	{"alnum", ctype_base::alnum},
299 	{"alpha", ctype_base::alpha},
300 	{"blank", ctype_base::blank},
301 	{"cntrl", ctype_base::cntrl},
302 	{"digit", ctype_base::digit},
303 	{"graph", ctype_base::graph},
304 	{"lower", ctype_base::lower},
305 	{"print", ctype_base::print},
306 	{"punct", ctype_base::punct},
307 	{"space", ctype_base::space},
308 	{"upper", ctype_base::upper},
309 	{"xdigit", ctype_base::xdigit},
310       };
311 
312       string __s;
313       for (; __first != __last; ++__first)
314 	__s += __fctyp.narrow(__fctyp.tolower(*__first), 0);
315 
316       for (const auto& __it : __classnames)
317 	if (__s == __it.first)
318 	  {
319 	    if (__icase
320 		&& ((__it.second
321 		     & (ctype_base::lower | ctype_base::upper)) != 0))
322 	      return ctype_base::alpha;
323 	    return __it.second;
324 	  }
325       return 0;
326     }
327 
328   template<typename _Ch_type>
329     bool
330     regex_traits<_Ch_type>::
isctype(_Ch_type __c,char_class_type __f) const331     isctype(_Ch_type __c, char_class_type __f) const
332     {
333       typedef std::ctype<char_type> __ctype_type;
334       const __ctype_type& __fctyp(use_facet<__ctype_type>(_M_locale));
335 
336       return __fctyp.is(__f._M_base, __c)
337 	// [[:w:]]
338 	|| ((__f._M_extended & _RegexMask::_S_under)
339 	    && __c == __fctyp.widen('_'));
340     }
341 
342   template<typename _Ch_type>
343     int
344     regex_traits<_Ch_type>::
value(_Ch_type __ch,int __radix) const345     value(_Ch_type __ch, int __radix) const
346     {
347       std::basic_istringstream<char_type> __is(string_type(1, __ch));
348       long __v;
349       if (__radix == 8)
350 	__is >> std::oct;
351       else if (__radix == 16)
352 	__is >> std::hex;
353       __is >> __v;
354       return __is.fail() ? -1 : __v;
355     }
356 
357   template<typename _Bi_iter, typename _Alloc>
358   template<typename _Out_iter>
359     _Out_iter match_results<_Bi_iter, _Alloc>::
format(_Out_iter __out,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_first,const match_results<_Bi_iter,_Alloc>::char_type * __fmt_last,match_flag_type __flags) const360     format(_Out_iter __out,
361 	   const match_results<_Bi_iter, _Alloc>::char_type* __fmt_first,
362 	   const match_results<_Bi_iter, _Alloc>::char_type* __fmt_last,
363 	   match_flag_type __flags) const
364     {
365       __glibcxx_assert( ready() );
366       regex_traits<char_type> __traits;
367       typedef std::ctype<char_type> __ctype_type;
368       const __ctype_type&
369 	__fctyp(use_facet<__ctype_type>(__traits.getloc()));
370 
371       auto __output = [&](size_t __idx)
372 	{
373 	  auto& __sub = (*this)[__idx];
374 	  if (__sub.matched)
375 	    __out = std::copy(__sub.first, __sub.second, __out);
376 	};
377 
378       if (__flags & regex_constants::format_sed)
379 	{
380 	  for (; __fmt_first != __fmt_last;)
381 	    if (*__fmt_first == '&')
382 	      {
383 		__output(0);
384 		++__fmt_first;
385 	      }
386 	    else if (*__fmt_first == '\\')
387 	      {
388 		if (++__fmt_first != __fmt_last
389 		    && __fctyp.is(__ctype_type::digit, *__fmt_first))
390 		  __output(__traits.value(*__fmt_first++, 10));
391 		else
392 		  *__out++ = '\\';
393 	      }
394 	    else
395 	      *__out++ = *__fmt_first++;
396 	}
397       else
398 	{
399 	  while (1)
400 	    {
401 	      auto __next = std::find(__fmt_first, __fmt_last, '$');
402 	      if (__next == __fmt_last)
403 		break;
404 
405 	      __out = std::copy(__fmt_first, __next, __out);
406 
407 	      auto __eat = [&](char __ch) -> bool
408 		{
409 		  if (*__next == __ch)
410 		    {
411 		      ++__next;
412 		      return true;
413 		    }
414 		  return false;
415 		};
416 
417 	      if (++__next == __fmt_last)
418 		*__out++ = '$';
419 	      else if (__eat('$'))
420 		*__out++ = '$';
421 	      else if (__eat('&'))
422 		__output(0);
423 	      else if (__eat('`'))
424 		{
425 		  auto& __sub = _M_prefix();
426 		  if (__sub.matched)
427 		    __out = std::copy(__sub.first, __sub.second, __out);
428 		}
429 	      else if (__eat('\''))
430 		{
431 		  auto& __sub = _M_suffix();
432 		  if (__sub.matched)
433 		    __out = std::copy(__sub.first, __sub.second, __out);
434 		}
435 	      else if (__fctyp.is(__ctype_type::digit, *__next))
436 		{
437 		  long __num = __traits.value(*__next, 10);
438 		  if (++__next != __fmt_last
439 		      && __fctyp.is(__ctype_type::digit, *__next))
440 		    {
441 		      __num *= 10;
442 		      __num += __traits.value(*__next++, 10);
443 		    }
444 		  if (0 <= __num && __num < this->size())
445 		    __output(__num);
446 		}
447 	      else
448 		*__out++ = '$';
449 	      __fmt_first = __next;
450 	    }
451 	  __out = std::copy(__fmt_first, __fmt_last, __out);
452 	}
453       return __out;
454     }
455 
456   template<typename _Out_iter, typename _Bi_iter,
457 	   typename _Rx_traits, typename _Ch_type>
458     _Out_iter
459     regex_replace(_Out_iter __out, _Bi_iter __first, _Bi_iter __last,
460 		  const basic_regex<_Ch_type, _Rx_traits>& __e,
461 		  const _Ch_type* __fmt,
462 		  regex_constants::match_flag_type __flags)
463     {
464       typedef regex_iterator<_Bi_iter, _Ch_type, _Rx_traits> _IterT;
465       _IterT __i(__first, __last, __e, __flags);
466       _IterT __end;
467       if (__i == __end)
468 	{
469 	  if (!(__flags & regex_constants::format_no_copy))
470 	    __out = std::copy(__first, __last, __out);
471 	}
472       else
473 	{
474 	  sub_match<_Bi_iter> __last;
475 	  auto __len = char_traits<_Ch_type>::length(__fmt);
476 	  for (; __i != __end; ++__i)
477 	    {
478 	      if (!(__flags & regex_constants::format_no_copy))
479 		__out = std::copy(__i->prefix().first, __i->prefix().second,
480 				  __out);
481 	      __out = __i->format(__out, __fmt, __fmt + __len, __flags);
482 	      __last = __i->suffix();
483 	      if (__flags & regex_constants::format_first_only)
484 		break;
485 	    }
486 	  if (!(__flags & regex_constants::format_no_copy))
487 	    __out = std::copy(__last.first, __last.second, __out);
488 	}
489       return __out;
490     }
491 
492   template<typename _Bi_iter,
493 	   typename _Ch_type,
494 	   typename _Rx_traits>
495     bool
496     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ==(const regex_iterator & __rhs) const497     operator==(const regex_iterator& __rhs) const
498     {
499       if (_M_pregex == nullptr && __rhs._M_pregex == nullptr)
500 	return true;
501       return _M_pregex == __rhs._M_pregex
502 	  && _M_begin == __rhs._M_begin
503 	  && _M_end == __rhs._M_end
504 	  && _M_flags == __rhs._M_flags
505 	  && _M_match[0] == __rhs._M_match[0];
506     }
507 
508   template<typename _Bi_iter,
509 	   typename _Ch_type,
510 	   typename _Rx_traits>
511     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
512     regex_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ++()513     operator++()
514     {
515       // In all cases in which the call to regex_search returns true,
516       // match.prefix().first shall be equal to the previous value of
517       // match[0].second, and for each index i in the half-open range
518       // [0, match.size()) for which match[i].matched is true,
519       // match[i].position() shall return distance(begin, match[i].first).
520       // [28.12.1.4.5]
521       if (_M_match[0].matched)
522 	{
523 	  auto __start = _M_match[0].second;
524 	  auto __prefix_first = _M_match[0].second;
525 	  if (_M_match[0].first == _M_match[0].second)
526 	    {
527 	      if (__start == _M_end)
528 		{
529 		  _M_pregex = nullptr;
530 		  return *this;
531 		}
532 	      else
533 		{
534 		  if (regex_search(__start, _M_end, _M_match, *_M_pregex,
535 				   _M_flags
536 				   | regex_constants::match_not_null
537 				   | regex_constants::match_continuous))
538 		    {
539 		      __glibcxx_assert(_M_match[0].matched);
540 		      auto& __prefix = _M_match._M_prefix();
541 		      __prefix.first = __prefix_first;
542 		      __prefix.matched = __prefix.first != __prefix.second;
543 		      // [28.12.1.4.5]
544 		      _M_match._M_begin = _M_begin;
545 		      return *this;
546 		    }
547 		  else
548 		    ++__start;
549 		}
550 	    }
551 	  _M_flags |= regex_constants::match_prev_avail;
552 	  if (regex_search(__start, _M_end, _M_match, *_M_pregex, _M_flags))
553 	    {
554 	      __glibcxx_assert(_M_match[0].matched);
555 	      auto& __prefix = _M_match._M_prefix();
556 	      __prefix.first = __prefix_first;
557 	      __prefix.matched = __prefix.first != __prefix.second;
558 	      // [28.12.1.4.5]
559 	      _M_match._M_begin = _M_begin;
560 	    }
561 	  else
562 	    _M_pregex = nullptr;
563 	}
564       return *this;
565     }
566 
567   template<typename _Bi_iter,
568 	   typename _Ch_type,
569 	   typename _Rx_traits>
570     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
571     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator =(const regex_token_iterator & __rhs)572     operator=(const regex_token_iterator& __rhs)
573     {
574       _M_position = __rhs._M_position;
575       _M_subs = __rhs._M_subs;
576       _M_n = __rhs._M_n;
577       _M_suffix = __rhs._M_suffix;
578       _M_has_m1 = __rhs._M_has_m1;
579       _M_normalize_result();
580       return *this;
581     }
582 
583   template<typename _Bi_iter,
584 	   typename _Ch_type,
585 	   typename _Rx_traits>
586     bool
587     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ==(const regex_token_iterator & __rhs) const588     operator==(const regex_token_iterator& __rhs) const
589     {
590       if (_M_end_of_seq() && __rhs._M_end_of_seq())
591 	return true;
592       if (_M_suffix.matched && __rhs._M_suffix.matched
593 	  && _M_suffix == __rhs._M_suffix)
594 	return true;
595       if (_M_end_of_seq() || _M_suffix.matched
596 	  || __rhs._M_end_of_seq() || __rhs._M_suffix.matched)
597 	return false;
598       return _M_position == __rhs._M_position
599 	&& _M_n == __rhs._M_n
600 	&& _M_subs == __rhs._M_subs;
601     }
602 
603   template<typename _Bi_iter,
604 	   typename _Ch_type,
605 	   typename _Rx_traits>
606     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>&
607     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
operator ++()608     operator++()
609     {
610       _Position __prev = _M_position;
611       if (_M_suffix.matched)
612 	*this = regex_token_iterator();
613       else if (_M_n + 1 < _M_subs.size())
614 	{
615 	  _M_n++;
616 	  _M_result = &_M_current_match();
617 	}
618       else
619 	{
620 	  _M_n = 0;
621 	  ++_M_position;
622 	  if (_M_position != _Position())
623 	    _M_result = &_M_current_match();
624 	  else if (_M_has_m1 && __prev->suffix().length() != 0)
625 	    {
626 	      _M_suffix.matched = true;
627 	      _M_suffix.first = __prev->suffix().first;
628 	      _M_suffix.second = __prev->suffix().second;
629 	      _M_result = &_M_suffix;
630 	    }
631 	  else
632 	    *this = regex_token_iterator();
633 	}
634       return *this;
635     }
636 
637   template<typename _Bi_iter,
638 	   typename _Ch_type,
639 	   typename _Rx_traits>
640     void
641     regex_token_iterator<_Bi_iter, _Ch_type, _Rx_traits>::
_M_init(_Bi_iter __a,_Bi_iter __b)642     _M_init(_Bi_iter __a, _Bi_iter __b)
643     {
644       _M_has_m1 = false;
645       for (auto __it : _M_subs)
646 	if (__it == -1)
647 	  {
648 	    _M_has_m1 = true;
649 	    break;
650 	  }
651       if (_M_position != _Position())
652 	_M_result = &_M_current_match();
653       else if (_M_has_m1)
654 	{
655 	  _M_suffix.matched = true;
656 	  _M_suffix.first = __a;
657 	  _M_suffix.second = __b;
658 	  _M_result = &_M_suffix;
659 	}
660       else
661 	_M_result = nullptr;
662     }
663 
664 _GLIBCXX_END_NAMESPACE_VERSION
665 } // namespace
666 
667