1 // Math overloads for simd -*- C++ -*-
2 
3 // Copyright (C) 2020-2021 Free Software Foundation, Inc.
4 //
5 // This file is part of the GNU ISO C++ Library.  This library is free
6 // software; you can redistribute it and/or modify it under the
7 // terms of the GNU General Public License as published by the
8 // Free Software Foundation; either version 3, or (at your option)
9 // any later version.
10 
11 // This library is distributed in the hope that it will be useful,
12 // but WITHOUT ANY WARRANTY; without even the implied warranty of
13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
14 // GNU General Public License for more details.
15 
16 // Under Section 7 of GPL version 3, you are granted additional
17 // permissions described in the GCC Runtime Library Exception, version
18 // 3.1, as published by the Free Software Foundation.
19 
20 // You should have received a copy of the GNU General Public License and
21 // a copy of the GCC Runtime Library Exception along with this program;
22 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
23 // <http://www.gnu.org/licenses/>.
24 
25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
26 #define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
27 
28 #if __cplusplus >= 201703L
29 
30 #include <utility>
31 #include <iomanip>
32 
33 _GLIBCXX_SIMD_BEGIN_NAMESPACE
34 template <typename _Tp, typename _V>
35   using _Samesize = fixed_size_simd<_Tp, _V::size()>;
36 
37 // _Math_return_type {{{
38 template <typename _DoubleR, typename _Tp, typename _Abi>
39   struct _Math_return_type;
40 
41 template <typename _DoubleR, typename _Tp, typename _Abi>
42   using _Math_return_type_t =
43     typename _Math_return_type<_DoubleR, _Tp, _Abi>::type;
44 
45 template <typename _Tp, typename _Abi>
46   struct _Math_return_type<double, _Tp, _Abi>
47   { using type = simd<_Tp, _Abi>; };
48 
49 template <typename _Tp, typename _Abi>
50   struct _Math_return_type<bool, _Tp, _Abi>
51   { using type = simd_mask<_Tp, _Abi>; };
52 
53 template <typename _DoubleR, typename _Tp, typename _Abi>
54   struct _Math_return_type
55   { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; };
56 
57 //}}}
58 // _GLIBCXX_SIMD_MATH_CALL_ {{{
59 #define _GLIBCXX_SIMD_MATH_CALL_(__name)                                       \
60 template <typename _Tp, typename _Abi, typename...,                            \
61 	  typename _R = _Math_return_type_t<                                   \
62 	    decltype(std::__name(declval<double>())), _Tp, _Abi>>              \
63   enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
64   __name(simd<_Tp, _Abi> __x)                                                  \
65   { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; }
66 
67 // }}}
68 //_Extra_argument_type{{{
69 template <typename _Up, typename _Tp, typename _Abi>
70   struct _Extra_argument_type;
71 
72 template <typename _Tp, typename _Abi>
73   struct _Extra_argument_type<_Tp*, _Tp, _Abi>
74   {
75     using type = simd<_Tp, _Abi>*;
76     static constexpr double* declval();
77     static constexpr bool __needs_temporary_scalar = true;
78 
79     _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
80     { return &__data(*__x); }
81   };
82 
83 template <typename _Up, typename _Tp, typename _Abi>
84   struct _Extra_argument_type<_Up*, _Tp, _Abi>
85   {
86     static_assert(is_integral_v<_Up>);
87     using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*;
88     static constexpr _Up* declval();
89     static constexpr bool __needs_temporary_scalar = true;
90 
91     _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x)
92     { return &__data(*__x); }
93   };
94 
95 template <typename _Tp, typename _Abi>
96   struct _Extra_argument_type<_Tp, _Tp, _Abi>
97   {
98     using type = simd<_Tp, _Abi>;
99     static constexpr double declval();
100     static constexpr bool __needs_temporary_scalar = false;
101 
102     _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
103     _S_data(const type& __x)
104     { return __data(__x); }
105   };
106 
107 template <typename _Up, typename _Tp, typename _Abi>
108   struct _Extra_argument_type
109   {
110     static_assert(is_integral_v<_Up>);
111     using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>;
112     static constexpr _Up declval();
113     static constexpr bool __needs_temporary_scalar = false;
114 
115     _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto)
116     _S_data(const type& __x)
117     { return __data(__x); }
118   };
119 
120 //}}}
121 // _GLIBCXX_SIMD_MATH_CALL2_ {{{
122 #define _GLIBCXX_SIMD_MATH_CALL2_(__name, arg2_)                               \
123 template <                                                                     \
124   typename _Tp, typename _Abi, typename...,                                    \
125   typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>,                     \
126   typename _R = _Math_return_type_t<                                           \
127     decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>>    \
128   enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
129   __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y)          \
130   {                                                                            \
131     return {__private_init,                                                    \
132 	    _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))};   \
133   }                                                                            \
134 template <typename _Up, typename _Tp, typename _Abi>                           \
135   _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t<                                 \
136     decltype(std::__name(                                                      \
137       declval<double>(),                                                       \
138       declval<enable_if_t<                                                     \
139 	conjunction_v<                                                         \
140 	  is_same<arg2_, _Tp>,                                                 \
141 	  negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>,           \
142 	  is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>,       \
143 	double>>())),                                                          \
144     _Tp, _Abi>                                                                 \
145   __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy)                              \
146   { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); }
147 
148 // }}}
149 // _GLIBCXX_SIMD_MATH_CALL3_ {{{
150 #define _GLIBCXX_SIMD_MATH_CALL3_(__name, arg2_, arg3_)                        \
151 template <typename _Tp, typename _Abi, typename...,                            \
152 	  typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>,             \
153 	  typename _Arg3 = _Extra_argument_type<arg3_, _Tp, _Abi>,             \
154 	  typename _R = _Math_return_type_t<                                   \
155 	    decltype(std::__name(declval<double>(), _Arg2::declval(),          \
156 				 _Arg3::declval())),                           \
157 	    _Tp, _Abi>>                                                        \
158   enable_if_t<is_floating_point_v<_Tp>, _R>                                    \
159   __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y,          \
160 	 const typename _Arg3::type& __z)                                      \
161   {                                                                            \
162     return {__private_init,                                                    \
163 	    _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y),     \
164 					 _Arg3::_S_data(__z))};                \
165   }                                                                            \
166 template <                                                                     \
167   typename _T0, typename _T1, typename _T2, typename...,                       \
168   typename _U0 = __remove_cvref_t<_T0>,                                        \
169   typename _U1 = __remove_cvref_t<_T1>,                                        \
170   typename _U2 = __remove_cvref_t<_T2>,                                        \
171   typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>,                    \
172   typename = enable_if_t<conjunction_v<                                        \
173     is_simd<_Simd>, is_convertible<_T0&&, _Simd>,                              \
174     is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>,                \
175     negation<conjunction<                                                      \
176       is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>>    \
177   _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(),             \
178 					  declval<const _Simd&>(),             \
179 					  declval<const _Simd&>()))            \
180   __name(_T0&& __xx, _T1&& __yy, _T2&& __zz)                                   \
181   {                                                                            \
182     return __name(_Simd(static_cast<_T0&&>(__xx)),                             \
183 		  _Simd(static_cast<_T1&&>(__yy)),                             \
184 		  _Simd(static_cast<_T2&&>(__zz)));                            \
185   }
186 
187 // }}}
188 // __cosSeries {{{
189 template <typename _Abi>
190   _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
191   __cosSeries(const simd<float, _Abi>& __x)
192   {
193     const simd<float, _Abi> __x2 = __x * __x;
194     simd<float, _Abi> __y;
195     __y = 0x1.ap-16f;                  //  1/8!
196     __y = __y * __x2 - 0x1.6c1p-10f;   // -1/6!
197     __y = __y * __x2 + 0x1.555556p-5f; //  1/4!
198     return __y * (__x2 * __x2) - .5f * __x2 + 1.f;
199   }
200 
201 template <typename _Abi>
202   _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
203   __cosSeries(const simd<double, _Abi>& __x)
204   {
205     const simd<double, _Abi> __x2 = __x * __x;
206     simd<double, _Abi> __y;
207     __y = 0x1.AC00000000000p-45;              //  1/16!
208     __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14!
209     __y = __y * __x2 + 0x1.1EED8C0000000p-29; //  1/12!
210     __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10!
211     __y = __y * __x2 + 0x1.A01A01A018000p-16; //  1/8!
212     __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6!
213     __y = __y * __x2 + 0x1.5555555555554p-5;  //  1/4!
214     return (__y * __x2 - .5f) * __x2 + 1.f;
215   }
216 
217 // }}}
218 // __sinSeries {{{
219 template <typename _Abi>
220   _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi>
221   __sinSeries(const simd<float, _Abi>& __x)
222   {
223     const simd<float, _Abi> __x2 = __x * __x;
224     simd<float, _Abi> __y;
225     __y = -0x1.9CC000p-13f;            // -1/7!
226     __y = __y * __x2 + 0x1.111100p-7f; //  1/5!
227     __y = __y * __x2 - 0x1.555556p-3f; // -1/3!
228     return __y * (__x2 * __x) + __x;
229   }
230 
231 template <typename _Abi>
232   _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi>
233   __sinSeries(const simd<double, _Abi>& __x)
234   {
235     // __x  = [0, 0.7854 = pi/4]
236     // __x² = [0, 0.6169 = pi²/8]
237     const simd<double, _Abi> __x2 = __x * __x;
238     simd<double, _Abi> __y;
239     __y = -0x1.ACF0000000000p-41;             // -1/15!
240     __y = __y * __x2 + 0x1.6124400000000p-33; //  1/13!
241     __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11!
242     __y = __y * __x2 + 0x1.71DE3A5540000p-19; //  1/9!
243     __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7!
244     __y = __y * __x2 + 0x1.1111111111110p-7;  //  1/5!
245     __y = __y * __x2 - 0x1.5555555555555p-3;  // -1/3!
246     return __y * (__x2 * __x) + __x;
247   }
248 
249 // }}}
250 // __zero_low_bits {{{
251 template <int _Bits, typename _Tp, typename _Abi>
252   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
253   __zero_low_bits(simd<_Tp, _Abi> __x)
254   {
255     const simd<_Tp, _Abi> __bitmask
256       = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits);
257     return {__private_init,
258 	    _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))};
259   }
260 
261 // }}}
262 // __fold_input {{{
263 
264 /**@internal
265  * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from:
266  * quadrant 0: [-¼π,  ¼π]
267  * quadrant 1: [ ¼π,  ¾π]
268  * quadrant 2: [ ¾π, 1¼π]
269  * quadrant 3: [1¼π, 1¾π]
270  *
271  * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using
272  * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as
273  * ```
274  * y = trunc(x / ¼π);
275  * y += fmod(y, 2);
276  * ```
277  * This can be simplified by moving the (implicit) division by 2 into the
278  * truncation expression. The `+= fmod` effect can the be achieved by using
279  * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows,
280  * `2/π * x` is better (faster).
281  */
282 template <typename _Tp, typename _Abi>
283   struct _Folded
284   {
285     simd<_Tp, _Abi> _M_x;
286     rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant;
287   };
288 
289 namespace __math_float {
290 inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4
291 inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/π
292 inline constexpr float __pi_2_5bits0
293   = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant)
294 inline constexpr float __pi_2_5bits0_rem
295   = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0
296 } // namespace __math_float
297 namespace __math_double {
298 inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4
299 inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/π
300 inline constexpr double __pi_2 = 0x1.921fb54442d18p0;       // π/2
301 } // namespace __math_double
302 
303 template <typename _Abi>
304   _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi>
305   __fold_input(const simd<float, _Abi>& __x)
306   {
307     using _V = simd<float, _Abi>;
308     using _IV = rebind_simd_t<int, _V>;
309     using namespace __math_float;
310     _Folded<float, _Abi> __r;
311     __r._M_x = abs(__x);
312 #if 0
313     // zero most mantissa bits:
314     constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/π
315     const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f;
316     // split π into 4 parts, the first three with 13 trailing zeros (to make the
317     // following multiplications precise):
318     constexpr float __pi0 = 0x1.920000p1f;
319     constexpr float __pi1 = 0x1.fb4000p-11f;
320     constexpr float __pi2 = 0x1.444000p-23f;
321     constexpr float __pi3 = 0x1.68c234p-38f;
322     __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3
323 #else
324     if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
325       __r._M_quadrant = 0;
326     else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4)))
327       {
328 	const _V __y = nearbyint(__r._M_x * __2_over_pi);
329 	__r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4
330 	__r._M_x -= __y * __pi_2_5bits0;
331 	__r._M_x -= __y * __pi_2_5bits0_rem;
332       }
333     else
334       {
335 	using __math_double::__2_over_pi;
336 	using __math_double::__pi_2;
337 	using _VD = rebind_simd_t<double, _V>;
338 	_VD __xd = static_simd_cast<_VD>(__r._M_x);
339 	_VD __y = nearbyint(__xd * __2_over_pi);
340 	__r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4
341 	__r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2);
342       }
343 #endif
344     return __r;
345   }
346 
347 template <typename _Abi>
348   _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi>
349   __fold_input(const simd<double, _Abi>& __x)
350   {
351     using _V = simd<double, _Abi>;
352     using _IV = rebind_simd_t<int, _V>;
353     using namespace __math_double;
354 
355     _Folded<double, _Abi> __r;
356     __r._M_x = abs(__x);
357     if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4)))
358       {
359 	__r._M_quadrant = 0;
360 	return __r;
361       }
362     const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4));
363     __r._M_quadrant = static_simd_cast<_IV>(__y) & 3;
364 
365     if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4)))
366       {
367 	// x - y * pi/2, y uses no more than 11 mantissa bits
368 	__r._M_x -= __y * 0x1.921FB54443000p0;
369 	__r._M_x -= __y * -0x1.73DCB3B39A000p-43;
370 	__r._M_x -= __y * 0x1.45C06E0E68948p-86;
371       }
372     else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30)))
373       {
374 	// x - y * pi/2, y uses no more than 29 mantissa bits
375 	__r._M_x -= __y * 0x1.921FB40000000p0;
376 	__r._M_x -= __y * 0x1.4442D00000000p-24;
377 	__r._M_x -= __y * 0x1.8469898CC5170p-48;
378       }
379     else
380       {
381 	// x - y * pi/2, y may require all mantissa bits
382 	const _V __y_hi = __zero_low_bits<26>(__y);
383 	const _V __y_lo = __y - __y_hi;
384 	const auto __pi_2_1 = 0x1.921FB50000000p0;
385 	const auto __pi_2_2 = 0x1.110B460000000p-26;
386 	const auto __pi_2_3 = 0x1.1A62630000000p-54;
387 	const auto __pi_2_4 = 0x1.8A2E03707344Ap-81;
388 	__r._M_x = __r._M_x - __y_hi * __pi_2_1
389 		   - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
390 		   - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1)
391 		   - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
392 		   - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2)
393 		   - max(__y * __pi_2_4, __y_lo * __pi_2_3)
394 		   - min(__y * __pi_2_4, __y_lo * __pi_2_3);
395       }
396     return __r;
397   }
398 
399 // }}}
400 // __extract_exponent_as_int {{{
401 template <typename _Tp, typename _Abi>
402   rebind_simd_t<int, simd<_Tp, _Abi>>
403   __extract_exponent_as_int(const simd<_Tp, _Abi>& __v)
404   {
405     using _Vp = simd<_Tp, _Abi>;
406     using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>;
407     using namespace std::experimental::__float_bitwise_operators;
408     const _Vp __exponent_mask
409       = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
410     return static_simd_cast<rebind_simd_t<int, _Vp>>(
411       __bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask)
412       >> (__digits_v<_Tp> - 1));
413   }
414 
415 // }}}
416 // __impl_or_fallback {{{
417 template <typename ImplFun, typename FallbackFun, typename... _Args>
418   _GLIBCXX_SIMD_INTRINSIC auto
419   __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&,
420 			      _Args&&... __args)
421     -> decltype(__impl_fun(static_cast<_Args&&>(__args)...))
422   { return __impl_fun(static_cast<_Args&&>(__args)...); }
423 
424 template <typename ImplFun, typename FallbackFun, typename... _Args>
425   inline auto
426   __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun,
427 			      _Args&&... __args)
428     -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...))
429   { return __fallback_fun(static_cast<_Args&&>(__args)...); }
430 
431 template <typename... _Args>
432   _GLIBCXX_SIMD_INTRINSIC auto
433   __impl_or_fallback(_Args&&... __args)
434   {
435     return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...);
436   }
437 //}}}
438 
439 // trigonometric functions {{{
440 _GLIBCXX_SIMD_MATH_CALL_(acos)
441 _GLIBCXX_SIMD_MATH_CALL_(asin)
442 _GLIBCXX_SIMD_MATH_CALL_(atan)
443 _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp)
444 
445 /*
446  * algorithm for sine and cosine:
447  *
448  * The result can be calculated with sine or cosine depending on the π/4 section
449  * the input is in. sine   ≈ __x + __x³ cosine ≈ 1 - __x²
450  *
451  * sine:
452  * Map -__x to __x and invert the output
453  * Extend precision of __x - n * π/4 by calculating
454  * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4)
455  *
456  * Calculate Taylor series with tuned coefficients.
457  * Fix sign.
458  */
459 // cos{{{
460 template <typename _Tp, typename _Abi>
461   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
462   cos(const simd<_Tp, _Abi>& __x)
463   {
464     using _V = simd<_Tp, _Abi>;
465     if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
466       return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))};
467     else
468       {
469 	if constexpr (is_same_v<_Tp, float>)
470 	  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382)))
471 	    return static_simd_cast<_V>(
472 	      cos(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
473 
474 	const auto __f = __fold_input(__x);
475 	// quadrant | effect
476 	//        0 | cosSeries, +
477 	//        1 | sinSeries, -
478 	//        2 | cosSeries, -
479 	//        3 | sinSeries, +
480 	using namespace std::experimental::__float_bitwise_operators;
481 	const _V __sign_flip
482 	  = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30);
483 
484 	const auto __need_cos = (__f._M_quadrant & 1) == 0;
485 	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos)))
486 	  return __sign_flip ^ __cosSeries(__f._M_x);
487 	else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos)))
488 	  return __sign_flip ^ __sinSeries(__f._M_x);
489 	else // some_of(__need_cos)
490 	  {
491 	    _V __r = __sinSeries(__f._M_x);
492 	    where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x);
493 	    return __r ^ __sign_flip;
494 	  }
495       }
496   }
497 
498 template <typename _Tp>
499   _GLIBCXX_SIMD_ALWAYS_INLINE
500   enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
501   cos(simd<_Tp, simd_abi::scalar> __x)
502   { return std::cos(__data(__x)); }
503 
504 //}}}
505 // sin{{{
506 template <typename _Tp, typename _Abi>
507   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
508   sin(const simd<_Tp, _Abi>& __x)
509   {
510     using _V = simd<_Tp, _Abi>;
511     if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>)
512       return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))};
513     else
514       {
515 	if constexpr (is_same_v<_Tp, float>)
516 	  if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449)))
517 	    return static_simd_cast<_V>(
518 	      sin(static_simd_cast<rebind_simd_t<double, _V>>(__x)));
519 
520 	const auto __f = __fold_input(__x);
521 	// quadrant | effect
522 	//        0 | sinSeries
523 	//        1 | cosSeries
524 	//        2 | sinSeries, sign flip
525 	//        3 | cosSeries, sign flip
526 	using namespace std::experimental::__float_bitwise_operators;
527 	const auto __sign_flip
528 	  = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.));
529 
530 	const auto __need_sin = (__f._M_quadrant & 1) == 0;
531 	if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin)))
532 	  return __sign_flip ^ __sinSeries(__f._M_x);
533 	else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin)))
534 	  return __sign_flip ^ __cosSeries(__f._M_x);
535 	else // some_of(__need_sin)
536 	  {
537 	    _V __r = __cosSeries(__f._M_x);
538 	    where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x);
539 	    return __sign_flip ^ __r;
540 	  }
541       }
542   }
543 
544 template <typename _Tp>
545   _GLIBCXX_SIMD_ALWAYS_INLINE
546   enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>>
547   sin(simd<_Tp, simd_abi::scalar> __x)
548   { return std::sin(__data(__x)); }
549 
550 //}}}
551 _GLIBCXX_SIMD_MATH_CALL_(tan)
552 _GLIBCXX_SIMD_MATH_CALL_(acosh)
553 _GLIBCXX_SIMD_MATH_CALL_(asinh)
554 _GLIBCXX_SIMD_MATH_CALL_(atanh)
555 _GLIBCXX_SIMD_MATH_CALL_(cosh)
556 _GLIBCXX_SIMD_MATH_CALL_(sinh)
557 _GLIBCXX_SIMD_MATH_CALL_(tanh)
558 // }}}
559 // exponential functions {{{
560 _GLIBCXX_SIMD_MATH_CALL_(exp)
561 _GLIBCXX_SIMD_MATH_CALL_(exp2)
562 _GLIBCXX_SIMD_MATH_CALL_(expm1)
563 
564 // }}}
565 // frexp {{{
566 #if _GLIBCXX_SIMD_X86INTRIN
567 template <typename _Tp, size_t _Np>
568   _SimdWrapper<_Tp, _Np>
569   __getexp(_SimdWrapper<_Tp, _Np> __x)
570   {
571     if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
572       return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x)));
573     else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
574       return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x))));
575     else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
576       return _mm_getexp_pd(__x);
577     else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
578       return __lo128(_mm512_getexp_pd(__auto_bitcast(__x)));
579     else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
580       return _mm256_getexp_ps(__x);
581     else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
582       return __lo256(_mm512_getexp_ps(__auto_bitcast(__x)));
583     else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
584       return _mm256_getexp_pd(__x);
585     else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
586       return __lo256(_mm512_getexp_pd(__auto_bitcast(__x)));
587     else if constexpr (__is_avx512_ps<_Tp, _Np>())
588       return _mm512_getexp_ps(__x);
589     else if constexpr (__is_avx512_pd<_Tp, _Np>())
590       return _mm512_getexp_pd(__x);
591     else
592       __assert_unreachable<_Tp>();
593   }
594 
595 template <typename _Tp, size_t _Np>
596   _SimdWrapper<_Tp, _Np>
597   __getmant_avx512(_SimdWrapper<_Tp, _Np> __x)
598   {
599     if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
600       return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1,
601 					   _MM_MANT_SIGN_src));
602     else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>())
603       return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)),
604 					      _MM_MANT_NORM_p5_1,
605 					      _MM_MANT_SIGN_src));
606     else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
607       return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
608     else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>())
609       return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
610 				       _MM_MANT_SIGN_src));
611     else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
612       return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
613     else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
614       return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
615 				       _MM_MANT_SIGN_src));
616     else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
617       return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
618     else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
619       return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1,
620 				       _MM_MANT_SIGN_src));
621     else if constexpr (__is_avx512_ps<_Tp, _Np>())
622       return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
623     else if constexpr (__is_avx512_pd<_Tp, _Np>())
624       return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src);
625     else
626       __assert_unreachable<_Tp>();
627   }
628 #endif // _GLIBCXX_SIMD_X86INTRIN
629 
630 /**
631  * splits @p __v into exponent and mantissa, the sign is kept with the mantissa
632  *
633  * The return value will be in the range [0.5, 1.0[
634  * The @p __e value will be an integer defining the power-of-two exponent
635  */
636 template <typename _Tp, typename _Abi>
637   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
638   frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp)
639   {
640     if constexpr (simd_size_v<_Tp, _Abi> == 1)
641       {
642 	int __tmp;
643 	const auto __r = std::frexp(__x[0], &__tmp);
644 	(*__exp)[0] = __tmp;
645 	return __r;
646       }
647     else if constexpr (__is_fixed_size_abi_v<_Abi>)
648       {
649 	return {__private_init,
650 		_Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))};
651 #if _GLIBCXX_SIMD_X86INTRIN
652       }
653     else if constexpr (__have_avx512f)
654       {
655 	constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
656 	constexpr size_t _NI = _Np < 4 ? 4 : _Np;
657 	const auto __v = __data(__x);
658 	const auto __isnonzero
659 	  = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data);
660 	const _SimdWrapper<int, _NI> __exp_plus1
661 	  = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data;
662 	const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>(
663 	  _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero),
664 				      _SimdWrapper<int, _NI>(), __exp_plus1));
665 	simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp);
666 	return {__private_init,
667 		_Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>(
668 					      __isnonzero),
669 					    __v, __getmant_avx512(__v))};
670 #endif // _GLIBCXX_SIMD_X86INTRIN
671       }
672     else
673       {
674 	// fallback implementation
675 	static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8);
676 	using _V = simd<_Tp, _Abi>;
677 	using _IV = rebind_simd_t<int, _V>;
678 	using namespace std::experimental::__proposed;
679 	using namespace std::experimental::__float_bitwise_operators;
680 
681 	constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe;
682 	constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200;
683 	constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512;
684 	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask
685 	  = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000
686 	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent
687 	  = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff
688 
689 	_V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1)
690 	const _IV __exponent_bits = __extract_exponent_as_int(__x);
691 	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))))
692 	  {
693 	    *__exp
694 	      = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust);
695 	    return __mant;
696 	  }
697 
698 #if __FINITE_MATH_ONLY__
699 	// at least one element of __x is 0 or subnormal, the rest is normal
700 	// (inf and NaN are excluded by -ffinite-math-only)
701 	const auto __iszero_inf_nan = __x == 0;
702 #else
703 	const auto __as_int
704 	  = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(abs(__x));
705 	const auto __inf
706 	  = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(
707 	    _V(__infinity_v<_Tp>));
708 	const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>(
709 	  __as_int == 0 || __as_int >= __inf);
710 #endif
711 
712 	const _V __scaled_subnormal = __x * __subnorm_scale;
713 	const _V __mant_subnormal
714 	  = __p5_1_exponent & (__exponent_mask | __scaled_subnormal);
715 	where(!isnormal(__x), __mant) = __mant_subnormal;
716 	where(__iszero_inf_nan, __mant) = __x;
717 	_IV __e = __extract_exponent_as_int(__scaled_subnormal);
718 	using _MaskType =
719 	  typename conditional_t<sizeof(typename _V::value_type) == sizeof(int),
720 				 _V, _IV>::mask_type;
721 	const _MaskType __value_isnormal = isnormal(__x).__cvt();
722 	where(__value_isnormal.__cvt(), __e) = __exponent_bits;
723 	static_assert(sizeof(_IV) == sizeof(__value_isnormal));
724 	const _IV __offset
725 	  = (__bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust))
726 	    | (__bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0)
727 			       & static_simd_cast<_MaskType>(__x != 0))
728 	       & _IV(__exp_adjust + __exp_offset));
729 	*__exp = simd_cast<_Samesize<int, _V>>(__e - __offset);
730 	return __mant;
731       }
732   }
733 
734 // }}}
735 _GLIBCXX_SIMD_MATH_CALL2_(ldexp, int)
736 _GLIBCXX_SIMD_MATH_CALL_(ilogb)
737 
738 // logarithms {{{
739 _GLIBCXX_SIMD_MATH_CALL_(log)
740 _GLIBCXX_SIMD_MATH_CALL_(log10)
741 _GLIBCXX_SIMD_MATH_CALL_(log1p)
742 _GLIBCXX_SIMD_MATH_CALL_(log2)
743 
744 //}}}
745 // logb{{{
746 template <typename _Tp, typename _Abi>
747   enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>>
748   logb(const simd<_Tp, _Abi>& __x)
749   {
750     constexpr size_t _Np = simd_size_v<_Tp, _Abi>;
751     if constexpr (_Np == 1)
752       return std::logb(__x[0]);
753     else if constexpr (__is_fixed_size_abi_v<_Abi>)
754       {
755 	return {__private_init,
756 		__data(__x)._M_apply_per_chunk([](auto __impl, auto __xx) {
757 		  using _V = typename decltype(__impl)::simd_type;
758 		  return __data(
759 		    std::experimental::logb(_V(__private_init, __xx)));
760 		})};
761       }
762 #if _GLIBCXX_SIMD_X86INTRIN // {{{
763     else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>())
764       return {__private_init,
765 	      __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))};
766     else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>())
767       return {__private_init, _mm_getexp_pd(__data(__x))};
768     else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>())
769       return {__private_init, _mm256_getexp_ps(__data(__x))};
770     else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>())
771       return {__private_init, _mm256_getexp_pd(__data(__x))};
772     else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>())
773       return {__private_init,
774 	      __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))};
775     else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>())
776       return {__private_init,
777 	      __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))};
778     else if constexpr (__is_avx512_ps<_Tp, _Np>())
779       return {__private_init, _mm512_getexp_ps(__data(__x))};
780     else if constexpr (__is_avx512_pd<_Tp, _Np>())
781       return {__private_init, _mm512_getexp_pd(__data(__x))};
782 #endif // _GLIBCXX_SIMD_X86INTRIN }}}
783     else
784       {
785 	using _V = simd<_Tp, _Abi>;
786 	using namespace std::experimental::__proposed;
787 	auto __is_normal = isnormal(__x);
788 
789 	// work on abs(__x) to reflect the return value on Linux for negative
790 	// inputs (domain-error => implementation-defined value is returned)
791 	const _V abs_x = abs(__x);
792 
793 	// __exponent(__x) returns the exponent value (bias removed) as
794 	// simd<_Up> with integral _Up
795 	auto&& __exponent = [](const _V& __v) {
796 	  using namespace std::experimental::__proposed;
797 	  using _IV = rebind_simd_t<
798 	    conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>;
799 	  return (__bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1))
800 		 - (__max_exponent_v<_Tp> - 1);
801 	};
802 	_V __r = static_simd_cast<_V>(__exponent(abs_x));
803 	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal)))
804 	  // without corner cases (nan, inf, subnormal, zero) we have our
805 	  // answer:
806 	  return __r;
807 	const auto __is_zero = __x == 0;
808 	const auto __is_nan = isnan(__x);
809 	const auto __is_inf = isinf(__x);
810 	where(__is_zero, __r) = -__infinity_v<_Tp>;
811 	where(__is_nan, __r) = __x;
812 	where(__is_inf, __r) = __infinity_v<_Tp>;
813 	__is_normal |= __is_zero || __is_nan || __is_inf;
814 	if (all_of(__is_normal))
815 	  // at this point everything but subnormals is handled
816 	  return __r;
817 	// subnormals repeat the exponent extraction after multiplication of the
818 	// input with __a floating point value that has 112 (0x70) in its exponent
819 	// (not too big for sp and large enough for dp)
820 	const _V __scaled = abs_x * _Tp(0x1p112);
821 	_V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112);
822 	where(__is_normal, __scaled_exp) = __r;
823 	return __scaled_exp;
824       }
825   }
826 
827 //}}}
828 template <typename _Tp, typename _Abi>
829   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
830   modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr)
831   {
832     if constexpr (__is_scalar_abi<_Abi>()
833 		  || (__is_fixed_size_abi_v<
834 			_Abi> && simd_size_v<_Tp, _Abi> == 1))
835       {
836 	_Tp __tmp;
837 	_Tp __r = std::modf(__x[0], &__tmp);
838 	__iptr[0] = __tmp;
839 	return __r;
840       }
841     else
842       {
843 	const auto __integral = trunc(__x);
844 	*__iptr = __integral;
845 	auto __r = __x - __integral;
846 #if !__FINITE_MATH_ONLY__
847 	where(isinf(__x), __r) = _Tp();
848 #endif
849 	return copysign(__r, __x);
850       }
851   }
852 
853 _GLIBCXX_SIMD_MATH_CALL2_(scalbn, int)
854 _GLIBCXX_SIMD_MATH_CALL2_(scalbln, long)
855 
856 _GLIBCXX_SIMD_MATH_CALL_(cbrt)
857 
858 _GLIBCXX_SIMD_MATH_CALL_(abs)
859 _GLIBCXX_SIMD_MATH_CALL_(fabs)
860 
861 // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to
862 // allow signed integral _Tp
863 template <typename _Tp, typename _Abi>
864   enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
865   abs(const simd<_Tp, _Abi>& __x)
866   { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
867 
868 template <typename _Tp, typename _Abi>
869   enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>>
870   fabs(const simd<_Tp, _Abi>& __x)
871   { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; }
872 
873 // the following are overloads for functions in <cstdlib> and not covered by
874 // [parallel.simd.math]. I don't see much value in making them work, though
875 /*
876 template <typename _Abi> simd<long, _Abi> labs(const simd<long, _Abi> &__x)
877 { return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
878 
879 template <typename _Abi> simd<long long, _Abi> llabs(const simd<long long, _Abi>
880 &__x)
881 { return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; }
882 */
883 
884 #define _GLIBCXX_SIMD_CVTING2(_NAME)                                           \
885 template <typename _Tp, typename _Abi>                                         \
886   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
887     const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \
888   {                                                                            \
889     return _NAME(__x, __y);                                                    \
890   }                                                                            \
891                                                                                \
892 template <typename _Tp, typename _Abi>                                         \
893   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
894     const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \
895   {                                                                            \
896     return _NAME(__x, __y);                                                    \
897   }
898 
899 #define _GLIBCXX_SIMD_CVTING3(_NAME)                                           \
900 template <typename _Tp, typename _Abi>                                         \
901   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
902     const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
903     const simd<_Tp, _Abi>& __z)                                                \
904   {                                                                            \
905     return _NAME(__x, __y, __z);                                               \
906   }                                                                            \
907                                                                                \
908 template <typename _Tp, typename _Abi>                                         \
909   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
910     const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
911     const simd<_Tp, _Abi>& __z)                                                \
912   {                                                                            \
913     return _NAME(__x, __y, __z);                                               \
914   }                                                                            \
915                                                                                \
916 template <typename _Tp, typename _Abi>                                         \
917   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
918     const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,                    \
919     const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
920   {                                                                            \
921     return _NAME(__x, __y, __z);                                               \
922   }                                                                            \
923                                                                                \
924 template <typename _Tp, typename _Abi>                                         \
925   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
926     const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \
927     const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
928   {                                                                            \
929     return _NAME(__x, __y, __z);                                               \
930   }                                                                            \
931                                                                                \
932 template <typename _Tp, typename _Abi>                                         \
933   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
934     const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \
935     const __type_identity_t<simd<_Tp, _Abi>>& __z)                             \
936   {                                                                            \
937     return _NAME(__x, __y, __z);                                               \
938   }                                                                            \
939                                                                                \
940 template <typename _Tp, typename _Abi>                                         \
941   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME(                               \
942     const __type_identity_t<simd<_Tp, _Abi>>& __x,                             \
943     const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \
944   {                                                                            \
945     return _NAME(__x, __y, __z);                                               \
946   }
947 
948 template <typename _R, typename _ToApply, typename _Tp, typename... _Tps>
949   _GLIBCXX_SIMD_INTRINSIC _R
950   __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0,
951 		     const _Tps&... __args)
952   {
953     return {__private_init,
954 	    __data(__arg0)._M_apply_per_chunk(
955 	      [&](auto __impl, const auto&... __inner) {
956 		using _V = typename decltype(__impl)::simd_type;
957 		return __data(__apply(_V(__private_init, __inner)...));
958 	      },
959 	      __data(__args)...)};
960   }
961 
962 template <typename _VV>
963   __remove_cvref_t<_VV>
964   __hypot(_VV __x, _VV __y)
965   {
966     using _V = __remove_cvref_t<_VV>;
967     using _Tp = typename _V::value_type;
968     if constexpr (_V::size() == 1)
969       return std::hypot(_Tp(__x[0]), _Tp(__y[0]));
970     else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>)
971       {
972 	return __fixed_size_apply<_V>([](auto __a,
973 					 auto __b) { return hypot(__a, __b); },
974 				      __x, __y);
975       }
976     else
977       {
978 	// A simple solution for _Tp == float would be to cast to double and
979 	// simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with
980 	// dp. It still needs the Annex F fixups though and isn't faster on
981 	// Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for
982 	// AVX-512).
983 	using namespace __float_bitwise_operators;
984 	_V __absx = abs(__x);          // no error
985 	_V __absy = abs(__y);          // no error
986 	_V __hi = max(__absx, __absy); // no error
987 	_V __lo = min(__absy, __absx); // no error
988 
989 	// round __hi down to the next power-of-2:
990 	_GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
991 
992 #ifndef __FAST_MATH__
993 	if constexpr (__have_neon && !__have_neon_a32)
994 	  { // With ARMv7 NEON, we have no subnormals and must use slightly
995 	    // different strategy
996 	    const _V __hi_exp = __hi & __inf;
997 	    _V __scale_back = __hi_exp;
998 	    // For large exponents (max & max/2) the inversion comes too close
999 	    // to subnormals. Subtract 3 from the exponent:
1000 	    where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
1001 	    // Invert and adjust for the off-by-one error of inversion via xor:
1002 	    const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
1003 	    const _V __h1 = __hi * __scale;
1004 	    const _V __l1 = __lo * __scale;
1005 	    _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1);
1006 	    // Fix up hypot(0, 0) to not be NaN:
1007 	    where(__hi == 0, __r) = 0;
1008 	    return __r;
1009 	  }
1010 #endif
1011 
1012 #ifdef __FAST_MATH__
1013 	// With fast-math, ignore precision of subnormals and inputs from
1014 	// __finite_max_v/2 to __finite_max_v. This removes all
1015 	// branching/masking.
1016 	if constexpr (true)
1017 #else
1018 	if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1019 				    && all_of(isnormal(__y))))
1020 #endif
1021 	  {
1022 	    const _V __hi_exp = __hi & __inf;
1023 	    //((__hi + __hi) & __inf) ^ __inf almost works for computing
1024 	    //__scale,
1025 	    // except when (__hi + __hi) & __inf == __inf, in which case __scale
1026 	    // becomes 0 (should be min/2 instead) and thus loses the
1027 	    // information from __lo.
1028 #ifdef __FAST_MATH__
1029 	    using _Ip = __int_for_sizeof_t<_Tp>;
1030 	    using _IV = rebind_simd_t<_Ip, _V>;
1031 	    const auto __as_int = __bit_cast<_IV>(__hi_exp);
1032 	    const _V __scale
1033 	      = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1034 #else
1035 	    const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1036 #endif
1037 	    _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask
1038 	      = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1039 	    const _V __h1 = (__hi & __mant_mask) | _V(1);
1040 	    const _V __l1 = __lo * __scale;
1041 	    return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1042 	  }
1043 	else
1044 	  {
1045 	    // slower path to support subnormals
1046 	    // if __hi is subnormal, avoid scaling by inf & final mul by 0
1047 	    // (which yields NaN) by using min()
1048 	    _V __scale = _V(1 / __norm_min_v<_Tp>);
1049 	    // invert exponent w/o error and w/o using the slow divider unit:
1050 	    // xor inverts the exponent but off by 1. Multiplication with .5
1051 	    // adjusts for the discrepancy.
1052 	    where(__hi >= __norm_min_v<_Tp>, __scale)
1053 	      = ((__hi & __inf) ^ __inf) * _Tp(.5);
1054 	    // adjust final exponent for subnormal inputs
1055 	    _V __hi_exp = __norm_min_v<_Tp>;
1056 	    where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1057 	      = __hi & __inf;         // no error
1058 	    _V __h1 = __hi * __scale; // no error
1059 	    _V __l1 = __lo * __scale; // no error
1060 
1061 	    // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²):
1062 	    // this ensures no overflow in the argument to sqrt
1063 	    _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1);
1064 #ifdef __STDC_IEC_559__
1065 	    // fixup for Annex F requirements
1066 	    // the naive fixup goes like this:
1067 	    //
1068 	    // where(__l1 == 0, __r)                      = __hi;
1069 	    // where(isunordered(__x, __y), __r)          = __quiet_NaN_v<_Tp>;
1070 	    // where(isinf(__absx) || isinf(__absy), __r) = __inf;
1071 	    //
1072 	    // The fixup can be prepared in parallel with the sqrt, requiring a
1073 	    // single blend step after hi_exp * sqrt, reducing latency and
1074 	    // throughput:
1075 	    _V __fixup = __hi; // __lo == 0
1076 	    where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>;
1077 	    where(isinf(__absx) || isinf(__absy), __fixup) = __inf;
1078 	    where(!(__lo == 0 || isunordered(__x, __y)
1079 		    || (isinf(__absx) || isinf(__absy))),
1080 		  __fixup)
1081 	      = __r;
1082 	    __r = __fixup;
1083 #endif
1084 	    return __r;
1085 	  }
1086       }
1087   }
1088 
1089 template <typename _Tp, typename _Abi>
1090   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1091   hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1092   {
1093     return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1094 				 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1095 									   __y);
1096   }
1097 
1098 _GLIBCXX_SIMD_CVTING2(hypot)
1099 
1100   template <typename _VV>
1101   __remove_cvref_t<_VV>
1102   __hypot(_VV __x, _VV __y, _VV __z)
1103   {
1104     using _V = __remove_cvref_t<_VV>;
1105     using _Abi = typename _V::abi_type;
1106     using _Tp = typename _V::value_type;
1107     /* FIXME: enable after PR77776 is resolved
1108     if constexpr (_V::size() == 1)
1109       return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0]));
1110     else
1111     */
1112     if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1)
1113       {
1114 	return __fixed_size_apply<simd<_Tp, _Abi>>(
1115 	  [](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); },
1116 	  __x, __y, __z);
1117       }
1118     else
1119       {
1120 	using namespace __float_bitwise_operators;
1121 	const _V __absx = abs(__x);                 // no error
1122 	const _V __absy = abs(__y);                 // no error
1123 	const _V __absz = abs(__z);                 // no error
1124 	_V __hi = max(max(__absx, __absy), __absz); // no error
1125 	_V __l0 = min(__absz, max(__absx, __absy)); // no error
1126 	_V __l1 = min(__absy, __absx);              // no error
1127 	if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000
1128 		      && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1)
1129 	  { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or
1130 	    // NaN. In this case the bit-tricks don't work, they require IEC559
1131 	    // binary32 or binary64 format.
1132 #ifdef __STDC_IEC_559__
1133 	    // fixup for Annex F requirements
1134 	    if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0]))
1135 	      return __infinity_v<_Tp>;
1136 	    else if (isunordered(__absx[0], __absy[0] + __absz[0]))
1137 	      return __quiet_NaN_v<_Tp>;
1138 	    else if (__l0[0] == 0 && __l1[0] == 0)
1139 	      return __hi;
1140 #endif
1141 	    _V __hi_exp = __hi;
1142 	    const _ULLong __tmp = 0x8000'0000'0000'0000ull;
1143 	    __builtin_memcpy(&__data(__hi_exp), &__tmp, 8);
1144 	    const _V __scale = 1 / __hi_exp;
1145 	    __hi *= __scale;
1146 	    __l0 *= __scale;
1147 	    __l1 *= __scale;
1148 	    return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi);
1149 	  }
1150 	else
1151 	  {
1152 	    // round __hi down to the next power-of-2:
1153 	    _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>);
1154 
1155 #ifndef __FAST_MATH__
1156 	    if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32)
1157 	      { // With ARMv7 NEON, we have no subnormals and must use slightly
1158 		// different strategy
1159 		const _V __hi_exp = __hi & __inf;
1160 		_V __scale_back = __hi_exp;
1161 		// For large exponents (max & max/2) the inversion comes too
1162 		// close to subnormals. Subtract 3 from the exponent:
1163 		where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125);
1164 		// Invert and adjust for the off-by-one error of inversion via
1165 		// xor:
1166 		const _V __scale = (__scale_back ^ __inf) * _Tp(.5);
1167 		const _V __h1 = __hi * __scale;
1168 		__l0 *= __scale;
1169 		__l1 *= __scale;
1170 		_V __lo = __l0 * __l0
1171 			  + __l1 * __l1; // add the two smaller values first
1172 		asm("" : "+m"(__lo));
1173 		_V __r = __scale_back * sqrt(__h1 * __h1 + __lo);
1174 		// Fix up hypot(0, 0, 0) to not be NaN:
1175 		where(__hi == 0, __r) = 0;
1176 		return __r;
1177 	      }
1178 #endif
1179 
1180 #ifdef __FAST_MATH__
1181 	    // With fast-math, ignore precision of subnormals and inputs from
1182 	    // __finite_max_v/2 to __finite_max_v. This removes all
1183 	    // branching/masking.
1184 	    if constexpr (true)
1185 #else
1186 	    if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x))
1187 					&& all_of(isnormal(__y))
1188 					&& all_of(isnormal(__z))))
1189 #endif
1190 	      {
1191 		const _V __hi_exp = __hi & __inf;
1192 		//((__hi + __hi) & __inf) ^ __inf almost works for computing
1193 		//__scale, except when (__hi + __hi) & __inf == __inf, in which
1194 		// case __scale
1195 		// becomes 0 (should be min/2 instead) and thus loses the
1196 		// information from __lo.
1197 #ifdef __FAST_MATH__
1198 		using _Ip = __int_for_sizeof_t<_Tp>;
1199 		using _IV = rebind_simd_t<_Ip, _V>;
1200 		const auto __as_int = __bit_cast<_IV>(__hi_exp);
1201 		const _V __scale
1202 		  = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int);
1203 #else
1204 		const _V __scale = (__hi_exp ^ __inf) * _Tp(.5);
1205 #endif
1206 		constexpr _Tp __mant_mask
1207 		  = __norm_min_v<_Tp> - __denorm_min_v<_Tp>;
1208 		const _V __h1 = (__hi & _V(__mant_mask)) | _V(1);
1209 		__l0 *= __scale;
1210 		__l1 *= __scale;
1211 		const _V __lo
1212 		  = __l0 * __l0
1213 		    + __l1 * __l1; // add the two smaller values first
1214 		return __hi_exp * sqrt(__lo + __h1 * __h1);
1215 	      }
1216 	    else
1217 	      {
1218 		// slower path to support subnormals
1219 		// if __hi is subnormal, avoid scaling by inf & final mul by 0
1220 		// (which yields NaN) by using min()
1221 		_V __scale = _V(1 / __norm_min_v<_Tp>);
1222 		// invert exponent w/o error and w/o using the slow divider
1223 		// unit: xor inverts the exponent but off by 1. Multiplication
1224 		// with .5 adjusts for the discrepancy.
1225 		where(__hi >= __norm_min_v<_Tp>, __scale)
1226 		  = ((__hi & __inf) ^ __inf) * _Tp(.5);
1227 		// adjust final exponent for subnormal inputs
1228 		_V __hi_exp = __norm_min_v<_Tp>;
1229 		where(__hi >= __norm_min_v<_Tp>, __hi_exp)
1230 		  = __hi & __inf;         // no error
1231 		_V __h1 = __hi * __scale; // no error
1232 		__l0 *= __scale;          // no error
1233 		__l1 *= __scale;          // no error
1234 		_V __lo = __l0 * __l0
1235 			  + __l1 * __l1; // add the two smaller values first
1236 		_V __r = __hi_exp * sqrt(__lo + __h1 * __h1);
1237 #ifdef __STDC_IEC_559__
1238 		// fixup for Annex F requirements
1239 		_V __fixup = __hi; // __lo == 0
1240 		// where(__lo == 0, __fixup)                   = __hi;
1241 		where(isunordered(__x, __y + __z), __fixup)
1242 		  = __quiet_NaN_v<_Tp>;
1243 		where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup)
1244 		  = __inf;
1245 		// Instead of __lo == 0, the following could depend on __h1² ==
1246 		// __h1² + __lo (i.e. __hi is so much larger than the other two
1247 		// inputs that the result is exactly __hi). While this may
1248 		// improve precision, it is likely to reduce efficiency if the
1249 		// ISA has FMAs (because __h1² + __lo is an FMA, but the
1250 		// intermediate
1251 		// __h1² must be kept)
1252 		where(!(__lo == 0 || isunordered(__x, __y + __z)
1253 			|| isinf(__absx) || isinf(__absy) || isinf(__absz)),
1254 		      __fixup)
1255 		  = __r;
1256 		__r = __fixup;
1257 #endif
1258 		return __r;
1259 	      }
1260 	  }
1261       }
1262   }
1263 
1264   template <typename _Tp, typename _Abi>
1265   _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi>
1266   hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y,
1267 	const simd<_Tp, _Abi>& __z)
1268   {
1269     return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>,
1270 				 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x,
1271 									   __y,
1272 									   __z);
1273   }
1274 
1275 _GLIBCXX_SIMD_CVTING3(hypot)
1276 
1277 _GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp)
1278 
1279 _GLIBCXX_SIMD_MATH_CALL_(sqrt)
1280 _GLIBCXX_SIMD_MATH_CALL_(erf)
1281 _GLIBCXX_SIMD_MATH_CALL_(erfc)
1282 _GLIBCXX_SIMD_MATH_CALL_(lgamma)
1283 _GLIBCXX_SIMD_MATH_CALL_(tgamma)
1284 _GLIBCXX_SIMD_MATH_CALL_(ceil)
1285 _GLIBCXX_SIMD_MATH_CALL_(floor)
1286 _GLIBCXX_SIMD_MATH_CALL_(nearbyint)
1287 _GLIBCXX_SIMD_MATH_CALL_(rint)
1288 _GLIBCXX_SIMD_MATH_CALL_(lrint)
1289 _GLIBCXX_SIMD_MATH_CALL_(llrint)
1290 
1291 _GLIBCXX_SIMD_MATH_CALL_(round)
1292 _GLIBCXX_SIMD_MATH_CALL_(lround)
1293 _GLIBCXX_SIMD_MATH_CALL_(llround)
1294 
1295 _GLIBCXX_SIMD_MATH_CALL_(trunc)
1296 
1297 _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp)
1298 _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp)
1299 _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*)
1300 
1301 template <typename _Tp, typename _Abi>
1302   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1303   copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y)
1304   {
1305     if constexpr (simd_size_v<_Tp, _Abi> == 1)
1306       return std::copysign(__x[0], __y[0]);
1307     else if constexpr (is_same_v<_Tp, long double> && sizeof(_Tp) == 12)
1308       // Remove this case once __bit_cast is implemented via __builtin_bit_cast.
1309       // It is necessary, because __signmask below cannot be computed at compile
1310       // time.
1311       return simd<_Tp, _Abi>(
1312 	[&](auto __i) { return std::copysign(__x[__i], __y[__i]); });
1313     else
1314       {
1315 	using _V = simd<_Tp, _Abi>;
1316 	using namespace std::experimental::__float_bitwise_operators;
1317 	_GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1);
1318 	return (__x & (__x ^ __signmask)) | (__y & __signmask);
1319       }
1320   }
1321 
1322 _GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp)
1323 // not covered in [parallel.simd.math]:
1324 // _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double)
1325 _GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp)
1326 _GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp)
1327 _GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp)
1328 
1329 _GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp)
1330 _GLIBCXX_SIMD_MATH_CALL_(fpclassify)
1331 _GLIBCXX_SIMD_MATH_CALL_(isfinite)
1332 
1333 // isnan and isinf require special treatment because old glibc may declare
1334 // `int isinf(double)`.
1335 template <typename _Tp, typename _Abi, typename...,
1336 	  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1337   enable_if_t<is_floating_point_v<_Tp>, _R>
1338   isinf(simd<_Tp, _Abi> __x)
1339   { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; }
1340 
1341 template <typename _Tp, typename _Abi, typename...,
1342 	  typename _R = _Math_return_type_t<bool, _Tp, _Abi>>
1343   enable_if_t<is_floating_point_v<_Tp>, _R>
1344   isnan(simd<_Tp, _Abi> __x)
1345   { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; }
1346 
1347 _GLIBCXX_SIMD_MATH_CALL_(isnormal)
1348 
1349 template <typename..., typename _Tp, typename _Abi>
1350   simd_mask<_Tp, _Abi>
1351   signbit(simd<_Tp, _Abi> __x)
1352   {
1353     if constexpr (is_integral_v<_Tp>)
1354       {
1355 	if constexpr (is_unsigned_v<_Tp>)
1356 	  return simd_mask<_Tp, _Abi>{}; // false
1357 	else
1358 	  return __x < 0;
1359       }
1360     else
1361       return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))};
1362   }
1363 
1364 _GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp)
1365 _GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp)
1366 _GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp)
1367 _GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp)
1368 _GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp)
1369 _GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp)
1370 
1371 /* not covered in [parallel.simd.math]
1372 template <typename _Abi> __doublev<_Abi> nan(const char* tagp);
1373 template <typename _Abi> __floatv<_Abi> nanf(const char* tagp);
1374 template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp);
1375 
1376 template <typename _V> struct simd_div_t {
1377     _V quot, rem;
1378 };
1379 
1380 template <typename _Abi>
1381 simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer,
1382 					 _SCharv<_Abi> denom);
1383 template <typename _Abi>
1384 simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer,
1385 					 __shortv<_Abi> denom);
1386 template <typename _Abi>
1387 simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom);
1388 template <typename _Abi>
1389 simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer,
1390 					__longv<_Abi> denom);
1391 template <typename _Abi>
1392 simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer,
1393 					 __llongv<_Abi> denom);
1394 */
1395 
1396 // special math {{{
1397 template <typename _Tp, typename _Abi>
1398   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1399   assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1400 		 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1401 		 const simd<_Tp, _Abi>& __x)
1402   {
1403     return simd<_Tp, _Abi>([&](auto __i) {
1404       return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]);
1405     });
1406   }
1407 
1408 template <typename _Tp, typename _Abi>
1409   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1410   assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1411 		 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1412 		 const simd<_Tp, _Abi>& __x)
1413   {
1414     return simd<_Tp, _Abi>([&](auto __i) {
1415       return std::assoc_legendre(__n[__i], __m[__i], __x[__i]);
1416     });
1417   }
1418 
1419 _GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp)
1420 _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1)
1421 _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2)
1422 _GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp)
1423 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp)
1424 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp)
1425 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp)
1426 _GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp)
1427 _GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp)
1428 _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp)
1429 _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp)
1430 _GLIBCXX_SIMD_MATH_CALL_(expint)
1431 
1432 template <typename _Tp, typename _Abi>
1433   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1434   hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1435 	  const simd<_Tp, _Abi>& __x)
1436   {
1437     return simd<_Tp, _Abi>(
1438       [&](auto __i) { return std::hermite(__n[__i], __x[__i]); });
1439   }
1440 
1441 template <typename _Tp, typename _Abi>
1442   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1443   laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1444 	   const simd<_Tp, _Abi>& __x)
1445   {
1446     return simd<_Tp, _Abi>(
1447       [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); });
1448   }
1449 
1450 template <typename _Tp, typename _Abi>
1451   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1452   legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1453 	   const simd<_Tp, _Abi>& __x)
1454   {
1455     return simd<_Tp, _Abi>(
1456       [&](auto __i) { return std::legendre(__n[__i], __x[__i]); });
1457   }
1458 
1459 _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta)
1460 
1461 template <typename _Tp, typename _Abi>
1462   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1463   sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1464 	     const simd<_Tp, _Abi>& __x)
1465   {
1466     return simd<_Tp, _Abi>(
1467       [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); });
1468   }
1469 
1470 template <typename _Tp, typename _Abi>
1471   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1472   sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l,
1473 	       const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m,
1474 	       const simd<_Tp, _Abi>& theta)
1475   {
1476     return simd<_Tp, _Abi>([&](auto __i) {
1477       return std::assoc_legendre(__l[__i], __m[__i], theta[__i]);
1478     });
1479   }
1480 
1481 template <typename _Tp, typename _Abi>
1482   enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>>
1483   sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n,
1484 	      const simd<_Tp, _Abi>& __x)
1485   {
1486     return simd<_Tp, _Abi>(
1487       [&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); });
1488   }
1489 // }}}
1490 
1491 #undef _GLIBCXX_SIMD_MATH_CALL_
1492 #undef _GLIBCXX_SIMD_MATH_CALL2_
1493 #undef _GLIBCXX_SIMD_MATH_CALL3_
1494 
1495 _GLIBCXX_SIMD_END_NAMESPACE
1496 
1497 #endif // __cplusplus >= 201703L
1498 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_
1499 
1500 // vim: foldmethod=marker sw=2 ts=8 noet sts=2
1501