1 // Math overloads for simd -*- C++ -*- 2 3 // Copyright (C) 2020-2021 Free Software Foundation, Inc. 4 // 5 // This file is part of the GNU ISO C++ Library. This library is free 6 // software; you can redistribute it and/or modify it under the 7 // terms of the GNU General Public License as published by the 8 // Free Software Foundation; either version 3, or (at your option) 9 // any later version. 10 11 // This library is distributed in the hope that it will be useful, 12 // but WITHOUT ANY WARRANTY; without even the implied warranty of 13 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 // GNU General Public License for more details. 15 16 // Under Section 7 of GPL version 3, you are granted additional 17 // permissions described in the GCC Runtime Library Exception, version 18 // 3.1, as published by the Free Software Foundation. 19 20 // You should have received a copy of the GNU General Public License and 21 // a copy of the GCC Runtime Library Exception along with this program; 22 // see the files COPYING3 and COPYING.RUNTIME respectively. If not, see 23 // <http://www.gnu.org/licenses/>. 24 25 #ifndef _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_ 26 #define _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_ 27 28 #if __cplusplus >= 201703L 29 30 #include <utility> 31 #include <iomanip> 32 33 _GLIBCXX_SIMD_BEGIN_NAMESPACE 34 template <typename _Tp, typename _V> 35 using _Samesize = fixed_size_simd<_Tp, _V::size()>; 36 37 // _Math_return_type {{{ 38 template <typename _DoubleR, typename _Tp, typename _Abi> 39 struct _Math_return_type; 40 41 template <typename _DoubleR, typename _Tp, typename _Abi> 42 using _Math_return_type_t = 43 typename _Math_return_type<_DoubleR, _Tp, _Abi>::type; 44 45 template <typename _Tp, typename _Abi> 46 struct _Math_return_type<double, _Tp, _Abi> 47 { using type = simd<_Tp, _Abi>; }; 48 49 template <typename _Tp, typename _Abi> 50 struct _Math_return_type<bool, _Tp, _Abi> 51 { using type = simd_mask<_Tp, _Abi>; }; 52 53 template <typename _DoubleR, typename _Tp, typename _Abi> 54 struct _Math_return_type 55 { using type = fixed_size_simd<_DoubleR, simd_size_v<_Tp, _Abi>>; }; 56 57 //}}} 58 // _GLIBCXX_SIMD_MATH_CALL_ {{{ 59 #define _GLIBCXX_SIMD_MATH_CALL_(__name) \ 60 template <typename _Tp, typename _Abi, typename..., \ 61 typename _R = _Math_return_type_t< \ 62 decltype(std::__name(declval<double>())), _Tp, _Abi>> \ 63 enable_if_t<is_floating_point_v<_Tp>, _R> \ 64 __name(simd<_Tp, _Abi> __x) \ 65 { return {__private_init, _Abi::_SimdImpl::_S_##__name(__data(__x))}; } 66 67 // }}} 68 //_Extra_argument_type{{{ 69 template <typename _Up, typename _Tp, typename _Abi> 70 struct _Extra_argument_type; 71 72 template <typename _Tp, typename _Abi> 73 struct _Extra_argument_type<_Tp*, _Tp, _Abi> 74 { 75 using type = simd<_Tp, _Abi>*; 76 static constexpr double* declval(); 77 static constexpr bool __needs_temporary_scalar = true; 78 79 _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x) 80 { return &__data(*__x); } 81 }; 82 83 template <typename _Up, typename _Tp, typename _Abi> 84 struct _Extra_argument_type<_Up*, _Tp, _Abi> 85 { 86 static_assert(is_integral_v<_Up>); 87 using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>*; 88 static constexpr _Up* declval(); 89 static constexpr bool __needs_temporary_scalar = true; 90 91 _GLIBCXX_SIMD_INTRINSIC static constexpr auto _S_data(type __x) 92 { return &__data(*__x); } 93 }; 94 95 template <typename _Tp, typename _Abi> 96 struct _Extra_argument_type<_Tp, _Tp, _Abi> 97 { 98 using type = simd<_Tp, _Abi>; 99 static constexpr double declval(); 100 static constexpr bool __needs_temporary_scalar = false; 101 102 _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto) 103 _S_data(const type& __x) 104 { return __data(__x); } 105 }; 106 107 template <typename _Up, typename _Tp, typename _Abi> 108 struct _Extra_argument_type 109 { 110 static_assert(is_integral_v<_Up>); 111 using type = fixed_size_simd<_Up, simd_size_v<_Tp, _Abi>>; 112 static constexpr _Up declval(); 113 static constexpr bool __needs_temporary_scalar = false; 114 115 _GLIBCXX_SIMD_INTRINSIC static constexpr decltype(auto) 116 _S_data(const type& __x) 117 { return __data(__x); } 118 }; 119 120 //}}} 121 // _GLIBCXX_SIMD_MATH_CALL2_ {{{ 122 #define _GLIBCXX_SIMD_MATH_CALL2_(__name, arg2_) \ 123 template < \ 124 typename _Tp, typename _Abi, typename..., \ 125 typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \ 126 typename _R = _Math_return_type_t< \ 127 decltype(std::__name(declval<double>(), _Arg2::declval())), _Tp, _Abi>> \ 128 enable_if_t<is_floating_point_v<_Tp>, _R> \ 129 __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y) \ 130 { \ 131 return {__private_init, \ 132 _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y))}; \ 133 } \ 134 template <typename _Up, typename _Tp, typename _Abi> \ 135 _GLIBCXX_SIMD_INTRINSIC _Math_return_type_t< \ 136 decltype(std::__name( \ 137 declval<double>(), \ 138 declval<enable_if_t< \ 139 conjunction_v< \ 140 is_same<arg2_, _Tp>, \ 141 negation<is_same<__remove_cvref_t<_Up>, simd<_Tp, _Abi>>>, \ 142 is_convertible<_Up, simd<_Tp, _Abi>>, is_floating_point<_Tp>>, \ 143 double>>())), \ 144 _Tp, _Abi> \ 145 __name(_Up&& __xx, const simd<_Tp, _Abi>& __yy) \ 146 { return __name(simd<_Tp, _Abi>(static_cast<_Up&&>(__xx)), __yy); } 147 148 // }}} 149 // _GLIBCXX_SIMD_MATH_CALL3_ {{{ 150 #define _GLIBCXX_SIMD_MATH_CALL3_(__name, arg2_, arg3_) \ 151 template <typename _Tp, typename _Abi, typename..., \ 152 typename _Arg2 = _Extra_argument_type<arg2_, _Tp, _Abi>, \ 153 typename _Arg3 = _Extra_argument_type<arg3_, _Tp, _Abi>, \ 154 typename _R = _Math_return_type_t< \ 155 decltype(std::__name(declval<double>(), _Arg2::declval(), \ 156 _Arg3::declval())), \ 157 _Tp, _Abi>> \ 158 enable_if_t<is_floating_point_v<_Tp>, _R> \ 159 __name(const simd<_Tp, _Abi>& __x, const typename _Arg2::type& __y, \ 160 const typename _Arg3::type& __z) \ 161 { \ 162 return {__private_init, \ 163 _Abi::_SimdImpl::_S_##__name(__data(__x), _Arg2::_S_data(__y), \ 164 _Arg3::_S_data(__z))}; \ 165 } \ 166 template < \ 167 typename _T0, typename _T1, typename _T2, typename..., \ 168 typename _U0 = __remove_cvref_t<_T0>, \ 169 typename _U1 = __remove_cvref_t<_T1>, \ 170 typename _U2 = __remove_cvref_t<_T2>, \ 171 typename _Simd = conditional_t<is_simd_v<_U1>, _U1, _U2>, \ 172 typename = enable_if_t<conjunction_v< \ 173 is_simd<_Simd>, is_convertible<_T0&&, _Simd>, \ 174 is_convertible<_T1&&, _Simd>, is_convertible<_T2&&, _Simd>, \ 175 negation<conjunction< \ 176 is_simd<_U0>, is_floating_point<__value_type_or_identity_t<_U0>>>>>>> \ 177 _GLIBCXX_SIMD_INTRINSIC decltype(__name(declval<const _Simd&>(), \ 178 declval<const _Simd&>(), \ 179 declval<const _Simd&>())) \ 180 __name(_T0&& __xx, _T1&& __yy, _T2&& __zz) \ 181 { \ 182 return __name(_Simd(static_cast<_T0&&>(__xx)), \ 183 _Simd(static_cast<_T1&&>(__yy)), \ 184 _Simd(static_cast<_T2&&>(__zz))); \ 185 } 186 187 // }}} 188 // __cosSeries {{{ 189 template <typename _Abi> 190 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi> 191 __cosSeries(const simd<float, _Abi>& __x) 192 { 193 const simd<float, _Abi> __x2 = __x * __x; 194 simd<float, _Abi> __y; 195 __y = 0x1.ap-16f; // 1/8! 196 __y = __y * __x2 - 0x1.6c1p-10f; // -1/6! 197 __y = __y * __x2 + 0x1.555556p-5f; // 1/4! 198 return __y * (__x2 * __x2) - .5f * __x2 + 1.f; 199 } 200 201 template <typename _Abi> 202 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi> 203 __cosSeries(const simd<double, _Abi>& __x) 204 { 205 const simd<double, _Abi> __x2 = __x * __x; 206 simd<double, _Abi> __y; 207 __y = 0x1.AC00000000000p-45; // 1/16! 208 __y = __y * __x2 - 0x1.9394000000000p-37; // -1/14! 209 __y = __y * __x2 + 0x1.1EED8C0000000p-29; // 1/12! 210 __y = __y * __x2 - 0x1.27E4FB7400000p-22; // -1/10! 211 __y = __y * __x2 + 0x1.A01A01A018000p-16; // 1/8! 212 __y = __y * __x2 - 0x1.6C16C16C16C00p-10; // -1/6! 213 __y = __y * __x2 + 0x1.5555555555554p-5; // 1/4! 214 return (__y * __x2 - .5f) * __x2 + 1.f; 215 } 216 217 // }}} 218 // __sinSeries {{{ 219 template <typename _Abi> 220 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<float, _Abi> 221 __sinSeries(const simd<float, _Abi>& __x) 222 { 223 const simd<float, _Abi> __x2 = __x * __x; 224 simd<float, _Abi> __y; 225 __y = -0x1.9CC000p-13f; // -1/7! 226 __y = __y * __x2 + 0x1.111100p-7f; // 1/5! 227 __y = __y * __x2 - 0x1.555556p-3f; // -1/3! 228 return __y * (__x2 * __x) + __x; 229 } 230 231 template <typename _Abi> 232 _GLIBCXX_SIMD_ALWAYS_INLINE static simd<double, _Abi> 233 __sinSeries(const simd<double, _Abi>& __x) 234 { 235 // __x = [0, 0.7854 = pi/4] 236 // __x² = [0, 0.6169 = pi²/8] 237 const simd<double, _Abi> __x2 = __x * __x; 238 simd<double, _Abi> __y; 239 __y = -0x1.ACF0000000000p-41; // -1/15! 240 __y = __y * __x2 + 0x1.6124400000000p-33; // 1/13! 241 __y = __y * __x2 - 0x1.AE64567000000p-26; // -1/11! 242 __y = __y * __x2 + 0x1.71DE3A5540000p-19; // 1/9! 243 __y = __y * __x2 - 0x1.A01A01A01A000p-13; // -1/7! 244 __y = __y * __x2 + 0x1.1111111111110p-7; // 1/5! 245 __y = __y * __x2 - 0x1.5555555555555p-3; // -1/3! 246 return __y * (__x2 * __x) + __x; 247 } 248 249 // }}} 250 // __zero_low_bits {{{ 251 template <int _Bits, typename _Tp, typename _Abi> 252 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> 253 __zero_low_bits(simd<_Tp, _Abi> __x) 254 { 255 const simd<_Tp, _Abi> __bitmask 256 = __bit_cast<_Tp>(~make_unsigned_t<__int_for_sizeof_t<_Tp>>() << _Bits); 257 return {__private_init, 258 _Abi::_SimdImpl::_S_bit_and(__data(__x), __data(__bitmask))}; 259 } 260 261 // }}} 262 // __fold_input {{{ 263 264 /**@internal 265 * Fold @p x into [-¼π, ¼π] and remember the quadrant it came from: 266 * quadrant 0: [-¼π, ¼π] 267 * quadrant 1: [ ¼π, ¾π] 268 * quadrant 2: [ ¾π, 1¼π] 269 * quadrant 3: [1¼π, 1¾π] 270 * 271 * The algorithm determines `y` as the multiple `x - y * ¼π = [-¼π, ¼π]`. Using 272 * a bitmask, `y` is reduced to `quadrant`. `y` can be calculated as 273 * ``` 274 * y = trunc(x / ¼π); 275 * y += fmod(y, 2); 276 * ``` 277 * This can be simplified by moving the (implicit) division by 2 into the 278 * truncation expression. The `+= fmod` effect can the be achieved by using 279 * rounding instead of truncation: `y = round(x / ½π) * 2`. If precision allows, 280 * `2/π * x` is better (faster). 281 */ 282 template <typename _Tp, typename _Abi> 283 struct _Folded 284 { 285 simd<_Tp, _Abi> _M_x; 286 rebind_simd_t<int, simd<_Tp, _Abi>> _M_quadrant; 287 }; 288 289 namespace __math_float { 290 inline constexpr float __pi_over_4 = 0x1.921FB6p-1f; // π/4 291 inline constexpr float __2_over_pi = 0x1.45F306p-1f; // 2/π 292 inline constexpr float __pi_2_5bits0 293 = 0x1.921fc0p0f; // π/2, 5 0-bits (least significant) 294 inline constexpr float __pi_2_5bits0_rem 295 = -0x1.5777a6p-21f; // π/2 - __pi_2_5bits0 296 } // namespace __math_float 297 namespace __math_double { 298 inline constexpr double __pi_over_4 = 0x1.921fb54442d18p-1; // π/4 299 inline constexpr double __2_over_pi = 0x1.45F306DC9C883p-1; // 2/π 300 inline constexpr double __pi_2 = 0x1.921fb54442d18p0; // π/2 301 } // namespace __math_double 302 303 template <typename _Abi> 304 _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<float, _Abi> 305 __fold_input(const simd<float, _Abi>& __x) 306 { 307 using _V = simd<float, _Abi>; 308 using _IV = rebind_simd_t<int, _V>; 309 using namespace __math_float; 310 _Folded<float, _Abi> __r; 311 __r._M_x = abs(__x); 312 #if 0 313 // zero most mantissa bits: 314 constexpr float __1_over_pi = 0x1.45F306p-2f; // 1/π 315 const auto __y = (__r._M_x * __1_over_pi + 0x1.8p23f) - 0x1.8p23f; 316 // split π into 4 parts, the first three with 13 trailing zeros (to make the 317 // following multiplications precise): 318 constexpr float __pi0 = 0x1.920000p1f; 319 constexpr float __pi1 = 0x1.fb4000p-11f; 320 constexpr float __pi2 = 0x1.444000p-23f; 321 constexpr float __pi3 = 0x1.68c234p-38f; 322 __r._M_x - __y*__pi0 - __y*__pi1 - __y*__pi2 - __y*__pi3 323 #else 324 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4))) 325 __r._M_quadrant = 0; 326 else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 6 * __pi_over_4))) 327 { 328 const _V __y = nearbyint(__r._M_x * __2_over_pi); 329 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // __y mod 4 330 __r._M_x -= __y * __pi_2_5bits0; 331 __r._M_x -= __y * __pi_2_5bits0_rem; 332 } 333 else 334 { 335 using __math_double::__2_over_pi; 336 using __math_double::__pi_2; 337 using _VD = rebind_simd_t<double, _V>; 338 _VD __xd = static_simd_cast<_VD>(__r._M_x); 339 _VD __y = nearbyint(__xd * __2_over_pi); 340 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; // = __y mod 4 341 __r._M_x = static_simd_cast<_V>(__xd - __y * __pi_2); 342 } 343 #endif 344 return __r; 345 } 346 347 template <typename _Abi> 348 _GLIBCXX_SIMD_ALWAYS_INLINE _Folded<double, _Abi> 349 __fold_input(const simd<double, _Abi>& __x) 350 { 351 using _V = simd<double, _Abi>; 352 using _IV = rebind_simd_t<int, _V>; 353 using namespace __math_double; 354 355 _Folded<double, _Abi> __r; 356 __r._M_x = abs(__x); 357 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__r._M_x < __pi_over_4))) 358 { 359 __r._M_quadrant = 0; 360 return __r; 361 } 362 const _V __y = nearbyint(__r._M_x / (2 * __pi_over_4)); 363 __r._M_quadrant = static_simd_cast<_IV>(__y) & 3; 364 365 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__r._M_x < 1025 * __pi_over_4))) 366 { 367 // x - y * pi/2, y uses no more than 11 mantissa bits 368 __r._M_x -= __y * 0x1.921FB54443000p0; 369 __r._M_x -= __y * -0x1.73DCB3B39A000p-43; 370 __r._M_x -= __y * 0x1.45C06E0E68948p-86; 371 } 372 else if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__y <= 0x1.0p30))) 373 { 374 // x - y * pi/2, y uses no more than 29 mantissa bits 375 __r._M_x -= __y * 0x1.921FB40000000p0; 376 __r._M_x -= __y * 0x1.4442D00000000p-24; 377 __r._M_x -= __y * 0x1.8469898CC5170p-48; 378 } 379 else 380 { 381 // x - y * pi/2, y may require all mantissa bits 382 const _V __y_hi = __zero_low_bits<26>(__y); 383 const _V __y_lo = __y - __y_hi; 384 const auto __pi_2_1 = 0x1.921FB50000000p0; 385 const auto __pi_2_2 = 0x1.110B460000000p-26; 386 const auto __pi_2_3 = 0x1.1A62630000000p-54; 387 const auto __pi_2_4 = 0x1.8A2E03707344Ap-81; 388 __r._M_x = __r._M_x - __y_hi * __pi_2_1 389 - max(__y_hi * __pi_2_2, __y_lo * __pi_2_1) 390 - min(__y_hi * __pi_2_2, __y_lo * __pi_2_1) 391 - max(__y_hi * __pi_2_3, __y_lo * __pi_2_2) 392 - min(__y_hi * __pi_2_3, __y_lo * __pi_2_2) 393 - max(__y * __pi_2_4, __y_lo * __pi_2_3) 394 - min(__y * __pi_2_4, __y_lo * __pi_2_3); 395 } 396 return __r; 397 } 398 399 // }}} 400 // __extract_exponent_as_int {{{ 401 template <typename _Tp, typename _Abi> 402 rebind_simd_t<int, simd<_Tp, _Abi>> 403 __extract_exponent_as_int(const simd<_Tp, _Abi>& __v) 404 { 405 using _Vp = simd<_Tp, _Abi>; 406 using _Up = make_unsigned_t<__int_for_sizeof_t<_Tp>>; 407 using namespace std::experimental::__float_bitwise_operators; 408 const _Vp __exponent_mask 409 = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000 410 return static_simd_cast<rebind_simd_t<int, _Vp>>( 411 __bit_cast<rebind_simd_t<_Up, _Vp>>(__v & __exponent_mask) 412 >> (__digits_v<_Tp> - 1)); 413 } 414 415 // }}} 416 // __impl_or_fallback {{{ 417 template <typename ImplFun, typename FallbackFun, typename... _Args> 418 _GLIBCXX_SIMD_INTRINSIC auto 419 __impl_or_fallback_dispatch(int, ImplFun&& __impl_fun, FallbackFun&&, 420 _Args&&... __args) 421 -> decltype(__impl_fun(static_cast<_Args&&>(__args)...)) 422 { return __impl_fun(static_cast<_Args&&>(__args)...); } 423 424 template <typename ImplFun, typename FallbackFun, typename... _Args> 425 inline auto 426 __impl_or_fallback_dispatch(float, ImplFun&&, FallbackFun&& __fallback_fun, 427 _Args&&... __args) 428 -> decltype(__fallback_fun(static_cast<_Args&&>(__args)...)) 429 { return __fallback_fun(static_cast<_Args&&>(__args)...); } 430 431 template <typename... _Args> 432 _GLIBCXX_SIMD_INTRINSIC auto 433 __impl_or_fallback(_Args&&... __args) 434 { 435 return __impl_or_fallback_dispatch(int(), static_cast<_Args&&>(__args)...); 436 } 437 //}}} 438 439 // trigonometric functions {{{ 440 _GLIBCXX_SIMD_MATH_CALL_(acos) 441 _GLIBCXX_SIMD_MATH_CALL_(asin) 442 _GLIBCXX_SIMD_MATH_CALL_(atan) 443 _GLIBCXX_SIMD_MATH_CALL2_(atan2, _Tp) 444 445 /* 446 * algorithm for sine and cosine: 447 * 448 * The result can be calculated with sine or cosine depending on the π/4 section 449 * the input is in. sine ≈ __x + __x³ cosine ≈ 1 - __x² 450 * 451 * sine: 452 * Map -__x to __x and invert the output 453 * Extend precision of __x - n * π/4 by calculating 454 * ((__x - n * p1) - n * p2) - n * p3 (p1 + p2 + p3 = π/4) 455 * 456 * Calculate Taylor series with tuned coefficients. 457 * Fix sign. 458 */ 459 // cos{{{ 460 template <typename _Tp, typename _Abi> 461 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 462 cos(const simd<_Tp, _Abi>& __x) 463 { 464 using _V = simd<_Tp, _Abi>; 465 if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>) 466 return {__private_init, _Abi::_SimdImpl::_S_cos(__data(__x))}; 467 else 468 { 469 if constexpr (is_same_v<_Tp, float>) 470 if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 393382))) 471 return static_simd_cast<_V>( 472 cos(static_simd_cast<rebind_simd_t<double, _V>>(__x))); 473 474 const auto __f = __fold_input(__x); 475 // quadrant | effect 476 // 0 | cosSeries, + 477 // 1 | sinSeries, - 478 // 2 | cosSeries, - 479 // 3 | sinSeries, + 480 using namespace std::experimental::__float_bitwise_operators; 481 const _V __sign_flip 482 = _V(-0.f) & static_simd_cast<_V>((1 + __f._M_quadrant) << 30); 483 484 const auto __need_cos = (__f._M_quadrant & 1) == 0; 485 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_cos))) 486 return __sign_flip ^ __cosSeries(__f._M_x); 487 else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_cos))) 488 return __sign_flip ^ __sinSeries(__f._M_x); 489 else // some_of(__need_cos) 490 { 491 _V __r = __sinSeries(__f._M_x); 492 where(__need_cos.__cvt(), __r) = __cosSeries(__f._M_x); 493 return __r ^ __sign_flip; 494 } 495 } 496 } 497 498 template <typename _Tp> 499 _GLIBCXX_SIMD_ALWAYS_INLINE 500 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>> 501 cos(simd<_Tp, simd_abi::scalar> __x) 502 { return std::cos(__data(__x)); } 503 504 //}}} 505 // sin{{{ 506 template <typename _Tp, typename _Abi> 507 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 508 sin(const simd<_Tp, _Abi>& __x) 509 { 510 using _V = simd<_Tp, _Abi>; 511 if constexpr (__is_scalar_abi<_Abi>() || __is_fixed_size_abi_v<_Abi>) 512 return {__private_init, _Abi::_SimdImpl::_S_sin(__data(__x))}; 513 else 514 { 515 if constexpr (is_same_v<_Tp, float>) 516 if (_GLIBCXX_SIMD_IS_UNLIKELY(any_of(abs(__x) >= 527449))) 517 return static_simd_cast<_V>( 518 sin(static_simd_cast<rebind_simd_t<double, _V>>(__x))); 519 520 const auto __f = __fold_input(__x); 521 // quadrant | effect 522 // 0 | sinSeries 523 // 1 | cosSeries 524 // 2 | sinSeries, sign flip 525 // 3 | cosSeries, sign flip 526 using namespace std::experimental::__float_bitwise_operators; 527 const auto __sign_flip 528 = (__x ^ static_simd_cast<_V>(1 - __f._M_quadrant)) & _V(_Tp(-0.)); 529 530 const auto __need_sin = (__f._M_quadrant & 1) == 0; 531 if (_GLIBCXX_SIMD_IS_UNLIKELY(all_of(__need_sin))) 532 return __sign_flip ^ __sinSeries(__f._M_x); 533 else if (_GLIBCXX_SIMD_IS_UNLIKELY(none_of(__need_sin))) 534 return __sign_flip ^ __cosSeries(__f._M_x); 535 else // some_of(__need_sin) 536 { 537 _V __r = __cosSeries(__f._M_x); 538 where(__need_sin.__cvt(), __r) = __sinSeries(__f._M_x); 539 return __sign_flip ^ __r; 540 } 541 } 542 } 543 544 template <typename _Tp> 545 _GLIBCXX_SIMD_ALWAYS_INLINE 546 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, simd_abi::scalar>> 547 sin(simd<_Tp, simd_abi::scalar> __x) 548 { return std::sin(__data(__x)); } 549 550 //}}} 551 _GLIBCXX_SIMD_MATH_CALL_(tan) 552 _GLIBCXX_SIMD_MATH_CALL_(acosh) 553 _GLIBCXX_SIMD_MATH_CALL_(asinh) 554 _GLIBCXX_SIMD_MATH_CALL_(atanh) 555 _GLIBCXX_SIMD_MATH_CALL_(cosh) 556 _GLIBCXX_SIMD_MATH_CALL_(sinh) 557 _GLIBCXX_SIMD_MATH_CALL_(tanh) 558 // }}} 559 // exponential functions {{{ 560 _GLIBCXX_SIMD_MATH_CALL_(exp) 561 _GLIBCXX_SIMD_MATH_CALL_(exp2) 562 _GLIBCXX_SIMD_MATH_CALL_(expm1) 563 564 // }}} 565 // frexp {{{ 566 #if _GLIBCXX_SIMD_X86INTRIN 567 template <typename _Tp, size_t _Np> 568 _SimdWrapper<_Tp, _Np> 569 __getexp(_SimdWrapper<_Tp, _Np> __x) 570 { 571 if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>()) 572 return __auto_bitcast(_mm_getexp_ps(__to_intrin(__x))); 573 else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>()) 574 return __auto_bitcast(_mm512_getexp_ps(__auto_bitcast(__to_intrin(__x)))); 575 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>()) 576 return _mm_getexp_pd(__x); 577 else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>()) 578 return __lo128(_mm512_getexp_pd(__auto_bitcast(__x))); 579 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>()) 580 return _mm256_getexp_ps(__x); 581 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>()) 582 return __lo256(_mm512_getexp_ps(__auto_bitcast(__x))); 583 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>()) 584 return _mm256_getexp_pd(__x); 585 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>()) 586 return __lo256(_mm512_getexp_pd(__auto_bitcast(__x))); 587 else if constexpr (__is_avx512_ps<_Tp, _Np>()) 588 return _mm512_getexp_ps(__x); 589 else if constexpr (__is_avx512_pd<_Tp, _Np>()) 590 return _mm512_getexp_pd(__x); 591 else 592 __assert_unreachable<_Tp>(); 593 } 594 595 template <typename _Tp, size_t _Np> 596 _SimdWrapper<_Tp, _Np> 597 __getmant_avx512(_SimdWrapper<_Tp, _Np> __x) 598 { 599 if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>()) 600 return __auto_bitcast(_mm_getmant_ps(__to_intrin(__x), _MM_MANT_NORM_p5_1, 601 _MM_MANT_SIGN_src)); 602 else if constexpr (__have_avx512f && __is_sse_ps<_Tp, _Np>()) 603 return __auto_bitcast(_mm512_getmant_ps(__auto_bitcast(__to_intrin(__x)), 604 _MM_MANT_NORM_p5_1, 605 _MM_MANT_SIGN_src)); 606 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>()) 607 return _mm_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); 608 else if constexpr (__have_avx512f && __is_sse_pd<_Tp, _Np>()) 609 return __lo128(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1, 610 _MM_MANT_SIGN_src)); 611 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>()) 612 return _mm256_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); 613 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>()) 614 return __lo256(_mm512_getmant_ps(__auto_bitcast(__x), _MM_MANT_NORM_p5_1, 615 _MM_MANT_SIGN_src)); 616 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>()) 617 return _mm256_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); 618 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>()) 619 return __lo256(_mm512_getmant_pd(__auto_bitcast(__x), _MM_MANT_NORM_p5_1, 620 _MM_MANT_SIGN_src)); 621 else if constexpr (__is_avx512_ps<_Tp, _Np>()) 622 return _mm512_getmant_ps(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); 623 else if constexpr (__is_avx512_pd<_Tp, _Np>()) 624 return _mm512_getmant_pd(__x, _MM_MANT_NORM_p5_1, _MM_MANT_SIGN_src); 625 else 626 __assert_unreachable<_Tp>(); 627 } 628 #endif // _GLIBCXX_SIMD_X86INTRIN 629 630 /** 631 * splits @p __v into exponent and mantissa, the sign is kept with the mantissa 632 * 633 * The return value will be in the range [0.5, 1.0[ 634 * The @p __e value will be an integer defining the power-of-two exponent 635 */ 636 template <typename _Tp, typename _Abi> 637 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 638 frexp(const simd<_Tp, _Abi>& __x, _Samesize<int, simd<_Tp, _Abi>>* __exp) 639 { 640 if constexpr (simd_size_v<_Tp, _Abi> == 1) 641 { 642 int __tmp; 643 const auto __r = std::frexp(__x[0], &__tmp); 644 (*__exp)[0] = __tmp; 645 return __r; 646 } 647 else if constexpr (__is_fixed_size_abi_v<_Abi>) 648 { 649 return {__private_init, 650 _Abi::_SimdImpl::_S_frexp(__data(__x), __data(*__exp))}; 651 #if _GLIBCXX_SIMD_X86INTRIN 652 } 653 else if constexpr (__have_avx512f) 654 { 655 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 656 constexpr size_t _NI = _Np < 4 ? 4 : _Np; 657 const auto __v = __data(__x); 658 const auto __isnonzero 659 = _Abi::_SimdImpl::_S_isnonzerovalue_mask(__v._M_data); 660 const _SimdWrapper<int, _NI> __exp_plus1 661 = 1 + __convert<_SimdWrapper<int, _NI>>(__getexp(__v))._M_data; 662 const _SimdWrapper<int, _Np> __e = __wrapper_bitcast<int, _Np>( 663 _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _NI>(__isnonzero), 664 _SimdWrapper<int, _NI>(), __exp_plus1)); 665 simd_abi::deduce_t<int, _Np>::_CommonImpl::_S_store(__e, __exp); 666 return {__private_init, 667 _Abi::_CommonImpl::_S_blend(_SimdWrapper<bool, _Np>( 668 __isnonzero), 669 __v, __getmant_avx512(__v))}; 670 #endif // _GLIBCXX_SIMD_X86INTRIN 671 } 672 else 673 { 674 // fallback implementation 675 static_assert(sizeof(_Tp) == 4 || sizeof(_Tp) == 8); 676 using _V = simd<_Tp, _Abi>; 677 using _IV = rebind_simd_t<int, _V>; 678 using namespace std::experimental::__proposed; 679 using namespace std::experimental::__float_bitwise_operators; 680 681 constexpr int __exp_adjust = sizeof(_Tp) == 4 ? 0x7e : 0x3fe; 682 constexpr int __exp_offset = sizeof(_Tp) == 4 ? 0x70 : 0x200; 683 constexpr _Tp __subnorm_scale = sizeof(_Tp) == 4 ? 0x1p112 : 0x1p512; 684 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __exponent_mask 685 = __infinity_v<_Tp>; // 0x7f800000 or 0x7ff0000000000000 686 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __p5_1_exponent 687 = -(2 - __epsilon_v<_Tp>) / 2; // 0xbf7fffff or 0xbfefffffffffffff 688 689 _V __mant = __p5_1_exponent & (__exponent_mask | __x); // +/-[.5, 1) 690 const _IV __exponent_bits = __extract_exponent_as_int(__x); 691 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x)))) 692 { 693 *__exp 694 = simd_cast<_Samesize<int, _V>>(__exponent_bits - __exp_adjust); 695 return __mant; 696 } 697 698 #if __FINITE_MATH_ONLY__ 699 // at least one element of __x is 0 or subnormal, the rest is normal 700 // (inf and NaN are excluded by -ffinite-math-only) 701 const auto __iszero_inf_nan = __x == 0; 702 #else 703 const auto __as_int 704 = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>(abs(__x)); 705 const auto __inf 706 = __bit_cast<rebind_simd_t<__int_for_sizeof_t<_Tp>, _V>>( 707 _V(__infinity_v<_Tp>)); 708 const auto __iszero_inf_nan = static_simd_cast<typename _V::mask_type>( 709 __as_int == 0 || __as_int >= __inf); 710 #endif 711 712 const _V __scaled_subnormal = __x * __subnorm_scale; 713 const _V __mant_subnormal 714 = __p5_1_exponent & (__exponent_mask | __scaled_subnormal); 715 where(!isnormal(__x), __mant) = __mant_subnormal; 716 where(__iszero_inf_nan, __mant) = __x; 717 _IV __e = __extract_exponent_as_int(__scaled_subnormal); 718 using _MaskType = 719 typename conditional_t<sizeof(typename _V::value_type) == sizeof(int), 720 _V, _IV>::mask_type; 721 const _MaskType __value_isnormal = isnormal(__x).__cvt(); 722 where(__value_isnormal.__cvt(), __e) = __exponent_bits; 723 static_assert(sizeof(_IV) == sizeof(__value_isnormal)); 724 const _IV __offset 725 = (__bit_cast<_IV>(__value_isnormal) & _IV(__exp_adjust)) 726 | (__bit_cast<_IV>(static_simd_cast<_MaskType>(__exponent_bits == 0) 727 & static_simd_cast<_MaskType>(__x != 0)) 728 & _IV(__exp_adjust + __exp_offset)); 729 *__exp = simd_cast<_Samesize<int, _V>>(__e - __offset); 730 return __mant; 731 } 732 } 733 734 // }}} 735 _GLIBCXX_SIMD_MATH_CALL2_(ldexp, int) 736 _GLIBCXX_SIMD_MATH_CALL_(ilogb) 737 738 // logarithms {{{ 739 _GLIBCXX_SIMD_MATH_CALL_(log) 740 _GLIBCXX_SIMD_MATH_CALL_(log10) 741 _GLIBCXX_SIMD_MATH_CALL_(log1p) 742 _GLIBCXX_SIMD_MATH_CALL_(log2) 743 744 //}}} 745 // logb{{{ 746 template <typename _Tp, typename _Abi> 747 enable_if_t<is_floating_point<_Tp>::value, simd<_Tp, _Abi>> 748 logb(const simd<_Tp, _Abi>& __x) 749 { 750 constexpr size_t _Np = simd_size_v<_Tp, _Abi>; 751 if constexpr (_Np == 1) 752 return std::logb(__x[0]); 753 else if constexpr (__is_fixed_size_abi_v<_Abi>) 754 { 755 return {__private_init, 756 __data(__x)._M_apply_per_chunk([](auto __impl, auto __xx) { 757 using _V = typename decltype(__impl)::simd_type; 758 return __data( 759 std::experimental::logb(_V(__private_init, __xx))); 760 })}; 761 } 762 #if _GLIBCXX_SIMD_X86INTRIN // {{{ 763 else if constexpr (__have_avx512vl && __is_sse_ps<_Tp, _Np>()) 764 return {__private_init, 765 __auto_bitcast(_mm_getexp_ps(__to_intrin(__as_vector(__x))))}; 766 else if constexpr (__have_avx512vl && __is_sse_pd<_Tp, _Np>()) 767 return {__private_init, _mm_getexp_pd(__data(__x))}; 768 else if constexpr (__have_avx512vl && __is_avx_ps<_Tp, _Np>()) 769 return {__private_init, _mm256_getexp_ps(__data(__x))}; 770 else if constexpr (__have_avx512vl && __is_avx_pd<_Tp, _Np>()) 771 return {__private_init, _mm256_getexp_pd(__data(__x))}; 772 else if constexpr (__have_avx512f && __is_avx_ps<_Tp, _Np>()) 773 return {__private_init, 774 __lo256(_mm512_getexp_ps(__auto_bitcast(__data(__x))))}; 775 else if constexpr (__have_avx512f && __is_avx_pd<_Tp, _Np>()) 776 return {__private_init, 777 __lo256(_mm512_getexp_pd(__auto_bitcast(__data(__x))))}; 778 else if constexpr (__is_avx512_ps<_Tp, _Np>()) 779 return {__private_init, _mm512_getexp_ps(__data(__x))}; 780 else if constexpr (__is_avx512_pd<_Tp, _Np>()) 781 return {__private_init, _mm512_getexp_pd(__data(__x))}; 782 #endif // _GLIBCXX_SIMD_X86INTRIN }}} 783 else 784 { 785 using _V = simd<_Tp, _Abi>; 786 using namespace std::experimental::__proposed; 787 auto __is_normal = isnormal(__x); 788 789 // work on abs(__x) to reflect the return value on Linux for negative 790 // inputs (domain-error => implementation-defined value is returned) 791 const _V abs_x = abs(__x); 792 793 // __exponent(__x) returns the exponent value (bias removed) as 794 // simd<_Up> with integral _Up 795 auto&& __exponent = [](const _V& __v) { 796 using namespace std::experimental::__proposed; 797 using _IV = rebind_simd_t< 798 conditional_t<sizeof(_Tp) == sizeof(_LLong), _LLong, int>, _V>; 799 return (__bit_cast<_IV>(__v) >> (__digits_v<_Tp> - 1)) 800 - (__max_exponent_v<_Tp> - 1); 801 }; 802 _V __r = static_simd_cast<_V>(__exponent(abs_x)); 803 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(__is_normal))) 804 // without corner cases (nan, inf, subnormal, zero) we have our 805 // answer: 806 return __r; 807 const auto __is_zero = __x == 0; 808 const auto __is_nan = isnan(__x); 809 const auto __is_inf = isinf(__x); 810 where(__is_zero, __r) = -__infinity_v<_Tp>; 811 where(__is_nan, __r) = __x; 812 where(__is_inf, __r) = __infinity_v<_Tp>; 813 __is_normal |= __is_zero || __is_nan || __is_inf; 814 if (all_of(__is_normal)) 815 // at this point everything but subnormals is handled 816 return __r; 817 // subnormals repeat the exponent extraction after multiplication of the 818 // input with __a floating point value that has 112 (0x70) in its exponent 819 // (not too big for sp and large enough for dp) 820 const _V __scaled = abs_x * _Tp(0x1p112); 821 _V __scaled_exp = static_simd_cast<_V>(__exponent(__scaled) - 112); 822 where(__is_normal, __scaled_exp) = __r; 823 return __scaled_exp; 824 } 825 } 826 827 //}}} 828 template <typename _Tp, typename _Abi> 829 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 830 modf(const simd<_Tp, _Abi>& __x, simd<_Tp, _Abi>* __iptr) 831 { 832 if constexpr (__is_scalar_abi<_Abi>() 833 || (__is_fixed_size_abi_v< 834 _Abi> && simd_size_v<_Tp, _Abi> == 1)) 835 { 836 _Tp __tmp; 837 _Tp __r = std::modf(__x[0], &__tmp); 838 __iptr[0] = __tmp; 839 return __r; 840 } 841 else 842 { 843 const auto __integral = trunc(__x); 844 *__iptr = __integral; 845 auto __r = __x - __integral; 846 #if !__FINITE_MATH_ONLY__ 847 where(isinf(__x), __r) = _Tp(); 848 #endif 849 return copysign(__r, __x); 850 } 851 } 852 853 _GLIBCXX_SIMD_MATH_CALL2_(scalbn, int) 854 _GLIBCXX_SIMD_MATH_CALL2_(scalbln, long) 855 856 _GLIBCXX_SIMD_MATH_CALL_(cbrt) 857 858 _GLIBCXX_SIMD_MATH_CALL_(abs) 859 _GLIBCXX_SIMD_MATH_CALL_(fabs) 860 861 // [parallel.simd.math] only asks for is_floating_point_v<_Tp> and forgot to 862 // allow signed integral _Tp 863 template <typename _Tp, typename _Abi> 864 enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>> 865 abs(const simd<_Tp, _Abi>& __x) 866 { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; } 867 868 template <typename _Tp, typename _Abi> 869 enable_if_t<!is_floating_point_v<_Tp> && is_signed_v<_Tp>, simd<_Tp, _Abi>> 870 fabs(const simd<_Tp, _Abi>& __x) 871 { return {__private_init, _Abi::_SimdImpl::_S_abs(__data(__x))}; } 872 873 // the following are overloads for functions in <cstdlib> and not covered by 874 // [parallel.simd.math]. I don't see much value in making them work, though 875 /* 876 template <typename _Abi> simd<long, _Abi> labs(const simd<long, _Abi> &__x) 877 { return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; } 878 879 template <typename _Abi> simd<long long, _Abi> llabs(const simd<long long, _Abi> 880 &__x) 881 { return {__private_init, _Abi::_SimdImpl::abs(__data(__x))}; } 882 */ 883 884 #define _GLIBCXX_SIMD_CVTING2(_NAME) \ 885 template <typename _Tp, typename _Abi> \ 886 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 887 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y) \ 888 { \ 889 return _NAME(__x, __y); \ 890 } \ 891 \ 892 template <typename _Tp, typename _Abi> \ 893 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 894 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y) \ 895 { \ 896 return _NAME(__x, __y); \ 897 } 898 899 #define _GLIBCXX_SIMD_CVTING3(_NAME) \ 900 template <typename _Tp, typename _Abi> \ 901 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 902 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \ 903 const simd<_Tp, _Abi>& __z) \ 904 { \ 905 return _NAME(__x, __y, __z); \ 906 } \ 907 \ 908 template <typename _Tp, typename _Abi> \ 909 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 910 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \ 911 const simd<_Tp, _Abi>& __z) \ 912 { \ 913 return _NAME(__x, __y, __z); \ 914 } \ 915 \ 916 template <typename _Tp, typename _Abi> \ 917 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 918 const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, \ 919 const __type_identity_t<simd<_Tp, _Abi>>& __z) \ 920 { \ 921 return _NAME(__x, __y, __z); \ 922 } \ 923 \ 924 template <typename _Tp, typename _Abi> \ 925 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 926 const simd<_Tp, _Abi>& __x, const __type_identity_t<simd<_Tp, _Abi>>& __y, \ 927 const __type_identity_t<simd<_Tp, _Abi>>& __z) \ 928 { \ 929 return _NAME(__x, __y, __z); \ 930 } \ 931 \ 932 template <typename _Tp, typename _Abi> \ 933 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 934 const __type_identity_t<simd<_Tp, _Abi>>& __x, const simd<_Tp, _Abi>& __y, \ 935 const __type_identity_t<simd<_Tp, _Abi>>& __z) \ 936 { \ 937 return _NAME(__x, __y, __z); \ 938 } \ 939 \ 940 template <typename _Tp, typename _Abi> \ 941 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> _NAME( \ 942 const __type_identity_t<simd<_Tp, _Abi>>& __x, \ 943 const __type_identity_t<simd<_Tp, _Abi>>& __y, const simd<_Tp, _Abi>& __z) \ 944 { \ 945 return _NAME(__x, __y, __z); \ 946 } 947 948 template <typename _R, typename _ToApply, typename _Tp, typename... _Tps> 949 _GLIBCXX_SIMD_INTRINSIC _R 950 __fixed_size_apply(_ToApply&& __apply, const _Tp& __arg0, 951 const _Tps&... __args) 952 { 953 return {__private_init, 954 __data(__arg0)._M_apply_per_chunk( 955 [&](auto __impl, const auto&... __inner) { 956 using _V = typename decltype(__impl)::simd_type; 957 return __data(__apply(_V(__private_init, __inner)...)); 958 }, 959 __data(__args)...)}; 960 } 961 962 template <typename _VV> 963 __remove_cvref_t<_VV> 964 __hypot(_VV __x, _VV __y) 965 { 966 using _V = __remove_cvref_t<_VV>; 967 using _Tp = typename _V::value_type; 968 if constexpr (_V::size() == 1) 969 return std::hypot(_Tp(__x[0]), _Tp(__y[0])); 970 else if constexpr (__is_fixed_size_abi_v<typename _V::abi_type>) 971 { 972 return __fixed_size_apply<_V>([](auto __a, 973 auto __b) { return hypot(__a, __b); }, 974 __x, __y); 975 } 976 else 977 { 978 // A simple solution for _Tp == float would be to cast to double and 979 // simply calculate sqrt(x²+y²) as it can't over-/underflow anymore with 980 // dp. It still needs the Annex F fixups though and isn't faster on 981 // Skylake-AVX512 (not even for SSE and AVX vectors, and really bad for 982 // AVX-512). 983 using namespace __float_bitwise_operators; 984 _V __absx = abs(__x); // no error 985 _V __absy = abs(__y); // no error 986 _V __hi = max(__absx, __absy); // no error 987 _V __lo = min(__absy, __absx); // no error 988 989 // round __hi down to the next power-of-2: 990 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>); 991 992 #ifndef __FAST_MATH__ 993 if constexpr (__have_neon && !__have_neon_a32) 994 { // With ARMv7 NEON, we have no subnormals and must use slightly 995 // different strategy 996 const _V __hi_exp = __hi & __inf; 997 _V __scale_back = __hi_exp; 998 // For large exponents (max & max/2) the inversion comes too close 999 // to subnormals. Subtract 3 from the exponent: 1000 where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125); 1001 // Invert and adjust for the off-by-one error of inversion via xor: 1002 const _V __scale = (__scale_back ^ __inf) * _Tp(.5); 1003 const _V __h1 = __hi * __scale; 1004 const _V __l1 = __lo * __scale; 1005 _V __r = __scale_back * sqrt(__h1 * __h1 + __l1 * __l1); 1006 // Fix up hypot(0, 0) to not be NaN: 1007 where(__hi == 0, __r) = 0; 1008 return __r; 1009 } 1010 #endif 1011 1012 #ifdef __FAST_MATH__ 1013 // With fast-math, ignore precision of subnormals and inputs from 1014 // __finite_max_v/2 to __finite_max_v. This removes all 1015 // branching/masking. 1016 if constexpr (true) 1017 #else 1018 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x)) 1019 && all_of(isnormal(__y)))) 1020 #endif 1021 { 1022 const _V __hi_exp = __hi & __inf; 1023 //((__hi + __hi) & __inf) ^ __inf almost works for computing 1024 //__scale, 1025 // except when (__hi + __hi) & __inf == __inf, in which case __scale 1026 // becomes 0 (should be min/2 instead) and thus loses the 1027 // information from __lo. 1028 #ifdef __FAST_MATH__ 1029 using _Ip = __int_for_sizeof_t<_Tp>; 1030 using _IV = rebind_simd_t<_Ip, _V>; 1031 const auto __as_int = __bit_cast<_IV>(__hi_exp); 1032 const _V __scale 1033 = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int); 1034 #else 1035 const _V __scale = (__hi_exp ^ __inf) * _Tp(.5); 1036 #endif 1037 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __mant_mask 1038 = __norm_min_v<_Tp> - __denorm_min_v<_Tp>; 1039 const _V __h1 = (__hi & __mant_mask) | _V(1); 1040 const _V __l1 = __lo * __scale; 1041 return __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1); 1042 } 1043 else 1044 { 1045 // slower path to support subnormals 1046 // if __hi is subnormal, avoid scaling by inf & final mul by 0 1047 // (which yields NaN) by using min() 1048 _V __scale = _V(1 / __norm_min_v<_Tp>); 1049 // invert exponent w/o error and w/o using the slow divider unit: 1050 // xor inverts the exponent but off by 1. Multiplication with .5 1051 // adjusts for the discrepancy. 1052 where(__hi >= __norm_min_v<_Tp>, __scale) 1053 = ((__hi & __inf) ^ __inf) * _Tp(.5); 1054 // adjust final exponent for subnormal inputs 1055 _V __hi_exp = __norm_min_v<_Tp>; 1056 where(__hi >= __norm_min_v<_Tp>, __hi_exp) 1057 = __hi & __inf; // no error 1058 _V __h1 = __hi * __scale; // no error 1059 _V __l1 = __lo * __scale; // no error 1060 1061 // sqrt(x²+y²) = e*sqrt((x/e)²+(y/e)²): 1062 // this ensures no overflow in the argument to sqrt 1063 _V __r = __hi_exp * sqrt(__h1 * __h1 + __l1 * __l1); 1064 #ifdef __STDC_IEC_559__ 1065 // fixup for Annex F requirements 1066 // the naive fixup goes like this: 1067 // 1068 // where(__l1 == 0, __r) = __hi; 1069 // where(isunordered(__x, __y), __r) = __quiet_NaN_v<_Tp>; 1070 // where(isinf(__absx) || isinf(__absy), __r) = __inf; 1071 // 1072 // The fixup can be prepared in parallel with the sqrt, requiring a 1073 // single blend step after hi_exp * sqrt, reducing latency and 1074 // throughput: 1075 _V __fixup = __hi; // __lo == 0 1076 where(isunordered(__x, __y), __fixup) = __quiet_NaN_v<_Tp>; 1077 where(isinf(__absx) || isinf(__absy), __fixup) = __inf; 1078 where(!(__lo == 0 || isunordered(__x, __y) 1079 || (isinf(__absx) || isinf(__absy))), 1080 __fixup) 1081 = __r; 1082 __r = __fixup; 1083 #endif 1084 return __r; 1085 } 1086 } 1087 } 1088 1089 template <typename _Tp, typename _Abi> 1090 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> 1091 hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y) 1092 { 1093 return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>, 1094 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x, 1095 __y); 1096 } 1097 1098 _GLIBCXX_SIMD_CVTING2(hypot) 1099 1100 template <typename _VV> 1101 __remove_cvref_t<_VV> 1102 __hypot(_VV __x, _VV __y, _VV __z) 1103 { 1104 using _V = __remove_cvref_t<_VV>; 1105 using _Abi = typename _V::abi_type; 1106 using _Tp = typename _V::value_type; 1107 /* FIXME: enable after PR77776 is resolved 1108 if constexpr (_V::size() == 1) 1109 return std::hypot(_Tp(__x[0]), _Tp(__y[0]), _Tp(__z[0])); 1110 else 1111 */ 1112 if constexpr (__is_fixed_size_abi_v<_Abi> && _V::size() > 1) 1113 { 1114 return __fixed_size_apply<simd<_Tp, _Abi>>( 1115 [](auto __a, auto __b, auto __c) { return hypot(__a, __b, __c); }, 1116 __x, __y, __z); 1117 } 1118 else 1119 { 1120 using namespace __float_bitwise_operators; 1121 const _V __absx = abs(__x); // no error 1122 const _V __absy = abs(__y); // no error 1123 const _V __absz = abs(__z); // no error 1124 _V __hi = max(max(__absx, __absy), __absz); // no error 1125 _V __l0 = min(__absz, max(__absx, __absy)); // no error 1126 _V __l1 = min(__absy, __absx); // no error 1127 if constexpr (__digits_v<_Tp> == 64 && __max_exponent_v<_Tp> == 0x4000 1128 && __min_exponent_v<_Tp> == -0x3FFD && _V::size() == 1) 1129 { // Seems like x87 fp80, where bit 63 is always 1 unless subnormal or 1130 // NaN. In this case the bit-tricks don't work, they require IEC559 1131 // binary32 or binary64 format. 1132 #ifdef __STDC_IEC_559__ 1133 // fixup for Annex F requirements 1134 if (isinf(__absx[0]) || isinf(__absy[0]) || isinf(__absz[0])) 1135 return __infinity_v<_Tp>; 1136 else if (isunordered(__absx[0], __absy[0] + __absz[0])) 1137 return __quiet_NaN_v<_Tp>; 1138 else if (__l0[0] == 0 && __l1[0] == 0) 1139 return __hi; 1140 #endif 1141 _V __hi_exp = __hi; 1142 const _ULLong __tmp = 0x8000'0000'0000'0000ull; 1143 __builtin_memcpy(&__data(__hi_exp), &__tmp, 8); 1144 const _V __scale = 1 / __hi_exp; 1145 __hi *= __scale; 1146 __l0 *= __scale; 1147 __l1 *= __scale; 1148 return __hi_exp * sqrt((__l0 * __l0 + __l1 * __l1) + __hi * __hi); 1149 } 1150 else 1151 { 1152 // round __hi down to the next power-of-2: 1153 _GLIBCXX_SIMD_USE_CONSTEXPR_API _V __inf(__infinity_v<_Tp>); 1154 1155 #ifndef __FAST_MATH__ 1156 if constexpr (_V::size() > 1 && __have_neon && !__have_neon_a32) 1157 { // With ARMv7 NEON, we have no subnormals and must use slightly 1158 // different strategy 1159 const _V __hi_exp = __hi & __inf; 1160 _V __scale_back = __hi_exp; 1161 // For large exponents (max & max/2) the inversion comes too 1162 // close to subnormals. Subtract 3 from the exponent: 1163 where(__hi_exp > 1, __scale_back) = __hi_exp * _Tp(0.125); 1164 // Invert and adjust for the off-by-one error of inversion via 1165 // xor: 1166 const _V __scale = (__scale_back ^ __inf) * _Tp(.5); 1167 const _V __h1 = __hi * __scale; 1168 __l0 *= __scale; 1169 __l1 *= __scale; 1170 _V __lo = __l0 * __l0 1171 + __l1 * __l1; // add the two smaller values first 1172 asm("" : "+m"(__lo)); 1173 _V __r = __scale_back * sqrt(__h1 * __h1 + __lo); 1174 // Fix up hypot(0, 0, 0) to not be NaN: 1175 where(__hi == 0, __r) = 0; 1176 return __r; 1177 } 1178 #endif 1179 1180 #ifdef __FAST_MATH__ 1181 // With fast-math, ignore precision of subnormals and inputs from 1182 // __finite_max_v/2 to __finite_max_v. This removes all 1183 // branching/masking. 1184 if constexpr (true) 1185 #else 1186 if (_GLIBCXX_SIMD_IS_LIKELY(all_of(isnormal(__x)) 1187 && all_of(isnormal(__y)) 1188 && all_of(isnormal(__z)))) 1189 #endif 1190 { 1191 const _V __hi_exp = __hi & __inf; 1192 //((__hi + __hi) & __inf) ^ __inf almost works for computing 1193 //__scale, except when (__hi + __hi) & __inf == __inf, in which 1194 // case __scale 1195 // becomes 0 (should be min/2 instead) and thus loses the 1196 // information from __lo. 1197 #ifdef __FAST_MATH__ 1198 using _Ip = __int_for_sizeof_t<_Tp>; 1199 using _IV = rebind_simd_t<_Ip, _V>; 1200 const auto __as_int = __bit_cast<_IV>(__hi_exp); 1201 const _V __scale 1202 = __bit_cast<_V>(2 * __bit_cast<_Ip>(_Tp(1)) - __as_int); 1203 #else 1204 const _V __scale = (__hi_exp ^ __inf) * _Tp(.5); 1205 #endif 1206 constexpr _Tp __mant_mask 1207 = __norm_min_v<_Tp> - __denorm_min_v<_Tp>; 1208 const _V __h1 = (__hi & _V(__mant_mask)) | _V(1); 1209 __l0 *= __scale; 1210 __l1 *= __scale; 1211 const _V __lo 1212 = __l0 * __l0 1213 + __l1 * __l1; // add the two smaller values first 1214 return __hi_exp * sqrt(__lo + __h1 * __h1); 1215 } 1216 else 1217 { 1218 // slower path to support subnormals 1219 // if __hi is subnormal, avoid scaling by inf & final mul by 0 1220 // (which yields NaN) by using min() 1221 _V __scale = _V(1 / __norm_min_v<_Tp>); 1222 // invert exponent w/o error and w/o using the slow divider 1223 // unit: xor inverts the exponent but off by 1. Multiplication 1224 // with .5 adjusts for the discrepancy. 1225 where(__hi >= __norm_min_v<_Tp>, __scale) 1226 = ((__hi & __inf) ^ __inf) * _Tp(.5); 1227 // adjust final exponent for subnormal inputs 1228 _V __hi_exp = __norm_min_v<_Tp>; 1229 where(__hi >= __norm_min_v<_Tp>, __hi_exp) 1230 = __hi & __inf; // no error 1231 _V __h1 = __hi * __scale; // no error 1232 __l0 *= __scale; // no error 1233 __l1 *= __scale; // no error 1234 _V __lo = __l0 * __l0 1235 + __l1 * __l1; // add the two smaller values first 1236 _V __r = __hi_exp * sqrt(__lo + __h1 * __h1); 1237 #ifdef __STDC_IEC_559__ 1238 // fixup for Annex F requirements 1239 _V __fixup = __hi; // __lo == 0 1240 // where(__lo == 0, __fixup) = __hi; 1241 where(isunordered(__x, __y + __z), __fixup) 1242 = __quiet_NaN_v<_Tp>; 1243 where(isinf(__absx) || isinf(__absy) || isinf(__absz), __fixup) 1244 = __inf; 1245 // Instead of __lo == 0, the following could depend on __h1² == 1246 // __h1² + __lo (i.e. __hi is so much larger than the other two 1247 // inputs that the result is exactly __hi). While this may 1248 // improve precision, it is likely to reduce efficiency if the 1249 // ISA has FMAs (because __h1² + __lo is an FMA, but the 1250 // intermediate 1251 // __h1² must be kept) 1252 where(!(__lo == 0 || isunordered(__x, __y + __z) 1253 || isinf(__absx) || isinf(__absy) || isinf(__absz)), 1254 __fixup) 1255 = __r; 1256 __r = __fixup; 1257 #endif 1258 return __r; 1259 } 1260 } 1261 } 1262 } 1263 1264 template <typename _Tp, typename _Abi> 1265 _GLIBCXX_SIMD_INTRINSIC simd<_Tp, _Abi> 1266 hypot(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y, 1267 const simd<_Tp, _Abi>& __z) 1268 { 1269 return __hypot<conditional_t<__is_fixed_size_abi_v<_Abi>, 1270 const simd<_Tp, _Abi>&, simd<_Tp, _Abi>>>(__x, 1271 __y, 1272 __z); 1273 } 1274 1275 _GLIBCXX_SIMD_CVTING3(hypot) 1276 1277 _GLIBCXX_SIMD_MATH_CALL2_(pow, _Tp) 1278 1279 _GLIBCXX_SIMD_MATH_CALL_(sqrt) 1280 _GLIBCXX_SIMD_MATH_CALL_(erf) 1281 _GLIBCXX_SIMD_MATH_CALL_(erfc) 1282 _GLIBCXX_SIMD_MATH_CALL_(lgamma) 1283 _GLIBCXX_SIMD_MATH_CALL_(tgamma) 1284 _GLIBCXX_SIMD_MATH_CALL_(ceil) 1285 _GLIBCXX_SIMD_MATH_CALL_(floor) 1286 _GLIBCXX_SIMD_MATH_CALL_(nearbyint) 1287 _GLIBCXX_SIMD_MATH_CALL_(rint) 1288 _GLIBCXX_SIMD_MATH_CALL_(lrint) 1289 _GLIBCXX_SIMD_MATH_CALL_(llrint) 1290 1291 _GLIBCXX_SIMD_MATH_CALL_(round) 1292 _GLIBCXX_SIMD_MATH_CALL_(lround) 1293 _GLIBCXX_SIMD_MATH_CALL_(llround) 1294 1295 _GLIBCXX_SIMD_MATH_CALL_(trunc) 1296 1297 _GLIBCXX_SIMD_MATH_CALL2_(fmod, _Tp) 1298 _GLIBCXX_SIMD_MATH_CALL2_(remainder, _Tp) 1299 _GLIBCXX_SIMD_MATH_CALL3_(remquo, _Tp, int*) 1300 1301 template <typename _Tp, typename _Abi> 1302 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1303 copysign(const simd<_Tp, _Abi>& __x, const simd<_Tp, _Abi>& __y) 1304 { 1305 if constexpr (simd_size_v<_Tp, _Abi> == 1) 1306 return std::copysign(__x[0], __y[0]); 1307 else if constexpr (is_same_v<_Tp, long double> && sizeof(_Tp) == 12) 1308 // Remove this case once __bit_cast is implemented via __builtin_bit_cast. 1309 // It is necessary, because __signmask below cannot be computed at compile 1310 // time. 1311 return simd<_Tp, _Abi>( 1312 [&](auto __i) { return std::copysign(__x[__i], __y[__i]); }); 1313 else 1314 { 1315 using _V = simd<_Tp, _Abi>; 1316 using namespace std::experimental::__float_bitwise_operators; 1317 _GLIBCXX_SIMD_USE_CONSTEXPR_API auto __signmask = _V(1) ^ _V(-1); 1318 return (__x & (__x ^ __signmask)) | (__y & __signmask); 1319 } 1320 } 1321 1322 _GLIBCXX_SIMD_MATH_CALL2_(nextafter, _Tp) 1323 // not covered in [parallel.simd.math]: 1324 // _GLIBCXX_SIMD_MATH_CALL2_(nexttoward, long double) 1325 _GLIBCXX_SIMD_MATH_CALL2_(fdim, _Tp) 1326 _GLIBCXX_SIMD_MATH_CALL2_(fmax, _Tp) 1327 _GLIBCXX_SIMD_MATH_CALL2_(fmin, _Tp) 1328 1329 _GLIBCXX_SIMD_MATH_CALL3_(fma, _Tp, _Tp) 1330 _GLIBCXX_SIMD_MATH_CALL_(fpclassify) 1331 _GLIBCXX_SIMD_MATH_CALL_(isfinite) 1332 1333 // isnan and isinf require special treatment because old glibc may declare 1334 // `int isinf(double)`. 1335 template <typename _Tp, typename _Abi, typename..., 1336 typename _R = _Math_return_type_t<bool, _Tp, _Abi>> 1337 enable_if_t<is_floating_point_v<_Tp>, _R> 1338 isinf(simd<_Tp, _Abi> __x) 1339 { return {__private_init, _Abi::_SimdImpl::_S_isinf(__data(__x))}; } 1340 1341 template <typename _Tp, typename _Abi, typename..., 1342 typename _R = _Math_return_type_t<bool, _Tp, _Abi>> 1343 enable_if_t<is_floating_point_v<_Tp>, _R> 1344 isnan(simd<_Tp, _Abi> __x) 1345 { return {__private_init, _Abi::_SimdImpl::_S_isnan(__data(__x))}; } 1346 1347 _GLIBCXX_SIMD_MATH_CALL_(isnormal) 1348 1349 template <typename..., typename _Tp, typename _Abi> 1350 simd_mask<_Tp, _Abi> 1351 signbit(simd<_Tp, _Abi> __x) 1352 { 1353 if constexpr (is_integral_v<_Tp>) 1354 { 1355 if constexpr (is_unsigned_v<_Tp>) 1356 return simd_mask<_Tp, _Abi>{}; // false 1357 else 1358 return __x < 0; 1359 } 1360 else 1361 return {__private_init, _Abi::_SimdImpl::_S_signbit(__data(__x))}; 1362 } 1363 1364 _GLIBCXX_SIMD_MATH_CALL2_(isgreater, _Tp) 1365 _GLIBCXX_SIMD_MATH_CALL2_(isgreaterequal, _Tp) 1366 _GLIBCXX_SIMD_MATH_CALL2_(isless, _Tp) 1367 _GLIBCXX_SIMD_MATH_CALL2_(islessequal, _Tp) 1368 _GLIBCXX_SIMD_MATH_CALL2_(islessgreater, _Tp) 1369 _GLIBCXX_SIMD_MATH_CALL2_(isunordered, _Tp) 1370 1371 /* not covered in [parallel.simd.math] 1372 template <typename _Abi> __doublev<_Abi> nan(const char* tagp); 1373 template <typename _Abi> __floatv<_Abi> nanf(const char* tagp); 1374 template <typename _Abi> __ldoublev<_Abi> nanl(const char* tagp); 1375 1376 template <typename _V> struct simd_div_t { 1377 _V quot, rem; 1378 }; 1379 1380 template <typename _Abi> 1381 simd_div_t<_SCharv<_Abi>> div(_SCharv<_Abi> numer, 1382 _SCharv<_Abi> denom); 1383 template <typename _Abi> 1384 simd_div_t<__shortv<_Abi>> div(__shortv<_Abi> numer, 1385 __shortv<_Abi> denom); 1386 template <typename _Abi> 1387 simd_div_t<__intv<_Abi>> div(__intv<_Abi> numer, __intv<_Abi> denom); 1388 template <typename _Abi> 1389 simd_div_t<__longv<_Abi>> div(__longv<_Abi> numer, 1390 __longv<_Abi> denom); 1391 template <typename _Abi> 1392 simd_div_t<__llongv<_Abi>> div(__llongv<_Abi> numer, 1393 __llongv<_Abi> denom); 1394 */ 1395 1396 // special math {{{ 1397 template <typename _Tp, typename _Abi> 1398 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1399 assoc_laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1400 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, 1401 const simd<_Tp, _Abi>& __x) 1402 { 1403 return simd<_Tp, _Abi>([&](auto __i) { 1404 return std::assoc_laguerre(__n[__i], __m[__i], __x[__i]); 1405 }); 1406 } 1407 1408 template <typename _Tp, typename _Abi> 1409 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1410 assoc_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1411 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, 1412 const simd<_Tp, _Abi>& __x) 1413 { 1414 return simd<_Tp, _Abi>([&](auto __i) { 1415 return std::assoc_legendre(__n[__i], __m[__i], __x[__i]); 1416 }); 1417 } 1418 1419 _GLIBCXX_SIMD_MATH_CALL2_(beta, _Tp) 1420 _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_1) 1421 _GLIBCXX_SIMD_MATH_CALL_(comp_ellint_2) 1422 _GLIBCXX_SIMD_MATH_CALL2_(comp_ellint_3, _Tp) 1423 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_i, _Tp) 1424 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_j, _Tp) 1425 _GLIBCXX_SIMD_MATH_CALL2_(cyl_bessel_k, _Tp) 1426 _GLIBCXX_SIMD_MATH_CALL2_(cyl_neumann, _Tp) 1427 _GLIBCXX_SIMD_MATH_CALL2_(ellint_1, _Tp) 1428 _GLIBCXX_SIMD_MATH_CALL2_(ellint_2, _Tp) 1429 _GLIBCXX_SIMD_MATH_CALL3_(ellint_3, _Tp, _Tp) 1430 _GLIBCXX_SIMD_MATH_CALL_(expint) 1431 1432 template <typename _Tp, typename _Abi> 1433 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1434 hermite(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1435 const simd<_Tp, _Abi>& __x) 1436 { 1437 return simd<_Tp, _Abi>( 1438 [&](auto __i) { return std::hermite(__n[__i], __x[__i]); }); 1439 } 1440 1441 template <typename _Tp, typename _Abi> 1442 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1443 laguerre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1444 const simd<_Tp, _Abi>& __x) 1445 { 1446 return simd<_Tp, _Abi>( 1447 [&](auto __i) { return std::laguerre(__n[__i], __x[__i]); }); 1448 } 1449 1450 template <typename _Tp, typename _Abi> 1451 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1452 legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1453 const simd<_Tp, _Abi>& __x) 1454 { 1455 return simd<_Tp, _Abi>( 1456 [&](auto __i) { return std::legendre(__n[__i], __x[__i]); }); 1457 } 1458 1459 _GLIBCXX_SIMD_MATH_CALL_(riemann_zeta) 1460 1461 template <typename _Tp, typename _Abi> 1462 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1463 sph_bessel(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1464 const simd<_Tp, _Abi>& __x) 1465 { 1466 return simd<_Tp, _Abi>( 1467 [&](auto __i) { return std::sph_bessel(__n[__i], __x[__i]); }); 1468 } 1469 1470 template <typename _Tp, typename _Abi> 1471 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1472 sph_legendre(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __l, 1473 const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __m, 1474 const simd<_Tp, _Abi>& theta) 1475 { 1476 return simd<_Tp, _Abi>([&](auto __i) { 1477 return std::assoc_legendre(__l[__i], __m[__i], theta[__i]); 1478 }); 1479 } 1480 1481 template <typename _Tp, typename _Abi> 1482 enable_if_t<is_floating_point_v<_Tp>, simd<_Tp, _Abi>> 1483 sph_neumann(const fixed_size_simd<unsigned, simd_size_v<_Tp, _Abi>>& __n, 1484 const simd<_Tp, _Abi>& __x) 1485 { 1486 return simd<_Tp, _Abi>( 1487 [&](auto __i) { return std::sph_neumann(__n[__i], __x[__i]); }); 1488 } 1489 // }}} 1490 1491 #undef _GLIBCXX_SIMD_MATH_CALL_ 1492 #undef _GLIBCXX_SIMD_MATH_CALL2_ 1493 #undef _GLIBCXX_SIMD_MATH_CALL3_ 1494 1495 _GLIBCXX_SIMD_END_NAMESPACE 1496 1497 #endif // __cplusplus >= 201703L 1498 #endif // _GLIBCXX_EXPERIMENTAL_SIMD_MATH_H_ 1499 1500 // vim: foldmethod=marker sw=2 ts=8 noet sts=2 1501