1/* Optimized with sse2 version of sinf 2 Copyright (C) 2012-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#include <errno.h> 21 22/* Short algorithm description: 23 * 24 * 1) if |x| == 0: return x. 25 * 2) if |x| < 2^-27: return x-x*DP_SMALL, raise underflow only when needed. 26 * 3) if |x| < 2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1. 27 * 4) if |x| < Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))). 28 * 5) if |x| < 9*Pi/4: 29 * 5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1, 30 * t=|x|-j*Pi/4. 31 * 5.2) Reconstruction: 32 * s = sign(x) * (-1.0)^((n>>2)&1) 33 * if(n&2 != 0) { 34 * using cos(t) polynomial for |t|<Pi/4, result is 35 * s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))). 36 * } else { 37 * using sin(t) polynomial for |t|<Pi/4, result is 38 * s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))). 39 * } 40 * 6) if |x| < 2^23, large args: 41 * 6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, 42 * t=|x|-j*Pi/4. 43 * 6.2) Reconstruction same as (5.2). 44 * 7) if |x| >= 2^23, very large args: 45 * 7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1, 46 * t=|x|-j*Pi/4. 47 * 7.2) Reconstruction same as (5.2). 48 * 8) if x is Inf, return x-x, and set errno=EDOM. 49 * 9) if x is NaN, return x-x. 50 * 51 * Special cases: 52 * sin(+-0) = +-0 not raising inexact/underflow, 53 * sin(subnormal) raises inexact/underflow, 54 * sin(min_normalized) raises inexact/underflow, 55 * sin(normalized) raises inexact, 56 * sin(Inf) = NaN, raises invalid, sets errno to EDOM, 57 * sin(NaN) = NaN. 58 */ 59 60#ifdef PIC 61# define MO1(symbol) L(symbol)##@GOTOFF(%ebx) 62# define MO2(symbol,reg2,_scale) L(symbol)##@GOTOFF(%ebx,reg2,_scale) 63# define CFI_PUSH(REG) cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0) 64# define CFI_POP(REG) cfi_adjust_cfa_offset(-4); cfi_restore(REG) 65# define PUSH(REG) pushl REG; CFI_PUSH(REG) 66# define POP(REG) popl REG; CFI_POP(REG) 67# define ENTRANCE PUSH(%ebx); LOAD_PIC_REG(bx) 68# define RETURN POP(%ebx); ret; CFI_PUSH(%ebx) 69# define ARG_X 8(%esp) 70#else 71# define MO1(symbol) L(symbol) 72# define MO2(symbol,reg2,_scale) L(symbol)(,reg2,_scale) 73# define ENTRANCE 74# define RETURN ret 75# define ARG_X 4(%esp) 76#endif 77 78 .text 79ENTRY(__sinf_sse2) 80 /* Input: single precision x on stack at address ARG_X */ 81 82 ENTRANCE 83 movl ARG_X, %eax /* Bits of x */ 84 cvtss2sd ARG_X, %xmm0 /* DP x */ 85 andl $0x7fffffff, %eax /* |x| */ 86 87 cmpl $0x3f490fdb, %eax /* |x|<Pi/4? */ 88 jb L(arg_less_pio4) 89 90 /* Here if |x|>=Pi/4 */ 91 movd %eax, %xmm3 /* SP |x| */ 92 andpd MO1(DP_ABS_MASK),%xmm0 /* DP |x| */ 93 movss MO1(SP_INVPIO4), %xmm2 /* SP 1/(Pi/4) */ 94 95 cmpl $0x40e231d6, %eax /* |x|<9*Pi/4? */ 96 jae L(large_args) 97 98 /* Here if Pi/4<=|x|<9*Pi/4 */ 99 mulss %xmm3, %xmm2 /* SP |x|/(Pi/4) */ 100 movl ARG_X, %ecx /* Load x */ 101 cvttss2si %xmm2, %eax /* k, number of Pi/4 in x */ 102 shrl $31, %ecx /* sign of x */ 103 addl $1, %eax /* k+1 */ 104 movl $0x0e, %edx 105 andl %eax, %edx /* j = (k+1)&0x0e */ 106 subsd MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */ 107 108L(reconstruction): 109 /* Input: %eax=n, %xmm0=t, %ecx=sign(x) */ 110 testl $2, %eax /* n&2 != 0? */ 111 jz L(sin_poly) 112 113/*L(cos_poly):*/ 114 /* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4: 115 * y = t*t; z = y*y; 116 * s = sign(x) * (-1.0)^((n>>2)&1) 117 * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))) 118 */ 119 shrl $2, %eax /* n>>2 */ 120 mulsd %xmm0, %xmm0 /* y=t^2 */ 121 andl $1, %eax /* (n>>2)&1 */ 122 movaps %xmm0, %xmm1 /* y */ 123 mulsd %xmm0, %xmm0 /* z=t^4 */ 124 125 movsd MO1(DP_C4), %xmm4 /* C4 */ 126 mulsd %xmm0, %xmm4 /* z*C4 */ 127 xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */ 128 movsd MO1(DP_C3), %xmm3 /* C3 */ 129 mulsd %xmm0, %xmm3 /* z*C3 */ 130 addsd MO1(DP_C2), %xmm4 /* C2+z*C4 */ 131 mulsd %xmm0, %xmm4 /* z*(C2+z*C4) */ 132 lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */ 133 addsd MO1(DP_C1), %xmm3 /* C1+z*C3 */ 134 mulsd %xmm0, %xmm3 /* z*(C1+z*C3) */ 135 addsd MO1(DP_C0), %xmm4 /* C0+z*(C2+z*C4) */ 136 mulsd %xmm1, %xmm4 /* y*(C0+z*(C2+z*C4)) */ 137 138 addsd %xmm4, %xmm3 /* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ 139 /* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */ 140 addsd MO1(DP_ONES), %xmm3 141 142 mulsd MO2(DP_ONES,%ecx,8), %xmm3 /* DP result */ 143 movsd %xmm3, 0(%esp) /* Move result from sse... */ 144 fldl 0(%esp) /* ...to FPU. */ 145 /* Return back 4 bytes of stack frame */ 146 lea 8(%esp), %esp 147 RETURN 148 149 .p2align 4 150L(sin_poly): 151 /* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4: 152 * y = t*t; z = y*y; 153 * s = sign(x) * (-1.0)^((n>>2)&1) 154 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))) 155 */ 156 157 movaps %xmm0, %xmm4 /* t */ 158 shrl $2, %eax /* n>>2 */ 159 mulsd %xmm0, %xmm0 /* y=t^2 */ 160 andl $1, %eax /* (n>>2)&1 */ 161 movaps %xmm0, %xmm1 /* y */ 162 xorl %eax, %ecx /* (-1.0)^((n>>2)&1) XOR sign(x) */ 163 mulsd %xmm0, %xmm0 /* z=t^4 */ 164 165 movsd MO1(DP_S4), %xmm2 /* S4 */ 166 mulsd %xmm0, %xmm2 /* z*S4 */ 167 movsd MO1(DP_S3), %xmm3 /* S3 */ 168 mulsd %xmm0, %xmm3 /* z*S3 */ 169 lea -8(%esp), %esp /* Borrow 4 bytes of stack frame */ 170 addsd MO1(DP_S2), %xmm2 /* S2+z*S4 */ 171 mulsd %xmm0, %xmm2 /* z*(S2+z*S4) */ 172 addsd MO1(DP_S1), %xmm3 /* S1+z*S3 */ 173 mulsd %xmm0, %xmm3 /* z*(S1+z*S3) */ 174 addsd MO1(DP_S0), %xmm2 /* S0+z*(S2+z*S4) */ 175 mulsd %xmm1, %xmm2 /* y*(S0+z*(S2+z*S4)) */ 176 /* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */ 177 mulsd MO2(DP_ONES,%ecx,8), %xmm4 178 addsd %xmm2, %xmm3 /* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ 179 /* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ 180 mulsd %xmm4, %xmm3 181 /* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ 182 addsd %xmm4, %xmm3 183 movsd %xmm3, 0(%esp) /* Move result from sse... */ 184 fldl 0(%esp) /* ...to FPU. */ 185 /* Return back 4 bytes of stack frame */ 186 lea 8(%esp), %esp 187 RETURN 188 189 .p2align 4 190L(large_args): 191 /* Here if |x|>=9*Pi/4 */ 192 cmpl $0x7f800000, %eax /* x is Inf or NaN? */ 193 jae L(arg_inf_or_nan) 194 195 /* Here if finite |x|>=9*Pi/4 */ 196 cmpl $0x4b000000, %eax /* |x|<2^23? */ 197 jae L(very_large_args) 198 199 /* Here if 9*Pi/4<=|x|<2^23 */ 200 movsd MO1(DP_INVPIO4), %xmm1 /* 1/(Pi/4) */ 201 mulsd %xmm0, %xmm1 /* |x|/(Pi/4) */ 202 cvttsd2si %xmm1, %eax /* k=trunc(|x|/(Pi/4)) */ 203 addl $1, %eax /* k+1 */ 204 movl %eax, %edx 205 andl $0xfffffffe, %edx /* j=(k+1)&0xfffffffe */ 206 cvtsi2sdl %edx, %xmm4 /* DP j */ 207 movl ARG_X, %ecx /* Load x */ 208 movsd MO1(DP_PIO4HI), %xmm2 /* -PIO4HI = high part of -Pi/4 */ 209 shrl $31, %ecx /* sign bit of x */ 210 mulsd %xmm4, %xmm2 /* -j*PIO4HI */ 211 movsd MO1(DP_PIO4LO), %xmm3 /* -PIO4LO = low part of -Pi/4 */ 212 addsd %xmm2, %xmm0 /* |x| - j*PIO4HI */ 213 mulsd %xmm3, %xmm4 /* j*PIO4LO */ 214 addsd %xmm4, %xmm0 /* t = |x| - j*PIO4HI - j*PIO4LO */ 215 jmp L(reconstruction) 216 217 .p2align 4 218L(very_large_args): 219 /* Here if finite |x|>=2^23 */ 220 221 /* bitpos = (ix>>23) - BIAS_32 + 59; */ 222 shrl $23, %eax /* eb = biased exponent of x */ 223 /* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */ 224 subl $68, %eax 225 movl $28, %ecx /* %cl=28 */ 226 movl %eax, %edx /* bitpos copy */ 227 228 /* j = bitpos/28; */ 229 div %cl /* j in register %al=%ax/%cl */ 230 movapd %xmm0, %xmm3 /* |x| */ 231 /* clear unneeded remainder from %ah */ 232 andl $0xff, %eax 233 234 imull $28, %eax, %ecx /* j*28 */ 235 movsd MO1(DP_HI_MASK), %xmm4 /* DP_HI_MASK */ 236 movapd %xmm0, %xmm5 /* |x| */ 237 mulsd -2*8+MO2(_FPI,%eax,8), %xmm3 /* tmp3 = FPI[j-2]*|x| */ 238 movapd %xmm0, %xmm1 /* |x| */ 239 mulsd -1*8+MO2(_FPI,%eax,8), %xmm5 /* tmp2 = FPI[j-1]*|x| */ 240 mulsd 0*8+MO2(_FPI,%eax,8), %xmm0 /* tmp0 = FPI[j]*|x| */ 241 addl $19, %ecx /* j*28+19 */ 242 mulsd 1*8+MO2(_FPI,%eax,8), %xmm1 /* tmp1 = FPI[j+1]*|x| */ 243 cmpl %ecx, %edx /* bitpos>=j*28+19? */ 244 jl L(very_large_skip1) 245 246 /* Here if bitpos>=j*28+19 */ 247 andpd %xmm3, %xmm4 /* HI(tmp3) */ 248 subsd %xmm4, %xmm3 /* tmp3 = tmp3 - HI(tmp3) */ 249L(very_large_skip1): 250 251 movsd MO1(DP_2POW52), %xmm6 252 movapd %xmm5, %xmm2 /* tmp2 copy */ 253 addsd %xmm3, %xmm5 /* tmp5 = tmp3 + tmp2 */ 254 movl $1, %edx 255 addsd %xmm5, %xmm6 /* tmp6 = tmp5 + 2^52 */ 256 movsd 8+MO1(DP_2POW52), %xmm4 257 movd %xmm6, %eax /* k = I64_LO(tmp6); */ 258 addsd %xmm6, %xmm4 /* tmp4 = tmp6 - 2^52 */ 259 movl ARG_X, %ecx /* Load x */ 260 comisd %xmm5, %xmm4 /* tmp4 > tmp5? */ 261 jbe L(very_large_skip2) 262 263 /* Here if tmp4 > tmp5 */ 264 subl $1, %eax /* k-- */ 265 addsd 8+MO1(DP_ONES), %xmm4 /* tmp4 -= 1.0 */ 266L(very_large_skip2): 267 268 andl %eax, %edx /* k&1 */ 269 subsd %xmm4, %xmm3 /* tmp3 -= tmp4 */ 270 addsd MO2(DP_ZERONE,%edx,8), %xmm3 /* t = DP_ZERONE[k&1] + tmp3 */ 271 addsd %xmm2, %xmm3 /* t += tmp2 */ 272 shrl $31, %ecx /* sign of x */ 273 addsd %xmm3, %xmm0 /* t += tmp0 */ 274 addl $1, %eax /* n=k+1 */ 275 addsd %xmm1, %xmm0 /* t += tmp1 */ 276 mulsd MO1(DP_PIO4), %xmm0 /* t *= PI04 */ 277 278 jmp L(reconstruction) /* end of very_large_args peth */ 279 280 .p2align 4 281L(arg_less_pio4): 282 /* Here if |x|<Pi/4 */ 283 cmpl $0x3d000000, %eax /* |x|<2^-5? */ 284 jl L(arg_less_2pn5) 285 286 /* Here if 2^-5<=|x|<Pi/4 */ 287 movaps %xmm0, %xmm3 /* x */ 288 mulsd %xmm0, %xmm0 /* y=x^2 */ 289 movaps %xmm0, %xmm1 /* y */ 290 mulsd %xmm0, %xmm0 /* z=x^4 */ 291 movsd MO1(DP_S4), %xmm4 /* S4 */ 292 mulsd %xmm0, %xmm4 /* z*S4 */ 293 movsd MO1(DP_S3), %xmm5 /* S3 */ 294 mulsd %xmm0, %xmm5 /* z*S3 */ 295 addsd MO1(DP_S2), %xmm4 /* S2+z*S4 */ 296 mulsd %xmm0, %xmm4 /* z*(S2+z*S4) */ 297 addsd MO1(DP_S1), %xmm5 /* S1+z*S3 */ 298 mulsd %xmm0, %xmm5 /* z*(S1+z*S3) */ 299 addsd MO1(DP_S0), %xmm4 /* S0+z*(S2+z*S4) */ 300 mulsd %xmm1, %xmm4 /* y*(S0+z*(S2+z*S4)) */ 301 mulsd %xmm3, %xmm5 /* x*z*(S1+z*S3) */ 302 mulsd %xmm3, %xmm4 /* x*y*(S0+z*(S2+z*S4)) */ 303 /* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ 304 addsd %xmm5, %xmm4 305 /* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */ 306 addsd %xmm4, %xmm3 307 cvtsd2ss %xmm3, %xmm3 /* SP result */ 308 309L(epilogue): 310 lea -4(%esp), %esp /* Borrow 4 bytes of stack frame */ 311 movss %xmm3, 0(%esp) /* Move result from sse... */ 312 flds 0(%esp) /* ...to FPU. */ 313 /* Return back 4 bytes of stack frame */ 314 lea 4(%esp), %esp 315 RETURN 316 317 .p2align 4 318L(arg_less_2pn5): 319 /* Here if |x|<2^-5 */ 320 cmpl $0x32000000, %eax /* |x|<2^-27? */ 321 jl L(arg_less_2pn27) 322 323 /* Here if 2^-27<=|x|<2^-5 */ 324 movaps %xmm0, %xmm1 /* DP x */ 325 mulsd %xmm0, %xmm0 /* DP x^2 */ 326 movsd MO1(DP_SIN2_1), %xmm3 /* DP DP_SIN2_1 */ 327 mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_1 */ 328 addsd MO1(DP_SIN2_0), %xmm3 /* DP DP_SIN2_0+x^2*DP_SIN2_1 */ 329 mulsd %xmm0, %xmm3 /* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */ 330 mulsd %xmm1, %xmm3 /* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ 331 addsd %xmm1, %xmm3 /* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */ 332 cvtsd2ss %xmm3, %xmm3 /* SP result */ 333 jmp L(epilogue) 334 335 .p2align 4 336L(arg_less_2pn27): 337 movss ARG_X, %xmm3 /* SP x */ 338 cmpl $0, %eax /* x=0? */ 339 je L(epilogue) /* in case x=0 return sin(+-0)==+-0 */ 340 /* Here if |x|<2^-27 */ 341 /* 342 * Special cases here: 343 * sin(subnormal) raises inexact/underflow 344 * sin(min_normalized) raises inexact/underflow 345 * sin(normalized) raises inexact 346 */ 347 movaps %xmm0, %xmm3 /* Copy of DP x */ 348 mulsd MO1(DP_SMALL), %xmm0 /* x*DP_SMALL */ 349 subsd %xmm0, %xmm3 /* Result is x-x*DP_SMALL */ 350 cvtsd2ss %xmm3, %xmm3 /* Result converted to SP */ 351 jmp L(epilogue) 352 353 .p2align 4 354L(arg_inf_or_nan): 355 /* Here if |x| is Inf or NAN */ 356 jne L(skip_errno_setting) /* in case of x is NaN */ 357 358 /* Here if x is Inf. Set errno to EDOM. */ 359 call JUMPTARGET(__errno_location) 360 movl $EDOM, (%eax) 361 362 .p2align 4 363L(skip_errno_setting): 364 /* Here if |x| is Inf or NAN. Continued. */ 365 movss ARG_X, %xmm3 /* load x */ 366 subss %xmm3, %xmm3 /* Result is NaN */ 367 jmp L(epilogue) 368END(__sinf_sse2) 369 370 .section .rodata, "a" 371 .p2align 3 372L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */ 373 .long 0x00000000,0x00000000 374 .long 0x54442d18,0x3fe921fb 375 .long 0x54442d18,0x3ff921fb 376 .long 0x7f3321d2,0x4002d97c 377 .long 0x54442d18,0x400921fb 378 .long 0x2955385e,0x400f6a7a 379 .long 0x7f3321d2,0x4012d97c 380 .long 0xe9bba775,0x4015fdbb 381 .long 0x54442d18,0x401921fb 382 .long 0xbeccb2bb,0x401c463a 383 .long 0x2955385e,0x401f6a7a 384 .type L(PIO4J), @object 385 ASM_SIZE_DIRECTIVE(L(PIO4J)) 386 387 .p2align 3 388L(_FPI): /* 4/Pi broken into sum of positive DP values */ 389 .long 0x00000000,0x00000000 390 .long 0x6c000000,0x3ff45f30 391 .long 0x2a000000,0x3e3c9c88 392 .long 0xa8000000,0x3c54fe13 393 .long 0xd0000000,0x3aaf47d4 394 .long 0x6c000000,0x38fbb81b 395 .long 0xe0000000,0x3714acc9 396 .long 0x7c000000,0x3560e410 397 .long 0x56000000,0x33bca2c7 398 .long 0xac000000,0x31fbd778 399 .long 0xe0000000,0x300b7246 400 .long 0xe8000000,0x2e5d2126 401 .long 0x48000000,0x2c970032 402 .long 0xe8000000,0x2ad77504 403 .long 0xe0000000,0x290921cf 404 .long 0xb0000000,0x274deb1c 405 .long 0xe0000000,0x25829a73 406 .long 0xbe000000,0x23fd1046 407 .long 0x10000000,0x2224baed 408 .long 0x8e000000,0x20709d33 409 .long 0x80000000,0x1e535a2f 410 .long 0x64000000,0x1cef904e 411 .long 0x30000000,0x1b0d6398 412 .long 0x24000000,0x1964ce7d 413 .long 0x16000000,0x17b908bf 414 .type L(_FPI), @object 415 ASM_SIZE_DIRECTIVE(L(_FPI)) 416 417/* Coefficients of polynomial 418 for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5. */ 419 .p2align 3 420L(DP_SIN2_0): 421 .long 0x5543d49d,0xbfc55555 422 .type L(DP_SIN2_0), @object 423 ASM_SIZE_DIRECTIVE(L(DP_SIN2_0)) 424 425 .p2align 3 426L(DP_SIN2_1): 427 .long 0x75cec8c5,0x3f8110f4 428 .type L(DP_SIN2_1), @object 429 ASM_SIZE_DIRECTIVE(L(DP_SIN2_1)) 430 431 .p2align 3 432L(DP_ZERONE): 433 .long 0x00000000,0x00000000 /* 0.0 */ 434 .long 0x00000000,0xbff00000 /* 1.0 */ 435 .type L(DP_ZERONE), @object 436 ASM_SIZE_DIRECTIVE(L(DP_ZERONE)) 437 438 .p2align 3 439L(DP_ONES): 440 .long 0x00000000,0x3ff00000 /* +1.0 */ 441 .long 0x00000000,0xbff00000 /* -1.0 */ 442 .type L(DP_ONES), @object 443 ASM_SIZE_DIRECTIVE(L(DP_ONES)) 444 445/* Coefficients of polynomial 446 for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4. */ 447 .p2align 3 448L(DP_S3): 449 .long 0x64e6b5b4,0x3ec71d72 450 .type L(DP_S3), @object 451 ASM_SIZE_DIRECTIVE(L(DP_S3)) 452 453 .p2align 3 454L(DP_S1): 455 .long 0x10c2688b,0x3f811111 456 .type L(DP_S1), @object 457 ASM_SIZE_DIRECTIVE(L(DP_S1)) 458 459 .p2align 3 460L(DP_S4): 461 .long 0x1674b58a,0xbe5a947e 462 .type L(DP_S4), @object 463 ASM_SIZE_DIRECTIVE(L(DP_S4)) 464 465 .p2align 3 466L(DP_S2): 467 .long 0x8b4bd1f9,0xbf2a019f 468 .type L(DP_S2), @object 469 ASM_SIZE_DIRECTIVE(L(DP_S2)) 470 471 .p2align 3 472L(DP_S0): 473 .long 0x55551cd9,0xbfc55555 474 .type L(DP_S0), @object 475 ASM_SIZE_DIRECTIVE(L(DP_S0)) 476 477 .p2align 3 478L(DP_SMALL): 479 .long 0x00000000,0x3cd00000 /* 2^(-50) */ 480 .type L(DP_SMALL), @object 481 ASM_SIZE_DIRECTIVE(L(DP_SMALL)) 482 483/* Coefficients of polynomial 484 for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4. */ 485 .p2align 3 486L(DP_C3): 487 .long 0x9ac43cc0,0x3efa00eb 488 .type L(DP_C3), @object 489 ASM_SIZE_DIRECTIVE(L(DP_C3)) 490 491 .p2align 3 492L(DP_C1): 493 .long 0x545c50c7,0x3fa55555 494 .type L(DP_C1), @object 495 ASM_SIZE_DIRECTIVE(L(DP_C1)) 496 497 .p2align 3 498L(DP_C4): 499 .long 0xdd8844d7,0xbe923c97 500 .type L(DP_C4), @object 501 ASM_SIZE_DIRECTIVE(L(DP_C4)) 502 503 .p2align 3 504L(DP_C2): 505 .long 0x348b6874,0xbf56c16b 506 .type L(DP_C2), @object 507 ASM_SIZE_DIRECTIVE(L(DP_C2)) 508 509 .p2align 3 510L(DP_C0): 511 .long 0xfffe98ae,0xbfdfffff 512 .type L(DP_C0), @object 513 ASM_SIZE_DIRECTIVE(L(DP_C0)) 514 515 .p2align 3 516L(DP_PIO4): 517 .long 0x54442d18,0x3fe921fb /* Pi/4 */ 518 .type L(DP_PIO4), @object 519 ASM_SIZE_DIRECTIVE(L(DP_PIO4)) 520 521 .p2align 3 522L(DP_2POW52): 523 .long 0x00000000,0x43300000 /* +2^52 */ 524 .long 0x00000000,0xc3300000 /* -2^52 */ 525 .type L(DP_2POW52), @object 526 ASM_SIZE_DIRECTIVE(L(DP_2POW52)) 527 528 .p2align 3 529L(DP_INVPIO4): 530 .long 0x6dc9c883,0x3ff45f30 /* 4/Pi */ 531 .type L(DP_INVPIO4), @object 532 ASM_SIZE_DIRECTIVE(L(DP_INVPIO4)) 533 534 .p2align 3 535L(DP_PIO4HI): 536 .long 0x54000000,0xbfe921fb /* High part of Pi/4 */ 537 .type L(DP_PIO4HI), @object 538 ASM_SIZE_DIRECTIVE(L(DP_PIO4HI)) 539 540 .p2align 3 541L(DP_PIO4LO): 542 .long 0x11A62633,0xbe010b46 /* Low part of Pi/4 */ 543 .type L(DP_PIO4LO), @object 544 ASM_SIZE_DIRECTIVE(L(DP_PIO4LO)) 545 546 .p2align 2 547L(SP_INVPIO4): 548 .long 0x3fa2f983 /* 4/Pi */ 549 .type L(SP_INVPIO4), @object 550 ASM_SIZE_DIRECTIVE(L(SP_INVPIO4)) 551 552 .p2align 4 553L(DP_ABS_MASK): /* Mask for getting DP absolute value */ 554 .long 0xffffffff,0x7fffffff 555 .long 0xffffffff,0x7fffffff 556 .type L(DP_ABS_MASK), @object 557 ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK)) 558 559 .p2align 3 560L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */ 561 .long 0x00000000,0xffffffff 562 .type L(DP_HI_MASK), @object 563 ASM_SIZE_DIRECTIVE(L(DP_HI_MASK)) 564 565weak_alias (__sinf, sinf) 566