1/* Optimized with sse2 version of sinf
2   Copyright (C) 2012-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include <errno.h>
21
22/* Short algorithm description:
23 *
24 *  1) if |x| == 0: return x.
25 *  2) if |x| <  2^-27: return x-x*DP_SMALL, raise underflow only when needed.
26 *  3) if |x| <  2^-5 : return x+x^3*DP_SIN2_0+x^5*DP_SIN2_1.
27 *  4) if |x| <   Pi/4: return x+x^3*(S0+x^2*(S1+x^2*(S2+x^2*(S3+x^2*S4)))).
28 *  5) if |x| < 9*Pi/4:
29 *      5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+1,
30 *           t=|x|-j*Pi/4.
31 *      5.2) Reconstruction:
32 *          s = sign(x) * (-1.0)^((n>>2)&1)
33 *          if(n&2 != 0) {
34 *              using cos(t) polynomial for |t|<Pi/4, result is
35 *              s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
36 *          } else {
37 *              using sin(t) polynomial for |t|<Pi/4, result is
38 *              s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
39 *          }
40 *  6) if |x| < 2^23, large args:
41 *      6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
42 *           t=|x|-j*Pi/4.
43 *      6.2) Reconstruction same as (5.2).
44 *  7) if |x| >= 2^23, very large args:
45 *      7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+1,
46 *           t=|x|-j*Pi/4.
47 *      7.2) Reconstruction same as (5.2).
48 *  8) if x is Inf, return x-x, and set errno=EDOM.
49 *  9) if x is NaN, return x-x.
50 *
51 * Special cases:
52 *  sin(+-0) = +-0 not raising inexact/underflow,
53 *  sin(subnormal) raises inexact/underflow,
54 *  sin(min_normalized) raises inexact/underflow,
55 *  sin(normalized) raises inexact,
56 *  sin(Inf) = NaN, raises invalid, sets errno to EDOM,
57 *  sin(NaN) = NaN.
58 */
59
60#ifdef	PIC
61# define MO1(symbol)			L(symbol)##@GOTOFF(%ebx)
62# define MO2(symbol,reg2,_scale)	L(symbol)##@GOTOFF(%ebx,reg2,_scale)
63# define CFI_PUSH(REG)	cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
64# define CFI_POP(REG)	cfi_adjust_cfa_offset(-4); cfi_restore(REG)
65# define PUSH(REG)			pushl REG; CFI_PUSH(REG)
66# define POP(REG)			popl REG; CFI_POP(REG)
67# define ENTRANCE			PUSH(%ebx); LOAD_PIC_REG(bx)
68# define RETURN				POP(%ebx); ret; CFI_PUSH(%ebx)
69# define ARG_X				8(%esp)
70#else
71# define MO1(symbol)			L(symbol)
72# define MO2(symbol,reg2,_scale)	L(symbol)(,reg2,_scale)
73# define ENTRANCE
74# define RETURN				ret
75# define ARG_X				4(%esp)
76#endif
77
78	.text
79ENTRY(__sinf_sse2)
80	/* Input: single precision x on stack at address ARG_X */
81
82	ENTRANCE
83	movl	ARG_X, %eax		/* Bits of x */
84	cvtss2sd ARG_X, %xmm0		/* DP x */
85	andl	$0x7fffffff, %eax	/* |x| */
86
87	cmpl	$0x3f490fdb, %eax	/* |x|<Pi/4?  */
88	jb	L(arg_less_pio4)
89
90	/* Here if |x|>=Pi/4 */
91	movd	%eax, %xmm3		/* SP |x| */
92	andpd	MO1(DP_ABS_MASK),%xmm0	/* DP |x| */
93	movss	MO1(SP_INVPIO4), %xmm2	/* SP 1/(Pi/4) */
94
95	cmpl	$0x40e231d6, %eax	/* |x|<9*Pi/4?  */
96	jae	L(large_args)
97
98	/* Here if Pi/4<=|x|<9*Pi/4 */
99	mulss	%xmm3, %xmm2		/* SP |x|/(Pi/4) */
100	movl	ARG_X, %ecx		/* Load x */
101	cvttss2si %xmm2, %eax		/* k, number of Pi/4 in x */
102	shrl	$31, %ecx		/* sign of x */
103	addl	$1, %eax		/* k+1 */
104	movl	$0x0e, %edx
105	andl	%eax, %edx		/* j = (k+1)&0x0e */
106	subsd	MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
107
108L(reconstruction):
109	/* Input: %eax=n, %xmm0=t, %ecx=sign(x) */
110	testl	$2, %eax		/* n&2 != 0?  */
111	jz	L(sin_poly)
112
113/*L(cos_poly):*/
114	/* Here if sin(x) calculated using cos(t) polynomial for |t|<Pi/4:
115	 * y = t*t; z = y*y;
116	 * s = sign(x) * (-1.0)^((n>>2)&1)
117	 * result = s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
118	 */
119	shrl	$2, %eax		/* n>>2 */
120	mulsd	%xmm0, %xmm0		/* y=t^2 */
121	andl	$1, %eax		/* (n>>2)&1 */
122	movaps	%xmm0, %xmm1		/* y */
123	mulsd	%xmm0, %xmm0		/* z=t^4 */
124
125	movsd	MO1(DP_C4), %xmm4	/* C4 */
126	mulsd	%xmm0, %xmm4		/* z*C4 */
127	xorl	%eax, %ecx		/* (-1.0)^((n>>2)&1) XOR sign(x) */
128	movsd	MO1(DP_C3), %xmm3	/* C3 */
129	mulsd	%xmm0, %xmm3		/* z*C3 */
130	addsd	MO1(DP_C2), %xmm4	/* C2+z*C4 */
131	mulsd	%xmm0, %xmm4		/* z*(C2+z*C4) */
132	lea	-8(%esp), %esp		/* Borrow 4 bytes of stack frame */
133	addsd	MO1(DP_C1), %xmm3	/* C1+z*C3 */
134	mulsd	%xmm0, %xmm3		/* z*(C1+z*C3) */
135	addsd	MO1(DP_C0), %xmm4	/* C0+z*(C2+z*C4) */
136	mulsd	%xmm1, %xmm4		/* y*(C0+z*(C2+z*C4)) */
137
138	addsd	%xmm4, %xmm3		/* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
139	/* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
140	addsd	MO1(DP_ONES), %xmm3
141
142	mulsd	MO2(DP_ONES,%ecx,8), %xmm3 /* DP result */
143	movsd	%xmm3, 0(%esp)		/* Move result from sse...  */
144	fldl	0(%esp)			/* ...to FPU.  */
145	/* Return back 4 bytes of stack frame */
146	lea	8(%esp), %esp
147	RETURN
148
149	.p2align	4
150L(sin_poly):
151	/* Here if sin(x) calculated using sin(t) polynomial for |t|<Pi/4:
152	 * y = t*t; z = y*y;
153	 * s = sign(x) * (-1.0)^((n>>2)&1)
154	 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
155	 */
156
157	movaps	%xmm0, %xmm4		/* t */
158	shrl	$2, %eax		/* n>>2 */
159	mulsd	%xmm0, %xmm0		/* y=t^2 */
160	andl	$1, %eax		/* (n>>2)&1 */
161	movaps	%xmm0, %xmm1		/* y */
162	xorl	%eax, %ecx		/* (-1.0)^((n>>2)&1) XOR sign(x) */
163	mulsd	%xmm0, %xmm0		/* z=t^4 */
164
165	movsd	MO1(DP_S4), %xmm2	/* S4 */
166	mulsd	%xmm0, %xmm2		/* z*S4 */
167	movsd	MO1(DP_S3), %xmm3	/* S3 */
168	mulsd	%xmm0, %xmm3		/* z*S3 */
169	lea	-8(%esp), %esp		/* Borrow 4 bytes of stack frame */
170	addsd	MO1(DP_S2), %xmm2	/* S2+z*S4 */
171	mulsd	%xmm0, %xmm2		/* z*(S2+z*S4) */
172	addsd	MO1(DP_S1), %xmm3	/* S1+z*S3 */
173	mulsd	%xmm0, %xmm3		/* z*(S1+z*S3) */
174	addsd	MO1(DP_S0), %xmm2	/* S0+z*(S2+z*S4) */
175	mulsd	%xmm1, %xmm2		/* y*(S0+z*(S2+z*S4)) */
176	/* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
177	mulsd	MO2(DP_ONES,%ecx,8), %xmm4
178	addsd	%xmm2, %xmm3		/* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
179	/* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
180	mulsd	%xmm4, %xmm3
181	/* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
182	addsd	%xmm4, %xmm3
183	movsd	%xmm3, 0(%esp)		/* Move result from sse...  */
184	fldl	0(%esp)			/* ...to FPU.  */
185	/* Return back 4 bytes of stack frame */
186	lea	8(%esp), %esp
187	RETURN
188
189	.p2align	4
190L(large_args):
191	/* Here if |x|>=9*Pi/4 */
192	cmpl	$0x7f800000, %eax	/* x is Inf or NaN?  */
193	jae	L(arg_inf_or_nan)
194
195	/* Here if finite |x|>=9*Pi/4 */
196	cmpl	$0x4b000000, %eax	/* |x|<2^23?  */
197	jae	L(very_large_args)
198
199	/* Here if 9*Pi/4<=|x|<2^23 */
200	movsd	MO1(DP_INVPIO4), %xmm1	/* 1/(Pi/4) */
201	mulsd	%xmm0, %xmm1		/* |x|/(Pi/4) */
202	cvttsd2si %xmm1, %eax		/* k=trunc(|x|/(Pi/4)) */
203	addl	$1, %eax		/* k+1 */
204	movl	%eax, %edx
205	andl	$0xfffffffe, %edx	/* j=(k+1)&0xfffffffe */
206	cvtsi2sdl %edx, %xmm4		/* DP j */
207	movl	ARG_X, %ecx		/* Load x */
208	movsd	MO1(DP_PIO4HI), %xmm2	/* -PIO4HI = high part of -Pi/4 */
209	shrl	$31, %ecx		/* sign bit of x */
210	mulsd	%xmm4, %xmm2		/* -j*PIO4HI */
211	movsd	MO1(DP_PIO4LO), %xmm3	/* -PIO4LO = low part of -Pi/4 */
212	addsd	%xmm2, %xmm0		/* |x| - j*PIO4HI */
213	mulsd	%xmm3, %xmm4		/* j*PIO4LO */
214	addsd	%xmm4, %xmm0		/* t = |x| - j*PIO4HI - j*PIO4LO */
215	jmp	L(reconstruction)
216
217	.p2align	4
218L(very_large_args):
219	/* Here if finite |x|>=2^23 */
220
221	/* bitpos = (ix>>23) - BIAS_32 + 59; */
222	shrl	$23, %eax		/* eb = biased exponent of x */
223	/* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
224	subl	$68, %eax
225	movl	$28, %ecx		/* %cl=28 */
226	movl	%eax, %edx		/* bitpos copy */
227
228	/* j = bitpos/28; */
229	div	%cl			/* j in register %al=%ax/%cl */
230	movapd	%xmm0, %xmm3		/* |x| */
231	/* clear unneeded remainder from %ah */
232	andl	$0xff, %eax
233
234	imull	$28, %eax, %ecx		/* j*28 */
235	movsd	MO1(DP_HI_MASK), %xmm4	/* DP_HI_MASK */
236	movapd	%xmm0, %xmm5		/* |x| */
237	mulsd	-2*8+MO2(_FPI,%eax,8), %xmm3	/* tmp3 = FPI[j-2]*|x| */
238	movapd	%xmm0, %xmm1		/* |x| */
239	mulsd	-1*8+MO2(_FPI,%eax,8), %xmm5	/* tmp2 = FPI[j-1]*|x| */
240	mulsd	0*8+MO2(_FPI,%eax,8), %xmm0	/* tmp0 = FPI[j]*|x| */
241	addl	$19, %ecx		/* j*28+19 */
242	mulsd	1*8+MO2(_FPI,%eax,8), %xmm1	/* tmp1 = FPI[j+1]*|x| */
243	cmpl	%ecx, %edx		/* bitpos>=j*28+19?   */
244	jl	L(very_large_skip1)
245
246	/* Here if bitpos>=j*28+19 */
247	andpd	%xmm3, %xmm4		/* HI(tmp3) */
248	subsd	%xmm4, %xmm3		/* tmp3 = tmp3 - HI(tmp3) */
249L(very_large_skip1):
250
251	movsd	MO1(DP_2POW52), %xmm6
252	movapd	%xmm5, %xmm2		/* tmp2 copy */
253	addsd	%xmm3, %xmm5		/* tmp5 = tmp3 + tmp2 */
254	movl	$1, %edx
255	addsd	%xmm5, %xmm6		/* tmp6 = tmp5 + 2^52 */
256	movsd	8+MO1(DP_2POW52), %xmm4
257	movd	%xmm6, %eax		/* k = I64_LO(tmp6); */
258	addsd	%xmm6, %xmm4		/* tmp4 = tmp6 - 2^52 */
259	movl	ARG_X, %ecx		/* Load x */
260	comisd	%xmm5, %xmm4		/* tmp4 > tmp5?  */
261	jbe	L(very_large_skip2)
262
263	/* Here if tmp4 > tmp5 */
264	subl	$1, %eax		/* k-- */
265	addsd	8+MO1(DP_ONES), %xmm4	/* tmp4 -= 1.0 */
266L(very_large_skip2):
267
268	andl	%eax, %edx		/* k&1 */
269	subsd	%xmm4, %xmm3		/* tmp3 -= tmp4 */
270	addsd	MO2(DP_ZERONE,%edx,8), %xmm3 /* t  = DP_ZERONE[k&1] + tmp3 */
271	addsd	%xmm2, %xmm3		/* t += tmp2 */
272	shrl	$31, %ecx		/* sign of x */
273	addsd	%xmm3, %xmm0		/* t += tmp0 */
274	addl	$1, %eax		/* n=k+1 */
275	addsd	%xmm1, %xmm0		/* t += tmp1 */
276	mulsd	MO1(DP_PIO4), %xmm0	/* t *= PI04 */
277
278	jmp	L(reconstruction)	/* end of very_large_args peth */
279
280	.p2align	4
281L(arg_less_pio4):
282	/* Here if |x|<Pi/4 */
283	cmpl	$0x3d000000, %eax	/* |x|<2^-5?  */
284	jl	L(arg_less_2pn5)
285
286	/* Here if 2^-5<=|x|<Pi/4 */
287	movaps	%xmm0, %xmm3		/* x */
288	mulsd	%xmm0, %xmm0		/* y=x^2 */
289	movaps	%xmm0, %xmm1		/* y */
290	mulsd	%xmm0, %xmm0		/* z=x^4 */
291	movsd	MO1(DP_S4), %xmm4	/* S4 */
292	mulsd	%xmm0, %xmm4		/* z*S4 */
293	movsd	MO1(DP_S3), %xmm5	/* S3 */
294	mulsd	%xmm0, %xmm5		/* z*S3 */
295	addsd	MO1(DP_S2), %xmm4	/* S2+z*S4 */
296	mulsd	%xmm0, %xmm4		/* z*(S2+z*S4) */
297	addsd	MO1(DP_S1), %xmm5	/* S1+z*S3 */
298	mulsd	%xmm0, %xmm5		/* z*(S1+z*S3) */
299	addsd	MO1(DP_S0), %xmm4	/* S0+z*(S2+z*S4) */
300	mulsd	%xmm1, %xmm4		/* y*(S0+z*(S2+z*S4)) */
301	mulsd	%xmm3, %xmm5		/* x*z*(S1+z*S3) */
302	mulsd	%xmm3, %xmm4		/* x*y*(S0+z*(S2+z*S4)) */
303	/* x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
304	addsd	%xmm5, %xmm4
305	/* x + x*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
306	addsd	%xmm4, %xmm3
307	cvtsd2ss %xmm3, %xmm3		/* SP result */
308
309L(epilogue):
310	lea	-4(%esp), %esp		/* Borrow 4 bytes of stack frame */
311	movss	%xmm3, 0(%esp)		/* Move result from sse...  */
312	flds	0(%esp)			/* ...to FPU.  */
313	/* Return back 4 bytes of stack frame */
314	lea	4(%esp), %esp
315	RETURN
316
317	.p2align	4
318L(arg_less_2pn5):
319	/* Here if |x|<2^-5 */
320	cmpl	$0x32000000, %eax	/* |x|<2^-27?  */
321	jl	L(arg_less_2pn27)
322
323	/* Here if 2^-27<=|x|<2^-5 */
324	movaps	%xmm0, %xmm1		/* DP x */
325	mulsd	%xmm0, %xmm0		/* DP x^2 */
326	movsd	MO1(DP_SIN2_1), %xmm3	/* DP DP_SIN2_1 */
327	mulsd	%xmm0, %xmm3		/* DP x^2*DP_SIN2_1 */
328	addsd	MO1(DP_SIN2_0), %xmm3	/* DP DP_SIN2_0+x^2*DP_SIN2_1 */
329	mulsd	%xmm0, %xmm3		/* DP x^2*DP_SIN2_0+x^4*DP_SIN2_1 */
330	mulsd	%xmm1, %xmm3		/* DP x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
331	addsd	%xmm1, %xmm3		/* DP x+x^3*DP_SIN2_0+x^5*DP_SIN2_1 */
332	cvtsd2ss %xmm3, %xmm3		/* SP result */
333	jmp	L(epilogue)
334
335	.p2align	4
336L(arg_less_2pn27):
337	movss	ARG_X, %xmm3		/* SP x */
338	cmpl	$0, %eax		/* x=0?  */
339	je	L(epilogue)		/* in case x=0 return sin(+-0)==+-0 */
340	/* Here if |x|<2^-27 */
341	/*
342	 * Special cases here:
343	 *  sin(subnormal) raises inexact/underflow
344	 *  sin(min_normalized) raises inexact/underflow
345	 *  sin(normalized) raises inexact
346	 */
347	movaps	%xmm0, %xmm3		/* Copy of DP x */
348	mulsd	MO1(DP_SMALL), %xmm0	/* x*DP_SMALL */
349	subsd	%xmm0, %xmm3		/* Result is x-x*DP_SMALL */
350	cvtsd2ss %xmm3, %xmm3		/* Result converted to SP */
351	jmp	L(epilogue)
352
353	.p2align	4
354L(arg_inf_or_nan):
355	/* Here if |x| is Inf or NAN */
356	jne	L(skip_errno_setting)	/* in case of x is NaN */
357
358	/* Here if x is Inf. Set errno to EDOM.  */
359	call	JUMPTARGET(__errno_location)
360	movl	$EDOM, (%eax)
361
362	.p2align	4
363L(skip_errno_setting):
364	/* Here if |x| is Inf or NAN. Continued.  */
365	movss	ARG_X, %xmm3		/* load x */
366	subss	%xmm3, %xmm3		/* Result is NaN */
367	jmp	L(epilogue)
368END(__sinf_sse2)
369
370	.section .rodata, "a"
371	.p2align 3
372L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
373	.long	0x00000000,0x00000000
374	.long	0x54442d18,0x3fe921fb
375	.long	0x54442d18,0x3ff921fb
376	.long	0x7f3321d2,0x4002d97c
377	.long	0x54442d18,0x400921fb
378	.long	0x2955385e,0x400f6a7a
379	.long	0x7f3321d2,0x4012d97c
380	.long	0xe9bba775,0x4015fdbb
381	.long	0x54442d18,0x401921fb
382	.long	0xbeccb2bb,0x401c463a
383	.long	0x2955385e,0x401f6a7a
384	.type L(PIO4J), @object
385	ASM_SIZE_DIRECTIVE(L(PIO4J))
386
387	.p2align 3
388L(_FPI): /* 4/Pi broken into sum of positive DP values */
389	.long	0x00000000,0x00000000
390	.long	0x6c000000,0x3ff45f30
391	.long	0x2a000000,0x3e3c9c88
392	.long	0xa8000000,0x3c54fe13
393	.long	0xd0000000,0x3aaf47d4
394	.long	0x6c000000,0x38fbb81b
395	.long	0xe0000000,0x3714acc9
396	.long	0x7c000000,0x3560e410
397	.long	0x56000000,0x33bca2c7
398	.long	0xac000000,0x31fbd778
399	.long	0xe0000000,0x300b7246
400	.long	0xe8000000,0x2e5d2126
401	.long	0x48000000,0x2c970032
402	.long	0xe8000000,0x2ad77504
403	.long	0xe0000000,0x290921cf
404	.long	0xb0000000,0x274deb1c
405	.long	0xe0000000,0x25829a73
406	.long	0xbe000000,0x23fd1046
407	.long	0x10000000,0x2224baed
408	.long	0x8e000000,0x20709d33
409	.long	0x80000000,0x1e535a2f
410	.long	0x64000000,0x1cef904e
411	.long	0x30000000,0x1b0d6398
412	.long	0x24000000,0x1964ce7d
413	.long	0x16000000,0x17b908bf
414	.type L(_FPI), @object
415	ASM_SIZE_DIRECTIVE(L(_FPI))
416
417/* Coefficients of polynomial
418   for sin(x)~=x+x^3*DP_SIN2_0+x^5*DP_SIN2_1, |x|<2^-5.  */
419	.p2align 3
420L(DP_SIN2_0):
421	.long	0x5543d49d,0xbfc55555
422	.type L(DP_SIN2_0), @object
423	ASM_SIZE_DIRECTIVE(L(DP_SIN2_0))
424
425	.p2align 3
426L(DP_SIN2_1):
427	.long	0x75cec8c5,0x3f8110f4
428	.type L(DP_SIN2_1), @object
429	ASM_SIZE_DIRECTIVE(L(DP_SIN2_1))
430
431	.p2align 3
432L(DP_ZERONE):
433	.long	0x00000000,0x00000000	/* 0.0 */
434	.long	0x00000000,0xbff00000	/* 1.0 */
435	.type L(DP_ZERONE), @object
436	ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
437
438	.p2align 3
439L(DP_ONES):
440	.long	0x00000000,0x3ff00000	/* +1.0 */
441	.long	0x00000000,0xbff00000	/* -1.0 */
442	.type L(DP_ONES), @object
443	ASM_SIZE_DIRECTIVE(L(DP_ONES))
444
445/* Coefficients of polynomial
446   for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4.  */
447	.p2align 3
448L(DP_S3):
449	.long	0x64e6b5b4,0x3ec71d72
450	.type L(DP_S3), @object
451	ASM_SIZE_DIRECTIVE(L(DP_S3))
452
453	.p2align 3
454L(DP_S1):
455	.long	0x10c2688b,0x3f811111
456	.type L(DP_S1), @object
457	ASM_SIZE_DIRECTIVE(L(DP_S1))
458
459	.p2align 3
460L(DP_S4):
461	.long	0x1674b58a,0xbe5a947e
462	.type L(DP_S4), @object
463	ASM_SIZE_DIRECTIVE(L(DP_S4))
464
465	.p2align 3
466L(DP_S2):
467	.long	0x8b4bd1f9,0xbf2a019f
468	.type L(DP_S2), @object
469	ASM_SIZE_DIRECTIVE(L(DP_S2))
470
471	.p2align 3
472L(DP_S0):
473	.long	0x55551cd9,0xbfc55555
474	.type L(DP_S0), @object
475	ASM_SIZE_DIRECTIVE(L(DP_S0))
476
477	.p2align 3
478L(DP_SMALL):
479	.long	0x00000000,0x3cd00000	/* 2^(-50) */
480	.type L(DP_SMALL), @object
481	ASM_SIZE_DIRECTIVE(L(DP_SMALL))
482
483/* Coefficients of polynomial
484   for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4.  */
485	.p2align 3
486L(DP_C3):
487	.long	0x9ac43cc0,0x3efa00eb
488	.type L(DP_C3), @object
489	ASM_SIZE_DIRECTIVE(L(DP_C3))
490
491	.p2align 3
492L(DP_C1):
493	.long	0x545c50c7,0x3fa55555
494	.type L(DP_C1), @object
495	ASM_SIZE_DIRECTIVE(L(DP_C1))
496
497	.p2align 3
498L(DP_C4):
499	.long	0xdd8844d7,0xbe923c97
500	.type L(DP_C4), @object
501	ASM_SIZE_DIRECTIVE(L(DP_C4))
502
503	.p2align 3
504L(DP_C2):
505	.long	0x348b6874,0xbf56c16b
506	.type L(DP_C2), @object
507	ASM_SIZE_DIRECTIVE(L(DP_C2))
508
509	.p2align 3
510L(DP_C0):
511	.long	0xfffe98ae,0xbfdfffff
512	.type L(DP_C0), @object
513	ASM_SIZE_DIRECTIVE(L(DP_C0))
514
515	.p2align 3
516L(DP_PIO4):
517	.long	0x54442d18,0x3fe921fb	/* Pi/4 */
518	.type L(DP_PIO4), @object
519	ASM_SIZE_DIRECTIVE(L(DP_PIO4))
520
521	.p2align 3
522L(DP_2POW52):
523	.long	0x00000000,0x43300000	/* +2^52 */
524	.long	0x00000000,0xc3300000	/* -2^52 */
525	.type L(DP_2POW52), @object
526	ASM_SIZE_DIRECTIVE(L(DP_2POW52))
527
528	.p2align 3
529L(DP_INVPIO4):
530	.long	0x6dc9c883,0x3ff45f30	/* 4/Pi */
531	.type L(DP_INVPIO4), @object
532	ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
533
534	.p2align 3
535L(DP_PIO4HI):
536	.long	0x54000000,0xbfe921fb	/* High part of Pi/4 */
537	.type L(DP_PIO4HI), @object
538	ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
539
540	.p2align 3
541L(DP_PIO4LO):
542	.long	0x11A62633,0xbe010b46	/* Low part of Pi/4 */
543	.type L(DP_PIO4LO), @object
544	ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
545
546	.p2align 2
547L(SP_INVPIO4):
548	.long	0x3fa2f983		/* 4/Pi */
549	.type L(SP_INVPIO4), @object
550	ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
551
552	.p2align 4
553L(DP_ABS_MASK): /* Mask for getting DP absolute value */
554	.long	0xffffffff,0x7fffffff
555	.long	0xffffffff,0x7fffffff
556	.type L(DP_ABS_MASK), @object
557	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
558
559	.p2align 3
560L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
561	.long	0x00000000,0xffffffff
562	.type L(DP_HI_MASK), @object
563	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
564
565weak_alias (__sinf, sinf)
566