1/* Optimized with sse2 version of cosf
2   Copyright (C) 2012-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include <errno.h>
21
22/* Short algorithm description:
23 *
24 *  1) if |x| == 0: return 1.0-|x|.
25 *  2) if |x| <  2^-27: return 1.0-|x|.
26 *  3) if |x| <  2^-5 : return 1.0+x^2*DP_COS2_0+x^5*DP_COS2_1.
27 *  4) if |x| <   Pi/4: return 1.0+x^2*(C0+x^2*(C1+x^2*(C2+x^2*(C3+x^2*C4)))).
28 *  5) if |x| < 9*Pi/4:
29 *      5.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0x0e, n=k+3,
30 *           t=|x|-j*Pi/4.
31 *      5.2) Reconstruction:
32 *          s = (-1.0)^((n>>2)&1)
33 *          if(n&2 != 0) {
34 *              using cos(t) polynomial for |t|<Pi/4, result is
35 *              s     * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4))))).
36 *          } else {
37 *              using sin(t) polynomial for |t|<Pi/4, result is
38 *              s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4))))).
39 *          }
40 *  6) if |x| < 2^23, large args:
41 *      6.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
42 *           t=|x|-j*Pi/4.
43 *      6.2) Reconstruction same as (5.2).
44 *  7) if |x| >= 2^23, very large args:
45 *      7.1) Range reduction: k=trunc(|x|/(Pi/4)), j=(k+1)&0xfffffffe, n=k+3,
46 *           t=|x|-j*Pi/4.
47 *      7.2) Reconstruction same as (5.2).
48 *  8) if x is Inf, return x-x, and set errno=EDOM.
49 *  9) if x is NaN, return x-x.
50 *
51 * Special cases:
52 *  cos(+-0) = 1 not raising inexact,
53 *  cos(subnormal) raises inexact,
54 *  cos(min_normalized) raises inexact,
55 *  cos(normalized) raises inexact,
56 *  cos(Inf) = NaN, raises invalid, sets errno to EDOM,
57 *  cos(NaN) = NaN.
58 */
59
60#ifdef	PIC
61# define MO1(symbol)			L(symbol)##@GOTOFF(%ebx)
62# define MO2(symbol,reg2,_scale)	L(symbol)##@GOTOFF(%ebx,reg2,_scale)
63# define CFI_PUSH(REG)	cfi_adjust_cfa_offset(4); cfi_rel_offset(REG,0)
64# define CFI_POP(REG)	cfi_adjust_cfa_offset(-4); cfi_restore(REG)
65# define PUSH(REG)			pushl REG; CFI_PUSH(REG)
66# define POP(REG)			popl REG; CFI_POP(REG)
67# define ENTRANCE			PUSH(%ebx); LOAD_PIC_REG(bx)
68# define RETURN				POP(%ebx); ret; CFI_PUSH(%ebx)
69# define ARG_X				8(%esp)
70#else
71# define MO1(symbol)			L(symbol)
72# define MO2(symbol,reg2,_scale)	L(symbol)(,reg2,_scale)
73# define ENTRANCE
74# define RETURN				ret
75# define ARG_X				4(%esp)
76#endif
77
78	.text
79ENTRY(__cosf_sse2)
80	/* Input: single precision x on stack at address ARG_X */
81
82	ENTRANCE
83	movl	ARG_X, %eax		/* Bits of x */
84	cvtss2sd ARG_X, %xmm0		/* DP x */
85	andl	$0x7fffffff, %eax	/* |x| */
86
87	cmpl	$0x3f490fdb, %eax	/* |x|<Pi/4?  */
88	jb	L(arg_less_pio4)
89
90	/* Here if |x|>=Pi/4 */
91	movd	%eax, %xmm3		/* SP |x| */
92	andpd	MO1(DP_ABS_MASK),%xmm0	/* DP |x| */
93	movss	MO1(SP_INVPIO4), %xmm2	/* SP 1/(Pi/4) */
94
95	cmpl	$0x40e231d6, %eax	/* |x|<9*Pi/4?  */
96	jae	L(large_args)
97
98	/* Here if Pi/4<=|x|<9*Pi/4 */
99	mulss	%xmm3, %xmm2		/* SP |x|/(Pi/4) */
100	cvttss2si %xmm2, %eax		/* k, number of Pi/4 in x */
101	addl	$1, %eax		/* k+1 */
102	movl	$0x0e, %edx
103	andl	%eax, %edx		/* j = (k+1)&0x0e */
104	addl	$2, %eax		/* n */
105	subsd	MO2(PIO4J,%edx,8), %xmm0 /* t = |x| - j * Pi/4 */
106
107L(reconstruction):
108	/* Input: %eax=n, %xmm0=t */
109	testl	$2, %eax		/* n&2 != 0?  */
110	jz	L(sin_poly)
111
112/*L(cos_poly):*/
113	/* Here if cos(x) calculated using cos(t) polynomial for |t|<Pi/4:
114	 * y = t*t; z = y*y;
115	 * s = sign(x) * (-1.0)^((n>>2)&1)
116	 * result = s * (1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))))
117	 */
118	shrl	$2, %eax		/* n>>2 */
119	mulsd	%xmm0, %xmm0		/* y=t^2 */
120	andl	$1, %eax		/* (n>>2)&1 */
121	movaps	%xmm0, %xmm1		/* y */
122	mulsd	%xmm0, %xmm0		/* z=t^4 */
123
124	movsd	MO1(DP_C4), %xmm4	/* C4 */
125	mulsd	%xmm0, %xmm4		/* z*C4 */
126	movsd	MO1(DP_C3), %xmm3	/* C3 */
127	mulsd	%xmm0, %xmm3		/* z*C3 */
128	addsd	MO1(DP_C2), %xmm4	/* C2+z*C4 */
129	mulsd	%xmm0, %xmm4		/* z*(C2+z*C4) */
130	lea	-8(%esp), %esp		/* Borrow 4 bytes of stack frame */
131	addsd	MO1(DP_C1), %xmm3	/* C1+z*C3 */
132	mulsd	%xmm0, %xmm3		/* z*(C1+z*C3) */
133	addsd	MO1(DP_C0), %xmm4	/* C0+z*(C2+z*C4) */
134	mulsd	%xmm1, %xmm4		/* y*(C0+z*(C2+z*C4)) */
135
136	addsd	%xmm4, %xmm3		/* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
137	/* 1.0+y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
138	addsd	MO1(DP_ONES), %xmm3
139
140	mulsd	MO2(DP_ONES,%eax,8), %xmm3 /* DP result */
141	movsd	%xmm3, 0(%esp)		/* Move result from sse...  */
142	fldl	0(%esp)			/* ...to FPU.  */
143	/* Return back 4 bytes of stack frame */
144	lea	8(%esp), %esp
145	RETURN
146
147	.p2align	4
148L(sin_poly):
149	/* Here if cos(x) calculated using sin(t) polynomial for |t|<Pi/4:
150	 * y = t*t; z = y*y;
151	 * s = sign(x) * (-1.0)^((n>>2)&1)
152	 * result = s * t * (1.0+t^2*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))))
153	 */
154
155	movaps	%xmm0, %xmm4		/* t */
156	shrl	$2, %eax		/* n>>2 */
157	mulsd	%xmm0, %xmm0		/* y=t^2 */
158	andl	$1, %eax		/* (n>>2)&1 */
159	movaps	%xmm0, %xmm1		/* y */
160	mulsd	%xmm0, %xmm0		/* z=t^4 */
161
162	movsd	MO1(DP_S4), %xmm2	/* S4 */
163	mulsd	%xmm0, %xmm2		/* z*S4 */
164	movsd	MO1(DP_S3), %xmm3	/* S3 */
165	mulsd	%xmm0, %xmm3		/* z*S3 */
166	lea	-8(%esp), %esp		/* Borrow 4 bytes of stack frame */
167	addsd	MO1(DP_S2), %xmm2	/* S2+z*S4 */
168	mulsd	%xmm0, %xmm2		/* z*(S2+z*S4) */
169	addsd	MO1(DP_S1), %xmm3	/* S1+z*S3 */
170	mulsd	%xmm0, %xmm3		/* z*(S1+z*S3) */
171	addsd	MO1(DP_S0), %xmm2	/* S0+z*(S2+z*S4) */
172	mulsd	%xmm1, %xmm2		/* y*(S0+z*(S2+z*S4)) */
173	/* t*s, where s = sign(x) * (-1.0)^((n>>2)&1) */
174	mulsd	MO2(DP_ONES,%eax,8), %xmm4
175	addsd	%xmm2, %xmm3		/* y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
176	/* t*s*y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
177	mulsd	%xmm4, %xmm3
178	/* t*s*(1.0+y*(S0+y*(S1+y*(S2+y*(S3+y*S4)))) */
179	addsd	%xmm4, %xmm3
180	movsd	%xmm3, 0(%esp)		/* Move result from sse...   */
181	fldl	0(%esp)			/* ...to FPU.  */
182	/* Return back 4 bytes of stack frame */
183	lea	8(%esp), %esp
184	RETURN
185
186	.p2align	4
187L(large_args):
188	/* Here if |x|>=9*Pi/4 */
189	cmpl	$0x7f800000, %eax	/* x is Inf or NaN?  */
190	jae	L(arg_inf_or_nan)
191
192	/* Here if finite |x|>=9*Pi/4 */
193	cmpl	$0x4b000000, %eax	/* |x|<2^23?  */
194	jae	L(very_large_args)
195
196	/* Here if 9*Pi/4<=|x|<2^23 */
197	movsd	MO1(DP_INVPIO4), %xmm1	/* 1/(Pi/4) */
198	mulsd	%xmm0, %xmm1		/* |x|/(Pi/4) */
199	cvttsd2si %xmm1, %eax		/* k=trunc(|x|/(Pi/4)) */
200	addl	$1, %eax		/* k+1 */
201	movl	%eax, %edx
202	andl	$0xfffffffe, %edx	/* j=(k+1)&0xfffffffe */
203	cvtsi2sdl %edx, %xmm4		/* DP j */
204	movsd	MO1(DP_PIO4HI), %xmm2	/* -PIO4HI = high part of -Pi/4 */
205	mulsd	%xmm4, %xmm2		/* -j*PIO4HI */
206	movsd	MO1(DP_PIO4LO), %xmm3	/* -PIO4LO = low part of -Pi/4 */
207	addsd	%xmm2, %xmm0		/* |x| - j*PIO4HI */
208	addl	$2, %eax		/* n */
209	mulsd	%xmm3, %xmm4		/* j*PIO4LO */
210	addsd	%xmm4, %xmm0		/* t = |x| - j*PIO4HI - j*PIO4LO */
211	jmp	L(reconstruction)
212
213	.p2align	4
214L(very_large_args):
215	/* Here if finite |x|>=2^23 */
216
217	/* bitpos = (ix>>23) - BIAS_32 + 59; */
218	shrl	$23, %eax		/* eb = biased exponent of x */
219	/* bitpos = eb - 0x7f + 59, where 0x7f is exponent bias */
220	subl	$68, %eax
221	movl	$28, %ecx		/* %cl=28 */
222	movl	%eax, %edx		/* bitpos copy */
223
224	/* j = bitpos/28; */
225	div	%cl			/* j in register %al=%ax/%cl */
226	movapd	%xmm0, %xmm3		/* |x| */
227	/* clear unneeded remainder from %ah */
228	andl	$0xff, %eax
229
230	imull	$28, %eax, %ecx		/* j*28 */
231	movsd	MO1(DP_HI_MASK), %xmm4	/* DP_HI_MASK */
232	movapd	%xmm0, %xmm5		/* |x| */
233	mulsd	-2*8+MO2(_FPI,%eax,8), %xmm3	/* tmp3 = FPI[j-2]*|x| */
234	movapd	%xmm0, %xmm1		/* |x| */
235	mulsd	-1*8+MO2(_FPI,%eax,8), %xmm5	/* tmp2 = FPI[j-1]*|x| */
236	mulsd	0*8+MO2(_FPI,%eax,8), %xmm0	/* tmp0 = FPI[j]*|x| */
237	addl	$19, %ecx		/* j*28+19 */
238	mulsd	1*8+MO2(_FPI,%eax,8), %xmm1	/* tmp1 = FPI[j+1]*|x| */
239	cmpl	%ecx, %edx		/* bitpos>=j*28+19?  */
240	jl	L(very_large_skip1)
241
242	/* Here if bitpos>=j*28+19 */
243	andpd	%xmm3, %xmm4		/* HI(tmp3) */
244	subsd	%xmm4, %xmm3		/* tmp3 = tmp3 - HI(tmp3) */
245L(very_large_skip1):
246
247	movsd	MO1(DP_2POW52), %xmm6
248	movapd	%xmm5, %xmm2		/* tmp2 copy */
249	addsd	%xmm3, %xmm5		/* tmp5 = tmp3 + tmp2 */
250	movl	$1, %edx
251	addsd	%xmm5, %xmm6		/* tmp6 = tmp5 + 2^52 */
252	movsd	8+MO1(DP_2POW52), %xmm4
253	movd	%xmm6, %eax		/* k = I64_LO(tmp6); */
254	addsd	%xmm6, %xmm4		/* tmp4 = tmp6 - 2^52 */
255	comisd	%xmm5, %xmm4		/* tmp4 > tmp5?  */
256	jbe	L(very_large_skip2)
257
258	/* Here if tmp4 > tmp5 */
259	subl	$1, %eax		/* k-- */
260	addsd	8+MO1(DP_ONES), %xmm4	/* tmp4 -= 1.0 */
261L(very_large_skip2):
262
263	andl	%eax, %edx		/* k&1 */
264	subsd	%xmm4, %xmm3		/* tmp3 -= tmp4 */
265	addsd	MO2(DP_ZERONE,%edx,8), %xmm3 /* t  = DP_ZERONE[k&1] + tmp3 */
266	addsd	%xmm2, %xmm3		/* t += tmp2 */
267	addsd	%xmm3, %xmm0		/* t += tmp0 */
268	addl	$3, %eax		/* n=k+3 */
269	addsd	%xmm1, %xmm0		/* t += tmp1 */
270	mulsd	MO1(DP_PIO4), %xmm0	/* t *= PI04 */
271
272	jmp	L(reconstruction)	/* end of very_large_args peth */
273
274	.p2align	4
275L(arg_less_pio4):
276	/* Here if |x|<Pi/4 */
277	cmpl	$0x3d000000, %eax	/* |x|<2^-5?  */
278	jl	L(arg_less_2pn5)
279
280	/* Here if 2^-5<=|x|<Pi/4 */
281	mulsd	%xmm0, %xmm0		/* y=x^2 */
282	movaps	%xmm0, %xmm1		/* y */
283	mulsd	%xmm0, %xmm0		/* z=x^4 */
284	movsd	MO1(DP_C4), %xmm3	/* C4 */
285	mulsd	%xmm0, %xmm3		/* z*C4 */
286	movsd	MO1(DP_C3), %xmm5	/* C3 */
287	mulsd	%xmm0, %xmm5		/* z*C3 */
288	addsd	MO1(DP_C2), %xmm3	/* C2+z*C4 */
289	mulsd	%xmm0, %xmm3		/* z*(C2+z*C4) */
290	addsd	MO1(DP_C1), %xmm5	/* C1+z*C3 */
291	mulsd	%xmm0, %xmm5		/* z*(C1+z*C3) */
292	addsd	MO1(DP_C0), %xmm3	/* C0+z*(C2+z*C4) */
293	mulsd	%xmm1, %xmm3		/* y*(C0+z*(C2+z*C4)) */
294	addsd	%xmm5, %xmm3		/* y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
295	/* 1.0 + y*(C0+y*(C1+y*(C2+y*(C3+y*C4)))) */
296	addsd	MO1(DP_ONES), %xmm3
297	cvtsd2ss %xmm3, %xmm3		/* SP result */
298
299L(epilogue):
300	lea	-4(%esp), %esp		/* Borrow 4 bytes of stack frame */
301	movss	%xmm3, 0(%esp)		/* Move result from sse...  */
302	flds	0(%esp)			/* ...to FPU.  */
303	/* Return back 4 bytes of stack frame */
304	lea	4(%esp), %esp
305	RETURN
306
307	.p2align	4
308L(arg_less_2pn5):
309	/* Here if |x|<2^-5 */
310	cmpl	$0x32000000, %eax	/* |x|<2^-27?  */
311	jl	L(arg_less_2pn27)
312
313	/* Here if 2^-27<=|x|<2^-5 */
314	mulsd	%xmm0, %xmm0		/* DP x^2 */
315	movsd	MO1(DP_COS2_1), %xmm3	/* DP DP_COS2_1 */
316	mulsd	%xmm0, %xmm3		/* DP x^2*DP_COS2_1 */
317	addsd	MO1(DP_COS2_0), %xmm3	/* DP DP_COS2_0+x^2*DP_COS2_1 */
318	mulsd	%xmm0, %xmm3		/* DP x^2*DP_COS2_0+x^4*DP_COS2_1 */
319	/* DP 1.0+x^2*DP_COS2_0+x^4*DP_COS2_1 */
320	addsd	MO1(DP_ONES), %xmm3
321	cvtsd2ss %xmm3, %xmm3		/* SP result */
322	jmp	L(epilogue)
323
324	.p2align	4
325L(arg_less_2pn27):
326	/* Here if |x|<2^-27 */
327	movss	ARG_X, %xmm0		/* x */
328	andps	MO1(SP_ABS_MASK),%xmm0	/* |x| */
329	movss	MO1(SP_ONE), %xmm3	/* 1.0 */
330	subss	%xmm0, %xmm3		/* result is 1.0-|x| */
331	jmp	L(epilogue)
332
333	.p2align	4
334L(arg_inf_or_nan):
335	/* Here if |x| is Inf or NAN */
336	jne	L(skip_errno_setting)	/* in case of x is NaN */
337
338	/* Here if x is Inf. Set errno to EDOM.  */
339	call	JUMPTARGET(__errno_location)
340	movl	$EDOM, (%eax)
341
342	.p2align	4
343L(skip_errno_setting):
344	/* Here if |x| is Inf or NAN. Continued.  */
345	movss	ARG_X, %xmm3		/* load x */
346	subss	%xmm3, %xmm3		/* Result is NaN */
347	jmp	L(epilogue)
348END(__cosf_sse2)
349
350	.section .rodata, "a"
351	.p2align 3
352L(PIO4J): /* Table of j*Pi/4, for j=0,1,..,10 */
353	.long	0x00000000,0x00000000
354	.long	0x54442d18,0x3fe921fb
355	.long	0x54442d18,0x3ff921fb
356	.long	0x7f3321d2,0x4002d97c
357	.long	0x54442d18,0x400921fb
358	.long	0x2955385e,0x400f6a7a
359	.long	0x7f3321d2,0x4012d97c
360	.long	0xe9bba775,0x4015fdbb
361	.long	0x54442d18,0x401921fb
362	.long	0xbeccb2bb,0x401c463a
363	.long	0x2955385e,0x401f6a7a
364	.type L(PIO4J), @object
365	ASM_SIZE_DIRECTIVE(L(PIO4J))
366
367	.p2align 3
368L(_FPI): /* 4/Pi broken into sum of positive DP values */
369	.long	0x00000000,0x00000000
370	.long	0x6c000000,0x3ff45f30
371	.long	0x2a000000,0x3e3c9c88
372	.long	0xa8000000,0x3c54fe13
373	.long	0xd0000000,0x3aaf47d4
374	.long	0x6c000000,0x38fbb81b
375	.long	0xe0000000,0x3714acc9
376	.long	0x7c000000,0x3560e410
377	.long	0x56000000,0x33bca2c7
378	.long	0xac000000,0x31fbd778
379	.long	0xe0000000,0x300b7246
380	.long	0xe8000000,0x2e5d2126
381	.long	0x48000000,0x2c970032
382	.long	0xe8000000,0x2ad77504
383	.long	0xe0000000,0x290921cf
384	.long	0xb0000000,0x274deb1c
385	.long	0xe0000000,0x25829a73
386	.long	0xbe000000,0x23fd1046
387	.long	0x10000000,0x2224baed
388	.long	0x8e000000,0x20709d33
389	.long	0x80000000,0x1e535a2f
390	.long	0x64000000,0x1cef904e
391	.long	0x30000000,0x1b0d6398
392	.long	0x24000000,0x1964ce7d
393	.long	0x16000000,0x17b908bf
394	.type L(_FPI), @object
395	ASM_SIZE_DIRECTIVE(L(_FPI))
396
397/* Coefficients of polynomial
398 for cos(x)~=1.0+x^2*DP_COS2_0+x^4*DP_COS2_1, |x|<2^-5.  */
399	.p2align 3
400L(DP_COS2_0):
401	.long	0xff5cc6fd,0xbfdfffff
402	.type L(DP_COS2_0), @object
403	ASM_SIZE_DIRECTIVE(L(DP_COS2_0))
404
405	.p2align 3
406L(DP_COS2_1):
407	.long	0xb178dac5,0x3fa55514
408	.type L(DP_COS2_1), @object
409	ASM_SIZE_DIRECTIVE(L(DP_COS2_1))
410
411	.p2align 3
412L(DP_ZERONE):
413	.long	0x00000000,0x00000000	/* 0.0 */
414	.long	0x00000000,0xbff00000	/* 1.0 */
415	.type L(DP_ZERONE),@object
416	ASM_SIZE_DIRECTIVE(L(DP_ZERONE))
417
418	.p2align 3
419L(DP_ONES):
420	.long	0x00000000,0x3ff00000	/* +1.0 */
421	.long	0x00000000,0xbff00000	/* -1.0 */
422	.type L(DP_ONES), @object
423	ASM_SIZE_DIRECTIVE(L(DP_ONES))
424
425/* Coefficients of polynomial
426 for sin(t)~=t+t^3*(S0+t^2*(S1+t^2*(S2+t^2*(S3+t^2*S4)))), |t|<Pi/4.  */
427	.p2align 3
428L(DP_S3):
429	.long	0x64e6b5b4,0x3ec71d72
430	.type L(DP_S3), @object
431	ASM_SIZE_DIRECTIVE(L(DP_S3))
432
433	.p2align 3
434L(DP_S1):
435	.long	0x10c2688b,0x3f811111
436	.type L(DP_S1), @object
437	ASM_SIZE_DIRECTIVE(L(DP_S1))
438
439	.p2align 3
440L(DP_S4):
441	.long	0x1674b58a,0xbe5a947e
442	.type L(DP_S4), @object
443	ASM_SIZE_DIRECTIVE(L(DP_S4))
444
445	.p2align 3
446L(DP_S2):
447	.long	0x8b4bd1f9,0xbf2a019f
448	.type L(DP_S2), @object
449	ASM_SIZE_DIRECTIVE(L(DP_S2))
450
451	.p2align 3
452L(DP_S0):
453	.long	0x55551cd9,0xbfc55555
454	.type L(DP_S0), @object
455	ASM_SIZE_DIRECTIVE(L(DP_S0))
456
457/* Coefficients of polynomial
458 for cos(t)~=1.0+t^2*(C0+t^2*(C1+t^2*(C2+t^2*(C3+t^2*C4)))), |t|<Pi/4.  */
459	.p2align 3
460L(DP_C3):
461	.long	0x9ac43cc0,0x3efa00eb
462	.type L(DP_C3), @object
463	ASM_SIZE_DIRECTIVE(L(DP_C3))
464
465	.p2align 3
466L(DP_C1):
467	.long	0x545c50c7,0x3fa55555
468	.type L(DP_C1), @object
469	ASM_SIZE_DIRECTIVE(L(DP_C1))
470
471	.p2align 3
472L(DP_C4):
473	.long	0xdd8844d7,0xbe923c97
474	.type L(DP_C4), @object
475	ASM_SIZE_DIRECTIVE(L(DP_C4))
476
477	.p2align 3
478L(DP_C2):
479	.long	0x348b6874,0xbf56c16b
480	.type L(DP_C2), @object
481	ASM_SIZE_DIRECTIVE(L(DP_C2))
482
483	.p2align 3
484L(DP_C0):
485	.long	0xfffe98ae,0xbfdfffff
486	.type L(DP_C0), @object
487	ASM_SIZE_DIRECTIVE(L(DP_C0))
488
489	.p2align 3
490L(DP_PIO4):
491	.long	0x54442d18,0x3fe921fb	/* Pi/4 */
492	.type L(DP_PIO4), @object
493	ASM_SIZE_DIRECTIVE(L(DP_PIO4))
494
495	.p2align 3
496L(DP_2POW52):
497	.long	0x00000000,0x43300000	/* +2^52 */
498	.long	0x00000000,0xc3300000	/* -2^52 */
499	.type L(DP_2POW52), @object
500	ASM_SIZE_DIRECTIVE(L(DP_2POW52))
501
502	.p2align 3
503L(DP_INVPIO4):
504	.long	0x6dc9c883,0x3ff45f30	/* 4/Pi */
505	.type L(DP_INVPIO4), @object
506	ASM_SIZE_DIRECTIVE(L(DP_INVPIO4))
507
508	.p2align 3
509L(DP_PIO4HI):
510	.long	0x54000000,0xbfe921fb	/* High part of Pi/4 */
511	.type L(DP_PIO4HI), @object
512	ASM_SIZE_DIRECTIVE(L(DP_PIO4HI))
513
514	.p2align 3
515L(DP_PIO4LO):
516	.long	0x11A62633,0xbe010b46	/* Low part of Pi/4 */
517	.type L(DP_PIO4LO), @object
518	ASM_SIZE_DIRECTIVE(L(DP_PIO4LO))
519
520	.p2align 2
521L(SP_INVPIO4):
522	.long	0x3fa2f983		/* 4/Pi */
523	.type L(SP_INVPIO4), @object
524	ASM_SIZE_DIRECTIVE(L(SP_INVPIO4))
525
526	.p2align 4
527L(DP_ABS_MASK): /* Mask for getting DP absolute value */
528	.long	0xffffffff,0x7fffffff
529	.long	0xffffffff,0x7fffffff
530	.type L(DP_ABS_MASK), @object
531	ASM_SIZE_DIRECTIVE(L(DP_ABS_MASK))
532
533	.p2align 3
534L(DP_HI_MASK): /* Mask for getting high 21 bits of DP value */
535	.long	0x00000000,0xffffffff
536	.type L(DP_HI_MASK), @object
537	ASM_SIZE_DIRECTIVE(L(DP_HI_MASK))
538
539	.p2align 4
540L(SP_ABS_MASK): /* Mask for getting SP absolute value */
541	.long	0x7fffffff,0x7fffffff
542	.long	0x7fffffff,0x7fffffff
543	.type L(SP_ABS_MASK), @object
544	ASM_SIZE_DIRECTIVE(L(SP_ABS_MASK))
545
546	.p2align 2
547L(SP_ONE):
548	.long	0x3f800000		/* 1.0 */
549	.type L(SP_ONE), @object
550	ASM_SIZE_DIRECTIVE(L(SP_ONE))
551
552weak_alias (__cosf, cosf)
553