1 #ifndef X86_FENV_PRIVATE_H
2 #define X86_FENV_PRIVATE_H 1
3 
4 #include <bits/floatn.h>
5 #include <fenv.h>
6 #include <fpu_control.h>
7 
8 /* This file is used by both the 32- and 64-bit ports.  The 64-bit port
9    has a field in the fenv_t for the mxcsr; the 32-bit port does not.
10    Instead, we (ab)use the only 32-bit field extant in the struct.  */
11 #ifndef __x86_64__
12 # define __mxcsr	__eip
13 #endif
14 
15 
16 /* All of these functions are private to libm, and are all used in pairs
17    to save+change the fp state and restore the original state.  Thus we
18    need not care for both the 387 and the sse unit, only the one we're
19    actually using.  */
20 
21 #if defined __AVX__ || defined SSE2AVX
22 # define STMXCSR "vstmxcsr"
23 # define LDMXCSR "vldmxcsr"
24 #else
25 # define STMXCSR "stmxcsr"
26 # define LDMXCSR "ldmxcsr"
27 #endif
28 
29 static __always_inline void
libc_feholdexcept_sse(fenv_t * e)30 libc_feholdexcept_sse (fenv_t *e)
31 {
32   unsigned int mxcsr;
33   asm (STMXCSR " %0" : "=m" (*&mxcsr));
34   e->__mxcsr = mxcsr;
35   mxcsr = (mxcsr | 0x1f80) & ~0x3f;
36   asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
37 }
38 
39 static __always_inline void
libc_feholdexcept_387(fenv_t * e)40 libc_feholdexcept_387 (fenv_t *e)
41 {
42   /* Recall that fnstenv has a side-effect of masking exceptions.
43      Clobber all of the fp registers so that the TOS field is 0.  */
44   asm volatile ("fnstenv %0; fnclex"
45 		: "=m"(*e)
46 		: : "st", "st(1)", "st(2)", "st(3)",
47 		    "st(4)", "st(5)", "st(6)", "st(7)");
48 }
49 
50 static __always_inline void
libc_fesetround_sse(int r)51 libc_fesetround_sse (int r)
52 {
53   unsigned int mxcsr;
54   asm (STMXCSR " %0" : "=m" (*&mxcsr));
55   mxcsr = (mxcsr & ~0x6000) | (r << 3);
56   asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
57 }
58 
59 static __always_inline void
libc_fesetround_387(int r)60 libc_fesetround_387 (int r)
61 {
62   fpu_control_t cw;
63   _FPU_GETCW (cw);
64   cw = (cw & ~0xc00) | r;
65   _FPU_SETCW (cw);
66 }
67 
68 static __always_inline void
libc_feholdexcept_setround_sse(fenv_t * e,int r)69 libc_feholdexcept_setround_sse (fenv_t *e, int r)
70 {
71   unsigned int mxcsr;
72   asm (STMXCSR " %0" : "=m" (*&mxcsr));
73   e->__mxcsr = mxcsr;
74   mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
75   asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
76 }
77 
78 /* Set both rounding mode and precision.  A convenience function for use
79    by libc_feholdexcept_setround and libc_feholdexcept_setround_53bit. */
80 static __always_inline void
libc_feholdexcept_setround_387_prec(fenv_t * e,int r)81 libc_feholdexcept_setround_387_prec (fenv_t *e, int r)
82 {
83   libc_feholdexcept_387 (e);
84 
85   fpu_control_t cw = e->__control_word;
86   cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
87   cw |= r | 0x3f;
88   _FPU_SETCW (cw);
89 }
90 
91 static __always_inline void
libc_feholdexcept_setround_387(fenv_t * e,int r)92 libc_feholdexcept_setround_387 (fenv_t *e, int r)
93 {
94   libc_feholdexcept_setround_387_prec (e, r | _FPU_EXTENDED);
95 }
96 
97 static __always_inline void
libc_feholdexcept_setround_387_53bit(fenv_t * e,int r)98 libc_feholdexcept_setround_387_53bit (fenv_t *e, int r)
99 {
100   libc_feholdexcept_setround_387_prec (e, r | _FPU_DOUBLE);
101 }
102 
103 static __always_inline int
libc_fetestexcept_sse(int e)104 libc_fetestexcept_sse (int e)
105 {
106   unsigned int mxcsr;
107   asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
108   return mxcsr & e & FE_ALL_EXCEPT;
109 }
110 
111 static __always_inline int
libc_fetestexcept_387(int ex)112 libc_fetestexcept_387 (int ex)
113 {
114   fexcept_t temp;
115   asm volatile ("fnstsw %0" : "=a" (temp));
116   return temp & ex & FE_ALL_EXCEPT;
117 }
118 
119 static __always_inline void
libc_fesetenv_sse(fenv_t * e)120 libc_fesetenv_sse (fenv_t *e)
121 {
122   asm volatile (LDMXCSR " %0" : : "m" (e->__mxcsr));
123 }
124 
125 static __always_inline void
libc_fesetenv_387(fenv_t * e)126 libc_fesetenv_387 (fenv_t *e)
127 {
128   /* Clobber all fp registers so that the TOS value we saved earlier is
129      compatible with the current state of the compiler.  */
130   asm volatile ("fldenv %0"
131 		: : "m" (*e)
132 		: "st", "st(1)", "st(2)", "st(3)",
133 		  "st(4)", "st(5)", "st(6)", "st(7)");
134 }
135 
136 static __always_inline int
libc_feupdateenv_test_sse(fenv_t * e,int ex)137 libc_feupdateenv_test_sse (fenv_t *e, int ex)
138 {
139   unsigned int mxcsr, old_mxcsr, cur_ex;
140   asm volatile (STMXCSR " %0" : "=m" (*&mxcsr));
141   cur_ex = mxcsr & FE_ALL_EXCEPT;
142 
143   /* Merge current exceptions with the old environment.  */
144   old_mxcsr = e->__mxcsr;
145   mxcsr = old_mxcsr | cur_ex;
146   asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
147 
148   /* Raise SIGFPE for any new exceptions since the hold.  Expect that
149      the normal environment has all exceptions masked.  */
150   if (__glibc_unlikely (~(old_mxcsr >> 7) & cur_ex))
151     __feraiseexcept (cur_ex);
152 
153   /* Test for exceptions raised since the hold.  */
154   return cur_ex & ex;
155 }
156 
157 static __always_inline int
libc_feupdateenv_test_387(fenv_t * e,int ex)158 libc_feupdateenv_test_387 (fenv_t *e, int ex)
159 {
160   fexcept_t cur_ex;
161 
162   /* Save current exceptions.  */
163   asm volatile ("fnstsw %0" : "=a" (cur_ex));
164   cur_ex &= FE_ALL_EXCEPT;
165 
166   /* Reload original environment.  */
167   libc_fesetenv_387 (e);
168 
169   /* Merge current exceptions.  */
170   __feraiseexcept (cur_ex);
171 
172   /* Test for exceptions raised since the hold.  */
173   return cur_ex & ex;
174 }
175 
176 static __always_inline void
libc_feupdateenv_sse(fenv_t * e)177 libc_feupdateenv_sse (fenv_t *e)
178 {
179   libc_feupdateenv_test_sse (e, 0);
180 }
181 
182 static __always_inline void
libc_feupdateenv_387(fenv_t * e)183 libc_feupdateenv_387 (fenv_t *e)
184 {
185   libc_feupdateenv_test_387 (e, 0);
186 }
187 
188 static __always_inline void
libc_feholdsetround_sse(fenv_t * e,int r)189 libc_feholdsetround_sse (fenv_t *e, int r)
190 {
191   unsigned int mxcsr;
192   asm (STMXCSR " %0" : "=m" (*&mxcsr));
193   e->__mxcsr = mxcsr;
194   mxcsr = (mxcsr & ~0x6000) | (r << 3);
195   asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
196 }
197 
198 static __always_inline void
libc_feholdsetround_387_prec(fenv_t * e,int r)199 libc_feholdsetround_387_prec (fenv_t *e, int r)
200 {
201   fpu_control_t cw;
202 
203   _FPU_GETCW (cw);
204   e->__control_word = cw;
205   cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
206   cw |= r;
207   _FPU_SETCW (cw);
208 }
209 
210 static __always_inline void
libc_feholdsetround_387(fenv_t * e,int r)211 libc_feholdsetround_387 (fenv_t *e, int r)
212 {
213   libc_feholdsetround_387_prec (e, r | _FPU_EXTENDED);
214 }
215 
216 static __always_inline void
libc_feholdsetround_387_53bit(fenv_t * e,int r)217 libc_feholdsetround_387_53bit (fenv_t *e, int r)
218 {
219   libc_feholdsetround_387_prec (e, r | _FPU_DOUBLE);
220 }
221 
222 static __always_inline void
libc_feresetround_sse(fenv_t * e)223 libc_feresetround_sse (fenv_t *e)
224 {
225   unsigned int mxcsr;
226   asm (STMXCSR " %0" : "=m" (*&mxcsr));
227   mxcsr = (mxcsr & ~0x6000) | (e->__mxcsr & 0x6000);
228   asm volatile (LDMXCSR " %0" : : "m" (*&mxcsr));
229 }
230 
231 static __always_inline void
libc_feresetround_387(fenv_t * e)232 libc_feresetround_387 (fenv_t *e)
233 {
234   _FPU_SETCW (e->__control_word);
235 }
236 
237 #ifdef __SSE_MATH__
238 # define libc_feholdexceptf		libc_feholdexcept_sse
239 # define libc_fesetroundf		libc_fesetround_sse
240 # define libc_feholdexcept_setroundf	libc_feholdexcept_setround_sse
241 # define libc_fetestexceptf		libc_fetestexcept_sse
242 # define libc_fesetenvf			libc_fesetenv_sse
243 # define libc_feupdateenv_testf		libc_feupdateenv_test_sse
244 # define libc_feupdateenvf		libc_feupdateenv_sse
245 # define libc_feholdsetroundf		libc_feholdsetround_sse
246 # define libc_feresetroundf		libc_feresetround_sse
247 #else
248 # define libc_feholdexceptf		libc_feholdexcept_387
249 # define libc_fesetroundf		libc_fesetround_387
250 # define libc_feholdexcept_setroundf	libc_feholdexcept_setround_387
251 # define libc_fetestexceptf		libc_fetestexcept_387
252 # define libc_fesetenvf			libc_fesetenv_387
253 # define libc_feupdateenv_testf		libc_feupdateenv_test_387
254 # define libc_feupdateenvf		libc_feupdateenv_387
255 # define libc_feholdsetroundf		libc_feholdsetround_387
256 # define libc_feresetroundf		libc_feresetround_387
257 #endif /* __SSE_MATH__ */
258 
259 #ifdef __SSE2_MATH__
260 # define libc_feholdexcept		libc_feholdexcept_sse
261 # define libc_fesetround		libc_fesetround_sse
262 # define libc_feholdexcept_setround	libc_feholdexcept_setround_sse
263 # define libc_fetestexcept		libc_fetestexcept_sse
264 # define libc_fesetenv			libc_fesetenv_sse
265 # define libc_feupdateenv_test		libc_feupdateenv_test_sse
266 # define libc_feupdateenv		libc_feupdateenv_sse
267 # define libc_feholdsetround		libc_feholdsetround_sse
268 # define libc_feresetround		libc_feresetround_sse
269 #else
270 # define libc_feholdexcept		libc_feholdexcept_387
271 # define libc_fesetround		libc_fesetround_387
272 # define libc_feholdexcept_setround	libc_feholdexcept_setround_387
273 # define libc_fetestexcept		libc_fetestexcept_387
274 # define libc_fesetenv			libc_fesetenv_387
275 # define libc_feupdateenv_test		libc_feupdateenv_test_387
276 # define libc_feupdateenv		libc_feupdateenv_387
277 # define libc_feholdsetround		libc_feholdsetround_387
278 # define libc_feresetround		libc_feresetround_387
279 #endif /* __SSE2_MATH__ */
280 
281 #define libc_feholdexceptl		libc_feholdexcept_387
282 #define libc_fesetroundl		libc_fesetround_387
283 #define libc_feholdexcept_setroundl	libc_feholdexcept_setround_387
284 #define libc_fetestexceptl		libc_fetestexcept_387
285 #define libc_fesetenvl			libc_fesetenv_387
286 #define libc_feupdateenv_testl		libc_feupdateenv_test_387
287 #define libc_feupdateenvl		libc_feupdateenv_387
288 #define libc_feholdsetroundl		libc_feholdsetround_387
289 #define libc_feresetroundl		libc_feresetround_387
290 
291 #ifndef __SSE2_MATH__
292 # define libc_feholdexcept_setround_53bit libc_feholdexcept_setround_387_53bit
293 # define libc_feholdsetround_53bit	libc_feholdsetround_387_53bit
294 #endif
295 
296 #ifdef __x86_64__
297 /* The SSE rounding mode is used by soft-fp (libgcc and glibc) on
298    x86_64, so that must be set for float128 computations.  */
299 # define SET_RESTORE_ROUNDF128(RM) \
300   SET_RESTORE_ROUND_GENERIC (RM, libc_feholdsetround_sse, libc_feresetround_sse)
301 # define libc_feholdexcept_setroundf128	libc_feholdexcept_setround_sse
302 # define libc_feupdateenv_testf128	libc_feupdateenv_test_sse
303 #else
304 /* The 387 rounding mode is used by soft-fp for 32-bit, but whether
305    387 or SSE exceptions are used depends on whether libgcc was built
306    for SSE math, which is not known when glibc is being built.  */
307 # define libc_feholdexcept_setroundf128	default_libc_feholdexcept_setround
308 # define libc_feupdateenv_testf128	default_libc_feupdateenv_test
309 #endif
310 
311 /* We have support for rounding mode context.  */
312 #define HAVE_RM_CTX 1
313 
314 static __always_inline void
libc_feholdexcept_setround_sse_ctx(struct rm_ctx * ctx,int r)315 libc_feholdexcept_setround_sse_ctx (struct rm_ctx *ctx, int r)
316 {
317   unsigned int mxcsr, new_mxcsr;
318   asm (STMXCSR " %0" : "=m" (*&mxcsr));
319   new_mxcsr = ((mxcsr | 0x1f80) & ~0x603f) | (r << 3);
320 
321   ctx->env.__mxcsr = mxcsr;
322   if (__glibc_unlikely (mxcsr != new_mxcsr))
323     {
324       asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr));
325       ctx->updated_status = true;
326     }
327   else
328     ctx->updated_status = false;
329 }
330 
331 /* Unconditional since we want to overwrite any exceptions that occurred in the
332    context.  This is also why all fehold* functions unconditionally write into
333    ctx->env.  */
334 static __always_inline void
libc_fesetenv_sse_ctx(struct rm_ctx * ctx)335 libc_fesetenv_sse_ctx (struct rm_ctx *ctx)
336 {
337   libc_fesetenv_sse (&ctx->env);
338 }
339 
340 static __always_inline void
libc_feupdateenv_sse_ctx(struct rm_ctx * ctx)341 libc_feupdateenv_sse_ctx (struct rm_ctx *ctx)
342 {
343   if (__glibc_unlikely (ctx->updated_status))
344     libc_feupdateenv_test_sse (&ctx->env, 0);
345 }
346 
347 static __always_inline void
libc_feholdexcept_setround_387_prec_ctx(struct rm_ctx * ctx,int r)348 libc_feholdexcept_setround_387_prec_ctx (struct rm_ctx *ctx, int r)
349 {
350   libc_feholdexcept_387 (&ctx->env);
351 
352   fpu_control_t cw = ctx->env.__control_word;
353   fpu_control_t old_cw = cw;
354   cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
355   cw |= r | 0x3f;
356 
357   if (__glibc_unlikely (old_cw != cw))
358     {
359       _FPU_SETCW (cw);
360       ctx->updated_status = true;
361     }
362   else
363     ctx->updated_status = false;
364 }
365 
366 static __always_inline void
libc_feholdexcept_setround_387_ctx(struct rm_ctx * ctx,int r)367 libc_feholdexcept_setround_387_ctx (struct rm_ctx *ctx, int r)
368 {
369   libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_EXTENDED);
370 }
371 
372 static __always_inline void
libc_feholdexcept_setround_387_53bit_ctx(struct rm_ctx * ctx,int r)373 libc_feholdexcept_setround_387_53bit_ctx (struct rm_ctx *ctx, int r)
374 {
375   libc_feholdexcept_setround_387_prec_ctx (ctx, r | _FPU_DOUBLE);
376 }
377 
378 static __always_inline void
libc_feholdsetround_387_prec_ctx(struct rm_ctx * ctx,int r)379 libc_feholdsetround_387_prec_ctx (struct rm_ctx *ctx, int r)
380 {
381   fpu_control_t cw, new_cw;
382 
383   _FPU_GETCW (cw);
384   new_cw = cw;
385   new_cw &= ~(_FPU_RC_ZERO | _FPU_EXTENDED);
386   new_cw |= r;
387 
388   ctx->env.__control_word = cw;
389   if (__glibc_unlikely (new_cw != cw))
390     {
391       _FPU_SETCW (new_cw);
392       ctx->updated_status = true;
393     }
394   else
395     ctx->updated_status = false;
396 }
397 
398 static __always_inline void
libc_feholdsetround_387_ctx(struct rm_ctx * ctx,int r)399 libc_feholdsetround_387_ctx (struct rm_ctx *ctx, int r)
400 {
401   libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_EXTENDED);
402 }
403 
404 static __always_inline void
libc_feholdsetround_387_53bit_ctx(struct rm_ctx * ctx,int r)405 libc_feholdsetround_387_53bit_ctx (struct rm_ctx *ctx, int r)
406 {
407   libc_feholdsetround_387_prec_ctx (ctx, r | _FPU_DOUBLE);
408 }
409 
410 static __always_inline void
libc_feholdsetround_sse_ctx(struct rm_ctx * ctx,int r)411 libc_feholdsetround_sse_ctx (struct rm_ctx *ctx, int r)
412 {
413   unsigned int mxcsr, new_mxcsr;
414 
415   asm (STMXCSR " %0" : "=m" (*&mxcsr));
416   new_mxcsr = (mxcsr & ~0x6000) | (r << 3);
417 
418   ctx->env.__mxcsr = mxcsr;
419   if (__glibc_unlikely (new_mxcsr != mxcsr))
420     {
421       asm volatile (LDMXCSR " %0" : : "m" (*&new_mxcsr));
422       ctx->updated_status = true;
423     }
424   else
425     ctx->updated_status = false;
426 }
427 
428 static __always_inline void
libc_feresetround_sse_ctx(struct rm_ctx * ctx)429 libc_feresetround_sse_ctx (struct rm_ctx *ctx)
430 {
431   if (__glibc_unlikely (ctx->updated_status))
432     libc_feresetround_sse (&ctx->env);
433 }
434 
435 static __always_inline void
libc_feresetround_387_ctx(struct rm_ctx * ctx)436 libc_feresetround_387_ctx (struct rm_ctx *ctx)
437 {
438   if (__glibc_unlikely (ctx->updated_status))
439     _FPU_SETCW (ctx->env.__control_word);
440 }
441 
442 static __always_inline void
libc_feupdateenv_387_ctx(struct rm_ctx * ctx)443 libc_feupdateenv_387_ctx (struct rm_ctx *ctx)
444 {
445   if (__glibc_unlikely (ctx->updated_status))
446     libc_feupdateenv_test_387 (&ctx->env, 0);
447 }
448 
449 #ifdef __SSE_MATH__
450 # define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_sse_ctx
451 # define libc_fesetenvf_ctx		libc_fesetenv_sse_ctx
452 # define libc_feupdateenvf_ctx		libc_feupdateenv_sse_ctx
453 # define libc_feholdsetroundf_ctx	libc_feholdsetround_sse_ctx
454 # define libc_feresetroundf_ctx		libc_feresetround_sse_ctx
455 #else
456 # define libc_feholdexcept_setroundf_ctx libc_feholdexcept_setround_387_ctx
457 # define libc_feupdateenvf_ctx		libc_feupdateenv_387_ctx
458 # define libc_feholdsetroundf_ctx	libc_feholdsetround_387_ctx
459 # define libc_feresetroundf_ctx		libc_feresetround_387_ctx
460 #endif /* __SSE_MATH__ */
461 
462 #ifdef __SSE2_MATH__
463 # if defined (__x86_64__) || !defined (MATH_SET_BOTH_ROUNDING_MODES)
464 #  define libc_feholdexcept_setround_ctx libc_feholdexcept_setround_sse_ctx
465 #  define libc_fesetenv_ctx		libc_fesetenv_sse_ctx
466 #  define libc_feupdateenv_ctx		libc_feupdateenv_sse_ctx
467 #  define libc_feholdsetround_ctx	libc_feholdsetround_sse_ctx
468 #  define libc_feresetround_ctx		libc_feresetround_sse_ctx
469 # else
470 #  define libc_feholdexcept_setround_ctx default_libc_feholdexcept_setround_ctx
471 #  define libc_fesetenv_ctx		default_libc_fesetenv_ctx
472 #  define libc_feupdateenv_ctx		default_libc_feupdateenv_ctx
473 #  define libc_feholdsetround_ctx	default_libc_feholdsetround_ctx
474 #  define libc_feresetround_ctx		default_libc_feresetround_ctx
475 # endif
476 #else
477 # define libc_feholdexcept_setround_ctx	libc_feholdexcept_setround_387_ctx
478 # define libc_feupdateenv_ctx		libc_feupdateenv_387_ctx
479 # define libc_feholdsetround_ctx	libc_feholdsetround_387_ctx
480 # define libc_feresetround_ctx		libc_feresetround_387_ctx
481 #endif /* __SSE2_MATH__ */
482 
483 #define libc_feholdexcept_setroundl_ctx	libc_feholdexcept_setround_387_ctx
484 #define libc_feupdateenvl_ctx		libc_feupdateenv_387_ctx
485 #define libc_feholdsetroundl_ctx	libc_feholdsetround_387_ctx
486 #define libc_feresetroundl_ctx		libc_feresetround_387_ctx
487 
488 #ifndef __SSE2_MATH__
489 # define libc_feholdsetround_53bit_ctx	libc_feholdsetround_387_53bit_ctx
490 # define libc_feresetround_53bit_ctx	libc_feresetround_387_ctx
491 #endif
492 
493 #undef __mxcsr
494 
495 #include_next <fenv_private.h>
496 
497 #endif /* X86_FENV_PRIVATE_H */
498