1ifeq ($(subdir),mathvec)
2libmvec-double-func-list = \
3  2_core \
4  4_core \
5  4_core_avx \
6  8_core
7libmvec-float-func-list = \
8  f4_core \
9  f8_core \
10  f8_core_avx \
11  f16_core
12libmvec-support += \
13  svml_d_exp_data \
14  svml_d_log_data \
15  svml_d_pow_data \
16  svml_d_trig_data \
17  svml_s_expf_data \
18  svml_s_logf_data \
19  svml_s_powf_data \
20  svml_s_trig_data \
21  $(foreach l,$(libmvec-double-func-list), \
22    $(addprefix svml_d_,$(addsuffix $(l),$(libmvec-funcs)))) \
23  $(foreach l,$(libmvec-float-func-list), \
24    $(addprefix svml_s_,$(addsuffix $(l),$(libmvec-funcs))))
25endif
26
27# Variables for libmvec tests.
28ifeq ($(subdir)$(build-mathvec),mathyes)
29libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \
30		 float-vlen4 float-vlen8 float-vlen8-avx2 \
31		 double-vlen8 float-vlen16
32tests += \
33  $(libmvec-abi-func-tests) \
34  $(libmvec-abi-func-avx-tests) \
35  $(libmvec-abi-func-avx2-tests) \
36  $(libmvec-abi-func-avx512f-tests)
37
38double-vlen2-funcs = $(libmvec-funcs)
39double-vlen4-funcs = $(libmvec-funcs)
40double-vlen4-avx2-funcs = $(libmvec-funcs)
41double-vlen8-funcs = $(libmvec-funcs)
42float-vlen4-funcs = $(libmvec-funcs)
43float-vlen8-funcs = $(libmvec-funcs)
44float-vlen8-avx2-funcs = $(libmvec-funcs)
45float-vlen16-funcs = $(libmvec-funcs)
46
47double-vlen4-arch-ext-cflags = -mavx
48double-vlen4-arch-ext2-cflags = -mavx2
49double-vlen8-arch-ext-cflags = -mavx512f
50
51float-vlen8-arch-ext-cflags = -mavx
52float-vlen8-arch-ext2-cflags = -mavx2
53float-vlen16-arch-ext-cflags = -mavx512f
54
55libmvec-abi-test-cflags = \
56  $(libm-test-fast-math-cflags) \
57  -fno-inline -fopenmp -Wno-unknown-pragmas
58
59CFLAGS-test-double-vlen4-avx2-wrappers.c = $(double-vlen4-arch-ext2-cflags)
60
61CFLAGS-test-float-vlen8-avx2-wrappers.c = $(float-vlen8-arch-ext2-cflags)
62endif
63
64ifeq ($(subdir)$(config-cflags-mprefer-vector-width),mathyes)
65# When compiled with -O3 -march=skylake, GCC 8 and 9 optimize some loops
66# in branred.c with 256-bit vector instructions, which leads to store
67# forward stall:
68#
69# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90579
70#
71# Limit vector width to 128 bits to work around this issue.  It improves
72# performance of sin and cos by more than 40% on Skylake.
73CFLAGS-branred.c = -mprefer-vector-width=128
74endif
75
76ifeq ($(subdir),benchtests)
77double-vlen4-arch-ext-cflags = -mavx
78double-vlen4-arch-ext2-cflags = -mavx2
79double-vlen8-arch-ext-cflags = -mavx512f
80
81float-vlen8-arch-ext-cflags = -mavx
82float-vlen8-arch-ext2-cflags = -mavx2
83float-vlen16-arch-ext-cflags = -mavx512f
84
85bench-libmvec := $(bench-libmvec-double) $(bench-libmvec-float)
86
87ifeq (${BENCHSET},)
88bench += $(bench-libmvec)
89endif
90
91ifeq (${STATIC-BENCHTESTS},yes)
92libmvec-benchtests = $(common-objpfx)mathvec/libmvec.a $(common-objpfx)math/libm.a
93else
94libmvec-benchtests = $(libmvec) $(libm)
95endif
96
97$(addprefix $(objpfx)bench-,$(bench-libmvec-double)): $(libmvec-benchtests)
98$(addprefix $(objpfx)bench-,$(bench-libmvec-float)): $(libmvec-benchtests)
99bench-libmvec-deps = $(..)sysdeps/x86_64/fpu/bench-libmvec-skeleton.c bench-timing.h Makefile
100
101$(objpfx)bench-float-%.c: $(bench-libmvec-deps)
102	{ if [ -n "$($*-INCLUDE)" ]; then \
103	  cat $($*-INCLUDE); \
104	fi; \
105	$(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp
106	mv -f $@-tmp $@
107
108$(objpfx)bench-double-%.c: $(bench-libmvec-deps)
109	{ if [ -n "$($*-INCLUDE)" ]; then \
110	  cat $($*-INCLUDE); \
111	fi; \
112	$(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp
113	mv -f $@-tmp $@
114endif
115