1ifeq ($(subdir),mathvec) 2libmvec-double-func-list = \ 3 2_core \ 4 4_core \ 5 4_core_avx \ 6 8_core 7libmvec-float-func-list = \ 8 f4_core \ 9 f8_core \ 10 f8_core_avx \ 11 f16_core 12libmvec-support += \ 13 svml_d_exp_data \ 14 svml_d_log_data \ 15 svml_d_pow_data \ 16 svml_d_trig_data \ 17 svml_s_expf_data \ 18 svml_s_logf_data \ 19 svml_s_powf_data \ 20 svml_s_trig_data \ 21 $(foreach l,$(libmvec-double-func-list), \ 22 $(addprefix svml_d_,$(addsuffix $(l),$(libmvec-funcs)))) \ 23 $(foreach l,$(libmvec-float-func-list), \ 24 $(addprefix svml_s_,$(addsuffix $(l),$(libmvec-funcs)))) 25endif 26 27# Variables for libmvec tests. 28ifeq ($(subdir)$(build-mathvec),mathyes) 29libmvec-tests += double-vlen2 double-vlen4 double-vlen4-avx2 \ 30 float-vlen4 float-vlen8 float-vlen8-avx2 \ 31 double-vlen8 float-vlen16 32tests += \ 33 $(libmvec-abi-func-tests) \ 34 $(libmvec-abi-func-avx-tests) \ 35 $(libmvec-abi-func-avx2-tests) \ 36 $(libmvec-abi-func-avx512f-tests) 37 38double-vlen2-funcs = $(libmvec-funcs) 39double-vlen4-funcs = $(libmvec-funcs) 40double-vlen4-avx2-funcs = $(libmvec-funcs) 41double-vlen8-funcs = $(libmvec-funcs) 42float-vlen4-funcs = $(libmvec-funcs) 43float-vlen8-funcs = $(libmvec-funcs) 44float-vlen8-avx2-funcs = $(libmvec-funcs) 45float-vlen16-funcs = $(libmvec-funcs) 46 47double-vlen4-arch-ext-cflags = -mavx 48double-vlen4-arch-ext2-cflags = -mavx2 49double-vlen8-arch-ext-cflags = -mavx512f 50 51float-vlen8-arch-ext-cflags = -mavx 52float-vlen8-arch-ext2-cflags = -mavx2 53float-vlen16-arch-ext-cflags = -mavx512f 54 55libmvec-abi-test-cflags = \ 56 $(libm-test-fast-math-cflags) \ 57 -fno-inline -fopenmp -Wno-unknown-pragmas 58 59CFLAGS-test-double-vlen4-avx2-wrappers.c = $(double-vlen4-arch-ext2-cflags) 60 61CFLAGS-test-float-vlen8-avx2-wrappers.c = $(float-vlen8-arch-ext2-cflags) 62endif 63 64ifeq ($(subdir)$(config-cflags-mprefer-vector-width),mathyes) 65# When compiled with -O3 -march=skylake, GCC 8 and 9 optimize some loops 66# in branred.c with 256-bit vector instructions, which leads to store 67# forward stall: 68# 69# https://gcc.gnu.org/bugzilla/show_bug.cgi?id=90579 70# 71# Limit vector width to 128 bits to work around this issue. It improves 72# performance of sin and cos by more than 40% on Skylake. 73CFLAGS-branred.c = -mprefer-vector-width=128 74endif 75 76ifeq ($(subdir),benchtests) 77double-vlen4-arch-ext-cflags = -mavx 78double-vlen4-arch-ext2-cflags = -mavx2 79double-vlen8-arch-ext-cflags = -mavx512f 80 81float-vlen8-arch-ext-cflags = -mavx 82float-vlen8-arch-ext2-cflags = -mavx2 83float-vlen16-arch-ext-cflags = -mavx512f 84 85bench-libmvec := $(bench-libmvec-double) $(bench-libmvec-float) 86 87ifeq (${BENCHSET},) 88bench += $(bench-libmvec) 89endif 90 91ifeq (${STATIC-BENCHTESTS},yes) 92libmvec-benchtests = $(common-objpfx)mathvec/libmvec.a $(common-objpfx)math/libm.a 93else 94libmvec-benchtests = $(libmvec) $(libm) 95endif 96 97$(addprefix $(objpfx)bench-,$(bench-libmvec-double)): $(libmvec-benchtests) 98$(addprefix $(objpfx)bench-,$(bench-libmvec-float)): $(libmvec-benchtests) 99bench-libmvec-deps = $(..)sysdeps/x86_64/fpu/bench-libmvec-skeleton.c bench-timing.h Makefile 100 101$(objpfx)bench-float-%.c: $(bench-libmvec-deps) 102 { if [ -n "$($*-INCLUDE)" ]; then \ 103 cat $($*-INCLUDE); \ 104 fi; \ 105 $(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp 106 mv -f $@-tmp $@ 107 108$(objpfx)bench-double-%.c: $(bench-libmvec-deps) 109 { if [ -n "$($*-INCLUDE)" ]; then \ 110 cat $($*-INCLUDE); \ 111 fi; \ 112 $(PYTHON) $(..)sysdeps/x86_64/fpu/scripts/bench_libmvec.py $(basename $(@F)); } > $@-tmp 113 mv -f $@-tmp $@ 114endif 115