1# Copyright (C) 2013-2021 Free Software Foundation, Inc.
2# This file is part of the GNU C Library.
3
4# The GNU C Library is free software; you can redistribute it and/or
5# modify it under the terms of the GNU Lesser General Public
6# License as published by the Free Software Foundation; either
7# version 2.1 of the License, or (at your option) any later version.
8
9# The GNU C Library is distributed in the hope that it will be useful,
10# but WITHOUT ANY WARRANTY; without even the implied warranty of
11# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12# Lesser General Public License for more details.
13
14# You should have received a copy of the GNU Lesser General Public
15# License along with the GNU C Library; if not, see
16# <https://www.gnu.org/licenses/>.
17
18# Makefile for benchmark tests.  The only useful target here is `bench`.
19# Add benchmark functions in alphabetical order.
20
21subdir := benchtests
22
23include ../Makeconfig
24
25bench-math := \
26  acos \
27  acosh \
28  asin \
29  asinh \
30  atan \
31  atan2 \
32  atanh \
33  cbrt \
34  cos \
35  cosf \
36  cosh \
37  erf \
38  erfc \
39  exp \
40  exp10 \
41  exp10f \
42  exp2 \
43  exp2f \
44  expf \
45  expm1 \
46  fmax \
47  fmaxf \
48  fmin \
49  fminf \
50  hypot \
51  hypotf \
52  ilogb \
53  ilogbf \
54  isfinite \
55  isinf \
56  isnan \
57  j0 \
58  j1 \
59  lgamma \
60  log \
61  log10 \
62  log1p \
63  log2 \
64  log2f \
65  logb \
66  logbf \
67  logf \
68  modf \
69  pow \
70  powf \
71  rint \
72  roundeven \
73  roundevenf \
74  sin \
75  sincos \
76  sincosf \
77  sinf \
78  sinh \
79  sqrt \
80  tan \
81  tanh \
82  tgamma \
83  trunc \
84  truncf \
85  y0 \
86  y1 \
87
88ifneq (,$(filter yes,$(float96-fcts)))
89bench-math += cbrtl
90endif
91
92ifneq (,$(filter yes,$(float128-fcts) $(float128-alias-fcts)))
93bench-math += expf128 powf128 sinf128 ilogbf128
94endif
95
96bench-pthread := pthread_once thread_create pthread-locks
97
98bench-string := ffs ffsll
99
100ifeq (${BENCHSET},)
101bench := $(bench-math) $(bench-pthread) $(bench-string)
102else
103bench := $(foreach B,$(filter bench-%,${BENCHSET}), ${${B}})
104endif
105
106# String function benchmarks.
107string-benchset := memccpy memchr memcmp memcmpeq memcpy memmem memmove \
108		   mempcpy memset rawmemchr stpcpy stpncpy strcasecmp strcasestr \
109		   strcat strchr strchrnul strcmp strcpy strcspn strlen \
110		   strncasecmp strncat strncmp strncpy strnlen strpbrk strrchr \
111		   strspn strstr strcpy_chk stpcpy_chk memrchr strsep strtok \
112		   strcoll memcpy-large memcpy-random memmove-large memset-large \
113		   memcpy-walk memset-walk memmove-walk
114
115# Build and run locale-dependent benchmarks only if we're building natively.
116ifeq (no,$(cross-compiling))
117wcsmbs-benchset := wcslen wcsnlen wcscpy wcpcpy wcsncpy wcpncpy wcscat wcsncat \
118		   wcscmp wcsncmp wcschr wcschrnul wcsrchr wcsspn wcspbrk wcscspn \
119		   wmemchr wmemset wmemcmp
120else
121wcsmbs-benchset :=
122endif
123
124string-benchset-all := $(string-benchset) ${wcsmbs-benchset}
125
126ifeq (no,$(cross-compiling))
127# We have to generate locales
128LOCALES := en_US.UTF-8 tr_TR.UTF-8 cs_CZ.UTF-8 fa_IR.UTF-8 fr_FR.UTF-8 \
129	   ja_JP.UTF-8 si_LK.UTF-8 en_GB.UTF-8 vi_VN.UTF-8 ar_SA.UTF-8 \
130	   da_DK.UTF-8 pl_PL.UTF-8 pt_PT.UTF-8 el_GR.UTF-8 ru_RU.UTF-8 \
131	   he_IL.UTF-8 is_IS.UTF-8 es_ES.UTF-8 hi_IN.UTF-8 sv_SE.UTF-8 \
132	   hu_HU.UTF-8 it_IT.UTF-8 sr_RS.UTF-8 zh_CN.UTF-8
133include ../gen-locales.mk
134endif
135
136stdlib-benchset := strtod
137
138stdio-common-benchset := sprintf
139
140math-benchset := math-inlines
141
142ifeq (${BENCHSET},)
143benchset := $(string-benchset-all) $(stdlib-benchset) $(stdio-common-benchset) \
144	    $(math-benchset)
145else
146benchset := $(foreach B,$(filter %-benchset,${BENCHSET}), ${${B}})
147endif
148
149CFLAGS-bench-ffs.c += -fno-builtin
150CFLAGS-bench-ffsll.c += -fno-builtin
151CFLAGS-bench-sqrt.c += -fno-builtin
152CFLAGS-bench-fmin.c += -fno-builtin
153CFLAGS-bench-fminf.c += -fno-builtin
154CFLAGS-bench-fmax.c += -fno-builtin
155CFLAGS-bench-fmaxf.c += -fno-builtin
156CFLAGS-bench-trunc.c += -fno-builtin
157CFLAGS-bench-truncf.c += -fno-builtin
158CFLAGS-bench-roundeven.c += -fno-builtin
159CFLAGS-bench-roundevenf.c += -fno-builtin
160CFLAGS-bench-isnan.c += -fsignaling-nans
161CFLAGS-bench-isinf.c += -fsignaling-nans
162CFLAGS-bench-isfinite.c += -fsignaling-nans
163
164ifeq (${BENCHSET},)
165bench-malloc := malloc-thread malloc-simple
166else
167bench-malloc := $(filter malloc-%,${BENCHSET})
168endif
169
170ifeq (${STATIC-BENCHTESTS},yes)
171+link-benchtests = $(+link-static-tests)
172link-libc-benchtests = $(link-libc-static)
173libm-benchtests = $(common-objpfx)math/libm.a
174thread-library-benchtests = $(static-thread-library)
175else
176link-libc-benchtests = $(link-libc)
177+link-benchtests = $(+link-tests)
178thread-library-benchtests = $(shared-thread-library)
179libm-benchtests = $(libm)
180endif
181
182$(addprefix $(objpfx)bench-,$(bench-math)): $(libm-benchtests)
183$(addprefix $(objpfx)bench-,$(math-benchset)): $(libm-benchtests)
184$(addprefix $(objpfx)bench-,$(bench-pthread)): $(thread-library-benchtests)
185$(addprefix $(objpfx)bench-,$(bench-malloc)): $(thread-library-benchtests)
186$(addprefix $(objpfx)bench-,pthread-locks): $(libm-benchtests)
187
188
189
190# Rules to build and execute the benchmarks.  Do not put any benchmark
191# parameters beyond this point.
192
193# We don't want the benchmark programs to run in parallel since that could
194# affect their performance.
195.NOTPARALLEL:
196
197bench-extra-objs = json-lib.o
198
199extra-objs += $(bench-extra-objs)
200others-extras = $(bench-extra-objs)
201
202include ../Rules
203
204binaries-bench := $(addprefix $(objpfx)bench-,$(bench))
205binaries-benchset := $(addprefix $(objpfx)bench-,$(benchset))
206binaries-bench-malloc := $(addprefix $(objpfx)bench-,$(bench-malloc))
207
208# The default duration: 1 seconds.
209ifndef BENCH_DURATION
210BENCH_DURATION := 1
211endif
212
213CPPFLAGS-nonlib += -DDURATION=$(BENCH_DURATION) -D_ISOMAC
214
215# Use clock_gettime to measure performance of functions.  The default is
216# to use the architecture-specific high precision timing instructions.
217ifdef USE_CLOCK_GETTIME
218CPPFLAGS-nonlib += -DUSE_CLOCK_GETTIME
219else
220# On x86 processors, use RDTSCP, instead of RDTSC, to measure performance
221# of functions.  All x86 processors since 2010 support RDTSCP instruction.
222ifdef USE_RDTSCP
223CPPFLAGS-nonlib += -DUSE_RDTSCP
224endif
225endif
226
227DETAILED_OPT :=
228
229ifdef DETAILED
230DETAILED_OPT := -d
231endif
232
233bench-deps := bench-skeleton.c bench-timing.h Makefile
234
235run-bench = $(test-wrapper-env) \
236	    $(run-program-env) \
237	    $($*-ENV) $(test-via-rtld-prefix) $${run}
238
239timing-type := $(objpfx)bench-timing-type
240
241# This makes sure CPPFLAGS-nonlib and CFLAGS-nonlib are passed
242# for all these modules.
243cpp-srcs-left := $(binaries-benchset:=.c) $(binaries-bench:=.c) \
244		 $(binaries-bench-malloc:=.c) $(timing-type:=.c)
245lib := nonlib
246include $(patsubst %,$(..)libof-iterator.mk,$(cpp-srcs-left))
247
248bench-clean:
249	rm -f $(binaries-bench) $(addsuffix .o,$(binaries-bench))
250	rm -f $(binaries-benchset) $(addsuffix .o,$(binaries-benchset))
251	rm -f $(binaries-bench-malloc) $(addsuffix .o,$(binaries-bench-malloc))
252	rm -f $(timing-type) $(addsuffix .o,$(timing-type))
253	rm -f $(addprefix $(objpfx),$(bench-extra-objs))
254
255# Validate the passed in BENCHSET
256ifneq ($(strip ${BENCHSET}),)
257VALIDBENCHSETNAMES := bench-pthread bench-math bench-string string-benchset \
258   wcsmbs-benchset stdlib-benchset stdio-common-benchset math-benchset \
259   malloc-thread malloc-simple
260INVALIDBENCHSETNAMES := $(filter-out ${VALIDBENCHSETNAMES},${BENCHSET})
261ifneq (${INVALIDBENCHSETNAMES},)
262$(info The following values in BENCHSET are invalid: ${INVALIDBENCHSETNAMES})
263$(info The valid ones are: ${VALIDBENCHSETNAMES})
264$(error Invalid BENCHSET value)
265endif
266endif
267
268bench: bench-build bench-set bench-func bench-malloc
269
270# Target to only build the benchmark without running it.  We generate locales
271# only if we're building natively.
272ifeq (no,$(cross-compiling))
273bench-build: $(gen-locales) $(timing-type) $(binaries-bench) \
274	$(binaries-benchset) $(binaries-bench-malloc)
275else
276bench-build: $(timing-type) $(binaries-bench) $(binaries-benchset) \
277	$(binaries-bench-malloc)
278endif
279
280bench-set: $(binaries-benchset)
281	for run in $^; do \
282	  echo "Running $${run}"; \
283	  $(run-bench) > $${run}.out; \
284	done
285
286bench-malloc: $(binaries-bench-malloc)
287	for run in $^; do \
288	  echo "$${run}"; \
289	  if [ `basename $${run}` = "bench-malloc-thread" ]; then \
290		for thr in 1 8 16 32; do \
291			echo "Running $${run} $${thr}"; \
292			$(run-bench) $${thr} > $${run}-$${thr}.out; \
293		done;\
294	  else \
295		for thr in 8 16 32 64 128 256 512 1024 2048 4096; do \
296		  echo "Running $${run} $${thr}"; \
297		  $(run-bench) $${thr} > $${run}-$${thr}.out; \
298		done;\
299	  fi;\
300	done
301
302# Build and execute the benchmark functions.  This target generates JSON
303# formatted bench.out.  Each of the programs produce independent JSON output,
304# so one could even execute them individually and process it using any JSON
305# capable language or tool.
306bench-func: $(binaries-bench)
307	if [ -n '$^' ] ; then \
308	{ timing_type=$$($(test-wrapper-env) \
309			 $(run-program-env) \
310			 $(test-via-rtld-prefix) \
311			 $(timing-type)); \
312	  echo "{\"timing_type\": \"$${timing_type}\","; \
313	  echo " \"functions\": {"; \
314	  for run in $^; do \
315	    if ! [ "x$${run}" = "x$<" ]; then \
316	      echo ","; \
317	    fi; \
318	    echo "Running $${run}" >&2; \
319	    $(run-bench) $(DETAILED_OPT); \
320	  done; \
321	  echo; \
322	  echo " }"; \
323	  echo "}"; \
324	  } > $(objpfx)bench.out-tmp; \
325	  if [ -f $(objpfx)bench.out ]; then \
326	    mv -f $(objpfx)bench.out $(objpfx)bench.out.old; \
327	  fi; \
328	  mv -f $(objpfx)bench.out-tmp $(objpfx)bench.out; \
329	  $(PYTHON) scripts/validate_benchout.py $(objpfx)bench.out \
330	  scripts/benchout.schema.json; \
331	fi
332
333ifeq ($(bind-now),yes)
334link-bench-bind-now = -Wl,-z,now
335endif
336
337bench-link-targets = $(timing-type) $(binaries-bench) $(binaries-benchset) \
338	$(binaries-bench-malloc)
339
340$(bench-link-targets): %: %.o $(objpfx)json-lib.o \
341	$(link-extra-libs-tests) \
342  $(sort $(filter $(common-objpfx)lib%,$(link-libc-benchtests))) \
343  $(addprefix $(csu-objpfx),start.o) $(+preinit) $(+postinit)
344	$(+link-benchtests)
345
346$(bench-link-targets): LDFLAGS += $(link-bench-bind-now)
347
348$(objpfx)bench-%.c: %-inputs $(bench-deps)
349	{ if [ -n "$($*-INCLUDE)" ]; then \
350	  cat $($*-INCLUDE); \
351	fi; \
352	$(PYTHON) scripts/bench.py $(patsubst %-inputs,%,$<); } > $@-tmp
353	mv -f $@-tmp $@
354