1 /* PLT trampolines.  x86-64 version.
2    Copyright (C) 2009-2021 Free Software Foundation, Inc.
3    This file is part of the GNU C Library.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Lesser General Public
7    License as published by the Free Software Foundation; either
8    version 2.1 of the License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Lesser General Public License for more details.
14 
15    You should have received a copy of the GNU Lesser General Public
16    License along with the GNU C Library; if not, see
17    <https://www.gnu.org/licenses/>.  */
18 
19 	.text
20 #ifdef _dl_runtime_resolve
21 
22 # undef REGISTER_SAVE_AREA
23 # undef LOCAL_STORAGE_AREA
24 # undef BASE
25 
26 # if (STATE_SAVE_ALIGNMENT % 16) != 0
27 #  error STATE_SAVE_ALIGNMENT must be multples of 16
28 # endif
29 
30 # if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0
31 #  error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT
32 # endif
33 
34 # if DL_RUNTIME_RESOLVE_REALIGN_STACK
35 /* Local stack area before jumping to function address: RBX.  */
36 #  define LOCAL_STORAGE_AREA	8
37 #  define BASE			rbx
38 #  ifdef USE_FXSAVE
39 /* Use fxsave to save XMM registers.  */
40 #   define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET)
41 #   if (REGISTER_SAVE_AREA % 16) != 0
42 #    error REGISTER_SAVE_AREA must be multples of 16
43 #   endif
44 #  endif
45 # else
46 #  ifndef USE_FXSAVE
47 #   error USE_FXSAVE must be defined
48 #  endif
49 /* Use fxsave to save XMM registers.  */
50 #  define REGISTER_SAVE_AREA	(512 + STATE_SAVE_OFFSET + 8)
51 /* Local stack area before jumping to function address:  All saved
52    registers.  */
53 #  define LOCAL_STORAGE_AREA	REGISTER_SAVE_AREA
54 #  define BASE			rsp
55 #  if (REGISTER_SAVE_AREA % 16) != 8
56 #   error REGISTER_SAVE_AREA must be odd multples of 8
57 #  endif
58 # endif
59 
60 	.globl _dl_runtime_resolve
61 	.hidden _dl_runtime_resolve
62 	.type _dl_runtime_resolve, @function
63 	.align 16
64 	cfi_startproc
65 _dl_runtime_resolve:
66 	cfi_adjust_cfa_offset(16) # Incorporate PLT
67 	_CET_ENDBR
68 # if DL_RUNTIME_RESOLVE_REALIGN_STACK
69 #  if LOCAL_STORAGE_AREA != 8
70 #   error LOCAL_STORAGE_AREA must be 8
71 #  endif
72 	pushq %rbx			# push subtracts stack by 8.
73 	cfi_adjust_cfa_offset(8)
74 	cfi_rel_offset(%rbx, 0)
75 	mov %RSP_LP, %RBX_LP
76 	cfi_def_cfa_register(%rbx)
77 	and $-STATE_SAVE_ALIGNMENT, %RSP_LP
78 # endif
79 # ifdef REGISTER_SAVE_AREA
80 	sub $REGISTER_SAVE_AREA, %RSP_LP
81 #  if !DL_RUNTIME_RESOLVE_REALIGN_STACK
82 	cfi_adjust_cfa_offset(REGISTER_SAVE_AREA)
83 #  endif
84 # else
85 	# Allocate stack space of the required size to save the state.
86 #  if IS_IN (rtld)
87 	sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
88 #  else
89 	sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP
90 #  endif
91 # endif
92 	# Preserve registers otherwise clobbered.
93 	movq %rax, REGISTER_SAVE_RAX(%rsp)
94 	movq %rcx, REGISTER_SAVE_RCX(%rsp)
95 	movq %rdx, REGISTER_SAVE_RDX(%rsp)
96 	movq %rsi, REGISTER_SAVE_RSI(%rsp)
97 	movq %rdi, REGISTER_SAVE_RDI(%rsp)
98 	movq %r8, REGISTER_SAVE_R8(%rsp)
99 	movq %r9, REGISTER_SAVE_R9(%rsp)
100 # ifdef USE_FXSAVE
101 	fxsave STATE_SAVE_OFFSET(%rsp)
102 # else
103 	movl $STATE_SAVE_MASK, %eax
104 	xorl %edx, %edx
105 	# Clear the XSAVE Header.
106 #  ifdef USE_XSAVE
107 	movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp)
108 	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp)
109 #  endif
110 	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp)
111 	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp)
112 	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp)
113 	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp)
114 	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp)
115 	movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp)
116 #  ifdef USE_XSAVE
117 	xsave STATE_SAVE_OFFSET(%rsp)
118 #  else
119 	xsavec STATE_SAVE_OFFSET(%rsp)
120 #  endif
121 # endif
122 	# Copy args pushed by PLT in register.
123 	# %rdi: link_map, %rsi: reloc_index
124 	mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP
125 	mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP
126 	call _dl_fixup		# Call resolver.
127 	mov %RAX_LP, %R11_LP	# Save return value
128 	# Get register content back.
129 # ifdef USE_FXSAVE
130 	fxrstor STATE_SAVE_OFFSET(%rsp)
131 # else
132 	movl $STATE_SAVE_MASK, %eax
133 	xorl %edx, %edx
134 	xrstor STATE_SAVE_OFFSET(%rsp)
135 # endif
136 	movq REGISTER_SAVE_R9(%rsp), %r9
137 	movq REGISTER_SAVE_R8(%rsp), %r8
138 	movq REGISTER_SAVE_RDI(%rsp), %rdi
139 	movq REGISTER_SAVE_RSI(%rsp), %rsi
140 	movq REGISTER_SAVE_RDX(%rsp), %rdx
141 	movq REGISTER_SAVE_RCX(%rsp), %rcx
142 	movq REGISTER_SAVE_RAX(%rsp), %rax
143 # if DL_RUNTIME_RESOLVE_REALIGN_STACK
144 	mov %RBX_LP, %RSP_LP
145 	cfi_def_cfa_register(%rsp)
146 	movq (%rsp), %rbx
147 	cfi_restore(%rbx)
148 # endif
149 	# Adjust stack(PLT did 2 pushes)
150 	add $(LOCAL_STORAGE_AREA + 16), %RSP_LP
151 	cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16))
152 	jmp *%r11		# Jump to function address.
153 	cfi_endproc
154 	.size _dl_runtime_resolve, .-_dl_runtime_resolve
155 #endif
156 
157 
158 #if !defined PROF && defined _dl_runtime_profile
159 # if (LR_VECTOR_OFFSET % VEC_SIZE) != 0
160 #  error LR_VECTOR_OFFSET must be multples of VEC_SIZE
161 # endif
162 
163 	.globl _dl_runtime_profile
164 	.hidden _dl_runtime_profile
165 	.type _dl_runtime_profile, @function
166 	.align 16
167 _dl_runtime_profile:
168 	cfi_startproc
169 	cfi_adjust_cfa_offset(16) # Incorporate PLT
170 	_CET_ENDBR
171 	/* The La_x86_64_regs data structure pointed to by the
172 	   fourth paramater must be VEC_SIZE-byte aligned.  This must
173 	   be explicitly enforced.  We have the set up a dynamically
174 	   sized stack frame.  %rbx points to the top half which
175 	   has a fixed size and preserves the original stack pointer.  */
176 
177 	sub $32, %RSP_LP	# Allocate the local storage.
178 	cfi_adjust_cfa_offset(32)
179 	movq %rbx, (%rsp)
180 	cfi_rel_offset(%rbx, 0)
181 
182 	/* On the stack:
183 		56(%rbx)	parameter #1
184 		48(%rbx)	return address
185 
186 		40(%rbx)	reloc index
187 		32(%rbx)	link_map
188 
189 		24(%rbx)	La_x86_64_regs pointer
190 		16(%rbx)	framesize
191 		 8(%rbx)	rax
192 		  (%rbx)	rbx
193 	*/
194 
195 	movq %rax, 8(%rsp)
196 	mov %RSP_LP, %RBX_LP
197 	cfi_def_cfa_register(%rbx)
198 
199 	/* Actively align the La_x86_64_regs structure.  */
200 	and $-VEC_SIZE, %RSP_LP
201 	/* sizeof(La_x86_64_regs).  Need extra space for 8 SSE registers
202 	   to detect if any xmm0-xmm7 registers are changed by audit
203 	   module.  */
204 	sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP
205 	movq %rsp, 24(%rbx)
206 
207 	/* Fill the La_x86_64_regs structure.  */
208 	movq %rdx, LR_RDX_OFFSET(%rsp)
209 	movq %r8,  LR_R8_OFFSET(%rsp)
210 	movq %r9,  LR_R9_OFFSET(%rsp)
211 	movq %rcx, LR_RCX_OFFSET(%rsp)
212 	movq %rsi, LR_RSI_OFFSET(%rsp)
213 	movq %rdi, LR_RDI_OFFSET(%rsp)
214 	movq %rbp, LR_RBP_OFFSET(%rsp)
215 
216 	lea 48(%rbx), %RAX_LP
217 	movq %rax, LR_RSP_OFFSET(%rsp)
218 
219 	/* We always store the XMM registers even if AVX is available.
220 	   This is to provide backward binary compatibility for existing
221 	   audit modules.  */
222 	movaps %xmm0,		   (LR_XMM_OFFSET)(%rsp)
223 	movaps %xmm1, (LR_XMM_OFFSET +   XMM_SIZE)(%rsp)
224 	movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
225 	movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
226 	movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
227 	movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
228 	movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
229 	movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
230 
231 # ifdef RESTORE_AVX
232 	/* This is to support AVX audit modules.  */
233 	VMOVA %VEC(0),		      (LR_VECTOR_OFFSET)(%rsp)
234 	VMOVA %VEC(1), (LR_VECTOR_OFFSET +   VECTOR_SIZE)(%rsp)
235 	VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
236 	VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
237 	VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
238 	VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
239 	VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
240 	VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
241 
242 	/* Save xmm0-xmm7 registers to detect if any of them are
243 	   changed by audit module.  */
244 	vmovdqa %xmm0,		    (LR_SIZE)(%rsp)
245 	vmovdqa %xmm1, (LR_SIZE +   XMM_SIZE)(%rsp)
246 	vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp)
247 	vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp)
248 	vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp)
249 	vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp)
250 	vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp)
251 	vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp)
252 # endif
253 
254 	mov %RSP_LP, %RCX_LP	# La_x86_64_regs pointer to %rcx.
255 	mov 48(%rbx), %RDX_LP	# Load return address if needed.
256 	mov 40(%rbx), %RSI_LP	# Copy args pushed by PLT in register.
257 	mov 32(%rbx), %RDI_LP	# %rdi: link_map, %rsi: reloc_index
258 	lea 16(%rbx), %R8_LP	# Address of framesize
259 	call _dl_profile_fixup	# Call resolver.
260 
261 	mov %RAX_LP, %R11_LP	# Save return value.
262 
263 	movq 8(%rbx), %rax	# Get back register content.
264 	movq LR_RDX_OFFSET(%rsp), %rdx
265 	movq  LR_R8_OFFSET(%rsp), %r8
266 	movq  LR_R9_OFFSET(%rsp), %r9
267 
268 	movaps		    (LR_XMM_OFFSET)(%rsp), %xmm0
269 	movaps	 (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1
270 	movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2
271 	movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3
272 	movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4
273 	movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5
274 	movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6
275 	movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7
276 
277 # ifdef RESTORE_AVX
278 	/* Check if any xmm0-xmm7 registers are changed by audit
279 	   module.  */
280 	vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8
281 	vpmovmskb %xmm8, %esi
282 	cmpl $0xffff, %esi
283 	je 2f
284 	vmovdqa	%xmm0, (LR_VECTOR_OFFSET)(%rsp)
285 	jmp 1f
286 2:	VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0)
287 	vmovdqa	%xmm0, (LR_XMM_OFFSET)(%rsp)
288 
289 1:	vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8
290 	vpmovmskb %xmm8, %esi
291 	cmpl $0xffff, %esi
292 	je 2f
293 	vmovdqa	%xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp)
294 	jmp 1f
295 2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1)
296 	vmovdqa	%xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp)
297 
298 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8
299 	vpmovmskb %xmm8, %esi
300 	cmpl $0xffff, %esi
301 	je 2f
302 	vmovdqa	%xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp)
303 	jmp 1f
304 2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2)
305 	vmovdqa	%xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp)
306 
307 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8
308 	vpmovmskb %xmm8, %esi
309 	cmpl $0xffff, %esi
310 	je 2f
311 	vmovdqa	%xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp)
312 	jmp 1f
313 2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3)
314 	vmovdqa	%xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp)
315 
316 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8
317 	vpmovmskb %xmm8, %esi
318 	cmpl $0xffff, %esi
319 	je 2f
320 	vmovdqa	%xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp)
321 	jmp 1f
322 2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4)
323 	vmovdqa	%xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp)
324 
325 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8
326 	vpmovmskb %xmm8, %esi
327 	cmpl $0xffff, %esi
328 	je 2f
329 	vmovdqa	%xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp)
330 	jmp 1f
331 2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5)
332 	vmovdqa	%xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp)
333 
334 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8
335 	vpmovmskb %xmm8, %esi
336 	cmpl $0xffff, %esi
337 	je 2f
338 	vmovdqa	%xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp)
339 	jmp 1f
340 2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6)
341 	vmovdqa	%xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp)
342 
343 1:	vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8
344 	vpmovmskb %xmm8, %esi
345 	cmpl $0xffff, %esi
346 	je 2f
347 	vmovdqa	%xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp)
348 	jmp 1f
349 2:	VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7)
350 	vmovdqa	%xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp)
351 
352 1:
353 # endif
354 
355 	mov  16(%rbx), %R10_LP	# Anything in framesize?
356 	test %R10_LP, %R10_LP
357 	jns 3f
358 
359 	/* There's nothing in the frame size, so there
360 	   will be no call to the _dl_audit_pltexit. */
361 
362 	/* Get back registers content.  */
363 	movq LR_RCX_OFFSET(%rsp), %rcx
364 	movq LR_RSI_OFFSET(%rsp), %rsi
365 	movq LR_RDI_OFFSET(%rsp), %rdi
366 
367 	mov %RBX_LP, %RSP_LP
368 	movq (%rsp), %rbx
369 	cfi_restore(%rbx)
370 	cfi_def_cfa_register(%rsp)
371 
372 	add $48, %RSP_LP	# Adjust the stack to the return value
373 				# (eats the reloc index and link_map)
374 	cfi_adjust_cfa_offset(-48)
375 	jmp *%r11		# Jump to function address.
376 
377 3:
378 	cfi_adjust_cfa_offset(48)
379 	cfi_rel_offset(%rbx, 0)
380 	cfi_def_cfa_register(%rbx)
381 
382 	/* At this point we need to prepare new stack for the function
383 	   which has to be called.  We copy the original stack to a
384 	   temporary buffer of the size specified by the 'framesize'
385 	   returned from _dl_profile_fixup */
386 
387 	lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack
388 	add $8, %R10_LP
389 	and $-16, %R10_LP
390 	mov %R10_LP, %RCX_LP
391 	sub %R10_LP, %RSP_LP
392 	mov %RSP_LP, %RDI_LP
393 	shr $3, %RCX_LP
394 	rep
395 	movsq
396 
397 	movq 24(%rdi), %rcx	# Get back register content.
398 	movq 32(%rdi), %rsi
399 	movq 40(%rdi), %rdi
400 
401 	call *%r11
402 
403 	mov 24(%rbx), %RSP_LP	# Drop the copied stack content
404 
405 	/* Now we have to prepare the La_x86_64_retval structure for the
406 	   _dl_audit_pltexit.  The La_x86_64_regs is being pointed by rsp now,
407 	   so we just need to allocate the sizeof(La_x86_64_retval) space on
408 	   the stack, since the alignment has already been taken care of. */
409 # ifdef RESTORE_AVX
410 	/* sizeof(La_x86_64_retval).  Need extra space for 2 SSE
411 	   registers to detect if xmm0/xmm1 registers are changed
412 	   by audit module.  Since rsp is aligned to VEC_SIZE, we
413 	   need to make sure that the address of La_x86_64_retval +
414 	   LRV_VECTOR0_OFFSET is aligned to VEC_SIZE.  */
415 #  define LRV_SPACE (LRV_SIZE + XMM_SIZE*2)
416 #  define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1))
417 #  if LRV_MISALIGNED == 0
418 	sub $LRV_SPACE, %RSP_LP
419 #  else
420 	sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP
421 #  endif
422 # else
423 	sub $LRV_SIZE, %RSP_LP	# sizeof(La_x86_64_retval)
424 # endif
425 	mov %RSP_LP, %RCX_LP	# La_x86_64_retval argument to %rcx.
426 
427 	/* Fill in the La_x86_64_retval structure.  */
428 	movq %rax, LRV_RAX_OFFSET(%rcx)
429 	movq %rdx, LRV_RDX_OFFSET(%rcx)
430 
431 	movaps %xmm0, LRV_XMM0_OFFSET(%rcx)
432 	movaps %xmm1, LRV_XMM1_OFFSET(%rcx)
433 
434 # ifdef RESTORE_AVX
435 	/* This is to support AVX audit modules.  */
436 	VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx)
437 	VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx)
438 
439 	/* Save xmm0/xmm1 registers to detect if they are changed
440 	   by audit module.  */
441 	vmovdqa %xmm0,		  (LRV_SIZE)(%rcx)
442 	vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx)
443 # endif
444 
445 	fstpt LRV_ST0_OFFSET(%rcx)
446 	fstpt LRV_ST1_OFFSET(%rcx)
447 
448 	movq 24(%rbx), %rdx	# La_x86_64_regs argument to %rdx.
449 	movq 40(%rbx), %rsi	# Copy args pushed by PLT in register.
450 	movq 32(%rbx), %rdi	# %rdi: link_map, %rsi: reloc_index
451 	call _dl_audit_pltexit
452 
453 	/* Restore return registers.  */
454 	movq LRV_RAX_OFFSET(%rsp), %rax
455 	movq LRV_RDX_OFFSET(%rsp), %rdx
456 
457 	movaps LRV_XMM0_OFFSET(%rsp), %xmm0
458 	movaps LRV_XMM1_OFFSET(%rsp), %xmm1
459 
460 # ifdef RESTORE_AVX
461 	/* Check if xmm0/xmm1 registers are changed by audit module.  */
462 	vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2
463 	vpmovmskb %xmm2, %esi
464 	cmpl $0xffff, %esi
465 	jne 1f
466 	VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0)
467 
468 1:	vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2
469 	vpmovmskb %xmm2, %esi
470 	cmpl $0xffff, %esi
471 	jne 1f
472 	VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1)
473 
474 1:
475 # endif
476 
477 	fldt LRV_ST1_OFFSET(%rsp)
478 	fldt LRV_ST0_OFFSET(%rsp)
479 
480 	mov %RBX_LP, %RSP_LP
481 	movq (%rsp), %rbx
482 	cfi_restore(%rbx)
483 	cfi_def_cfa_register(%rsp)
484 
485 	add $48, %RSP_LP	# Adjust the stack to the return value
486 				# (eats the reloc index and link_map)
487 	cfi_adjust_cfa_offset(-48)
488 	retq
489 
490 	cfi_endproc
491 	.size _dl_runtime_profile, .-_dl_runtime_profile
492 #endif
493