1 /* PLT trampolines. x86-64 version. 2 Copyright (C) 2009-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 .text 20 #ifdef _dl_runtime_resolve 21 22 # undef REGISTER_SAVE_AREA 23 # undef LOCAL_STORAGE_AREA 24 # undef BASE 25 26 # if (STATE_SAVE_ALIGNMENT % 16) != 0 27 # error STATE_SAVE_ALIGNMENT must be multples of 16 28 # endif 29 30 # if (STATE_SAVE_OFFSET % STATE_SAVE_ALIGNMENT) != 0 31 # error STATE_SAVE_OFFSET must be multples of STATE_SAVE_ALIGNMENT 32 # endif 33 34 # if DL_RUNTIME_RESOLVE_REALIGN_STACK 35 /* Local stack area before jumping to function address: RBX. */ 36 # define LOCAL_STORAGE_AREA 8 37 # define BASE rbx 38 # ifdef USE_FXSAVE 39 /* Use fxsave to save XMM registers. */ 40 # define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET) 41 # if (REGISTER_SAVE_AREA % 16) != 0 42 # error REGISTER_SAVE_AREA must be multples of 16 43 # endif 44 # endif 45 # else 46 # ifndef USE_FXSAVE 47 # error USE_FXSAVE must be defined 48 # endif 49 /* Use fxsave to save XMM registers. */ 50 # define REGISTER_SAVE_AREA (512 + STATE_SAVE_OFFSET + 8) 51 /* Local stack area before jumping to function address: All saved 52 registers. */ 53 # define LOCAL_STORAGE_AREA REGISTER_SAVE_AREA 54 # define BASE rsp 55 # if (REGISTER_SAVE_AREA % 16) != 8 56 # error REGISTER_SAVE_AREA must be odd multples of 8 57 # endif 58 # endif 59 60 .globl _dl_runtime_resolve 61 .hidden _dl_runtime_resolve 62 .type _dl_runtime_resolve, @function 63 .align 16 64 cfi_startproc 65 _dl_runtime_resolve: 66 cfi_adjust_cfa_offset(16) # Incorporate PLT 67 _CET_ENDBR 68 # if DL_RUNTIME_RESOLVE_REALIGN_STACK 69 # if LOCAL_STORAGE_AREA != 8 70 # error LOCAL_STORAGE_AREA must be 8 71 # endif 72 pushq %rbx # push subtracts stack by 8. 73 cfi_adjust_cfa_offset(8) 74 cfi_rel_offset(%rbx, 0) 75 mov %RSP_LP, %RBX_LP 76 cfi_def_cfa_register(%rbx) 77 and $-STATE_SAVE_ALIGNMENT, %RSP_LP 78 # endif 79 # ifdef REGISTER_SAVE_AREA 80 sub $REGISTER_SAVE_AREA, %RSP_LP 81 # if !DL_RUNTIME_RESOLVE_REALIGN_STACK 82 cfi_adjust_cfa_offset(REGISTER_SAVE_AREA) 83 # endif 84 # else 85 # Allocate stack space of the required size to save the state. 86 # if IS_IN (rtld) 87 sub _rtld_local_ro+RTLD_GLOBAL_RO_DL_X86_CPU_FEATURES_OFFSET+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP 88 # else 89 sub _dl_x86_cpu_features+XSAVE_STATE_SIZE_OFFSET(%rip), %RSP_LP 90 # endif 91 # endif 92 # Preserve registers otherwise clobbered. 93 movq %rax, REGISTER_SAVE_RAX(%rsp) 94 movq %rcx, REGISTER_SAVE_RCX(%rsp) 95 movq %rdx, REGISTER_SAVE_RDX(%rsp) 96 movq %rsi, REGISTER_SAVE_RSI(%rsp) 97 movq %rdi, REGISTER_SAVE_RDI(%rsp) 98 movq %r8, REGISTER_SAVE_R8(%rsp) 99 movq %r9, REGISTER_SAVE_R9(%rsp) 100 # ifdef USE_FXSAVE 101 fxsave STATE_SAVE_OFFSET(%rsp) 102 # else 103 movl $STATE_SAVE_MASK, %eax 104 xorl %edx, %edx 105 # Clear the XSAVE Header. 106 # ifdef USE_XSAVE 107 movq %rdx, (STATE_SAVE_OFFSET + 512)(%rsp) 108 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8)(%rsp) 109 # endif 110 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 2)(%rsp) 111 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 3)(%rsp) 112 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 4)(%rsp) 113 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 5)(%rsp) 114 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 6)(%rsp) 115 movq %rdx, (STATE_SAVE_OFFSET + 512 + 8 * 7)(%rsp) 116 # ifdef USE_XSAVE 117 xsave STATE_SAVE_OFFSET(%rsp) 118 # else 119 xsavec STATE_SAVE_OFFSET(%rsp) 120 # endif 121 # endif 122 # Copy args pushed by PLT in register. 123 # %rdi: link_map, %rsi: reloc_index 124 mov (LOCAL_STORAGE_AREA + 8)(%BASE), %RSI_LP 125 mov LOCAL_STORAGE_AREA(%BASE), %RDI_LP 126 call _dl_fixup # Call resolver. 127 mov %RAX_LP, %R11_LP # Save return value 128 # Get register content back. 129 # ifdef USE_FXSAVE 130 fxrstor STATE_SAVE_OFFSET(%rsp) 131 # else 132 movl $STATE_SAVE_MASK, %eax 133 xorl %edx, %edx 134 xrstor STATE_SAVE_OFFSET(%rsp) 135 # endif 136 movq REGISTER_SAVE_R9(%rsp), %r9 137 movq REGISTER_SAVE_R8(%rsp), %r8 138 movq REGISTER_SAVE_RDI(%rsp), %rdi 139 movq REGISTER_SAVE_RSI(%rsp), %rsi 140 movq REGISTER_SAVE_RDX(%rsp), %rdx 141 movq REGISTER_SAVE_RCX(%rsp), %rcx 142 movq REGISTER_SAVE_RAX(%rsp), %rax 143 # if DL_RUNTIME_RESOLVE_REALIGN_STACK 144 mov %RBX_LP, %RSP_LP 145 cfi_def_cfa_register(%rsp) 146 movq (%rsp), %rbx 147 cfi_restore(%rbx) 148 # endif 149 # Adjust stack(PLT did 2 pushes) 150 add $(LOCAL_STORAGE_AREA + 16), %RSP_LP 151 cfi_adjust_cfa_offset(-(LOCAL_STORAGE_AREA + 16)) 152 jmp *%r11 # Jump to function address. 153 cfi_endproc 154 .size _dl_runtime_resolve, .-_dl_runtime_resolve 155 #endif 156 157 158 #if !defined PROF && defined _dl_runtime_profile 159 # if (LR_VECTOR_OFFSET % VEC_SIZE) != 0 160 # error LR_VECTOR_OFFSET must be multples of VEC_SIZE 161 # endif 162 163 .globl _dl_runtime_profile 164 .hidden _dl_runtime_profile 165 .type _dl_runtime_profile, @function 166 .align 16 167 _dl_runtime_profile: 168 cfi_startproc 169 cfi_adjust_cfa_offset(16) # Incorporate PLT 170 _CET_ENDBR 171 /* The La_x86_64_regs data structure pointed to by the 172 fourth paramater must be VEC_SIZE-byte aligned. This must 173 be explicitly enforced. We have the set up a dynamically 174 sized stack frame. %rbx points to the top half which 175 has a fixed size and preserves the original stack pointer. */ 176 177 sub $32, %RSP_LP # Allocate the local storage. 178 cfi_adjust_cfa_offset(32) 179 movq %rbx, (%rsp) 180 cfi_rel_offset(%rbx, 0) 181 182 /* On the stack: 183 56(%rbx) parameter #1 184 48(%rbx) return address 185 186 40(%rbx) reloc index 187 32(%rbx) link_map 188 189 24(%rbx) La_x86_64_regs pointer 190 16(%rbx) framesize 191 8(%rbx) rax 192 (%rbx) rbx 193 */ 194 195 movq %rax, 8(%rsp) 196 mov %RSP_LP, %RBX_LP 197 cfi_def_cfa_register(%rbx) 198 199 /* Actively align the La_x86_64_regs structure. */ 200 and $-VEC_SIZE, %RSP_LP 201 /* sizeof(La_x86_64_regs). Need extra space for 8 SSE registers 202 to detect if any xmm0-xmm7 registers are changed by audit 203 module. */ 204 sub $(LR_SIZE + XMM_SIZE*8), %RSP_LP 205 movq %rsp, 24(%rbx) 206 207 /* Fill the La_x86_64_regs structure. */ 208 movq %rdx, LR_RDX_OFFSET(%rsp) 209 movq %r8, LR_R8_OFFSET(%rsp) 210 movq %r9, LR_R9_OFFSET(%rsp) 211 movq %rcx, LR_RCX_OFFSET(%rsp) 212 movq %rsi, LR_RSI_OFFSET(%rsp) 213 movq %rdi, LR_RDI_OFFSET(%rsp) 214 movq %rbp, LR_RBP_OFFSET(%rsp) 215 216 lea 48(%rbx), %RAX_LP 217 movq %rax, LR_RSP_OFFSET(%rsp) 218 219 /* We always store the XMM registers even if AVX is available. 220 This is to provide backward binary compatibility for existing 221 audit modules. */ 222 movaps %xmm0, (LR_XMM_OFFSET)(%rsp) 223 movaps %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) 224 movaps %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) 225 movaps %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) 226 movaps %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) 227 movaps %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) 228 movaps %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) 229 movaps %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) 230 231 # ifdef RESTORE_AVX 232 /* This is to support AVX audit modules. */ 233 VMOVA %VEC(0), (LR_VECTOR_OFFSET)(%rsp) 234 VMOVA %VEC(1), (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) 235 VMOVA %VEC(2), (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) 236 VMOVA %VEC(3), (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) 237 VMOVA %VEC(4), (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) 238 VMOVA %VEC(5), (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) 239 VMOVA %VEC(6), (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) 240 VMOVA %VEC(7), (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) 241 242 /* Save xmm0-xmm7 registers to detect if any of them are 243 changed by audit module. */ 244 vmovdqa %xmm0, (LR_SIZE)(%rsp) 245 vmovdqa %xmm1, (LR_SIZE + XMM_SIZE)(%rsp) 246 vmovdqa %xmm2, (LR_SIZE + XMM_SIZE*2)(%rsp) 247 vmovdqa %xmm3, (LR_SIZE + XMM_SIZE*3)(%rsp) 248 vmovdqa %xmm4, (LR_SIZE + XMM_SIZE*4)(%rsp) 249 vmovdqa %xmm5, (LR_SIZE + XMM_SIZE*5)(%rsp) 250 vmovdqa %xmm6, (LR_SIZE + XMM_SIZE*6)(%rsp) 251 vmovdqa %xmm7, (LR_SIZE + XMM_SIZE*7)(%rsp) 252 # endif 253 254 mov %RSP_LP, %RCX_LP # La_x86_64_regs pointer to %rcx. 255 mov 48(%rbx), %RDX_LP # Load return address if needed. 256 mov 40(%rbx), %RSI_LP # Copy args pushed by PLT in register. 257 mov 32(%rbx), %RDI_LP # %rdi: link_map, %rsi: reloc_index 258 lea 16(%rbx), %R8_LP # Address of framesize 259 call _dl_profile_fixup # Call resolver. 260 261 mov %RAX_LP, %R11_LP # Save return value. 262 263 movq 8(%rbx), %rax # Get back register content. 264 movq LR_RDX_OFFSET(%rsp), %rdx 265 movq LR_R8_OFFSET(%rsp), %r8 266 movq LR_R9_OFFSET(%rsp), %r9 267 268 movaps (LR_XMM_OFFSET)(%rsp), %xmm0 269 movaps (LR_XMM_OFFSET + XMM_SIZE)(%rsp), %xmm1 270 movaps (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp), %xmm2 271 movaps (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp), %xmm3 272 movaps (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp), %xmm4 273 movaps (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp), %xmm5 274 movaps (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp), %xmm6 275 movaps (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp), %xmm7 276 277 # ifdef RESTORE_AVX 278 /* Check if any xmm0-xmm7 registers are changed by audit 279 module. */ 280 vpcmpeqq (LR_SIZE)(%rsp), %xmm0, %xmm8 281 vpmovmskb %xmm8, %esi 282 cmpl $0xffff, %esi 283 je 2f 284 vmovdqa %xmm0, (LR_VECTOR_OFFSET)(%rsp) 285 jmp 1f 286 2: VMOVA (LR_VECTOR_OFFSET)(%rsp), %VEC(0) 287 vmovdqa %xmm0, (LR_XMM_OFFSET)(%rsp) 288 289 1: vpcmpeqq (LR_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm8 290 vpmovmskb %xmm8, %esi 291 cmpl $0xffff, %esi 292 je 2f 293 vmovdqa %xmm1, (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp) 294 jmp 1f 295 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE)(%rsp), %VEC(1) 296 vmovdqa %xmm1, (LR_XMM_OFFSET + XMM_SIZE)(%rsp) 297 298 1: vpcmpeqq (LR_SIZE + XMM_SIZE*2)(%rsp), %xmm2, %xmm8 299 vpmovmskb %xmm8, %esi 300 cmpl $0xffff, %esi 301 je 2f 302 vmovdqa %xmm2, (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp) 303 jmp 1f 304 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*2)(%rsp), %VEC(2) 305 vmovdqa %xmm2, (LR_XMM_OFFSET + XMM_SIZE*2)(%rsp) 306 307 1: vpcmpeqq (LR_SIZE + XMM_SIZE*3)(%rsp), %xmm3, %xmm8 308 vpmovmskb %xmm8, %esi 309 cmpl $0xffff, %esi 310 je 2f 311 vmovdqa %xmm3, (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp) 312 jmp 1f 313 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*3)(%rsp), %VEC(3) 314 vmovdqa %xmm3, (LR_XMM_OFFSET + XMM_SIZE*3)(%rsp) 315 316 1: vpcmpeqq (LR_SIZE + XMM_SIZE*4)(%rsp), %xmm4, %xmm8 317 vpmovmskb %xmm8, %esi 318 cmpl $0xffff, %esi 319 je 2f 320 vmovdqa %xmm4, (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp) 321 jmp 1f 322 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*4)(%rsp), %VEC(4) 323 vmovdqa %xmm4, (LR_XMM_OFFSET + XMM_SIZE*4)(%rsp) 324 325 1: vpcmpeqq (LR_SIZE + XMM_SIZE*5)(%rsp), %xmm5, %xmm8 326 vpmovmskb %xmm8, %esi 327 cmpl $0xffff, %esi 328 je 2f 329 vmovdqa %xmm5, (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp) 330 jmp 1f 331 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*5)(%rsp), %VEC(5) 332 vmovdqa %xmm5, (LR_XMM_OFFSET + XMM_SIZE*5)(%rsp) 333 334 1: vpcmpeqq (LR_SIZE + XMM_SIZE*6)(%rsp), %xmm6, %xmm8 335 vpmovmskb %xmm8, %esi 336 cmpl $0xffff, %esi 337 je 2f 338 vmovdqa %xmm6, (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp) 339 jmp 1f 340 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*6)(%rsp), %VEC(6) 341 vmovdqa %xmm6, (LR_XMM_OFFSET + XMM_SIZE*6)(%rsp) 342 343 1: vpcmpeqq (LR_SIZE + XMM_SIZE*7)(%rsp), %xmm7, %xmm8 344 vpmovmskb %xmm8, %esi 345 cmpl $0xffff, %esi 346 je 2f 347 vmovdqa %xmm7, (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp) 348 jmp 1f 349 2: VMOVA (LR_VECTOR_OFFSET + VECTOR_SIZE*7)(%rsp), %VEC(7) 350 vmovdqa %xmm7, (LR_XMM_OFFSET + XMM_SIZE*7)(%rsp) 351 352 1: 353 # endif 354 355 mov 16(%rbx), %R10_LP # Anything in framesize? 356 test %R10_LP, %R10_LP 357 jns 3f 358 359 /* There's nothing in the frame size, so there 360 will be no call to the _dl_audit_pltexit. */ 361 362 /* Get back registers content. */ 363 movq LR_RCX_OFFSET(%rsp), %rcx 364 movq LR_RSI_OFFSET(%rsp), %rsi 365 movq LR_RDI_OFFSET(%rsp), %rdi 366 367 mov %RBX_LP, %RSP_LP 368 movq (%rsp), %rbx 369 cfi_restore(%rbx) 370 cfi_def_cfa_register(%rsp) 371 372 add $48, %RSP_LP # Adjust the stack to the return value 373 # (eats the reloc index and link_map) 374 cfi_adjust_cfa_offset(-48) 375 jmp *%r11 # Jump to function address. 376 377 3: 378 cfi_adjust_cfa_offset(48) 379 cfi_rel_offset(%rbx, 0) 380 cfi_def_cfa_register(%rbx) 381 382 /* At this point we need to prepare new stack for the function 383 which has to be called. We copy the original stack to a 384 temporary buffer of the size specified by the 'framesize' 385 returned from _dl_profile_fixup */ 386 387 lea LR_RSP_OFFSET(%rbx), %RSI_LP # stack 388 add $8, %R10_LP 389 and $-16, %R10_LP 390 mov %R10_LP, %RCX_LP 391 sub %R10_LP, %RSP_LP 392 mov %RSP_LP, %RDI_LP 393 shr $3, %RCX_LP 394 rep 395 movsq 396 397 movq 24(%rdi), %rcx # Get back register content. 398 movq 32(%rdi), %rsi 399 movq 40(%rdi), %rdi 400 401 call *%r11 402 403 mov 24(%rbx), %RSP_LP # Drop the copied stack content 404 405 /* Now we have to prepare the La_x86_64_retval structure for the 406 _dl_audit_pltexit. The La_x86_64_regs is being pointed by rsp now, 407 so we just need to allocate the sizeof(La_x86_64_retval) space on 408 the stack, since the alignment has already been taken care of. */ 409 # ifdef RESTORE_AVX 410 /* sizeof(La_x86_64_retval). Need extra space for 2 SSE 411 registers to detect if xmm0/xmm1 registers are changed 412 by audit module. Since rsp is aligned to VEC_SIZE, we 413 need to make sure that the address of La_x86_64_retval + 414 LRV_VECTOR0_OFFSET is aligned to VEC_SIZE. */ 415 # define LRV_SPACE (LRV_SIZE + XMM_SIZE*2) 416 # define LRV_MISALIGNED ((LRV_SIZE + LRV_VECTOR0_OFFSET) & (VEC_SIZE - 1)) 417 # if LRV_MISALIGNED == 0 418 sub $LRV_SPACE, %RSP_LP 419 # else 420 sub $(LRV_SPACE + VEC_SIZE - LRV_MISALIGNED), %RSP_LP 421 # endif 422 # else 423 sub $LRV_SIZE, %RSP_LP # sizeof(La_x86_64_retval) 424 # endif 425 mov %RSP_LP, %RCX_LP # La_x86_64_retval argument to %rcx. 426 427 /* Fill in the La_x86_64_retval structure. */ 428 movq %rax, LRV_RAX_OFFSET(%rcx) 429 movq %rdx, LRV_RDX_OFFSET(%rcx) 430 431 movaps %xmm0, LRV_XMM0_OFFSET(%rcx) 432 movaps %xmm1, LRV_XMM1_OFFSET(%rcx) 433 434 # ifdef RESTORE_AVX 435 /* This is to support AVX audit modules. */ 436 VMOVA %VEC(0), LRV_VECTOR0_OFFSET(%rcx) 437 VMOVA %VEC(1), LRV_VECTOR1_OFFSET(%rcx) 438 439 /* Save xmm0/xmm1 registers to detect if they are changed 440 by audit module. */ 441 vmovdqa %xmm0, (LRV_SIZE)(%rcx) 442 vmovdqa %xmm1, (LRV_SIZE + XMM_SIZE)(%rcx) 443 # endif 444 445 fstpt LRV_ST0_OFFSET(%rcx) 446 fstpt LRV_ST1_OFFSET(%rcx) 447 448 movq 24(%rbx), %rdx # La_x86_64_regs argument to %rdx. 449 movq 40(%rbx), %rsi # Copy args pushed by PLT in register. 450 movq 32(%rbx), %rdi # %rdi: link_map, %rsi: reloc_index 451 call _dl_audit_pltexit 452 453 /* Restore return registers. */ 454 movq LRV_RAX_OFFSET(%rsp), %rax 455 movq LRV_RDX_OFFSET(%rsp), %rdx 456 457 movaps LRV_XMM0_OFFSET(%rsp), %xmm0 458 movaps LRV_XMM1_OFFSET(%rsp), %xmm1 459 460 # ifdef RESTORE_AVX 461 /* Check if xmm0/xmm1 registers are changed by audit module. */ 462 vpcmpeqq (LRV_SIZE)(%rsp), %xmm0, %xmm2 463 vpmovmskb %xmm2, %esi 464 cmpl $0xffff, %esi 465 jne 1f 466 VMOVA LRV_VECTOR0_OFFSET(%rsp), %VEC(0) 467 468 1: vpcmpeqq (LRV_SIZE + XMM_SIZE)(%rsp), %xmm1, %xmm2 469 vpmovmskb %xmm2, %esi 470 cmpl $0xffff, %esi 471 jne 1f 472 VMOVA LRV_VECTOR1_OFFSET(%rsp), %VEC(1) 473 474 1: 475 # endif 476 477 fldt LRV_ST1_OFFSET(%rsp) 478 fldt LRV_ST0_OFFSET(%rsp) 479 480 mov %RBX_LP, %RSP_LP 481 movq (%rsp), %rbx 482 cfi_restore(%rbx) 483 cfi_def_cfa_register(%rsp) 484 485 add $48, %RSP_LP # Adjust the stack to the return value 486 # (eats the reloc index and link_map) 487 cfi_adjust_cfa_offset(-48) 488 retq 489 490 cfi_endproc 491 .size _dl_runtime_profile, .-_dl_runtime_profile 492 #endif 493