1/* PLT trampolines. ia64 version. 2 Copyright (C) 2005-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20#undef ret 21 22/* 23 This code is used in dl-runtime.c to call the `_dl_fixup' function 24 and then redirect to the address it returns. `_dl_fixup()' takes two 25 arguments, however _dl_profile_fixup() takes five. 26 27 The ABI specifies that we will never see more than 8 input 28 registers to a function call, thus it is safe to simply allocate 29 those, and simpler than playing stack games. */ 30 31/* Used to save and restore 8 incoming fp registers */ 32#define RESOLVE_FRAME_SIZE (16*8) 33 34ENTRY(_dl_runtime_resolve) 35 { .mmi 36 .prologue 37 .save ar.pfs, r40 38 alloc loc0 = ar.pfs, 8, 6, 2, 0 39 /* Use the 16 byte scratch area. r2 will start at f8 and 40 r3 will start at f9. */ 41 adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12 42 adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12 43 } 44 { .mii 45 .fframe RESOLVE_FRAME_SIZE 46 adds r12 = -RESOLVE_FRAME_SIZE, r12 47 .save rp, loc1 48 mov loc1 = b0 49 .body 50 mov loc2 = r8 /* preserve struct value register */ 51 ;; 52 } 53 { .mii 54 mov loc3 = r9 /* preserve language specific register */ 55 mov loc4 = r10 /* preserve language specific register */ 56 mov loc5 = r11 /* preserve language specific register */ 57 } 58 { .mmi 59 stf.spill [r2] = f8, 32 60 stf.spill [r3] = f9, 32 61 mov out0 = r16 62 ;; 63 } 64 { .mmi 65 stf.spill [r2] = f10, 32 66 stf.spill [r3] = f11, 32 67 shl out1 = r15, 4 68 ;; 69 } 70 { .mmi 71 stf.spill [r2] = f12, 32 72 stf.spill [r3] = f13, 32 73 /* Relocation record is 24 byte. */ 74 shladd out1 = r15, 3, out1 75 ;; 76 } 77 { .mmb 78 stf.spill [r2] = f14 79 stf.spill [r3] = f15 80 br.call.sptk.many b0 = _dl_fixup 81 } 82 { .mii 83 /* Skip the 16byte scratch area. */ 84 adds r2 = 16, r12 85 adds r3 = 32, r12 86 mov b6 = ret0 87 ;; 88 } 89 { .mmi 90 ldf.fill f8 = [r2], 32 91 ldf.fill f9 = [r3], 32 92 mov b0 = loc1 93 ;; 94 } 95 { .mmi 96 ldf.fill f10 = [r2], 32 97 ldf.fill f11 = [r3], 32 98 mov gp = ret1 99 ;; 100 } 101 { .mmi 102 ldf.fill f12 = [r2], 32 103 ldf.fill f13 = [r3], 32 104 mov ar.pfs = loc0 105 ;; 106 } 107 { .mmi 108 ldf.fill f14 = [r2], 32 109 ldf.fill f15 = [r3], 32 110 .restore sp /* pop the unwind frame state */ 111 adds r12 = RESOLVE_FRAME_SIZE, r12 112 ;; 113 } 114 { .mii 115 mov r9 = loc3 /* restore language specific register */ 116 mov r10 = loc4 /* restore language specific register */ 117 mov r11 = loc5 /* restore language specific register */ 118 } 119 { .mii 120 mov r8 = loc2 /* restore struct value register */ 121 ;; 122 } 123 /* An alloc is needed for the break system call to work. 124 We don't care about the old value of the pfs register. */ 125 { .mmb 126 .prologue 127 .body 128 alloc r2 = ar.pfs, 0, 0, 8, 0 129 br.sptk.many b6 130 ;; 131 } 132END(_dl_runtime_resolve) 133 134 135/* The fourth argument to _dl_profile_fixup and the third one to 136 _dl_audit_pltexit are a pointer to La_ia64_regs: 137 138 8byte r8 139 8byte r9 140 8byte r10 141 8byte r11 142 8byte in0 143 8byte in1 144 8byte in2 145 8byte in3 146 8byte in4 147 8byte in5 148 8byte in6 149 8byte in7 150 16byte f8 151 16byte f9 152 16byte f10 153 16byte f11 154 16byte f12 155 16byte f13 156 16byte f14 157 16byte f15 158 8byte ar.unat 159 8byte sp 160 161 The fifth argument to _dl_profile_fixup is a pointer to long int. 162 The fourth argument to _dl_audit_pltexit is a pointer to 163 La_ia64_retval: 164 165 8byte r8 166 8byte r9 167 8byte r10 168 8byte r11 169 16byte f8 170 16byte f9 171 16byte f10 172 16byte f11 173 16byte f12 174 16byte f13 175 16byte f14 176 16byte f15 177 178 Since stack has to be 16 byte aligned, the stack allocation is in 179 16byte increment. Before calling _dl_profile_fixup, the stack will 180 look like 181 182 psp new frame_size 183 +16 La_ia64_regs 184 sp scratch 185 186 */ 187 188#define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16) 189#define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16) 190 191#ifndef PROF 192ENTRY(_dl_runtime_profile) 193 { .mii 194 .prologue 195 .save ar.pfs, r40 196 alloc loc0 = ar.pfs, 8, 12, 8, 0 197 .vframe loc10 198 mov loc10 = r12 199 .save rp, loc1 200 mov loc1 = b0 201 } 202 { .mii 203 .save ar.unat, r17 204 mov r17 = ar.unat 205 .save ar.lc, loc6 206 mov loc6 = ar.lc 207 mov loc11 = gp 208 } 209 { .mii 210 .body 211 /* There is a 16 byte scratch area. r2 will start at r8 and 212 r3 will start at r9 for La_ia64_regs. */ 213 adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12 214 adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12 215 adds r12 = -PLTENTER_FRAME_SIZE, r12 216 ;; 217 } 218 { .mmi 219 st8 [r2] = r8, 16; 220 st8 [r3] = r9, 16; 221 mov out2 = b0 /* needed by _dl_fixup_profile */ 222 ;; 223 } 224 { .mmi 225 st8 [r2] = r10, 16; 226 st8 [r3] = r11, 16; 227 adds out3 = 16, r12 /* pointer to La_ia64_regs */ 228 ;; 229 } 230 { .mmi 231 .mem.offset 0, 0 232 st8.spill [r2] = in0, 16 233 .mem.offset 8, 0 234 st8.spill [r3] = in1, 16 235 mov out4 = loc10 /* pointer to new frame size */ 236 ;; 237 } 238 { .mmi 239 .mem.offset 0, 0 240 st8.spill [r2] = in2, 16 241 .mem.offset 8, 0 242 st8.spill [r3] = in3, 16 243 mov loc2 = r8 /* preserve struct value register */ 244 ;; 245 } 246 { .mmi 247 .mem.offset 0, 0 248 st8.spill [r2] = in4, 16 249 .mem.offset 8, 0 250 st8.spill [r3] = in5, 16 251 mov loc3 = r9 /* preserve language specific register */ 252 ;; 253 } 254 { .mmi 255 .mem.offset 0, 0 256 st8 [r2] = in6, 16 257 .mem.offset 8, 0 258 st8 [r3] = in7, 24 /* adjust for f9 */ 259 mov loc4 = r10 /* preserve language specific register */ 260 ;; 261 } 262 { .mii 263 mov r18 = ar.unat /* save it in La_ia64_regs */ 264 mov loc7 = out3 /* save it for _dl_audit_pltexit */ 265 mov loc5 = r11 /* preserve language specific register */ 266 } 267 { .mmi 268 stf.spill [r2] = f8, 32 269 stf.spill [r3] = f9, 32 270 mov out0 = r16 /* needed by _dl_fixup_profile */ 271 ;; 272 } 273 { .mii 274 mov ar.unat = r17 /* restore it for function call */ 275 mov loc8 = r16 /* save it for _dl_audit_pltexit */ 276 nop.i 0x0 277 } 278 { .mmi 279 stf.spill [r2] = f10, 32 280 stf.spill [r3] = f11, 32 281 shl out1 = r15, 4 282 ;; 283 } 284 { .mmi 285 stf.spill [r2] = f12, 32 286 stf.spill [r3] = f13, 32 287 /* Relocation record is 24 byte. */ 288 shladd out1 = r15, 3, out1 289 ;; 290 } 291 { .mmi 292 stf.spill [r2] = f14, 32 293 stf.spill [r3] = f15, 24 294 mov loc9 = out1 /* save it for _dl_audit_pltexit */ 295 ;; 296 } 297 { .mmb 298 st8 [r2] = r18 /* store ar.unat */ 299 st8 [r3] = loc10 /* store sp */ 300 br.call.sptk.many b0 = _dl_profile_fixup 301 } 302 { .mii 303 /* Skip the 16byte scratch area, 4 language specific GRs and 304 8 incoming GRs to restore incoming fp registers. */ 305 adds r2 = (4*8 + 8*8 + 16), r12 306 adds r3 = (4*8 + 8*8 + 32), r12 307 mov b6 = ret0 308 ;; 309 } 310 { .mmi 311 ldf.fill f8 = [r2], 32 312 ldf.fill f9 = [r3], 32 313 mov gp = ret1 314 ;; 315 } 316 { .mmi 317 ldf.fill f10 = [r2], 32 318 ldf.fill f11 = [r3], 32 319 mov r8 = loc2 /* restore struct value register */ 320 ;; 321 } 322 { .mmi 323 ldf.fill f12 = [r2], 32 324 ldf.fill f13 = [r3], 32 325 mov r9 = loc3 /* restore language specific register */ 326 ;; 327 } 328 { .mmi 329 ldf.fill f14 = [r2], 32 330 ldf.fill f15 = [r3], 32 331 mov r10 = loc4 /* restore language specific register */ 332 ;; 333 } 334 { .mii 335 ld8 r15 = [loc10] /* load the new frame size */ 336 mov r11 = loc5 /* restore language specific register */ 337 ;; 338 cmp.eq p6, p7 = -1, r15 339 ;; 340 } 341 { .mii 342(p7) cmp.eq p8, p9 = 0, r15 343(p6) mov b0 = loc1 344(p6) mov ar.lc = loc6 345 } 346 { .mib 347 nop.m 0x0 348(p6) mov ar.pfs = loc0 349(p6) br.cond.dptk.many .Lresolved 350 ;; 351 } 352 353 /* At this point, the stack looks like 354 355 +psp free 356 +16 La_ia64_regs 357 sp scratch 358 359 We need to keep the current stack and call the resolved 360 function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE 361 + 16 (scratch area) to sp + 16 (scratch area). Since stack 362 has to be 16byte aligned, we around r15 up to 16byte. */ 363 364 { .mbb 365(p9) adds r15 = 15, r15 366(p8) br.cond.dptk.many .Lno_new_frame 367 nop.b 0x0 368 ;; 369 } 370 { .mmi 371 and r15 = -16, r15 372 ;; 373 /* We don't copy the 16byte scatch area. Prepare r16/r17 as 374 destination. */ 375 sub r16 = r12, r15 376 sub r17 = r12, r15 377 ;; 378 } 379 { .mii 380 adds r16 = 16, r16 381 adds r17 = 24, r17 382 sub r12 = r12, r15 /* Adjust stack */ 383 ;; 384 } 385 { .mii 386 nop.m 0x0 387 shr r15 = r15, 4 388 ;; 389 adds r15 = -1, r15 390 ;; 391 } 392 { .mii 393 /* Skip the 16byte scatch area. Prepare r2/r3 as source. */ 394 adds r2 = 16, loc10 395 adds r3 = 24, loc10 396 mov ar.lc = r15 397 ;; 398 } 399.Lcopy: 400 { .mmi 401 ld8 r18 = [r2], 16 402 ld8 r19 = [r3], 16 403 nop.i 0x0 404 ;; 405 } 406 { .mmb 407 st8 [r16] = r18, 16 408 st8 [r17] = r19, 16 409 br.cloop.sptk.few .Lcopy 410 } 411.Lno_new_frame: 412 { .mii 413 mov out0 = in0 414 mov out1 = in1 415 mov out2 = in2 416 } 417 { .mii 418 mov out3 = in3 419 mov out4 = in4 420 mov out5 = in5 421 } 422 { .mib 423 mov out6 = in6 424 mov out7 = in7 425 /* Call the resolved function */ 426 br.call.sptk.many b0 = b6 427 } 428 { .mii 429 /* Prepare stack for _dl_audit_pltexit. Loc10 has the original 430 stack pointer. */ 431 adds r12 = -PLTEXIT_FRAME_SIZE, loc10 432 adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10 433 adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10 434 ;; 435 } 436 { .mmi 437 /* Load all possible return values into buffer. */ 438 st8 [r2] = r8, 16 439 st8 [r3] = r9, 16 440 mov out0 = loc8 441 ;; 442 } 443 { .mmi 444 st8 [r2] = r10, 16 445 st8 [r3] = r11, 24 446 mov out1 = loc9 447 ;; 448 } 449 { .mmi 450 stf.spill [r2] = f8, 32 451 stf.spill [r3] = f9, 32 452 mov out2 = loc7 /* Pointer to La_ia64_regs */ 453 ;; 454 } 455 { .mmi 456 stf.spill [r2] = f10, 32 457 stf.spill [r3] = f11, 32 458 adds out3 = 16, r12 /* Pointer to La_ia64_retval */ 459 ;; 460 } 461 { .mmi 462 stf.spill [r2] = f12, 32 463 stf.spill [r3] = f13, 32 464 /* We need to restore gp for _dl_audit_pltexit. */ 465 mov gp = loc11 466 ;; 467 } 468 { .mmb 469 stf.spill [r2] = f14 470 stf.spill [r3] = f15 471 br.call.sptk.many b0 = _dl_audit_pltexit 472 } 473 { .mmi 474 /* Load all the non-floating and floating return values. Skip 475 the 16byte scratch area. */ 476 adds r2 = 16, r12 477 adds r3 = 24, r12 478 nop.i 0x0 479 ;; 480 } 481 { .mmi 482 ld8 r8 = [r2], 16 483 ld8 r9 = [r3], 16 484 nop.i 0x0 485 ;; 486 } 487 { .mmi 488 ld8 r10 = [r2], 16 489 ld8 r11 = [r3], 24 490 nop.i 0x0 491 ;; 492 } 493 { .mmi 494 ldf.fill f8 = [r2], 32 495 ldf.fill f9 = [r3], 32 496 mov ar.lc = loc6 497 ;; 498 } 499 { .mmi 500 ldf.fill f10 = [r2], 32 501 ldf.fill f11 = [r3], 32 502 mov ar.pfs = loc0 503 ;; 504 } 505 { .mmi 506 ldf.fill f12 = [r2], 32 507 ldf.fill f13 = [r3], 32 508 mov b0 = loc1 509 ;; 510 } 511 { .mmi 512 ldf.fill f14 = [r2] 513 ldf.fill f15 = [r3] 514 /* We know that the previous stack pointer, loc10, isn't 0. 515 We use it to reload p7. */ 516 cmp.ne p7, p0 = 0, loc10 517 ;; 518 } 519.Lresolved: 520 { .mmb 521 .restore sp 522 mov r12 = loc10 523(p7) br.ret.sptk.many b0 524 ;; 525 } 526 /* An alloc is needed for the break system call to work. We 527 don't care about the old value of the pfs register. After 528 this alloc, we can't use any rotating registers. Otherwise 529 assembler won't be happy. This has to be at the end. */ 530 { .mmb 531 .prologue 532 .body 533 alloc r2 = ar.pfs, 0, 0, 8, 0 534 br.sptk.many b6 535 ;; 536 } 537END(_dl_runtime_profile) 538#endif 539