1/* PLT trampolines.  ia64 version.
2   Copyright (C) 2005-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#undef ret
21
22/*
23   This code is used in dl-runtime.c to call the `_dl_fixup' function
24   and then redirect to the address it returns. `_dl_fixup()' takes two
25   arguments, however _dl_profile_fixup() takes five.
26
27   The ABI specifies that we will never see more than 8 input
28   registers to a function call, thus it is safe to simply allocate
29   those, and simpler than playing stack games.  */
30
31/* Used to save and restore 8 incoming fp registers */
32#define RESOLVE_FRAME_SIZE (16*8)
33
34ENTRY(_dl_runtime_resolve)
35	{ .mmi
36	  .prologue
37	  .save ar.pfs, r40
38	  alloc loc0 = ar.pfs, 8, 6, 2, 0
39	  /* Use the 16 byte scratch area. r2 will start at f8 and
40	     r3 will start at f9.  */
41	  adds r2 = -(RESOLVE_FRAME_SIZE - 16), r12
42	  adds r3 = -(RESOLVE_FRAME_SIZE - 32), r12
43	}
44	{ .mii
45	  .fframe RESOLVE_FRAME_SIZE
46	  adds r12 = -RESOLVE_FRAME_SIZE, r12
47	  .save rp, loc1
48	  mov loc1 = b0
49	  .body
50	  mov loc2 = r8		/* preserve struct value register */
51	  ;;
52	}
53	{ .mii
54	  mov loc3 = r9		/* preserve language specific register */
55	  mov loc4 = r10	/* preserve language specific register */
56	  mov loc5 = r11	/* preserve language specific register */
57	}
58	{ .mmi
59	  stf.spill [r2] = f8, 32
60	  stf.spill [r3] = f9, 32
61	  mov out0 = r16
62	  ;;
63	}
64	{ .mmi
65	  stf.spill [r2] = f10, 32
66	  stf.spill [r3] = f11, 32
67	  shl out1 = r15, 4
68	  ;;
69	}
70	{ .mmi
71	  stf.spill [r2] = f12, 32
72	  stf.spill [r3] = f13, 32
73	  /* Relocation record is 24 byte. */
74	  shladd out1 = r15, 3, out1
75	  ;;
76	}
77	{ .mmb
78	  stf.spill [r2] = f14
79	  stf.spill [r3] = f15
80	  br.call.sptk.many b0 = _dl_fixup
81	}
82	{ .mii
83	  /* Skip the 16byte scratch area.  */
84	  adds r2 = 16, r12
85	  adds r3 = 32, r12
86	  mov b6 = ret0
87	  ;;
88	}
89	{ .mmi
90	  ldf.fill f8 = [r2], 32
91	  ldf.fill f9 = [r3], 32
92	  mov b0 = loc1
93	  ;;
94	}
95	{ .mmi
96	  ldf.fill f10 = [r2], 32
97	  ldf.fill f11 = [r3], 32
98	  mov gp = ret1
99	  ;;
100	}
101	{ .mmi
102	  ldf.fill f12 = [r2], 32
103	  ldf.fill f13 = [r3], 32
104	  mov ar.pfs = loc0
105	  ;;
106	}
107	{ .mmi
108	  ldf.fill f14 = [r2], 32
109	  ldf.fill f15 = [r3], 32
110	  .restore sp		/* pop the unwind frame state */
111	  adds r12 = RESOLVE_FRAME_SIZE, r12
112	  ;;
113	}
114	{ .mii
115	  mov r9 = loc3		/* restore language specific register */
116	  mov r10 = loc4	/* restore language specific register */
117	  mov r11 = loc5	/* restore language specific register */
118	}
119	{ .mii
120	  mov r8 = loc2		/* restore struct value register */
121	  ;;
122	}
123	/* An alloc is needed for the break system call to work.
124	   We don't care about the old value of the pfs register.  */
125	{ .mmb
126	  .prologue
127	  .body
128	  alloc r2 = ar.pfs, 0, 0, 8, 0
129	  br.sptk.many b6
130	  ;;
131	}
132END(_dl_runtime_resolve)
133
134
135/* The fourth argument to _dl_profile_fixup and the third one to
136   _dl_audit_pltexit are a pointer to La_ia64_regs:
137
138   8byte r8
139   8byte r9
140   8byte r10
141   8byte r11
142   8byte in0
143   8byte in1
144   8byte in2
145   8byte in3
146   8byte in4
147   8byte in5
148   8byte in6
149   8byte in7
150   16byte f8
151   16byte f9
152   16byte f10
153   16byte f11
154   16byte f12
155   16byte f13
156   16byte f14
157   16byte f15
158   8byte ar.unat
159   8byte sp
160
161   The fifth argument to _dl_profile_fixup is a pointer to long int.
162   The fourth argument to _dl_audit_pltexit is a pointer to
163   La_ia64_retval:
164
165   8byte r8
166   8byte r9
167   8byte r10
168   8byte r11
169   16byte f8
170   16byte f9
171   16byte f10
172   16byte f11
173   16byte f12
174   16byte f13
175   16byte f14
176   16byte f15
177
178  Since stack has to be 16 byte aligned, the stack allocation is in
179  16byte increment. Before calling _dl_profile_fixup, the stack will
180  look like
181
182  psp	new frame_size
183  +16	La_ia64_regs
184  sp	scratch
185
186 */
187
188#define PLTENTER_FRAME_SIZE (4*8 + 8*8 + 8*16 + 2*8 + 16)
189#define PLTEXIT_FRAME_SIZE (PLTENTER_FRAME_SIZE + 4*8 + 8*16)
190
191#ifndef PROF
192ENTRY(_dl_runtime_profile)
193	{ .mii
194	  .prologue
195	  .save ar.pfs, r40
196	  alloc loc0 = ar.pfs, 8, 12, 8, 0
197	  .vframe loc10
198	  mov loc10 = r12
199	  .save rp, loc1
200	  mov loc1 = b0
201	}
202	{ .mii
203	  .save ar.unat, r17
204	  mov r17 = ar.unat
205	  .save ar.lc, loc6
206	  mov loc6 = ar.lc
207	  mov loc11 = gp
208	}
209	{ .mii
210	  .body
211	  /* There is a 16 byte scratch area. r2 will start at r8 and
212	     r3 will start at r9 for La_ia64_regs.  */
213	  adds r2 = -(PLTENTER_FRAME_SIZE - 16), r12
214	  adds r3 = -(PLTENTER_FRAME_SIZE - 24), r12
215	  adds r12 = -PLTENTER_FRAME_SIZE, r12
216	  ;;
217	}
218	{ .mmi
219	  st8 [r2] = r8, 16;
220	  st8 [r3] = r9, 16;
221	  mov out2 = b0		/* needed by _dl_fixup_profile */
222	  ;;
223	}
224	{ .mmi
225	  st8 [r2] = r10, 16;
226	  st8 [r3] = r11, 16;
227	  adds out3 = 16, r12	/* pointer to La_ia64_regs */
228	  ;;
229	}
230	{ .mmi
231	  .mem.offset 0, 0
232	  st8.spill [r2] = in0, 16
233	  .mem.offset 8, 0
234	  st8.spill [r3] = in1, 16
235	  mov out4 = loc10	/* pointer to new frame size  */
236	  ;;
237	}
238	{ .mmi
239	  .mem.offset 0, 0
240	  st8.spill [r2] = in2, 16
241	  .mem.offset 8, 0
242	  st8.spill [r3] = in3, 16
243	  mov loc2 = r8		/* preserve struct value register */
244	  ;;
245	}
246	{ .mmi
247	  .mem.offset 0, 0
248	  st8.spill [r2] = in4, 16
249	  .mem.offset 8, 0
250	  st8.spill [r3] = in5, 16
251	  mov loc3 = r9		/* preserve language specific register */
252	  ;;
253	}
254	{ .mmi
255	  .mem.offset 0, 0
256	  st8 [r2] = in6, 16
257	  .mem.offset 8, 0
258	  st8 [r3] = in7, 24	/* adjust for f9 */
259	  mov loc4 = r10	/* preserve language specific register */
260	  ;;
261	}
262	{ .mii
263	  mov r18 = ar.unat	/* save it in La_ia64_regs */
264	  mov loc7 = out3	/* save it for _dl_audit_pltexit */
265	  mov loc5 = r11	/* preserve language specific register */
266	}
267	{ .mmi
268	  stf.spill [r2] = f8, 32
269	  stf.spill [r3] = f9, 32
270	  mov out0 = r16	/* needed by _dl_fixup_profile */
271	  ;;
272	}
273	{ .mii
274	  mov ar.unat = r17	/* restore it for function call */
275	  mov loc8 = r16	/* save it for _dl_audit_pltexit */
276	  nop.i 0x0
277	}
278	{ .mmi
279	  stf.spill [r2] = f10, 32
280	  stf.spill [r3] = f11, 32
281	  shl out1 = r15, 4
282	  ;;
283	}
284	{ .mmi
285	  stf.spill [r2] = f12, 32
286	  stf.spill [r3] = f13, 32
287	  /* Relocation record is 24 byte. */
288	  shladd out1 = r15, 3, out1
289	  ;;
290	}
291	{ .mmi
292	  stf.spill [r2] = f14, 32
293	  stf.spill [r3] = f15, 24
294	  mov loc9 = out1	/* save it for _dl_audit_pltexit */
295	  ;;
296	}
297	{ .mmb
298	  st8 [r2] = r18	/* store ar.unat */
299	  st8 [r3] = loc10	/* store sp */
300	  br.call.sptk.many b0 = _dl_profile_fixup
301	}
302	{ .mii
303	  /* Skip the 16byte scratch area, 4 language specific GRs and
304	     8 incoming GRs to restore incoming fp registers.  */
305	  adds r2 = (4*8 + 8*8 + 16), r12
306	  adds r3 = (4*8 + 8*8 + 32), r12
307	  mov b6 = ret0
308	  ;;
309	}
310	{ .mmi
311	  ldf.fill f8 = [r2], 32
312	  ldf.fill f9 = [r3], 32
313	  mov gp = ret1
314	  ;;
315	}
316	{ .mmi
317	  ldf.fill f10 = [r2], 32
318	  ldf.fill f11 = [r3], 32
319	  mov r8 = loc2		/* restore struct value register */
320	  ;;
321	}
322	{ .mmi
323	  ldf.fill f12 = [r2], 32
324	  ldf.fill f13 = [r3], 32
325	  mov r9 = loc3		/* restore language specific register */
326	  ;;
327	}
328	{ .mmi
329	  ldf.fill f14 = [r2], 32
330	  ldf.fill f15 = [r3], 32
331	  mov r10 = loc4	/* restore language specific register */
332	  ;;
333	}
334	{ .mii
335	  ld8 r15 = [loc10]	/* load the new frame size */
336	  mov r11 = loc5	/* restore language specific register */
337	  ;;
338	  cmp.eq p6, p7 = -1, r15
339	  ;;
340	}
341	{ .mii
342(p7)	  cmp.eq p8, p9 = 0, r15
343(p6)	  mov b0 = loc1
344(p6)	  mov ar.lc = loc6
345	}
346	{ .mib
347	  nop.m 0x0
348(p6)	  mov ar.pfs = loc0
349(p6)	  br.cond.dptk.many .Lresolved
350	  ;;
351	}
352
353	/* At this point, the stack looks like
354
355	  +psp	free
356	  +16	La_ia64_regs
357	  sp	scratch
358
359	  We need to keep the current stack and call the resolved
360	  function by copying the r15 byte from sp + PLTENTER_FRAME_SIZE
361	  + 16 (scratch area) to sp + 16 (scratch area). Since stack
362	  has to be 16byte aligned, we around r15 up to 16byte.  */
363
364	{ .mbb
365(p9)	  adds r15 = 15, r15
366(p8)	  br.cond.dptk.many .Lno_new_frame
367	  nop.b 0x0
368	  ;;
369	}
370	{ .mmi
371	  and r15 = -16, r15
372	  ;;
373	  /* We don't copy the 16byte scatch area. Prepare r16/r17 as
374	     destination.  */
375	  sub r16 = r12, r15
376	  sub r17 = r12, r15
377	  ;;
378	}
379	{ .mii
380	  adds r16 = 16, r16
381	  adds r17 = 24, r17
382	  sub r12 = r12, r15		/* Adjust stack  */
383	  ;;
384	}
385	{ .mii
386	  nop.m 0x0
387	  shr r15 = r15, 4
388	  ;;
389	  adds r15 = -1, r15
390	  ;;
391	}
392	{ .mii
393	  /* Skip the 16byte scatch area. Prepare r2/r3 as source.  */
394	  adds r2 = 16, loc10
395	  adds r3 = 24, loc10
396	  mov ar.lc = r15
397	  ;;
398	}
399.Lcopy:
400	{ .mmi
401	  ld8 r18 = [r2], 16
402	  ld8 r19 = [r3], 16
403	  nop.i 0x0
404	  ;;
405	}
406	{ .mmb
407	  st8 [r16] = r18, 16
408	  st8 [r17] = r19, 16
409	  br.cloop.sptk.few .Lcopy
410	}
411.Lno_new_frame:
412	{ .mii
413	  mov out0 = in0
414	  mov out1 = in1
415	  mov out2 = in2
416	}
417	{ .mii
418	  mov out3 = in3
419	  mov out4 = in4
420	  mov out5 = in5
421	}
422	{ .mib
423	  mov out6 = in6
424	  mov out7 = in7
425	  /* Call the resolved function  */
426	  br.call.sptk.many b0 = b6
427	}
428	{ .mii
429	  /* Prepare stack for _dl_audit_pltexit. Loc10 has the original
430	     stack pointer.  */
431	  adds r12 = -PLTEXIT_FRAME_SIZE, loc10
432	  adds r2 = -(PLTEXIT_FRAME_SIZE - 16), loc10
433	  adds r3 = -(PLTEXIT_FRAME_SIZE - 24), loc10
434	  ;;
435	}
436	{ .mmi
437	  /* Load all possible return values into buffer.  */
438	  st8 [r2] = r8, 16
439	  st8 [r3] = r9, 16
440	  mov out0 = loc8
441	  ;;
442	}
443	{ .mmi
444	  st8 [r2] = r10, 16
445	  st8 [r3] = r11, 24
446	  mov out1 = loc9
447	  ;;
448	}
449	{ .mmi
450	  stf.spill [r2] = f8, 32
451	  stf.spill [r3] = f9, 32
452	  mov out2 = loc7		/* Pointer to La_ia64_regs */
453	  ;;
454	}
455	{ .mmi
456	  stf.spill [r2] = f10, 32
457	  stf.spill [r3] = f11, 32
458	  adds out3 = 16, r12		/* Pointer to La_ia64_retval */
459	  ;;
460	}
461	{ .mmi
462	  stf.spill [r2] = f12, 32
463	  stf.spill [r3] = f13, 32
464	  /* We need to restore gp for _dl_audit_pltexit. */
465	  mov gp = loc11
466	  ;;
467	}
468	{ .mmb
469	  stf.spill [r2] = f14
470	  stf.spill [r3] = f15
471	  br.call.sptk.many b0 = _dl_audit_pltexit
472	}
473	{ .mmi
474	  /* Load all the non-floating and floating return values. Skip
475	     the 16byte scratch area.  */
476	  adds r2 = 16, r12
477	  adds r3 = 24, r12
478	  nop.i 0x0
479	  ;;
480	}
481	{ .mmi
482	  ld8 r8 = [r2], 16
483	  ld8 r9 = [r3], 16
484	  nop.i 0x0
485	  ;;
486	}
487	{ .mmi
488	  ld8 r10 = [r2], 16
489	  ld8 r11 = [r3], 24
490	  nop.i 0x0
491	  ;;
492	}
493	{ .mmi
494	  ldf.fill f8 = [r2], 32
495	  ldf.fill f9 = [r3], 32
496	  mov ar.lc = loc6
497	  ;;
498	}
499	{ .mmi
500	  ldf.fill f10 = [r2], 32
501	  ldf.fill f11 = [r3], 32
502	  mov ar.pfs = loc0
503	  ;;
504	}
505	{ .mmi
506	  ldf.fill f12 = [r2], 32
507	  ldf.fill f13 = [r3], 32
508	  mov b0 = loc1
509	  ;;
510	}
511	{ .mmi
512	  ldf.fill f14 = [r2]
513	  ldf.fill f15 = [r3]
514	  /* We know that the previous stack pointer, loc10, isn't 0.
515	     We use it to reload p7.  */
516	  cmp.ne p7, p0 = 0, loc10
517	  ;;
518	}
519.Lresolved:
520	{ .mmb
521	  .restore sp
522	  mov r12 = loc10
523(p7)	  br.ret.sptk.many b0
524	  ;;
525	}
526	/* An alloc is needed for the break system call to work. We
527	   don't care about the old value of the pfs register. After
528	   this alloc, we can't use any rotating registers. Otherwise
529	   assembler won't be happy. This has to be at the end.  */
530	{ .mmb
531	  .prologue
532	  .body
533	  alloc r2 = ar.pfs, 0, 0, 8, 0
534	  br.sptk.many b6
535	  ;;
536	}
537END(_dl_runtime_profile)
538#endif
539