1/* PLT trampolines.  PPC64 version.
2   Copyright (C) 2005-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include <rtld-global-offsets.h>
21
22
23	.section ".text"
24/* On entry r0 contains the index of the PLT entry we need to fixup
25   and r11 contains the link_map (from PLT0+16).  The link_map becomes
26   parm1 (r3) and the index (r0) need to be converted to an offset
27   (index * 24) in parm2 (r4).  */
28
29#define FRAME_SIZE (FRAME_MIN_SIZE+64)
30/* We need to save the registers used to pass parameters, ie. r3 thru
31   r10;  Use local var space rather than the parameter save area,
32   because gcc as of 2010/05 doesn't allocate a proper stack frame for
33   a function that makes no calls except for __tls_get_addr and we
34   might be here resolving the __tls_get_addr call.  */
35#define INT_PARMS FRAME_MIN_SIZE
36ENTRY (_dl_runtime_resolve, 4)
37	stdu	r1,-FRAME_SIZE(r1)
38	cfi_adjust_cfa_offset (FRAME_SIZE)
39	std	r3,INT_PARMS+0(r1)
40	mr	r3,r11
41	std	r4,INT_PARMS+8(r1)
42	sldi	r4,r0,1
43	std	r5,INT_PARMS+16(r1)
44	add	r4,r4,r0
45	std	r6,INT_PARMS+24(r1)
46	sldi	r4,r4,3
47	std	r7,INT_PARMS+32(r1)
48	mflr	r0
49	std	r8,INT_PARMS+40(r1)
50/* Store the LR in the LR Save area.  */
51	std	r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
52	cfi_offset (lr, FRAME_LR_SAVE)
53	std	r9,INT_PARMS+48(r1)
54	std	r10,INT_PARMS+56(r1)
55	bl	JUMPTARGET(_dl_fixup)
56#ifndef SHARED
57	nop
58#endif
59/* Put the registers back.  */
60	ld	r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
61	ld	r10,INT_PARMS+56(r1)
62	ld	r9,INT_PARMS+48(r1)
63	ld	r8,INT_PARMS+40(r1)
64	ld	r7,INT_PARMS+32(r1)
65	mtlr	r0
66	ld	r6,INT_PARMS+24(r1)
67	ld	r5,INT_PARMS+16(r1)
68	ld	r4,INT_PARMS+8(r1)
69/* Prepare for calling the function returned by fixup.  */
70	PPC64_LOAD_FUNCPTR r3
71	ld	r3,INT_PARMS+0(r1)
72#if _CALL_ELF == 2
73/* Restore the caller's TOC in case we jump to a local entry point.  */
74	ld	r2,FRAME_SIZE+FRAME_TOC_SAVE(r1)
75#endif
76/* Unwind the stack frame, and jump.  */
77	addi	r1,r1,FRAME_SIZE
78	bctr
79END(_dl_runtime_resolve)
80#undef FRAME_SIZE
81#undef INT_PARMS
82
83	/* Stack layout:		ELFv2 ABI.
84					+752   previous backchain
85					+744   spill_r31
86					+736   spill_r30
87					+720   v8
88					+704   v7
89					+688   v6
90					+672   v5
91					+656   v4
92					+640   v3
93					+624   v2
94					+608   v1
95					+600   fp10
96	  ELFv1 ABI			+592   fp9
97	  +592   previous backchain	+584   fp8
98	  +584   spill_r31		+576   fp7
99	  +576   spill_r30		+568   fp6
100	  +560   v1			+560   fp5
101	  +552   fp4			+552   fp4
102	  +544   fp3			+544   fp3
103	  +536   fp2			+536   fp2
104	  +528   fp1			+528   fp1
105	  +520   r4			+520   r4
106	  +512   r3			+512   r3
107	   return values
108          +504   free
109	  +496   stackframe
110	  +488   lr
111	  +480   r1
112	  +464   v13
113	  +448   v12
114	  +432   v11
115	  +416   v10
116	  +400   v9
117	  +384   v8
118	  +368   v7
119	  +352   v6
120	  +336   v5
121	  +320   v4
122	  +304   v3
123	  +288   v2
124	 * VMX Parms in V2-V13, V0-V1 are scratch
125	  +284   vrsave
126	  +280   free
127	  +272   fp13
128	  +264   fp12
129	  +256   fp11
130	  +248   fp10
131	  +240   fp9
132	  +232   fp8
133	  +224   fp7
134	  +216   fp6
135	  +208   fp5
136	  +200   fp4
137	  +192   fp3
138	  +184   fp2
139	  +176   fp1
140	 * FP Parms in FP1-FP13, FP0 is a scratch register
141	  +168   r10
142	  +160   r9
143	  +152   r8
144	  +144   r7
145	  +136   r6
146	  +128   r5
147	  +120   r4
148	  +112   r3
149	 * Integer parms in R3-R10, R0 is scratch, R1 SP, R2 is TOC
150	  +104   parm8
151	  +96    parm7
152	  +88    parm6
153	  +80    parm5
154	  +72    parm4
155	  +64    parm3
156	  +56    parm2
157	  +48    parm1
158	 * Parameter save area
159	 * (v1 ABI: Allocated by the call, at least 8 double words)
160	  +40    v1 ABI: TOC save area
161	  +32    v1 ABI: Reserved for linker
162	  +24    v1 ABI: Reserved for compiler / v2 ABI: TOC save area
163	  +16    LR save area
164	  +8     CR save area
165	r1+0     stack back chain
166	*/
167#if _CALL_ELF == 2
168# define FRAME_SIZE 752
169# define VR_RTN 608
170#else
171# define FRAME_SIZE 592
172# define VR_RTN 560
173#endif
174#define INT_RTN 512
175#define FPR_RTN 528
176#define STACK_FRAME 496
177#define CALLING_LR 488
178#define CALLING_SP 480
179#define INT_PARMS 112
180#define FPR_PARMS 176
181#define VR_PARMS 288
182#define VR_VRSAVE 284
183	.section	".toc","aw"
184.LC__dl_hwcap:
185# ifdef SHARED
186	.tc _rtld_local_ro[TC],_rtld_local_ro
187# else
188	.tc _dl_hwcap[TC],_dl_hwcap
189# endif
190	.section ".text"
191
192	.machine	"altivec"
193/* On entry r0 contains the index of the PLT entry we need to fixup
194   and r11 contains the link_map (from PLT0+16).  The link_map becomes
195   parm1 (r3) and the index (r0) needs to be converted to an offset
196   (index * 24) in parm2 (r4).  */
197#ifndef PROF
198ENTRY (_dl_profile_resolve, 4)
199/* Spill r30, r31 to preserve the link_map* and reloc_addr, in case we
200   need to call _dl_audit_pltexit.  */
201	std	r31,-8(r1)
202	std	r30,-16(r1)
203/* We need to save the registers used to pass parameters, ie. r3 thru
204   r10; the registers are saved in a stack frame.  */
205	stdu	r1,-FRAME_SIZE(r1)
206	cfi_adjust_cfa_offset (FRAME_SIZE)
207	cfi_offset(r31,-8)
208	cfi_offset(r30,-16)
209	std	r3,INT_PARMS+0(r1)
210	mr	r3,r11
211	std	r4,INT_PARMS+8(r1)
212	sldi	r4,r0,1		/* index * 2 */
213	std	r5,INT_PARMS+16(r1)
214	add	r4,r4,r0	/* index * 3 */
215	std	r6,INT_PARMS+24(r1)
216	sldi	r4,r4,3		/* index * 24  == PLT offset */
217	mflr	r5
218	std	r7,INT_PARMS+32(r1)
219	std	r8,INT_PARMS+40(r1)
220/* Store the LR in the LR Save area.  */
221	la	r8,FRAME_SIZE(r1)
222	std	r5,FRAME_SIZE+FRAME_LR_SAVE(r1)
223	cfi_offset (lr, FRAME_LR_SAVE)
224	std	r5,CALLING_LR(r1)
225	std	r9,INT_PARMS+48(r1)
226	std	r10,INT_PARMS+56(r1)
227	std	r8,CALLING_SP(r1)
228	ld	r12,.LC__dl_hwcap@toc(r2)
229#ifdef SHARED
230	/* Load _rtld_local_ro._dl_hwcap.  */
231	ld	r12,RTLD_GLOBAL_RO_DL_HWCAP_OFFSET(r12)
232#else
233	ld	r12,0(r12) /* Load extern _dl_hwcap.  */
234#endif
235	andis.  r0,r12,(PPC_FEATURE_HAS_ALTIVEC >> 16)
236	beq	L(saveFP)
237	la	r10,(VR_PARMS+0)(r1)
238	la	r9,(VR_PARMS+16)(r1)
239	li	r11,32
240	li	r12,64
241	stvx	v2,0,r10
242	stvx	v3,0,r9
243
244	stvx	v4,r11,r10
245	stvx	v5,r11,r9
246	addi	r11,r11,64
247
248	stvx	v6,r12,r10
249	stvx	v7,r12,r9
250	addi	r12,r12,64
251
252	stvx	v8,r11,r10
253	stvx	v9,r11,r9
254	addi	r11,r11,64
255
256	stvx	v10,r12,r10
257	stvx	v11,r12,r9
258	mfspr	r0,VRSAVE
259
260	stvx	v12,r11,r10
261	stvx	v13,r11,r9
262L(saveFP):
263	stw	r0,VR_VRSAVE(r1)
264/* Save floating registers.  */
265	stfd	fp1,FPR_PARMS+0(r1)
266	stfd	fp2,FPR_PARMS+8(r1)
267	stfd	fp3,FPR_PARMS+16(r1)
268	stfd	fp4,FPR_PARMS+24(r1)
269	stfd	fp5,FPR_PARMS+32(r1)
270	stfd	fp6,FPR_PARMS+40(r1)
271	stfd	fp7,FPR_PARMS+48(r1)
272	stfd	fp8,FPR_PARMS+56(r1)
273	stfd	fp9,FPR_PARMS+64(r1)
274	stfd	fp10,FPR_PARMS+72(r1)
275	stfd	fp11,FPR_PARMS+80(r1)
276	li	r0,-1
277	stfd	fp12,FPR_PARMS+88(r1)
278	stfd	fp13,FPR_PARMS+96(r1)
279/* Load the extra parameters.  */
280	addi	r6,r1,INT_PARMS
281	addi	r7,r1,STACK_FRAME
282/* Save  link_map* and reloc_addr parms for later.  */
283	mr	r31,r3
284	mr	r30,r4
285	std	r0,0(r7)
286	bl	JUMPTARGET(_dl_profile_fixup)
287#ifndef SHARED
288	nop
289#endif
290/* Test *framesizep > 0 to see if need to do pltexit processing.  */
291	ld	r0,STACK_FRAME(r1)
292/* Put the registers back.  */
293	lwz	r12,VR_VRSAVE(r1)
294	cmpdi	cr1,r0,0
295	cmpdi	cr0,r12,0
296	bgt	cr1,L(do_pltexit)
297	la	r10,(VR_PARMS+0)(r1)
298	la	r9,(VR_PARMS+16)(r1)
299/* VRSAVE must be non-zero if VMX is present and VRs are in use. */
300	beq	L(restoreFXR)
301	li	r11,32
302	li	r12,64
303	lvx	v2,0,r10
304	lvx	v3,0,r9
305
306	lvx	v4,r11,r10
307	lvx	v5,r11,r9
308	addi	r11,r11,64
309
310	lvx	v6,r12,r10
311	lvx	v7,r12,r9
312	addi	r12,r12,64
313
314	lvx	v8,r11,r10
315	lvx	v9,r11,r9
316	addi	r11,r11,64
317
318	lvx	v10,r12,r10
319	lvx	v11,r12,r9
320
321	lvx	v12,r11,r10
322	lvx	v13,r11,r9
323L(restoreFXR):
324	ld	r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
325	ld	r10,INT_PARMS+56(r1)
326	ld	r9,INT_PARMS+48(r1)
327	ld	r8,INT_PARMS+40(r1)
328	ld	r7,INT_PARMS+32(r1)
329	mtlr	r0
330	ld	r6,INT_PARMS+24(r1)
331	ld	r5,INT_PARMS+16(r1)
332	ld	r4,INT_PARMS+8(r1)
333/* Prepare for calling the function returned by fixup.  */
334	PPC64_LOAD_FUNCPTR r3
335	ld	r3,INT_PARMS+0(r1)
336#if _CALL_ELF == 2
337/* Restore the caller's TOC in case we jump to a local entry point.  */
338	ld	r2,FRAME_SIZE+FRAME_TOC_SAVE(r1)
339#endif
340/* Load the floating point registers.  */
341	lfd	fp1,FPR_PARMS+0(r1)
342	lfd	fp2,FPR_PARMS+8(r1)
343	lfd	fp3,FPR_PARMS+16(r1)
344	lfd	fp4,FPR_PARMS+24(r1)
345	lfd	fp5,FPR_PARMS+32(r1)
346	lfd	fp6,FPR_PARMS+40(r1)
347	lfd	fp7,FPR_PARMS+48(r1)
348	lfd	fp8,FPR_PARMS+56(r1)
349	lfd	fp9,FPR_PARMS+64(r1)
350	lfd	fp10,FPR_PARMS+72(r1)
351	lfd	fp11,FPR_PARMS+80(r1)
352	lfd	fp12,FPR_PARMS+88(r1)
353	lfd	fp13,FPR_PARMS+96(r1)
354/* Unwind the stack frame, and jump.  */
355	ld	r31,FRAME_SIZE-8(r1)
356	ld	r30,FRAME_SIZE-16(r1)
357	addi	r1,r1,FRAME_SIZE
358	bctr
359
360L(do_pltexit):
361	la	r10,(VR_PARMS+0)(r1)
362	la	r9,(VR_PARMS+16)(r1)
363	beq	L(restoreFXR2)
364	li	r11,32
365	li	r12,64
366	lvx	v2,0,r10
367	lvx	v3,0,r9
368
369	lvx	v4,r11,r10
370	lvx	v5,r11,r9
371	addi	r11,r11,64
372
373	lvx	v6,r12,r10
374	lvx	v7,r12,r9
375	addi	r12,r12,64
376
377	lvx	v8,r11,r10
378	lvx	v9,r11,r9
379	addi	r11,r11,64
380
381	lvx	v10,r12,r10
382	lvx	v11,r12,r9
383
384	lvx	v12,r11,r10
385	lvx	v13,r11,r9
386L(restoreFXR2):
387	ld	r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
388	ld	r10,INT_PARMS+56(r1)
389	ld	r9,INT_PARMS+48(r1)
390	ld	r8,INT_PARMS+40(r1)
391	ld	r7,INT_PARMS+32(r1)
392	mtlr	r0
393	ld	r6,INT_PARMS+24(r1)
394	ld	r5,INT_PARMS+16(r1)
395	ld	r4,INT_PARMS+8(r1)
396/* Prepare for calling the function returned by fixup.  */
397	std	r2,FRAME_TOC_SAVE(r1)
398	PPC64_LOAD_FUNCPTR r3
399	ld	r3,INT_PARMS+0(r1)
400/* Load the floating point registers.  */
401	lfd	fp1,FPR_PARMS+0(r1)
402	lfd	fp2,FPR_PARMS+8(r1)
403	lfd	fp3,FPR_PARMS+16(r1)
404	lfd	fp4,FPR_PARMS+24(r1)
405	lfd	fp5,FPR_PARMS+32(r1)
406	lfd	fp6,FPR_PARMS+40(r1)
407	lfd	fp7,FPR_PARMS+48(r1)
408	lfd	fp8,FPR_PARMS+56(r1)
409	lfd	fp9,FPR_PARMS+64(r1)
410	lfd	fp10,FPR_PARMS+72(r1)
411	lfd	fp11,FPR_PARMS+80(r1)
412	lfd	fp12,FPR_PARMS+88(r1)
413	lfd	fp13,FPR_PARMS+96(r1)
414/* Call the target function.  */
415	bctrl
416	ld	r2,FRAME_TOC_SAVE(r1)
417	lwz	r12,VR_VRSAVE(r1)
418/* But return here and store the return values.  */
419	std	r3,INT_RTN(r1)
420	std	r4,INT_RTN+8(r1)
421	stfd	fp1,FPR_RTN+0(r1)
422	stfd	fp2,FPR_RTN+8(r1)
423	cmpdi	cr0,r12,0
424	la	r10,VR_RTN(r1)
425	stfd	fp3,FPR_RTN+16(r1)
426	stfd	fp4,FPR_RTN+24(r1)
427#if _CALL_ELF == 2
428	la	r12,VR_RTN+16(r1)
429	stfd	fp5,FPR_RTN+32(r1)
430	stfd	fp6,FPR_RTN+40(r1)
431	li	r5,32
432	li	r6,64
433	stfd	fp7,FPR_RTN+48(r1)
434	stfd	fp8,FPR_RTN+56(r1)
435	stfd	fp9,FPR_RTN+64(r1)
436	stfd	fp10,FPR_RTN+72(r1)
437#endif
438	mr	r3,r31
439	mr	r4,r30
440	beq	L(callpltexit)
441	stvx	v2,0,r10
442#if _CALL_ELF == 2
443	stvx	v3,0,r12
444	stvx	v4,r5,r10
445	stvx	v5,r5,r12
446	addi	r5,r5,64
447	stvx	v6,r6,r10
448	stvx	v7,r6,r12
449	stvx	v8,r5,r10
450	stvx	v9,r5,r12
451#endif
452L(callpltexit):
453	addi	r5,r1,INT_PARMS
454	addi	r6,r1,INT_RTN
455	bl	JUMPTARGET(_dl_audit_pltexit)
456#ifndef SHARED
457	nop
458#endif
459/* Restore the return values from target function.  */
460	lwz	r12,VR_VRSAVE(r1)
461	ld	r3,INT_RTN(r1)
462	ld	r4,INT_RTN+8(r1)
463	lfd	fp1,FPR_RTN+0(r1)
464	lfd	fp2,FPR_RTN+8(r1)
465	cmpdi	cr0,r12,0
466	la	r11,VR_RTN(r1)
467	lfd	fp3,FPR_RTN+16(r1)
468	lfd	fp4,FPR_RTN+24(r1)
469#if _CALL_ELF == 2
470	la	r12,VR_RTN+16(r1)
471	lfd	fp5,FPR_RTN+32(r1)
472	lfd	fp6,FPR_RTN+40(r1)
473	li	r30,32
474	li	r31,64
475	lfd	fp7,FPR_RTN+48(r1)
476	lfd	fp8,FPR_RTN+56(r1)
477	lfd	fp9,FPR_RTN+64(r1)
478	lfd	fp10,FPR_RTN+72(r1)
479#endif
480	beq	L(pltexitreturn)
481	lvx	v2,0,r11
482#if _CALL_ELF == 2
483	lvx	v3,0,r12
484	lvx	v4,r30,r11
485	lvx	v5,r30,r12
486	addi	r30,r30,64
487	lvx	v6,r31,r11
488	lvx	v7,r31,r12
489	lvx	v8,r30,r11
490	lvx	v9,r30,r12
491#endif
492L(pltexitreturn):
493	ld	r0,FRAME_SIZE+FRAME_LR_SAVE(r1)
494	ld	r31,FRAME_SIZE-8(r1)
495	ld	r30,FRAME_SIZE-16(r1)
496	mtlr	r0
497	ld	r1,0(r1)
498	blr
499END(_dl_profile_resolve)
500#endif
501