1/* memcpy - copy a block from source to destination.  31/64 bit S/390 version.
2   Copyright (C) 2012-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19
20#include <sysdep.h>
21#include "asm-syntax.h"
22#include <ifunc-memcpy.h>
23
24/* INPUT PARAMETERS
25     %r2 = address of destination memory area
26     %r3 = address of source memory area
27     %r4 = number of bytes to copy.  */
28
29       .text
30
31#if defined __s390x__
32# define LTGR	ltgr
33# define CGHI	cghi
34# define LGR	lgr
35# define AGHI	aghi
36# define BRCTG	brctg
37#else
38# define LTGR	ltr
39# define CGHI	chi
40# define LGR	lr
41# define AGHI	ahi
42# define BRCTG	brct
43#endif /* ! defined __s390x__  */
44
45#if HAVE_MEMCPY_Z900_G5
46ENTRY(MEMPCPY_Z900_G5)
47# if defined __s390x__
48	.machine "z900"
49# else
50	.machine "g5"
51# endif /* ! defined __s390x__  */
52	LGR     %r1,%r2             # Use as dest
53	la      %r2,0(%r4,%r2)      # Return dest + n
54	j	.L_Z900_G5_start
55END(MEMPCPY_Z900_G5)
56
57ENTRY(MEMCPY_Z900_G5)
58# if defined __s390x__
59	.machine "z900"
60# else
61	.machine "g5"
62# endif /* ! defined __s390x__  */
63	LGR     %r1,%r2             # r1: Use as dest ; r2: Return dest
64.L_Z900_G5_start:
65	LTGR    %r4,%r4
66	je      .L_Z900_G5_4
67	AGHI    %r4,-1
68# if defined __s390x__
69	srlg	%r5,%r4,8
70# else
71	lr	%r5,%r4
72	srl	%r5,8
73# endif /* ! defined __s390x__  */
74	LTGR    %r5,%r5
75	jne     .L_Z900_G5_13
76.L_Z900_G5_3:
77# if defined __s390x__
78	larl    %r5,.L_Z900_G5_15
79#  define Z900_G5_EX_D 0
80# else
81	basr    %r5,0
82.L_Z900_G5_14:
83#  define Z900_G5_EX_D .L_Z900_G5_15-.L_Z900_G5_14
84# endif /* ! defined __s390x__  */
85	ex      %r4,Z900_G5_EX_D(%r5)
86.L_Z900_G5_4:
87	br      %r14
88.L_Z900_G5_13:
89	CGHI	%r5,4096            # Switch to mvcle for copies >1MB
90	jh      __memcpy_mvcle
91.L_Z900_G5_12:
92	mvc     0(256,%r1),0(%r3)
93	la      %r1,256(%r1)
94	la      %r3,256(%r3)
95	BRCTG   %r5,.L_Z900_G5_12
96	j       .L_Z900_G5_3
97.L_Z900_G5_15:
98	mvc     0(1,%r1),0(%r3)
99END(MEMCPY_Z900_G5)
100#endif /* HAVE_MEMCPY_Z900_G5  */
101
102ENTRY(__memcpy_mvcle)
103	# Using as standalone function will result in unexpected
104	# results since the length field is incremented by 1 in order to
105	# compensate the changes already done in the functions above.
106	LGR     %r0,%r2             # backup return dest [ + n ]
107	AGHI    %r4,1               # length + 1
108	LGR     %r5,%r4             # source length
109	LGR     %r4,%r3             # source address
110	LGR     %r2,%r1             # destination address
111	LGR     %r3,%r5             # destination length = source length
112.L_MVCLE_1:
113	mvcle   %r2,%r4,0           # thats it, MVCLE is your friend
114	jo      .L_MVCLE_1
115	LGR     %r2,%r0             # return destination address
116	br      %r14
117END(__memcpy_mvcle)
118
119#undef LTGR
120#undef CGHI
121#undef LGR
122#undef AGHI
123#undef BRCTG
124
125#if HAVE_MEMCPY_Z10
126ENTRY(MEMPCPY_Z10)
127	.machine "z10"
128	.machinemode "zarch_nohighgprs"
129	lgr     %r1,%r2         # Use as dest
130	la      %r2,0(%r4,%r2)  # Return dest + n
131	j	.L_Z10_start
132END(MEMPCPY_Z10)
133
134ENTRY(MEMCPY_Z10)
135	.machine "z10"
136	.machinemode "zarch_nohighgprs"
137	lgr     %r1,%r2         # r1: Use as dest ; r2: Return dest
138.L_Z10_start:
139# if !defined __s390x__
140	llgfr	%r4,%r4
141# endif /* !defined __s390x__  */
142	cgije   %r4,0,.L_Z10_4
143	aghi    %r4,-1
144	srlg    %r5,%r4,8
145	cgijlh  %r5,0,.L_Z10_13
146.L_Z10_3:
147	exrl    %r4,.L_Z10_15
148.L_Z10_4:
149	br      %r14
150.L_Z10_13:
151	cgfi    %r5,65535	# Switch to mvcle for copies >16MB
152	jh      __memcpy_mvcle
153.L_Z10_12:
154	pfd     1,768(%r3)
155	pfd     2,768(%r1)
156	mvc     0(256,%r1),0(%r3)
157	la      %r1,256(%r1)
158	la      %r3,256(%r3)
159	brctg   %r5,.L_Z10_12
160	j       .L_Z10_3
161.L_Z10_15:
162	mvc     0(1,%r1),0(%r3)
163END(MEMCPY_Z10)
164#endif /* HAVE_MEMCPY_Z10  */
165
166#if HAVE_MEMCPY_Z196
167ENTRY(MEMPCPY_Z196)
168	.machine "z196"
169	.machinemode "zarch_nohighgprs"
170	lgr     %r1,%r2         # Use as dest
171	la      %r2,0(%r4,%r2)  # Return dest + n
172	j	.L_Z196_start
173END(MEMPCPY_Z196)
174
175ENTRY(MEMCPY_Z196)
176	.machine "z196"
177	.machinemode "zarch_nohighgprs"
178	lgr     %r1,%r2         # r1: Use as dest ; r2: Return dest
179.L_Z196_start:
180# if !defined __s390x__
181	llgfr	%r4,%r4
182# endif /* !defined __s390x__  */
183	ltgr    %r4,%r4
184	je      .L_Z196_4
185.L_Z196_start2:
186	aghi    %r4,-1
187	risbg	%r5,%r4,8,128+63,56 # r0 = r5 / 256
188	jne     .L_Z196_5
189.L_Z196_3:
190	exrl    %r4,.L_Z196_14
191.L_Z196_4:
192	br      %r14
193.L_Z196_5:
194	cgfi	%r5,255		# Switch to loop with pfd for copies >=64kB
195	jh	.L_Z196_6
196.L_Z196_2:
197	mvc     0(256,%r1),0(%r3)
198	aghi    %r5,-1
199	la      %r1,256(%r1)
200	la      %r3,256(%r3)
201	jne     .L_Z196_2
202	j       .L_Z196_3
203.L_Z196_6:
204	cgfi    %r5,262144      # Switch to mvcle for copies >64MB
205	jh      __memcpy_mvcle
206.L_Z196_7:
207	pfd     1,1024(%r3)
208	pfd     2,1024(%r1)
209	mvc     0(256,%r1),0(%r3)
210	aghi    %r5,-1
211	la      %r1,256(%r1)
212	la      %r3,256(%r3)
213	jne     .L_Z196_7
214	j       .L_Z196_3
215.L_Z196_14:
216	mvc     0(1,%r1),0(%r3)
217END(MEMCPY_Z196)
218#endif /* HAVE_MEMCPY_Z196  */
219
220#if HAVE_MEMMOVE_Z13
221ENTRY(MEMMOVE_Z13)
222	.machine "z13"
223	.machinemode "zarch_nohighgprs"
224# if !defined __s390x__
225	/* Note: The 31bit dst and src pointers are prefixed with zeroes.  */
226	llgfr	%r4,%r4
227	llgfr	%r3,%r3
228	llgfr	%r2,%r2
229# endif /* !defined __s390x__ */
230	sgrk	%r0,%r2,%r3
231	clgijh	%r4,16,.L_MEMMOVE_Z13_LARGE
232	aghik	%r5,%r4,-1
233.L_MEMMOVE_Z13_SMALL:
234	jl .L_MEMMOVE_Z13_END		/* Jump away if len was zero.  */
235	/* Store up to 16 bytes with vll/vstl which needs the index
236	   instead of lengths.  */
237	vll	%v16,%r5,0(%r3)
238	vstl	%v16,%r5,0(%r2)
239.L_MEMMOVE_Z13_END:
240	br      %r14
241.L_MEMMOVE_Z13_LARGE:
242	lgr     %r1,%r2			/* For memcpy: r1: Use as dest ;
243					   r2: Return dest  */
244	/* The unsigned comparison (dst - src >= len) determines if we can
245	   execute the forward case with memcpy.  */
246#if ! HAVE_MEMCPY_Z196
247# error The z13 variant of memmove needs the z196 variant of memcpy!
248#endif
249	clgrjhe %r0,%r4,.L_Z196_start2
250	risbgn	%r5,%r4,4,128+63,60	/* r5 = r4 / 16  */
251	aghi	%r4,-16
252	clgijhe	%r5,8,.L_MEMMOVE_Z13_LARGE_64B
253.L_MEMMOVE_Z13_LARGE_16B_LOOP:
254	/* Store at least 16 bytes with vl/vst. The number of 16byte blocks
255	   is stored in r5.  */
256	vl	%v16,0(%r4,%r3)
257	vst	%v16,0(%r4,%r2)
258	aghi	%r4,-16
259	brctg	%r5,.L_MEMMOVE_Z13_LARGE_16B_LOOP
260	aghik	%r5,%r4,15
261	j	.L_MEMMOVE_Z13_SMALL
262.L_MEMMOVE_Z13_LARGE_64B:
263	/* Store at least 128 bytes with 4x vl/vst. The number of 64byte blocks
264	   will be stored in r0.  */
265	aghi	%r4,-48
266	srlg	%r0,%r5,2		/* r5 = %r0 / 4
267					   => Number of 64byte blocks.  */
268.L_MEMMOVE_Z13_LARGE_64B_LOOP:
269	vl	%v20,48(%r4,%r3)
270	vl	%v19,32(%r4,%r3)
271	vl	%v18,16(%r4,%r3)
272	vl	%v17,0(%r4,%r3)
273	vst	%v20,48(%r4,%r2)
274	vst	%v19,32(%r4,%r2)
275	vst	%v18,16(%r4,%r2)
276	vst	%v17,0(%r4,%r2)
277	aghi	%r4,-64
278	brctg	%r0,.L_MEMMOVE_Z13_LARGE_64B_LOOP
279	aghi	%r4,48
280	/* Recalculate the number of 16byte blocks.  */
281	risbg	%r5,%r5,62,128+63,0	/* r5 = r5 & 3
282					   => Remaining 16byte blocks.  */
283	jne	.L_MEMMOVE_Z13_LARGE_16B_LOOP
284	aghik	%r5,%r4,15
285	j	.L_MEMMOVE_Z13_SMALL
286END(MEMMOVE_Z13)
287#endif /* HAVE_MEMMOVE_Z13  */
288
289#if HAVE_MEMMOVE_ARCH13
290ENTRY(MEMMOVE_ARCH13)
291	.machine "arch13"
292	.machinemode "zarch_nohighgprs"
293# if ! defined __s390x__
294	/* Note: The 31bit dst and src pointers are prefixed with zeroes.  */
295	llgfr	%r4,%r4
296	llgfr	%r3,%r3
297	llgfr	%r2,%r2
298# endif /* ! defined __s390x__ */
299	sgrk	%r5,%r2,%r3
300	aghik	%r0,%r4,-1	/* Both vstl and mvcrl needs highest index.  */
301	clgijh	%r4,16,.L_MEMMOVE_ARCH13_LARGE
302.L_MEMMOVE_ARCH13_SMALL:
303	jl .L_MEMMOVE_ARCH13_END /* Return if len was zero (cc of aghik).  */
304	/* Store up to 16 bytes with vll/vstl (needs highest index).  */
305	vll	%v16,%r0,0(%r3)
306	vstl	%v16,%r0,0(%r2)
307.L_MEMMOVE_ARCH13_END:
308	br      %r14
309.L_MEMMOVE_ARCH13_LARGE:
310	lgr     %r1,%r2	/* For memcpy: r1: Use as dest ; r2: Return dest  */
311	/* The unsigned comparison (dst - src >= len) determines if we can
312	   execute the forward case with memcpy.  */
313#if ! HAVE_MEMCPY_Z196
314# error The arch13 variant of memmove needs the z196 variant of memcpy!
315#endif
316	/* Backward case.  */
317	clgrjhe %r5,%r4,.L_Z196_start2
318	clgijh	%r0,255,.L_MEMMOVE_ARCH13_LARGER_256B
319	/* Move up to 256bytes with mvcrl (move right to left).  */
320	mvcrl	0(%r1),0(%r3)	/* Move (r0 + 1) bytes from r3 to r1.  */
321	br      %r14
322.L_MEMMOVE_ARCH13_LARGER_256B:
323	/* First move the "remaining" block of up to 256 bytes at the end of
324	   src/dst buffers.  Then move blocks of 256bytes in a loop starting
325	   with the block at the end.
326	   (If src/dst pointers are aligned e.g. to 256 bytes, then the pointers
327	   passed to mvcrl instructions are aligned, too)  */
328	risbgn	%r5,%r0,8,128+63,56	/* r5 = r0 / 256  */
329	risbgn	%r0,%r0,56,128+63,0	/* r0 = r0 & 0xFF  */
330	slgr	%r4,%r0
331	lay	%r1,-1(%r4,%r1)
332	lay	%r3,-1(%r4,%r3)
333	mvcrl	0(%r1),0(%r3)	/* Move (r0 + 1) bytes from r3 to r1.  */
334	lghi	%r0,255		/* Always copy 256 bytes in the loop below!  */
335.L_MEMMOVE_ARCH13_LARGE_256B_LOOP:
336	aghi	%r1,-256
337	aghi	%r3,-256
338	mvcrl	0(%r1),0(%r3)	/* Move (r0 + 1) bytes from r3 to r1.  */
339	brctg	%r5,.L_MEMMOVE_ARCH13_LARGE_256B_LOOP
340	br      %r14
341END(MEMMOVE_ARCH13)
342#endif /* HAVE_MEMMOVE_ARCH13  */
343
344#if ! HAVE_MEMCPY_IFUNC
345/* If we don't use ifunc, define an alias for mem[p]cpy here.
346   Otherwise see sysdeps/s390/mem[p]cpy.c.  */
347strong_alias (MEMCPY_DEFAULT, memcpy)
348strong_alias (MEMPCPY_DEFAULT, __mempcpy)
349weak_alias (__mempcpy, mempcpy)
350#endif
351
352#if ! HAVE_MEMMOVE_IFUNC
353/* If we don't use ifunc, define an alias for memmove here.
354   Otherwise see sysdeps/s390/memmove.c.  */
355# if ! HAVE_MEMMOVE_C
356/* If the c variant is needed, then sysdeps/s390/memmove-c.c
357   defines memmove.
358   Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it.  */
359strong_alias (MEMMOVE_DEFAULT, memmove)
360# endif
361#endif
362
363#if defined SHARED && IS_IN (libc)
364/* Defines the internal symbols.
365   Compare to libc_hidden_[builtin_]def (mem[p]cpy) in string/mem[p]cpy.c.  */
366strong_alias (MEMCPY_DEFAULT, __GI_memcpy)
367strong_alias (MEMPCPY_DEFAULT, __GI_mempcpy)
368strong_alias (MEMPCPY_DEFAULT, __GI___mempcpy)
369# if ! HAVE_MEMMOVE_C
370/* If the c variant is needed, then sysdeps/s390/memmove-c.c
371   defines the internal symbol.
372   Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it.  */
373strong_alias (MEMMOVE_DEFAULT, __GI_memmove)
374# endif
375#endif
376