1/* memcpy - copy a block from source to destination. 31/64 bit S/390 version. 2 Copyright (C) 2012-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19 20#include <sysdep.h> 21#include "asm-syntax.h" 22#include <ifunc-memcpy.h> 23 24/* INPUT PARAMETERS 25 %r2 = address of destination memory area 26 %r3 = address of source memory area 27 %r4 = number of bytes to copy. */ 28 29 .text 30 31#if defined __s390x__ 32# define LTGR ltgr 33# define CGHI cghi 34# define LGR lgr 35# define AGHI aghi 36# define BRCTG brctg 37#else 38# define LTGR ltr 39# define CGHI chi 40# define LGR lr 41# define AGHI ahi 42# define BRCTG brct 43#endif /* ! defined __s390x__ */ 44 45#if HAVE_MEMCPY_Z900_G5 46ENTRY(MEMPCPY_Z900_G5) 47# if defined __s390x__ 48 .machine "z900" 49# else 50 .machine "g5" 51# endif /* ! defined __s390x__ */ 52 LGR %r1,%r2 # Use as dest 53 la %r2,0(%r4,%r2) # Return dest + n 54 j .L_Z900_G5_start 55END(MEMPCPY_Z900_G5) 56 57ENTRY(MEMCPY_Z900_G5) 58# if defined __s390x__ 59 .machine "z900" 60# else 61 .machine "g5" 62# endif /* ! defined __s390x__ */ 63 LGR %r1,%r2 # r1: Use as dest ; r2: Return dest 64.L_Z900_G5_start: 65 LTGR %r4,%r4 66 je .L_Z900_G5_4 67 AGHI %r4,-1 68# if defined __s390x__ 69 srlg %r5,%r4,8 70# else 71 lr %r5,%r4 72 srl %r5,8 73# endif /* ! defined __s390x__ */ 74 LTGR %r5,%r5 75 jne .L_Z900_G5_13 76.L_Z900_G5_3: 77# if defined __s390x__ 78 larl %r5,.L_Z900_G5_15 79# define Z900_G5_EX_D 0 80# else 81 basr %r5,0 82.L_Z900_G5_14: 83# define Z900_G5_EX_D .L_Z900_G5_15-.L_Z900_G5_14 84# endif /* ! defined __s390x__ */ 85 ex %r4,Z900_G5_EX_D(%r5) 86.L_Z900_G5_4: 87 br %r14 88.L_Z900_G5_13: 89 CGHI %r5,4096 # Switch to mvcle for copies >1MB 90 jh __memcpy_mvcle 91.L_Z900_G5_12: 92 mvc 0(256,%r1),0(%r3) 93 la %r1,256(%r1) 94 la %r3,256(%r3) 95 BRCTG %r5,.L_Z900_G5_12 96 j .L_Z900_G5_3 97.L_Z900_G5_15: 98 mvc 0(1,%r1),0(%r3) 99END(MEMCPY_Z900_G5) 100#endif /* HAVE_MEMCPY_Z900_G5 */ 101 102ENTRY(__memcpy_mvcle) 103 # Using as standalone function will result in unexpected 104 # results since the length field is incremented by 1 in order to 105 # compensate the changes already done in the functions above. 106 LGR %r0,%r2 # backup return dest [ + n ] 107 AGHI %r4,1 # length + 1 108 LGR %r5,%r4 # source length 109 LGR %r4,%r3 # source address 110 LGR %r2,%r1 # destination address 111 LGR %r3,%r5 # destination length = source length 112.L_MVCLE_1: 113 mvcle %r2,%r4,0 # thats it, MVCLE is your friend 114 jo .L_MVCLE_1 115 LGR %r2,%r0 # return destination address 116 br %r14 117END(__memcpy_mvcle) 118 119#undef LTGR 120#undef CGHI 121#undef LGR 122#undef AGHI 123#undef BRCTG 124 125#if HAVE_MEMCPY_Z10 126ENTRY(MEMPCPY_Z10) 127 .machine "z10" 128 .machinemode "zarch_nohighgprs" 129 lgr %r1,%r2 # Use as dest 130 la %r2,0(%r4,%r2) # Return dest + n 131 j .L_Z10_start 132END(MEMPCPY_Z10) 133 134ENTRY(MEMCPY_Z10) 135 .machine "z10" 136 .machinemode "zarch_nohighgprs" 137 lgr %r1,%r2 # r1: Use as dest ; r2: Return dest 138.L_Z10_start: 139# if !defined __s390x__ 140 llgfr %r4,%r4 141# endif /* !defined __s390x__ */ 142 cgije %r4,0,.L_Z10_4 143 aghi %r4,-1 144 srlg %r5,%r4,8 145 cgijlh %r5,0,.L_Z10_13 146.L_Z10_3: 147 exrl %r4,.L_Z10_15 148.L_Z10_4: 149 br %r14 150.L_Z10_13: 151 cgfi %r5,65535 # Switch to mvcle for copies >16MB 152 jh __memcpy_mvcle 153.L_Z10_12: 154 pfd 1,768(%r3) 155 pfd 2,768(%r1) 156 mvc 0(256,%r1),0(%r3) 157 la %r1,256(%r1) 158 la %r3,256(%r3) 159 brctg %r5,.L_Z10_12 160 j .L_Z10_3 161.L_Z10_15: 162 mvc 0(1,%r1),0(%r3) 163END(MEMCPY_Z10) 164#endif /* HAVE_MEMCPY_Z10 */ 165 166#if HAVE_MEMCPY_Z196 167ENTRY(MEMPCPY_Z196) 168 .machine "z196" 169 .machinemode "zarch_nohighgprs" 170 lgr %r1,%r2 # Use as dest 171 la %r2,0(%r4,%r2) # Return dest + n 172 j .L_Z196_start 173END(MEMPCPY_Z196) 174 175ENTRY(MEMCPY_Z196) 176 .machine "z196" 177 .machinemode "zarch_nohighgprs" 178 lgr %r1,%r2 # r1: Use as dest ; r2: Return dest 179.L_Z196_start: 180# if !defined __s390x__ 181 llgfr %r4,%r4 182# endif /* !defined __s390x__ */ 183 ltgr %r4,%r4 184 je .L_Z196_4 185.L_Z196_start2: 186 aghi %r4,-1 187 risbg %r5,%r4,8,128+63,56 # r0 = r5 / 256 188 jne .L_Z196_5 189.L_Z196_3: 190 exrl %r4,.L_Z196_14 191.L_Z196_4: 192 br %r14 193.L_Z196_5: 194 cgfi %r5,255 # Switch to loop with pfd for copies >=64kB 195 jh .L_Z196_6 196.L_Z196_2: 197 mvc 0(256,%r1),0(%r3) 198 aghi %r5,-1 199 la %r1,256(%r1) 200 la %r3,256(%r3) 201 jne .L_Z196_2 202 j .L_Z196_3 203.L_Z196_6: 204 cgfi %r5,262144 # Switch to mvcle for copies >64MB 205 jh __memcpy_mvcle 206.L_Z196_7: 207 pfd 1,1024(%r3) 208 pfd 2,1024(%r1) 209 mvc 0(256,%r1),0(%r3) 210 aghi %r5,-1 211 la %r1,256(%r1) 212 la %r3,256(%r3) 213 jne .L_Z196_7 214 j .L_Z196_3 215.L_Z196_14: 216 mvc 0(1,%r1),0(%r3) 217END(MEMCPY_Z196) 218#endif /* HAVE_MEMCPY_Z196 */ 219 220#if HAVE_MEMMOVE_Z13 221ENTRY(MEMMOVE_Z13) 222 .machine "z13" 223 .machinemode "zarch_nohighgprs" 224# if !defined __s390x__ 225 /* Note: The 31bit dst and src pointers are prefixed with zeroes. */ 226 llgfr %r4,%r4 227 llgfr %r3,%r3 228 llgfr %r2,%r2 229# endif /* !defined __s390x__ */ 230 sgrk %r0,%r2,%r3 231 clgijh %r4,16,.L_MEMMOVE_Z13_LARGE 232 aghik %r5,%r4,-1 233.L_MEMMOVE_Z13_SMALL: 234 jl .L_MEMMOVE_Z13_END /* Jump away if len was zero. */ 235 /* Store up to 16 bytes with vll/vstl which needs the index 236 instead of lengths. */ 237 vll %v16,%r5,0(%r3) 238 vstl %v16,%r5,0(%r2) 239.L_MEMMOVE_Z13_END: 240 br %r14 241.L_MEMMOVE_Z13_LARGE: 242 lgr %r1,%r2 /* For memcpy: r1: Use as dest ; 243 r2: Return dest */ 244 /* The unsigned comparison (dst - src >= len) determines if we can 245 execute the forward case with memcpy. */ 246#if ! HAVE_MEMCPY_Z196 247# error The z13 variant of memmove needs the z196 variant of memcpy! 248#endif 249 clgrjhe %r0,%r4,.L_Z196_start2 250 risbgn %r5,%r4,4,128+63,60 /* r5 = r4 / 16 */ 251 aghi %r4,-16 252 clgijhe %r5,8,.L_MEMMOVE_Z13_LARGE_64B 253.L_MEMMOVE_Z13_LARGE_16B_LOOP: 254 /* Store at least 16 bytes with vl/vst. The number of 16byte blocks 255 is stored in r5. */ 256 vl %v16,0(%r4,%r3) 257 vst %v16,0(%r4,%r2) 258 aghi %r4,-16 259 brctg %r5,.L_MEMMOVE_Z13_LARGE_16B_LOOP 260 aghik %r5,%r4,15 261 j .L_MEMMOVE_Z13_SMALL 262.L_MEMMOVE_Z13_LARGE_64B: 263 /* Store at least 128 bytes with 4x vl/vst. The number of 64byte blocks 264 will be stored in r0. */ 265 aghi %r4,-48 266 srlg %r0,%r5,2 /* r5 = %r0 / 4 267 => Number of 64byte blocks. */ 268.L_MEMMOVE_Z13_LARGE_64B_LOOP: 269 vl %v20,48(%r4,%r3) 270 vl %v19,32(%r4,%r3) 271 vl %v18,16(%r4,%r3) 272 vl %v17,0(%r4,%r3) 273 vst %v20,48(%r4,%r2) 274 vst %v19,32(%r4,%r2) 275 vst %v18,16(%r4,%r2) 276 vst %v17,0(%r4,%r2) 277 aghi %r4,-64 278 brctg %r0,.L_MEMMOVE_Z13_LARGE_64B_LOOP 279 aghi %r4,48 280 /* Recalculate the number of 16byte blocks. */ 281 risbg %r5,%r5,62,128+63,0 /* r5 = r5 & 3 282 => Remaining 16byte blocks. */ 283 jne .L_MEMMOVE_Z13_LARGE_16B_LOOP 284 aghik %r5,%r4,15 285 j .L_MEMMOVE_Z13_SMALL 286END(MEMMOVE_Z13) 287#endif /* HAVE_MEMMOVE_Z13 */ 288 289#if HAVE_MEMMOVE_ARCH13 290ENTRY(MEMMOVE_ARCH13) 291 .machine "arch13" 292 .machinemode "zarch_nohighgprs" 293# if ! defined __s390x__ 294 /* Note: The 31bit dst and src pointers are prefixed with zeroes. */ 295 llgfr %r4,%r4 296 llgfr %r3,%r3 297 llgfr %r2,%r2 298# endif /* ! defined __s390x__ */ 299 sgrk %r5,%r2,%r3 300 aghik %r0,%r4,-1 /* Both vstl and mvcrl needs highest index. */ 301 clgijh %r4,16,.L_MEMMOVE_ARCH13_LARGE 302.L_MEMMOVE_ARCH13_SMALL: 303 jl .L_MEMMOVE_ARCH13_END /* Return if len was zero (cc of aghik). */ 304 /* Store up to 16 bytes with vll/vstl (needs highest index). */ 305 vll %v16,%r0,0(%r3) 306 vstl %v16,%r0,0(%r2) 307.L_MEMMOVE_ARCH13_END: 308 br %r14 309.L_MEMMOVE_ARCH13_LARGE: 310 lgr %r1,%r2 /* For memcpy: r1: Use as dest ; r2: Return dest */ 311 /* The unsigned comparison (dst - src >= len) determines if we can 312 execute the forward case with memcpy. */ 313#if ! HAVE_MEMCPY_Z196 314# error The arch13 variant of memmove needs the z196 variant of memcpy! 315#endif 316 /* Backward case. */ 317 clgrjhe %r5,%r4,.L_Z196_start2 318 clgijh %r0,255,.L_MEMMOVE_ARCH13_LARGER_256B 319 /* Move up to 256bytes with mvcrl (move right to left). */ 320 mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ 321 br %r14 322.L_MEMMOVE_ARCH13_LARGER_256B: 323 /* First move the "remaining" block of up to 256 bytes at the end of 324 src/dst buffers. Then move blocks of 256bytes in a loop starting 325 with the block at the end. 326 (If src/dst pointers are aligned e.g. to 256 bytes, then the pointers 327 passed to mvcrl instructions are aligned, too) */ 328 risbgn %r5,%r0,8,128+63,56 /* r5 = r0 / 256 */ 329 risbgn %r0,%r0,56,128+63,0 /* r0 = r0 & 0xFF */ 330 slgr %r4,%r0 331 lay %r1,-1(%r4,%r1) 332 lay %r3,-1(%r4,%r3) 333 mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ 334 lghi %r0,255 /* Always copy 256 bytes in the loop below! */ 335.L_MEMMOVE_ARCH13_LARGE_256B_LOOP: 336 aghi %r1,-256 337 aghi %r3,-256 338 mvcrl 0(%r1),0(%r3) /* Move (r0 + 1) bytes from r3 to r1. */ 339 brctg %r5,.L_MEMMOVE_ARCH13_LARGE_256B_LOOP 340 br %r14 341END(MEMMOVE_ARCH13) 342#endif /* HAVE_MEMMOVE_ARCH13 */ 343 344#if ! HAVE_MEMCPY_IFUNC 345/* If we don't use ifunc, define an alias for mem[p]cpy here. 346 Otherwise see sysdeps/s390/mem[p]cpy.c. */ 347strong_alias (MEMCPY_DEFAULT, memcpy) 348strong_alias (MEMPCPY_DEFAULT, __mempcpy) 349weak_alias (__mempcpy, mempcpy) 350#endif 351 352#if ! HAVE_MEMMOVE_IFUNC 353/* If we don't use ifunc, define an alias for memmove here. 354 Otherwise see sysdeps/s390/memmove.c. */ 355# if ! HAVE_MEMMOVE_C 356/* If the c variant is needed, then sysdeps/s390/memmove-c.c 357 defines memmove. 358 Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */ 359strong_alias (MEMMOVE_DEFAULT, memmove) 360# endif 361#endif 362 363#if defined SHARED && IS_IN (libc) 364/* Defines the internal symbols. 365 Compare to libc_hidden_[builtin_]def (mem[p]cpy) in string/mem[p]cpy.c. */ 366strong_alias (MEMCPY_DEFAULT, __GI_memcpy) 367strong_alias (MEMPCPY_DEFAULT, __GI_mempcpy) 368strong_alias (MEMPCPY_DEFAULT, __GI___mempcpy) 369# if ! HAVE_MEMMOVE_C 370/* If the c variant is needed, then sysdeps/s390/memmove-c.c 371 defines the internal symbol. 372 Otherwise MEMMOVE_DEFAULT is implemented here and we have to define it. */ 373strong_alias (MEMMOVE_DEFAULT, __GI_memmove) 374# endif 375#endif 376