1/* strcpy/stpcpy - copy a string returning pointer to start/end. 2 Copyright (C) 2013-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19/* To build as stpcpy, define BUILD_STPCPY before compiling this file. 20 21 To test the page crossing code path more thoroughly, compile with 22 -DSTRCPY_TEST_PAGE_CROSS - this will force all unaligned copies through 23 the slower entry path. This option is not intended for production use. */ 24 25#include <sysdep.h> 26 27/* Assumptions: 28 * 29 * ARMv8-a, AArch64, Advanced SIMD. 30 * MTE compatible. 31 */ 32 33/* Arguments and results. */ 34#define dstin x0 35#define srcin x1 36#define result x0 37 38#define src x2 39#define dst x3 40#define len x4 41#define synd x4 42#define tmp x5 43#define wtmp w5 44#define shift x5 45#define data1 x6 46#define dataw1 w6 47#define data2 x7 48#define dataw2 w7 49 50#define dataq q0 51#define vdata v0 52#define vhas_nul v1 53#define vrepmask v2 54#define vend v3 55#define dend d3 56#define dataq2 q1 57 58#ifdef BUILD_STPCPY 59# define STRCPY __stpcpy 60# define IFSTPCPY(X,...) X,__VA_ARGS__ 61#else 62# define STRCPY strcpy 63# define IFSTPCPY(X,...) 64#endif 65 66/* Core algorithm: 67 68 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 69 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the 70 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are 71 set likewise for odd bytes so that adjacent bytes can be merged. Since the 72 bits in the syndrome reflect the order in which things occur in the original 73 string, counting trailing zeros identifies exactly which byte matched. */ 74 75ENTRY (STRCPY) 76 PTR_ARG (0) 77 PTR_ARG (1) 78 bic src, srcin, 15 79 mov wtmp, 0xf00f 80 ld1 {vdata.16b}, [src] 81 dup vrepmask.8h, wtmp 82 cmeq vhas_nul.16b, vdata.16b, 0 83 lsl shift, srcin, 2 84 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b 85 addp vend.16b, vhas_nul.16b, vhas_nul.16b 86 fmov synd, dend 87 lsr synd, synd, shift 88 cbnz synd, L(tail) 89 90 ldr dataq, [src, 16]! 91 cmeq vhas_nul.16b, vdata.16b, 0 92 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b 93 addp vend.16b, vhas_nul.16b, vhas_nul.16b 94 fmov synd, dend 95 cbz synd, L(start_loop) 96 97#ifndef __AARCH64EB__ 98 rbit synd, synd 99#endif 100 sub tmp, src, srcin 101 clz len, synd 102 add len, tmp, len, lsr 2 103 tbz len, 4, L(less16) 104 sub tmp, len, 15 105 ldr dataq, [srcin] 106 ldr dataq2, [srcin, tmp] 107 str dataq, [dstin] 108 str dataq2, [dstin, tmp] 109 IFSTPCPY (add result, dstin, len) 110 ret 111 112 .p2align 4,,8 113L(tail): 114 rbit synd, synd 115 clz len, synd 116 lsr len, len, 2 117 118 .p2align 4 119L(less16): 120 tbz len, 3, L(less8) 121 sub tmp, len, 7 122 ldr data1, [srcin] 123 ldr data2, [srcin, tmp] 124 str data1, [dstin] 125 str data2, [dstin, tmp] 126 IFSTPCPY (add result, dstin, len) 127 ret 128 129 .p2align 4 130L(less8): 131 subs tmp, len, 3 132 b.lo L(less4) 133 ldr dataw1, [srcin] 134 ldr dataw2, [srcin, tmp] 135 str dataw1, [dstin] 136 str dataw2, [dstin, tmp] 137 IFSTPCPY (add result, dstin, len) 138 ret 139 140L(less4): 141 cbz len, L(zerobyte) 142 ldrh dataw1, [srcin] 143 strh dataw1, [dstin] 144L(zerobyte): 145 strb wzr, [dstin, len] 146 IFSTPCPY (add result, dstin, len) 147 ret 148 149 .p2align 4 150L(start_loop): 151 sub len, src, srcin 152 ldr dataq2, [srcin] 153 add dst, dstin, len 154 str dataq2, [dstin] 155 156 .p2align 5 157L(loop): 158 str dataq, [dst], 16 159 ldr dataq, [src, 16]! 160 cmeq vhas_nul.16b, vdata.16b, 0 161 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 162 fmov synd, dend 163 cbz synd, L(loop) 164 165 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b 166 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ 167 fmov synd, dend 168#ifndef __AARCH64EB__ 169 rbit synd, synd 170#endif 171 clz len, synd 172 lsr len, len, 2 173 sub tmp, len, 15 174 ldr dataq, [src, tmp] 175 str dataq, [dst, tmp] 176 IFSTPCPY (add result, dst, len) 177 ret 178 179END (STRCPY) 180 181#ifdef BUILD_STPCPY 182weak_alias (__stpcpy, stpcpy) 183libc_hidden_def (__stpcpy) 184libc_hidden_builtin_def (stpcpy) 185#else 186libc_hidden_builtin_def (strcpy) 187#endif 188