1/* Copyright (C) 2012-2021 Free Software Foundation, Inc. 2 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library. If not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20 21/* Assumptions: 22 * 23 * ARMv8-a, AArch64, Advanced SIMD. 24 * MTE compatible. 25 */ 26 27#ifndef STRLEN 28# define STRLEN __strlen 29#endif 30 31#define srcin x0 32#define result x0 33 34#define src x1 35#define synd x2 36#define tmp x3 37#define wtmp w3 38#define shift x4 39 40#define data q0 41#define vdata v0 42#define vhas_nul v1 43#define vrepmask v2 44#define vend v3 45#define dend d3 46 47/* Core algorithm: 48 49 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 50 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the 51 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are 52 set likewise for odd bytes so that adjacent bytes can be merged. Since the 53 bits in the syndrome reflect the order in which things occur in the original 54 string, counting trailing zeros identifies exactly which byte matched. */ 55 56ENTRY (STRLEN) 57 PTR_ARG (0) 58 bic src, srcin, 15 59 mov wtmp, 0xf00f 60 ld1 {vdata.16b}, [src] 61 dup vrepmask.8h, wtmp 62 cmeq vhas_nul.16b, vdata.16b, 0 63 lsl shift, srcin, 2 64 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b 65 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ 66 fmov synd, dend 67 lsr synd, synd, shift 68 cbz synd, L(loop) 69 70 rbit synd, synd 71 clz result, synd 72 lsr result, result, 2 73 ret 74 75 .p2align 5 76L(loop): 77 ldr data, [src, 16]! 78 cmeq vhas_nul.16b, vdata.16b, 0 79 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 80 fmov synd, dend 81 cbz synd, L(loop) 82 83 and vhas_nul.16b, vhas_nul.16b, vrepmask.16b 84 addp vend.16b, vhas_nul.16b, vhas_nul.16b /* 128->64 */ 85 sub result, src, srcin 86 fmov synd, dend 87#ifndef __AARCH64EB__ 88 rbit synd, synd 89#endif 90 clz tmp, synd 91 add result, result, tmp, lsr 2 92 ret 93 94END (STRLEN) 95weak_alias (STRLEN, strlen) 96libc_hidden_builtin_def (strlen) 97