1/* strchrnul - find a character or nul in a string 2 3 Copyright (C) 2014-2021 Free Software Foundation, Inc. 4 5 This file is part of the GNU C Library. 6 7 The GNU C Library is free software; you can redistribute it and/or 8 modify it under the terms of the GNU Lesser General Public 9 License as published by the Free Software Foundation; either 10 version 2.1 of the License, or (at your option) any later version. 11 12 The GNU C Library is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Lesser General Public License for more details. 16 17 You should have received a copy of the GNU Lesser General Public 18 License along with the GNU C Library. If not, see 19 <https://www.gnu.org/licenses/>. */ 20 21#include <sysdep.h> 22 23/* Assumptions: 24 * 25 * ARMv8-a, AArch64, Advanced SIMD. 26 * MTE compatible. 27 */ 28 29#define srcin x0 30#define chrin w1 31#define result x0 32 33#define src x2 34#define tmp1 x1 35#define tmp2 x3 36#define tmp2w w3 37 38#define vrepchr v0 39#define vdata v1 40#define qdata q1 41#define vhas_nul v2 42#define vhas_chr v3 43#define vrepmask v4 44#define vend v5 45#define dend d5 46 47/* Core algorithm: 48 49 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 50 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the 51 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are 52 set likewise for odd bytes so that adjacent bytes can be merged. Since the 53 bits in the syndrome reflect the order in which things occur in the original 54 string, counting trailing zeros identifies exactly which byte matched. */ 55 56ENTRY (__strchrnul) 57 PTR_ARG (0) 58 bic src, srcin, 15 59 dup vrepchr.16b, chrin 60 ld1 {vdata.16b}, [src] 61 mov tmp2w, 0xf00f 62 dup vrepmask.8h, tmp2w 63 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 64 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b 65 lsl tmp2, srcin, 2 66 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b 67 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 68 fmov tmp1, dend 69 lsr tmp1, tmp1, tmp2 /* Mask padding bits. */ 70 cbz tmp1, L(loop) 71 72 rbit tmp1, tmp1 73 clz tmp1, tmp1 74 add result, srcin, tmp1, lsr 2 75 ret 76 77 .p2align 4 78L(loop): 79 ldr qdata, [src, 16]! 80 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 81 cmhs vhas_chr.16b, vhas_chr.16b, vdata.16b 82 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b 83 fmov tmp1, dend 84 cbz tmp1, L(loop) 85 86 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b 87 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 88 fmov tmp1, dend 89#ifndef __AARCH64EB__ 90 rbit tmp1, tmp1 91#endif 92 clz tmp1, tmp1 93 add result, src, tmp1, lsr 2 94 ret 95 96END(__strchrnul) 97weak_alias (__strchrnul, strchrnul) 98