1/* memchr - find a character in a memory zone 2 3 Copyright (C) 2015-2021 Free Software Foundation, Inc. 4 5 This file is part of the GNU C Library. 6 7 The GNU C Library is free software; you can redistribute it and/or 8 modify it under the terms of the GNU Lesser General Public 9 License as published by the Free Software Foundation; either 10 version 2.1 of the License, or (at your option) any later version. 11 12 The GNU C Library is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Lesser General Public License for more details. 16 17 You should have received a copy of the GNU Lesser General Public 18 License along with the GNU C Library. If not, see 19 <https://www.gnu.org/licenses/>. */ 20 21#include <sysdep.h> 22 23/* Assumptions: 24 * 25 * ARMv8-a, AArch64, Advanced SIMD. 26 * MTE compatible. 27 */ 28 29#ifndef MEMCHR 30# define MEMCHR __memchr 31#endif 32 33/* Arguments and results. */ 34#define srcin x0 35#define chrin w1 36#define cntin x2 37#define result x0 38 39#define src x3 40#define cntrem x4 41#define synd x5 42#define shift x6 43#define tmp x7 44#define wtmp w7 45 46#define vrepchr v0 47#define qdata q1 48#define vdata v1 49#define vhas_chr v2 50#define vrepmask v3 51#define vend v4 52#define dend d4 53 54/* 55 Core algorithm: 56 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 57 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the 58 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are 59 set likewise for odd bytes so that adjacent bytes can be merged. Since the 60 bits in the syndrome reflect the order in which things occur in the original 61 string, counting trailing zeros identifies exactly which byte matched. */ 62 63ENTRY (MEMCHR) 64 PTR_ARG (0) 65 SIZE_ARG (2) 66 bic src, srcin, 15 67 cbz cntin, L(nomatch) 68 ld1 {vdata.16b}, [src] 69 dup vrepchr.16b, chrin 70 mov wtmp, 0xf00f 71 dup vrepmask.8h, wtmp 72 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 73 lsl shift, srcin, 2 74 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b 75 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 76 fmov synd, dend 77 lsr synd, synd, shift 78 cbz synd, L(start_loop) 79 80 rbit synd, synd 81 clz synd, synd 82 add result, srcin, synd, lsr 2 83 cmp cntin, synd, lsr 2 84 csel result, result, xzr, hi 85 ret 86 87L(start_loop): 88 sub tmp, src, srcin 89 add tmp, tmp, 16 90 subs cntrem, cntin, tmp 91 b.ls L(nomatch) 92 93 /* Make sure that it won't overread by a 16-byte chunk */ 94 add tmp, cntrem, 15 95 tbnz tmp, 4, L(loop32_2) 96 97 .p2align 4 98L(loop32): 99 ldr qdata, [src, 16]! 100 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 101 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 102 fmov synd, dend 103 cbnz synd, L(end) 104 105L(loop32_2): 106 ldr qdata, [src, 16]! 107 subs cntrem, cntrem, 32 108 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 109 b.ls L(end) 110 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 111 fmov synd, dend 112 cbz synd, L(loop32) 113L(end): 114 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b 115 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 116 fmov synd, dend 117 add tmp, srcin, cntin 118 sub cntrem, tmp, src 119#ifndef __AARCH64EB__ 120 rbit synd, synd 121#endif 122 clz synd, synd 123 cmp cntrem, synd, lsr 2 124 add result, src, synd, lsr 2 125 csel result, result, xzr, hi 126 ret 127 128L(nomatch): 129 mov result, 0 130 ret 131 132END (MEMCHR) 133weak_alias (MEMCHR, memchr) 134libc_hidden_builtin_def (memchr) 135