1/* strnlen - calculate the length of a string with limit. 2 3 Copyright (C) 2013-2021 Free Software Foundation, Inc. 4 5 This file is part of the GNU C Library. 6 7 The GNU C Library is free software; you can redistribute it and/or 8 modify it under the terms of the GNU Lesser General Public 9 License as published by the Free Software Foundation; either 10 version 2.1 of the License, or (at your option) any later version. 11 12 The GNU C Library is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Lesser General Public License for more details. 16 17 You should have received a copy of the GNU Lesser General Public 18 License along with the GNU C Library. If not, see 19 <https://www.gnu.org/licenses/>. */ 20 21#include <sysdep.h> 22 23/* Assumptions: 24 * 25 * ARMv8-a, AArch64, Advanced SIMD. 26 * MTE compatible. 27 */ 28 29#define srcin x0 30#define cntin x1 31#define result x0 32 33#define src x2 34#define synd x3 35#define shift x4 36#define wtmp w4 37#define tmp x4 38#define cntrem x5 39 40#define qdata q0 41#define vdata v0 42#define vhas_chr v1 43#define vrepmask v2 44#define vend v3 45#define dend d3 46 47/* 48 Core algorithm: 49 50 For each 16-byte chunk we calculate a 64-bit syndrome value with four bits 51 per byte. For even bytes, bits 0-3 are set if the relevant byte matched the 52 requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are 53 set likewise for odd bytes so that adjacent bytes can be merged. Since the 54 bits in the syndrome reflect the order in which things occur in the original 55 string, counting trailing zeros identifies exactly which byte matched. */ 56 57ENTRY (__strnlen) 58 PTR_ARG (0) 59 SIZE_ARG (1) 60 bic src, srcin, 15 61 mov wtmp, 0xf00f 62 cbz cntin, L(nomatch) 63 ld1 {vdata.16b}, [src], 16 64 dup vrepmask.8h, wtmp 65 cmeq vhas_chr.16b, vdata.16b, 0 66 lsl shift, srcin, 2 67 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b 68 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 69 fmov synd, dend 70 lsr synd, synd, shift 71 cbz synd, L(start_loop) 72L(finish): 73 rbit synd, synd 74 clz synd, synd 75 lsr result, synd, 2 76 cmp cntin, result 77 csel result, cntin, result, ls 78 ret 79 80L(start_loop): 81 sub tmp, src, srcin 82 subs cntrem, cntin, tmp 83 b.ls L(nomatch) 84 85 /* Make sure that it won't overread by a 16-byte chunk */ 86 add tmp, cntrem, 15 87 tbnz tmp, 4, L(loop32_2) 88 89 .p2align 5 90L(loop32): 91 ldr qdata, [src], 16 92 cmeq vhas_chr.16b, vdata.16b, 0 93 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 94 fmov synd, dend 95 cbnz synd, L(end) 96L(loop32_2): 97 ldr qdata, [src], 16 98 subs cntrem, cntrem, 32 99 cmeq vhas_chr.16b, vdata.16b, 0 100 b.ls L(end) 101 umaxp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 102 fmov synd, dend 103 cbz synd, L(loop32) 104 105L(end): 106 and vhas_chr.16b, vhas_chr.16b, vrepmask.16b 107 addp vend.16b, vhas_chr.16b, vhas_chr.16b /* 128->64 */ 108 sub src, src, 16 109 mov synd, vend.d[0] 110 sub result, src, srcin 111#ifndef __AARCH64EB__ 112 rbit synd, synd 113#endif 114 clz synd, synd 115 add result, result, synd, lsr 2 116 cmp cntin, result 117 csel result, cntin, result, ls 118 ret 119 120L(nomatch): 121 mov result, cntin 122 ret 123 124END (__strnlen) 125libc_hidden_def (__strnlen) 126weak_alias (__strnlen, strnlen) 127libc_hidden_def (strnlen) 128