1/* strnlen - calculate the length of a string with limit.
2
3   Copyright (C) 2013-2021 Free Software Foundation, Inc.
4
5   This file is part of the GNU C Library.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public
18   License along with the GNU C Library.  If not, see
19   <https://www.gnu.org/licenses/>.  */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#define srcin		x0
30#define cntin		x1
31#define result		x0
32
33#define src		x2
34#define synd		x3
35#define	shift		x4
36#define wtmp		w4
37#define tmp		x4
38#define cntrem		x5
39
40#define qdata		q0
41#define vdata		v0
42#define vhas_chr	v1
43#define vrepmask	v2
44#define vend		v3
45#define dend		d3
46
47/*
48   Core algorithm:
49
50   For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
51   per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
52   requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
53   set likewise for odd bytes so that adjacent bytes can be merged. Since the
54   bits in the syndrome reflect the order in which things occur in the original
55   string, counting trailing zeros identifies exactly which byte matched.  */
56
57ENTRY (__strnlen)
58	PTR_ARG (0)
59	SIZE_ARG (1)
60	bic	src, srcin, 15
61	mov	wtmp, 0xf00f
62	cbz	cntin, L(nomatch)
63	ld1	{vdata.16b}, [src], 16
64	dup	vrepmask.8h, wtmp
65	cmeq	vhas_chr.16b, vdata.16b, 0
66	lsl	shift, srcin, 2
67	and	vhas_chr.16b, vhas_chr.16b, vrepmask.16b
68	addp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
69	fmov	synd, dend
70	lsr	synd, synd, shift
71	cbz	synd, L(start_loop)
72L(finish):
73	rbit	synd, synd
74	clz	synd, synd
75	lsr	result, synd, 2
76	cmp	cntin, result
77	csel	result, cntin, result, ls
78	ret
79
80L(start_loop):
81	sub	tmp, src, srcin
82	subs	cntrem, cntin, tmp
83	b.ls	L(nomatch)
84
85	/* Make sure that it won't overread by a 16-byte chunk */
86	add	tmp, cntrem, 15
87	tbnz	tmp, 4, L(loop32_2)
88
89	.p2align 5
90L(loop32):
91	ldr	qdata, [src], 16
92	cmeq	vhas_chr.16b, vdata.16b, 0
93	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
94	fmov	synd, dend
95	cbnz	synd, L(end)
96L(loop32_2):
97	ldr	qdata, [src], 16
98	subs	cntrem, cntrem, 32
99	cmeq	vhas_chr.16b, vdata.16b, 0
100	b.ls	L(end)
101	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
102	fmov	synd, dend
103	cbz	synd, L(loop32)
104
105L(end):
106	and	vhas_chr.16b, vhas_chr.16b, vrepmask.16b
107	addp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
108	sub	src, src, 16
109	mov	synd, vend.d[0]
110	sub	result, src, srcin
111#ifndef __AARCH64EB__
112	rbit	synd, synd
113#endif
114	clz	synd, synd
115	add	result, result, synd, lsr 2
116	cmp	cntin, result
117	csel	result, cntin, result, ls
118	ret
119
120L(nomatch):
121	mov	result, cntin
122	ret
123
124END (__strnlen)
125libc_hidden_def (__strnlen)
126weak_alias (__strnlen, strnlen)
127libc_hidden_def (strnlen)
128