1/* strchrnul - find a character or nul in a string
2
3   Copyright (C) 2014-2021 Free Software Foundation, Inc.
4
5   This file is part of the GNU C Library.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public
18   License along with the GNU C Library.  If not, see
19   <https://www.gnu.org/licenses/>.  */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64, Advanced SIMD.
26 * MTE compatible.
27 */
28
29#define srcin		x0
30#define chrin		w1
31#define result		x0
32
33#define src		x2
34#define tmp1		x1
35#define tmp2		x3
36#define tmp2w		w3
37
38#define vrepchr		v0
39#define vdata		v1
40#define qdata		q1
41#define vhas_nul	v2
42#define vhas_chr	v3
43#define vrepmask	v4
44#define vend		v5
45#define dend		d5
46
47/* Core algorithm:
48
49   For each 16-byte chunk we calculate a 64-bit syndrome value with four bits
50   per byte. For even bytes, bits 0-3 are set if the relevant byte matched the
51   requested character or the byte is NUL. Bits 4-7 must be zero. Bits 4-7 are
52   set likewise for odd bytes so that adjacent bytes can be merged. Since the
53   bits in the syndrome reflect the order in which things occur in the original
54   string, counting trailing zeros identifies exactly which byte matched.  */
55
56ENTRY (__strchrnul)
57	PTR_ARG (0)
58	bic	src, srcin, 15
59	dup	vrepchr.16b, chrin
60	ld1	{vdata.16b}, [src]
61	mov	tmp2w, 0xf00f
62	dup	vrepmask.8h, tmp2w
63	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
64	cmhs	vhas_chr.16b, vhas_chr.16b, vdata.16b
65	lsl	tmp2, srcin, 2
66	and	vhas_chr.16b, vhas_chr.16b, vrepmask.16b
67	addp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
68	fmov	tmp1, dend
69	lsr	tmp1, tmp1, tmp2	/* Mask padding bits.  */
70	cbz	tmp1, L(loop)
71
72	rbit	tmp1, tmp1
73	clz	tmp1, tmp1
74	add	result, srcin, tmp1, lsr 2
75	ret
76
77	.p2align 4
78L(loop):
79	ldr	qdata, [src, 16]!
80	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
81	cmhs	vhas_chr.16b, vhas_chr.16b, vdata.16b
82	umaxp	vend.16b, vhas_chr.16b, vhas_chr.16b
83	fmov	tmp1, dend
84	cbz	tmp1, L(loop)
85
86	and	vhas_chr.16b, vhas_chr.16b, vrepmask.16b
87	addp	vend.16b, vhas_chr.16b, vhas_chr.16b		/* 128->64 */
88	fmov	tmp1, dend
89#ifndef __AARCH64EB__
90	rbit	tmp1, tmp1
91#endif
92	clz	tmp1, tmp1
93	add	result, src, tmp1, lsr 2
94	ret
95
96END(__strchrnul)
97weak_alias (__strchrnul, strchrnul)
98