1/* strrchr: find the last instance of a character in a string.
2
3   Copyright (C) 2014-2021 Free Software Foundation, Inc.
4
5   This file is part of the GNU C Library.
6
7   The GNU C Library is free software; you can redistribute it and/or
8   modify it under the terms of the GNU Lesser General Public
9   License as published by the Free Software Foundation; either
10   version 2.1 of the License, or (at your option) any later version.
11
12   The GNU C Library is distributed in the hope that it will be useful,
13   but WITHOUT ANY WARRANTY; without even the implied warranty of
14   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15   Lesser General Public License for more details.
16
17   You should have received a copy of the GNU Lesser General Public
18   License along with the GNU C Library.  If not, see
19   <https://www.gnu.org/licenses/>.  */
20
21#include <sysdep.h>
22
23/* Assumptions:
24 *
25 * ARMv8-a, AArch64
26 * Neon Available.
27 * MTE compatible.
28 */
29
30/* Arguments and results.  */
31#define srcin		x0
32#define chrin		w1
33#define result		x0
34
35#define src		x2
36#define tmp		x3
37#define wtmp		w3
38#define synd		x3
39#define shift		x4
40#define src_match	x4
41#define nul_match	x5
42#define chr_match	x6
43
44#define vrepchr		v0
45#define vdata		v1
46#define vhas_nul	v2
47#define vhas_chr	v3
48#define vrepmask	v4
49#define vrepmask2	v5
50#define vend		v5
51#define dend		d5
52
53/* Core algorithm.
54
55   For each 16-byte chunk we calculate a 64-bit syndrome value, with
56   four bits per byte (LSB is always in bits 0 and 1, for both big
57   and little-endian systems).  For each tuple, bits 0-1 are set if
58   the relevant byte matched the requested character; bits 2-3 are set
59   if the relevant byte matched the NUL end of string.  */
60
61ENTRY(strrchr)
62	PTR_ARG (0)
63	bic	src, srcin, 15
64	dup	vrepchr.16b, chrin
65	mov	wtmp, 0x3003
66	dup	vrepmask.8h, wtmp
67	tst	srcin, 15
68	beq	L(loop1)
69
70	ld1	{vdata.16b}, [src], 16
71	cmeq	vhas_nul.16b, vdata.16b, 0
72	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
73	mov	wtmp, 0xf00f
74	dup	vrepmask2.8h, wtmp
75	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
76	and	vhas_nul.16b, vhas_nul.16b, vrepmask2.16b
77	addp	vend.16b, vhas_nul.16b, vhas_nul.16b
78	lsl	shift, srcin, 2
79	fmov	synd, dend
80	lsr	synd, synd, shift
81	lsl	synd, synd, shift
82	ands	nul_match, synd, 0xcccccccccccccccc
83	bne	L(tail)
84	cbnz	synd, L(loop2)
85
86	.p2align 5
87L(loop1):
88	ld1	{vdata.16b}, [src], 16
89	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
90	cmhs	vhas_nul.16b, vhas_chr.16b, vdata.16b
91	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
92	fmov	synd, dend
93	cbz	synd, L(loop1)
94
95	cmeq	vhas_nul.16b, vdata.16b, 0
96	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
97	bic	vhas_nul.8h, 0x0f, lsl 8
98	addp	vend.16b, vhas_nul.16b, vhas_nul.16b
99	fmov	synd, dend
100	ands	nul_match, synd, 0xcccccccccccccccc
101	beq	L(loop2)
102
103L(tail):
104	sub	nul_match, nul_match, 1
105	and	chr_match, synd, 0x3333333333333333
106	ands	chr_match, chr_match, nul_match
107	sub	result, src, 1
108	clz	tmp, chr_match
109	sub	result, result, tmp, lsr 2
110	csel	result, result, xzr, ne
111	ret
112
113	.p2align 4
114L(loop2):
115	cmp	synd, 0
116	csel	src_match, src, src_match, ne
117	csel	chr_match, synd, chr_match, ne
118	ld1	{vdata.16b}, [src], 16
119	cmeq	vhas_nul.16b, vdata.16b, 0
120	cmeq	vhas_chr.16b, vdata.16b, vrepchr.16b
121	bit	vhas_nul.16b, vhas_chr.16b, vrepmask.16b
122	umaxp	vend.16b, vhas_nul.16b, vhas_nul.16b
123	fmov	synd, dend
124	tst	synd, 0xcccccccccccccccc
125	beq	L(loop2)
126
127	bic	vhas_nul.8h, 0x0f, lsl 8
128	addp	vend.16b, vhas_nul.16b, vhas_nul.16b
129	fmov	synd, dend
130	and	nul_match, synd, 0xcccccccccccccccc
131	sub	nul_match, nul_match, 1
132	and	tmp, synd, 0x3333333333333333
133	ands	tmp, tmp, nul_match
134	csel	chr_match, tmp, chr_match, ne
135	csel	src_match, src, src_match, ne
136	sub	src_match, src_match, 1
137	clz	tmp, chr_match
138	sub	result, src_match, tmp, lsr 2
139	ret
140
141END(strrchr)
142weak_alias (strrchr, rindex)
143libc_hidden_builtin_def (strrchr)
144