1/* Optimized strchr implementation for PowerPC64.
2   Copyright (C) 1997-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21/* See strlen.s for comments on how this works.  */
22
23/* char * [r3] strchr (const char *s [r3] , int c [r4] )  */
24
25#ifndef STRCHR
26# define STRCHR strchr
27#endif
28
29ENTRY_TOCLESS (STRCHR)
30	CALL_MCOUNT 2
31
32#define rTMP1	r0
33#define rRTN	r3	/* outgoing result */
34#define rSTR	r8	/* current word pointer */
35#define rCHR	r4	/* byte we're looking for, spread over the whole word */
36#define rWORD	r5	/* the current word */
37#define rCLZB	rCHR	/* leading zero byte count */
38#define rFEFE	r6	/* constant 0xfefefefefefefeff (-0x0101010101010101) */
39#define r7F7F	r7	/* constant 0x7f7f7f7f7f7f7f7f */
40#define rTMP2	r9
41#define rIGN	r10	/* number of bits we should ignore in the first word */
42#define rMASK	r11	/* mask with the bits to ignore set to 0 */
43#define rTMP3	r12
44#define rTMP4	rIGN
45#define rTMP5	rMASK
46
47	dcbt	0,rRTN
48	insrdi	rCHR, rCHR, 8, 48
49	li	rMASK, -1
50	insrdi	rCHR, rCHR, 16, 32
51	rlwinm	rIGN, rRTN, 3, 26, 28
52	insrdi	rCHR, rCHR, 32, 0
53	lis	rFEFE, -0x101
54	lis	r7F7F, 0x7f7f
55	clrrdi	rSTR, rRTN, 3
56	addi	rFEFE, rFEFE, -0x101
57	addi	r7F7F, r7F7F, 0x7f7f
58	sldi	rTMP1, rFEFE, 32
59	insrdi	r7F7F, r7F7F, 32, 0
60	add	rFEFE, rFEFE, rTMP1
61/* Test the first (partial?) word.  */
62	ld	rWORD, 0(rSTR)
63#ifdef __LITTLE_ENDIAN__
64	sld	rMASK, rMASK, rIGN
65#else
66	srd	rMASK, rMASK, rIGN
67#endif
68	orc	rWORD, rWORD, rMASK
69	add	rTMP1, rFEFE, rWORD
70	nor	rTMP2, r7F7F, rWORD
71	and.	rTMP4, rTMP1, rTMP2
72	xor	rTMP3, rCHR, rWORD
73	orc	rTMP3, rTMP3, rMASK
74	b	L(loopentry)
75
76/* The loop.  */
77
78L(loop):
79	ldu	rWORD, 8(rSTR)
80	and.	rTMP5, rTMP1, rTMP2
81/* Test for 0.	*/
82	add	rTMP1, rFEFE, rWORD /* x - 0x01010101.  */
83	nor	rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080.  */
84	bne	L(foundit)
85	and.	rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080.  */
86/* Start test for the bytes we're looking for.  */
87	xor	rTMP3, rCHR, rWORD
88L(loopentry):
89	add	rTMP1, rFEFE, rTMP3
90	nor	rTMP2, r7F7F, rTMP3
91	beq	L(loop)
92
93/* There is a zero byte in the word, but may also be a matching byte (either
94   before or after the zero byte).  In fact, we may be looking for a
95   zero byte, in which case we return a match.  */
96	and.	rTMP5, rTMP1, rTMP2
97	li	rRTN, 0
98	beqlr
99/* At this point:
100   rTMP5 bytes are 0x80 for each match of c, 0 otherwise.
101   rTMP4 bytes are 0x80 for each match of 0, 0 otherwise.
102   But there may be false matches in the next most significant byte from
103   a true match due to carries.  This means we need to recalculate the
104   matches using a longer method for big-endian.  */
105#ifdef __LITTLE_ENDIAN__
106	addi	rTMP1, rTMP5, -1
107	andc	rTMP1, rTMP1, rTMP5
108	cntlzd	rCLZB, rTMP1
109	addi	rTMP2, rTMP4, -1
110	andc	rTMP2, rTMP2, rTMP4
111	cmpld	rTMP1, rTMP2
112	bgtlr
113	subfic	rCLZB, rCLZB, 64-7
114#else
115/* I think we could reduce this by two instructions by keeping the "nor"
116   results from the loop for reuse here.  See strlen.S tail.  Similarly
117   one instruction could be pruned from L(foundit).  */
118	and	rFEFE, r7F7F, rWORD
119	or	rTMP5, r7F7F, rWORD
120	and	rTMP1, r7F7F, rTMP3
121	or	rTMP4, r7F7F, rTMP3
122	add	rFEFE, rFEFE, r7F7F
123	add	rTMP1, rTMP1, r7F7F
124	nor	rWORD, rTMP5, rFEFE
125	nor	rTMP2, rTMP4, rTMP1
126	cntlzd	rCLZB, rTMP2
127	cmpld	rWORD, rTMP2
128	bgtlr
129#endif
130	srdi	rCLZB, rCLZB, 3
131	add	rRTN, rSTR, rCLZB
132	blr
133
134L(foundit):
135#ifdef __LITTLE_ENDIAN__
136	addi	rTMP1, rTMP5, -1
137	andc	rTMP1, rTMP1, rTMP5
138	cntlzd	rCLZB, rTMP1
139	subfic	rCLZB, rCLZB, 64-7-64
140	sradi	rCLZB, rCLZB, 3
141#else
142	and	rTMP1, r7F7F, rTMP3
143	or	rTMP4, r7F7F, rTMP3
144	add	rTMP1, rTMP1, r7F7F
145	nor	rTMP2, rTMP4, rTMP1
146	cntlzd	rCLZB, rTMP2
147	subi	rSTR, rSTR, 8
148	srdi	rCLZB, rCLZB, 3
149#endif
150	add	rRTN, rSTR, rCLZB
151	blr
152END (STRCHR)
153
154weak_alias (strchr, index)
155libc_hidden_builtin_def (strchr)
156