1/* strchr with SSE2 without bsf
2   Copyright (C) 2011-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#if IS_IN (libc)
20
21# include <sysdep.h>
22# include "asm-syntax.h"
23
24	atom_text_section
25ENTRY (__strchr_sse2_no_bsf)
26	movd	%esi, %xmm1
27	movq	%rdi, %rcx
28	punpcklbw %xmm1, %xmm1
29	andq	$~15, %rdi
30	pxor	%xmm2, %xmm2
31	punpcklbw %xmm1, %xmm1
32	orl	$0xffffffff, %esi
33	movdqa	(%rdi), %xmm0
34	pshufd	$0, %xmm1, %xmm1
35	subq	%rdi, %rcx
36	movdqa	%xmm0, %xmm3
37	leaq	16(%rdi), %rdi
38	pcmpeqb	%xmm1, %xmm0
39	pcmpeqb	%xmm2, %xmm3
40	shl	%cl, %esi
41	pmovmskb %xmm0, %eax
42	pmovmskb %xmm3, %edx
43	andl	%esi, %eax
44	andl	%esi, %edx
45	test	%eax, %eax
46	jnz	L(matches)
47	test	%edx, %edx
48	jnz	L(return_null)
49
50L(loop):
51	movdqa	(%rdi), %xmm0
52	leaq	16(%rdi), %rdi
53	movdqa	%xmm0, %xmm3
54	pcmpeqb	%xmm1, %xmm0
55	pcmpeqb	%xmm2, %xmm3
56	pmovmskb %xmm0, %eax
57	pmovmskb %xmm3, %edx
58	or	%eax, %edx
59	jz	L(loop)
60
61	pmovmskb %xmm3, %edx
62	test	%eax, %eax
63	jnz	L(matches)
64
65/* Return NULL.  */
66	.p2align 4
67L(return_null):
68	xor	%rax, %rax
69	ret
70
71L(matches):
72	/* There is a match.  First find where NULL is.  */
73	leaq	-16(%rdi), %rdi
74	test	%edx, %edx
75	jz	L(match_case1)
76
77	.p2align 4
78L(match_case2):
79	test	%al, %al
80	jz	L(match_high_case2)
81
82	mov	%al, %cl
83	and	$15, %cl
84	jnz	L(match_case2_4)
85
86	mov	%dl, %ch
87	and	$15, %ch
88	jnz	L(return_null)
89
90	test	$0x10, %al
91	jnz	L(Exit5)
92	test	$0x10, %dl
93	jnz	L(return_null)
94	test	$0x20, %al
95	jnz	L(Exit6)
96	test	$0x20, %dl
97	jnz	L(return_null)
98	test	$0x40, %al
99	jnz	L(Exit7)
100	test	$0x40, %dl
101	jnz	L(return_null)
102	lea	7(%rdi), %rax
103	ret
104
105	.p2align 4
106L(match_case2_4):
107	test	$0x01, %al
108	jnz	L(Exit1)
109	test	$0x01, %dl
110	jnz	L(return_null)
111	test	$0x02, %al
112	jnz	L(Exit2)
113	test	$0x02, %dl
114	jnz	L(return_null)
115	test	$0x04, %al
116	jnz	L(Exit3)
117	test	$0x04, %dl
118	jnz	L(return_null)
119	lea	3(%rdi), %rax
120	ret
121
122	.p2align 4
123L(match_high_case2):
124	test	%dl, %dl
125	jnz	L(return_null)
126
127	mov	%ah, %cl
128	and	$15, %cl
129	jnz	L(match_case2_12)
130
131	mov	%dh, %ch
132	and	$15, %ch
133	jnz	L(return_null)
134
135	test	$0x10, %ah
136	jnz	L(Exit13)
137	test	$0x10, %dh
138	jnz	L(return_null)
139	test	$0x20, %ah
140	jnz	L(Exit14)
141	test	$0x20, %dh
142	jnz	L(return_null)
143	test	$0x40, %ah
144	jnz	L(Exit15)
145	test	$0x40, %dh
146	jnz	L(return_null)
147	lea	15(%rdi), %rax
148	ret
149
150	.p2align 4
151L(match_case2_12):
152	test	$0x01, %ah
153	jnz	L(Exit9)
154	test	$0x01, %dh
155	jnz	L(return_null)
156	test	$0x02, %ah
157	jnz	L(Exit10)
158	test	$0x02, %dh
159	jnz	L(return_null)
160	test	$0x04, %ah
161	jnz	L(Exit11)
162	test	$0x04, %dh
163	jnz	L(return_null)
164	lea	11(%rdi), %rax
165	ret
166
167	.p2align 4
168L(match_case1):
169	test	%al, %al
170	jz	L(match_high_case1)
171
172	test	$0x01, %al
173	jnz	L(Exit1)
174	test	$0x02, %al
175	jnz	L(Exit2)
176	test	$0x04, %al
177	jnz	L(Exit3)
178	test	$0x08, %al
179	jnz	L(Exit4)
180	test	$0x10, %al
181	jnz	L(Exit5)
182	test	$0x20, %al
183	jnz	L(Exit6)
184	test	$0x40, %al
185	jnz	L(Exit7)
186	lea	7(%rdi), %rax
187	ret
188
189	.p2align 4
190L(match_high_case1):
191	test	$0x01, %ah
192	jnz	L(Exit9)
193	test	$0x02, %ah
194	jnz	L(Exit10)
195	test	$0x04, %ah
196	jnz	L(Exit11)
197	test	$0x08, %ah
198	jnz	L(Exit12)
199	test	$0x10, %ah
200	jnz	L(Exit13)
201	test	$0x20, %ah
202	jnz	L(Exit14)
203	test	$0x40, %ah
204	jnz	L(Exit15)
205	lea	15(%rdi), %rax
206	ret
207
208	.p2align 4
209L(Exit1):
210	lea	(%rdi), %rax
211	ret
212
213	.p2align 4
214L(Exit2):
215	lea	1(%rdi), %rax
216	ret
217
218	.p2align 4
219L(Exit3):
220	lea	2(%rdi), %rax
221	ret
222
223	.p2align 4
224L(Exit4):
225	lea	3(%rdi), %rax
226	ret
227
228	.p2align 4
229L(Exit5):
230	lea	4(%rdi), %rax
231	ret
232
233	.p2align 4
234L(Exit6):
235	lea	5(%rdi), %rax
236	ret
237
238	.p2align 4
239L(Exit7):
240	lea	6(%rdi), %rax
241	ret
242
243	.p2align 4
244L(Exit9):
245	lea	8(%rdi), %rax
246	ret
247
248	.p2align 4
249L(Exit10):
250	lea	9(%rdi), %rax
251	ret
252
253	.p2align 4
254L(Exit11):
255	lea	10(%rdi), %rax
256	ret
257
258	.p2align 4
259L(Exit12):
260	lea	11(%rdi), %rax
261	ret
262
263	.p2align 4
264L(Exit13):
265	lea	12(%rdi), %rax
266	ret
267
268	.p2align 4
269L(Exit14):
270	lea	13(%rdi), %rax
271	ret
272
273	.p2align 4
274L(Exit15):
275	lea	14(%rdi), %rax
276	ret
277
278END (__strchr_sse2_no_bsf)
279#endif
280