1/* wcschr with SSSE3
2   Copyright (C) 2011-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21	.text
22ENTRY (__wcschr)
23
24	movd	%rsi, %xmm1
25	pxor	%xmm2, %xmm2
26	mov	%rdi, %rcx
27	punpckldq %xmm1, %xmm1
28	punpckldq %xmm1, %xmm1
29
30	and	$63, %rcx
31	cmp	$48, %rcx
32	ja	L(cross_cache)
33
34	movdqu	(%rdi), %xmm0
35	pcmpeqd	%xmm0, %xmm2
36	add	$16, %rdi
37	pcmpeqd	%xmm1, %xmm0
38	pmovmskb %xmm2, %rdx
39	pmovmskb %xmm0, %rax
40	or	%rax, %rdx
41	jnz	L(matches)
42
43	and	$-16, %rdi
44
45	movdqa	(%rdi), %xmm0
46	pcmpeqd	%xmm0, %xmm2
47	add	$16, %rdi
48	pcmpeqd	%xmm1, %xmm0
49	pmovmskb %xmm2, %rdx
50	pmovmskb %xmm0, %rax
51	or	%rax, %rdx
52	jnz	L(matches)
53
54	jmp	L(loop)
55
56L(cross_cache):
57	and	$15, %rcx
58	and	$-16, %rdi
59	movdqa	(%rdi), %xmm0
60	pcmpeqd	%xmm0, %xmm2
61	pcmpeqd	%xmm1, %xmm0
62	pmovmskb %xmm2, %rdx
63	pmovmskb %xmm0, %rax
64
65	sar	%cl, %rdx
66	sar	%cl, %rax
67	test	%rax, %rax
68	je	L(unaligned_no_match)
69
70	bsf	%rax, %rax
71	test	%rdx, %rdx
72	je	L(unaligned_match)
73	bsf	%rdx, %rdx
74	cmp	%rdx, %rax
75	ja	L(return_null)
76
77L(unaligned_match):
78	add	%rdi, %rax
79	add	%rcx, %rax
80	ret
81
82	.p2align 4
83L(unaligned_no_match):
84	test	%rdx, %rdx
85	jne	L(return_null)
86	pxor	%xmm2, %xmm2
87
88	add	$16, %rdi
89
90	.p2align 4
91/* Loop start on aligned string.  */
92L(loop):
93	movdqa	(%rdi), %xmm0
94	pcmpeqd	%xmm0, %xmm2
95	add	$16, %rdi
96	pcmpeqd	%xmm1, %xmm0
97	pmovmskb %xmm2, %rdx
98	pmovmskb %xmm0, %rax
99	or	%rax, %rdx
100	jnz	L(matches)
101
102	movdqa	(%rdi), %xmm0
103	pcmpeqd	%xmm0, %xmm2
104	add	$16, %rdi
105	pcmpeqd	%xmm1, %xmm0
106	pmovmskb %xmm2, %rdx
107	pmovmskb %xmm0, %rax
108	or	%rax, %rdx
109	jnz	L(matches)
110
111	movdqa	(%rdi), %xmm0
112	pcmpeqd	%xmm0, %xmm2
113	add	$16, %rdi
114	pcmpeqd	%xmm1, %xmm0
115	pmovmskb %xmm2, %rdx
116	pmovmskb %xmm0, %rax
117	or	%rax, %rdx
118	jnz	L(matches)
119
120	movdqa	(%rdi), %xmm0
121	pcmpeqd	%xmm0, %xmm2
122	add	$16, %rdi
123	pcmpeqd	%xmm1, %xmm0
124	pmovmskb %xmm2, %rdx
125	pmovmskb %xmm0, %rax
126	or	%rax, %rdx
127	jnz	L(matches)
128	jmp	L(loop)
129
130	.p2align 4
131L(matches):
132	pmovmskb %xmm2, %rdx
133	test	%rax, %rax
134	jz	L(return_null)
135	bsf	%rax, %rax
136	test	%rdx, %rdx
137	je	L(match)
138	bsf	%rdx, %rcx
139	cmp	%rcx, %rax
140	ja	L(return_null)
141L(match):
142	sub	$16, %rdi
143	add	%rdi, %rax
144	ret
145
146	.p2align 4
147L(return_null):
148	xor	%rax, %rax
149	ret
150
151END (__wcschr)
152
153libc_hidden_def(__wcschr)
154weak_alias (__wcschr, wcschr)
155libc_hidden_weak (wcschr)
156