1/* Copyright (C) 2014-2021 Free Software Foundation, Inc.
2   This file is part of the GNU C Library.
3
4   The GNU C Library is free software; you can redistribute it and/or
5   modify it under the terms of the GNU Lesser General Public
6   License as published by the Free Software Foundation; either
7   version 2.1 of the License, or (at your option) any later version.
8
9   The GNU C Library is distributed in the hope that it will be useful,
10   but WITHOUT ANY WARRANTY; without even the implied warranty of
11   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
12   Lesser General Public License for more details.
13
14   You should have received a copy of the GNU Lesser General Public
15   License along with the GNU C Library.  If not, see
16   <https://www.gnu.org/licenses/>.  */
17
18#ifdef ANDROID_CHANGES
19# include "machine/asm.h"
20# include "machine/regdef.h"
21#elif _LIBC
22# include <sysdep.h>
23# include <regdef.h>
24# include <sys/asm.h>
25#elif defined _COMPILING_NEWLIB
26# include "machine/asm.h"
27# include "machine/regdef.h"
28#else
29# include <regdef.h>
30# include <sys/asm.h>
31#endif
32
33/* Technically strcmp should not read past the end of the strings being
34   compared.  We will read a full word that may contain excess bits beyond
35   the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
36   read the next word after the end of string.  Setting ENABLE_READAHEAD will
37   improve performance but is technically illegal based on the definition of
38   strcmp.  */
39#ifdef ENABLE_READAHEAD
40# define DELAY_READ
41#else
42# define DELAY_READ nop
43#endif
44
45/* Testing on a little endian machine showed using CLZ was a
46   performance loss, so we are not turning it on by default.  */
47#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1)
48# define USE_CLZ
49#endif
50
51/* Some asm.h files do not have the L macro definition.  */
52#ifndef L
53# if _MIPS_SIM == _ABIO32
54#  define L(label) $L ## label
55# else
56#  define L(label) .L ## label
57# endif
58#endif
59
60/* Some asm.h files do not have the PTR_ADDIU macro definition.  */
61#ifndef PTR_ADDIU
62# ifdef USE_DOUBLE
63#  define PTR_ADDIU       daddiu
64# else
65#  define PTR_ADDIU       addiu
66# endif
67#endif
68
69/* Allow the routine to be named something else if desired.  */
70#ifndef STRCMP_NAME
71# define STRCMP_NAME strcmp
72#endif
73
74#ifdef ANDROID_CHANGES
75LEAF(STRCMP_NAME, 0)
76#else
77LEAF(STRCMP_NAME)
78#endif
79	.set	nomips16
80	.set	noreorder
81
82	or	t0, a0, a1
83	andi	t0,0x3
84	bne	t0, zero, L(byteloop)
85
86/* Both strings are 4 byte aligned at this point.  */
87
88	lui	t8, 0x0101
89	ori	t8, t8, 0x0101
90	lui	t9, 0x7f7f
91	ori	t9, 0x7f7f
92
93#define STRCMP32(OFFSET) \
94	lw	v0, OFFSET(a0); \
95	lw	v1, OFFSET(a1); \
96	subu	t0, v0, t8; \
97	bne	v0, v1, L(worddiff); \
98	nor	t1, v0, t9; \
99	and	t0, t0, t1; \
100	bne	t0, zero, L(returnzero)
101
102L(wordloop):
103	STRCMP32(0)
104	DELAY_READ
105	STRCMP32(4)
106	DELAY_READ
107	STRCMP32(8)
108	DELAY_READ
109	STRCMP32(12)
110	DELAY_READ
111	STRCMP32(16)
112	DELAY_READ
113	STRCMP32(20)
114	DELAY_READ
115	STRCMP32(24)
116	DELAY_READ
117	STRCMP32(28)
118	PTR_ADDIU a0, a0, 32
119	b	L(wordloop)
120	PTR_ADDIU a1, a1, 32
121
122L(returnzero):
123	j	ra
124	move	v0, zero
125
126L(worddiff):
127#ifdef USE_CLZ
128	subu	t0, v0, t8
129	nor	t1, v0, t9
130	and	t1, t0, t1
131	xor	t0, v0, v1
132	or	t0, t0, t1
133# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
134	wsbh	t0, t0
135	rotr	t0, t0, 16
136# endif
137	clz	t1, t0
138	and	t1, 0xf8
139# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
140	neg	t1
141	addu	t1, 24
142# endif
143	rotrv	v0, v0, t1
144	rotrv	v1, v1, t1
145	and	v0, v0, 0xff
146	and	v1, v1, 0xff
147	j	ra
148	subu	v0, v0, v1
149#else /* USE_CLZ */
150# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
151	andi	t0, v0, 0xff
152	beq	t0, zero, L(wexit01)
153	andi	t1, v1, 0xff
154	bne	t0, t1, L(wexit01)
155
156	srl	t8, v0, 8
157	srl	t9, v1, 8
158	andi	t8, t8, 0xff
159	beq	t8, zero, L(wexit89)
160	andi	t9, t9, 0xff
161	bne	t8, t9, L(wexit89)
162
163	srl	t0, v0, 16
164	srl	t1, v1, 16
165	andi	t0, t0, 0xff
166	beq	t0, zero, L(wexit01)
167	andi	t1, t1, 0xff
168	bne	t0, t1, L(wexit01)
169
170	srl	t8, v0, 24
171	srl	t9, v1, 24
172# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
173	srl	t0, v0, 24
174	beq	t0, zero, L(wexit01)
175	srl	t1, v1, 24
176	bne	t0, t1, L(wexit01)
177
178	srl	t8, v0, 16
179	srl	t9, v1, 16
180	andi	t8, t8, 0xff
181	beq	t8, zero, L(wexit89)
182	andi	t9, t9, 0xff
183	bne	t8, t9, L(wexit89)
184
185	srl	t0, v0, 8
186	srl	t1, v1, 8
187	andi	t0, t0, 0xff
188	beq	t0, zero, L(wexit01)
189	andi	t1, t1, 0xff
190	bne	t0, t1, L(wexit01)
191
192	andi	t8, v0, 0xff
193	andi	t9, v1, 0xff
194# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
195
196L(wexit89):
197	j	ra
198	subu	v0, t8, t9
199L(wexit01):
200	j	ra
201	subu	v0, t0, t1
202#endif /* USE_CLZ */
203
204/* It might seem better to do the 'beq' instruction between the two 'lbu'
205   instructions so that the nop is not needed but testing showed that this
206   code is actually faster (based on glibc strcmp test).  */
207#define BYTECMP01(OFFSET) \
208	lbu	v0, OFFSET(a0); \
209	lbu	v1, OFFSET(a1); \
210	beq	v0, zero, L(bexit01); \
211	nop; \
212	bne	v0, v1, L(bexit01)
213
214#define BYTECMP89(OFFSET) \
215	lbu	t8, OFFSET(a0); \
216	lbu	t9, OFFSET(a1); \
217	beq	t8, zero, L(bexit89); \
218	nop;	\
219	bne	t8, t9, L(bexit89)
220
221L(byteloop):
222	BYTECMP01(0)
223	BYTECMP89(1)
224	BYTECMP01(2)
225	BYTECMP89(3)
226	BYTECMP01(4)
227	BYTECMP89(5)
228	BYTECMP01(6)
229	BYTECMP89(7)
230	PTR_ADDIU a0, a0, 8
231	b	L(byteloop)
232	PTR_ADDIU a1, a1, 8
233
234L(bexit01):
235	j	ra
236	subu	v0, v0, v1
237L(bexit89):
238	j	ra
239	subu	v0, t8, t9
240
241	.set	at
242	.set	reorder
243
244END(STRCMP_NAME)
245#ifndef ANDROID_CHANGES
246# ifdef _LIBC
247libc_hidden_builtin_def (STRCMP_NAME)
248# endif
249#endif
250