1/* Optimized strcmp implementation for PowerPC32.
2   Copyright (C) 2003-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20
21/* See strlen.s for comments on how the end-of-string testing works.  */
22
23/* int [r3] strncmp (const char *s1 [r3], const char *s2 [r4], size_t size [r5])  */
24
25EALIGN (strncmp, 4, 0)
26
27#define rTMP2	r0
28#define rRTN	r3
29#define rSTR1	r3	/* first string arg */
30#define rSTR2	r4	/* second string arg */
31#define rN	r5	/* max string length */
32#define rWORD1	r6	/* current word in s1 */
33#define rWORD2	r7	/* current word in s2 */
34#define rFEFE	r8	/* constant 0xfefefeff (-0x01010101) */
35#define r7F7F	r9	/* constant 0x7f7f7f7f */
36#define rNEG	r10	/* ~(word in s1 | 0x7f7f7f7f) */
37#define rBITDIF	r11	/* bits that differ in s1 & s2 words */
38#define rTMP	r12
39
40	dcbt	0,rSTR1
41	or	rTMP, rSTR2, rSTR1
42	lis	r7F7F, 0x7f7f
43	dcbt	0,rSTR2
44	clrlwi.	rTMP, rTMP, 30
45	cmplwi	cr1, rN, 0
46	lis	rFEFE, -0x101
47	bne	L(unaligned)
48/* We are word aligned so set up for two loops.  first a word
49   loop, then fall into the byte loop if any residual.  */
50	srwi.	rTMP, rN, 2
51	clrlwi	rN, rN, 30
52	addi	rFEFE, rFEFE, -0x101
53	addi	r7F7F, r7F7F, 0x7f7f
54	cmplwi	cr1, rN, 0
55	beq	L(unaligned)
56
57	mtctr	rTMP	/* Power4 wants mtctr 1st in dispatch group.  */
58	lwz	rWORD1, 0(rSTR1)
59	lwz	rWORD2, 0(rSTR2)
60	b	L(g1)
61
62L(g0):
63	lwzu	rWORD1, 4(rSTR1)
64	bne-	cr1, L(different)
65	lwzu	rWORD2, 4(rSTR2)
66L(g1):	add	rTMP, rFEFE, rWORD1
67	nor	rNEG, r7F7F, rWORD1
68	bdz	L(tail)
69	and.	rTMP, rTMP, rNEG
70	cmpw	cr1, rWORD1, rWORD2
71	beq+	L(g0)
72
73/* OK. We've hit the end of the string. We need to be careful that
74   we don't compare two strings as different because of gunk beyond
75   the end of the strings...  */
76
77#ifdef __LITTLE_ENDIAN__
78L(endstring):
79	slwi	rTMP, rTMP, 1
80	addi    rTMP2, rTMP, -1
81	andc    rTMP2, rTMP2, rTMP
82	and	rWORD2, rWORD2, rTMP2		/* Mask off gunk.  */
83	and	rWORD1, rWORD1, rTMP2
84	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
85	rlwinm	rTMP, rWORD1, 8, 0xffffffff
86	rlwimi	rTMP2, rWORD2, 24, 0, 7
87	rlwimi	rTMP, rWORD1, 24, 0, 7
88	rlwimi	rTMP2, rWORD2, 24, 16, 23
89	rlwimi	rTMP, rWORD1, 24, 16, 23
90	xor.	rBITDIF, rTMP, rTMP2
91	sub	rRTN, rTMP, rTMP2
92	bgelr+
93	ori	rRTN, rTMP2, 1
94	blr
95
96L(different):
97	lwz	rWORD1, -4(rSTR1)
98	rlwinm	rTMP2, rWORD2, 8, 0xffffffff	/* Byte reverse word.  */
99	rlwinm	rTMP, rWORD1, 8, 0xffffffff
100	rlwimi	rTMP2, rWORD2, 24, 0, 7
101	rlwimi	rTMP, rWORD1, 24, 0, 7
102	rlwimi	rTMP2, rWORD2, 24, 16, 23
103	rlwimi	rTMP, rWORD1, 24, 16, 23
104	xor.	rBITDIF, rTMP, rTMP2
105	sub	rRTN, rTMP, rTMP2
106	bgelr+
107	ori	rRTN, rTMP2, 1
108	blr
109
110#else
111L(endstring):
112	and	rTMP, r7F7F, rWORD1
113	beq	cr1, L(equal)
114	add	rTMP, rTMP, r7F7F
115	xor.	rBITDIF, rWORD1, rWORD2
116	andc	rNEG, rNEG, rTMP
117	blt-	L(highbit)
118	cntlzw	rBITDIF, rBITDIF
119	cntlzw	rNEG, rNEG
120	addi	rNEG, rNEG, 7
121	cmpw	cr1, rNEG, rBITDIF
122	sub	rRTN, rWORD1, rWORD2
123	bgelr+	cr1
124L(equal):
125	li	rRTN, 0
126	blr
127
128L(different):
129	lwz	rWORD1, -4(rSTR1)
130	xor.	rBITDIF, rWORD1, rWORD2
131	sub	rRTN, rWORD1, rWORD2
132	bgelr+
133L(highbit):
134	ori	rRTN, rWORD2, 1
135	blr
136#endif
137
138/* Oh well.  In this case, we just do a byte-by-byte comparison.  */
139	.align 4
140L(tail):
141	and.	rTMP, rTMP, rNEG
142	cmpw	cr1, rWORD1, rWORD2
143	bne-	L(endstring)
144	addi	rSTR1, rSTR1, 4
145	bne-	cr1, L(different)
146	addi	rSTR2, rSTR2, 4
147	cmplwi	cr1, rN, 0
148L(unaligned):
149	mtctr   rN	/* Power4 wants mtctr 1st in dispatch group */
150	bgt	cr1, L(uz)
151L(ux):
152	li	rRTN, 0
153	blr
154	.align 4
155L(uz):
156	lbz	rWORD1, 0(rSTR1)
157	lbz	rWORD2, 0(rSTR2)
158	nop
159	b	L(u1)
160L(u0):
161	lbzu	rWORD2, 1(rSTR2)
162L(u1):
163	bdz	L(u3)
164	cmpwi	cr1, rWORD1, 0
165	cmpw	rWORD1, rWORD2
166	beq-	cr1, L(u3)
167	lbzu	rWORD1, 1(rSTR1)
168	bne-	L(u2)
169	lbzu	rWORD2, 1(rSTR2)
170	bdz	L(u3)
171	cmpwi	cr1, rWORD1, 0
172	cmpw	rWORD1, rWORD2
173	bne-	L(u3)
174	lbzu	rWORD1, 1(rSTR1)
175	bne+	cr1, L(u0)
176
177L(u2):	lbzu	rWORD1, -1(rSTR1)
178L(u3):	sub	rRTN, rWORD1, rWORD2
179	blr
180END (strncmp)
181libc_hidden_builtin_def (strncmp)
182