1/* Set a block of memory to some byte value.
2   For UltraSPARC.
3   Copyright (C) 1996-2021 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include <asm/asi.h>
22#ifndef XCC
23#define XCC xcc
24#define USE_BPR
25#endif
26#define FPRS_FEF	4
27
28#define SET_BLOCKS(base, offset, source)		\
29	stx		source, [base - offset - 0x18];	\
30	stx		source, [base - offset - 0x10];	\
31	stx		source, [base - offset - 0x08];	\
32	stx		source, [base - offset - 0x00];
33
34	/* Well, memset is a lot easier to get right than bcopy... */
35	.text
36	.align		32
37ENTRY(memset)
38	andcc		%o1, 0xff, %o1
39	mov		%o0, %o5
40	be,a,pt		%icc, 50f
41#ifndef USE_BPR
42	 srl		%o2, 0, %o1
43#else
44	 mov		%o2, %o1
45#endif
46	cmp		%o2, 7
47#ifndef USE_BPR
48	srl		%o2, 0, %o2
49#endif
50	bleu,pn		%XCC, 17f
51	 andcc		%o0, 3, %g5
52	be,pt		%xcc, 4f
53	 and		%o1, 0xff, %o1
54	cmp		%g5, 3
55	be,pn		%xcc, 2f
56	 stb		%o1, [%o0 + 0x00]
57	cmp		%g5, 2
58	be,pt		%xcc, 2f
59	 stb		%o1, [%o0 + 0x01]
60	stb		%o1, [%o0 + 0x02]
612:	sub		%g5, 4, %g5
62	sub		%o0, %g5, %o0
63	add		%o2, %g5, %o2
644:	sllx		%o1, 8, %g1
65	andcc		%o0, 4, %g0
66	or		%o1, %g1, %o1
67	sllx		%o1, 16, %g1
68	or		%o1, %g1, %o1
69	be,pt		%xcc, 2f
70	 sllx		%o1, 32, %g1
71	stw		%o1, [%o0]
72	sub		%o2, 4, %o2
73	add		%o0, 4, %o0
742:	cmp		%o2, 128
75	or		%o1, %g1, %o1
76	blu,pn		%xcc, 9f
77	 andcc		%o0, 0x38, %g5
78	be,pn		%icc, 6f
79	 mov		64, %o4
80	andcc		%o0, 8, %g0
81	be,pn		%icc, 1f
82	 sub		%o4, %g5, %o4
83	stx		%o1, [%o0]
84	add		%o0, 8, %o0
851:	andcc		%o4, 16, %g0
86	be,pn		%icc, 1f
87	 sub		%o2, %o4, %o2
88	stx		%o1, [%o0]
89	stx		%o1, [%o0 + 8]
90	add		%o0, 16, %o0
911:	andcc		%o4, 32, %g0
92	be,pn		%icc, 7f
93	 andncc		%o2, 0x3f, %o3
94	stw		%o1, [%o0]
95	stw		%o1, [%o0 + 4]
96	stw		%o1, [%o0 + 8]
97	stw		%o1, [%o0 + 12]
98	stw		%o1, [%o0 + 16]
99	stw		%o1, [%o0 + 20]
100	stw		%o1, [%o0 + 24]
101	stw		%o1, [%o0 + 28]
102	add		%o0, 32, %o0
1037:	be,pn		%xcc, 9f
104	 nop
105	ldd		[%o0 - 8], %f0
10618:	wr		%g0, ASI_BLK_P, %asi
107	membar		#StoreStore | #LoadStore
108	andcc		%o3, 0xc0, %g5
109	and		%o2, 0x3f, %o2
110	fsrc2		%f0, %f2
111	fsrc2		%f0, %f4
112	andn		%o3, 0xff, %o3
113	fsrc2		%f0, %f6
114	cmp		%g5, 64
115	fsrc2		%f0, %f8
116	fsrc2		%f0, %f10
117	fsrc2		%f0, %f12
118	brz,pn		%g5, 10f
119	 fsrc2		%f0, %f14
120	be,pn		%icc, 2f
121	 stda		%f0, [%o0 + 0x00] %asi
122	cmp		%g5, 128
123	be,pn		%icc, 2f
124	 stda		%f0, [%o0 + 0x40] %asi
125	stda		%f0, [%o0 + 0x80] %asi
1262:	brz,pn		%o3, 12f
127	 add		%o0, %g5, %o0
12810:	stda		%f0, [%o0 + 0x00] %asi
129	stda		%f0, [%o0 + 0x40] %asi
130	stda		%f0, [%o0 + 0x80] %asi
131	stda		%f0, [%o0 + 0xc0] %asi
13211:	subcc		%o3, 256, %o3
133	bne,pt		%xcc, 10b
134	 add		%o0, 256, %o0
13512:	wr		%g0, FPRS_FEF, %fprs
136	membar		#StoreLoad | #StoreStore
1379:	andcc		%o2, 0x78, %g5
138	be,pn		%xcc, 13f
139	 andcc		%o2, 7, %o2
14014:	rd		%pc, %o4
141	srl		%g5, 1, %o3
142	sub		%o4, %o3, %o4
143	jmpl		%o4 + (13f - 14b), %g0
144	 add		%o0, %g5, %o0
14512:	SET_BLOCKS	(%o0, 0x68, %o1)
146	SET_BLOCKS	(%o0, 0x48, %o1)
147	SET_BLOCKS	(%o0, 0x28, %o1)
148	SET_BLOCKS	(%o0, 0x08, %o1)
14913:	be,pn		%xcc, 8f
150	 andcc		%o2, 4, %g0
151	be,pn		%xcc, 1f
152	 andcc		%o2, 2, %g0
153	stw		%o1, [%o0]
154	add		%o0, 4, %o0
1551:	be,pn		%xcc, 1f
156	 andcc		%o2, 1, %g0
157	sth		%o1, [%o0]
158	add		%o0, 2, %o0
1591:	bne,a,pn	%xcc, 8f
160	 stb		%o1, [%o0]
1618:	retl
162	 mov		%o5, %o0
16317:	brz,pn		%o2, 0f
1648:	 add		%o0, 1, %o0
165	subcc		%o2, 1, %o2
166	bne,pt		%xcc, 8b
167	 stb		%o1, [%o0 - 1]
1680:	retl
169	 mov		%o5, %o0
170
1716:	stx		%o1, [%o0]
172	andncc		%o2, 0x3f, %o3
173	be,pn		%xcc, 9b
174	 nop
175	ba,pt		%xcc, 18b
176	 ldd		[%o0], %f0
177END(memset)
178libc_hidden_builtin_def (memset)
179
180#define ZERO_BLOCKS(base, offset, source)		\
181	stx		source, [base - offset - 0x38];	\
182	stx		source, [base - offset - 0x30];	\
183	stx		source, [base - offset - 0x28];	\
184	stx		source, [base - offset - 0x20];	\
185	stx		source, [base - offset - 0x18];	\
186	stx		source, [base - offset - 0x10];	\
187	stx		source, [base - offset - 0x08];	\
188	stx		source, [base - offset - 0x00];
189
190	.text
191	.align		32
192ENTRY(__bzero)
193#ifndef USE_BPR
194	srl		%o1, 0, %o1
195#endif
196	mov		%o0, %o5
19750:	cmp		%o1, 7
198	bleu,pn		%xcc, 17f
199	 andcc		%o0, 3, %o2
200	be,a,pt		%xcc, 4f
201	 andcc		%o0, 4, %g0
202	cmp		%o2, 3
203	be,pn		%xcc, 2f
204	 stb		%g0, [%o0 + 0x00]
205	cmp		%o2, 2
206	be,pt		%xcc, 2f
207	 stb		%g0, [%o0 + 0x01]
208	stb		%g0, [%o0 + 0x02]
2092:	sub		%o2, 4, %o2
210	sub		%o0, %o2, %o0
211	add		%o1, %o2, %o1
212	andcc		%o0, 4, %g0
2134:	be,pt		%xcc, 2f
214	 cmp		%o1, 128
215	stw		%g0, [%o0]
216	sub		%o1, 4, %o1
217	add		%o0, 4, %o0
2182:	blu,pn		%xcc, 9f
219	 andcc		%o0, 0x38, %o2
220	be,pn		%icc, 6f
221	 mov		64, %o4
222	andcc		%o0, 8, %g0
223	be,pn		%icc, 1f
224	 sub		%o4, %o2, %o4
225	stx		%g0, [%o0]
226	add		%o0, 8, %o0
2271:	andcc		%o4, 16, %g0
228	be,pn		%icc, 1f
229	 sub		%o1, %o4, %o1
230	stx		%g0, [%o0]
231	stx		%g0, [%o0 + 8]
232	add		%o0, 16, %o0
2331:	andcc		%o4, 32, %g0
234	be,pn		%icc, 7f
235	 andncc		%o1, 0x3f, %o3
236	stx		%g0, [%o0]
237	stx		%g0, [%o0 + 8]
238	stx		%g0, [%o0 + 16]
239	stx		%g0, [%o0 + 24]
240	add		%o0, 32, %o0
2416:	andncc		%o1, 0x3f, %o3
2427:	be,pn		%xcc, 9f
243	 wr		%g0, ASI_BLK_P, %asi
244	membar		#StoreLoad | #StoreStore | #LoadStore
245	fzero		%f0
246	andcc		%o3, 0xc0, %o2
247	and		%o1, 0x3f, %o1
248	fzero		%f2
249	andn		%o3, 0xff, %o3
250	faddd		%f0, %f2, %f4
251	fmuld		%f0, %f2, %f6
252	cmp		%o2, 64
253	faddd		%f0, %f2, %f8
254	fmuld		%f0, %f2, %f10
255	faddd		%f0, %f2, %f12
256	brz,pn		%o2, 10f
257	 fmuld		%f0, %f2, %f14
258	be,pn		%icc, 2f
259	 stda		%f0, [%o0 + 0x00] %asi
260	cmp		%o2, 128
261	be,pn		%icc, 2f
262	 stda		%f0, [%o0 + 0x40] %asi
263	stda		%f0, [%o0 + 0x80] %asi
2642:	brz,pn		%o3, 12f
265	 add		%o0, %o2, %o0
26610:	stda		%f0, [%o0 + 0x00] %asi
267	stda		%f0, [%o0 + 0x40] %asi
268	stda		%f0, [%o0 + 0x80] %asi
269	stda		%f0, [%o0 + 0xc0] %asi
27011:	subcc		%o3, 256, %o3
271	bne,pt		%xcc, 10b
272	 add		%o0, 256, %o0
27312:	wr		%g0, FPRS_FEF, %fprs
274	membar		#StoreLoad | #StoreStore
2759:	andcc		%o1, 0xf8, %o2
276	be,pn		%xcc, 13f
277	 andcc		%o1, 7, %o1
27814:	rd		%pc, %o4
279	srl		%o2, 1, %o3
280	sub		%o4, %o3, %o4
281	jmpl		%o4 + (13f - 14b), %g0
282	 add		%o0, %o2, %o0
28312:	ZERO_BLOCKS	(%o0, 0xc8, %g0)
284	ZERO_BLOCKS	(%o0, 0x88, %g0)
285	ZERO_BLOCKS	(%o0, 0x48, %g0)
286	ZERO_BLOCKS	(%o0, 0x08, %g0)
28713:	be,pn		%xcc, 8f
288	 andcc		%o1, 4, %g0
289	be,pn		%xcc, 1f
290	 andcc		%o1, 2, %g0
291	stw		%g0, [%o0]
292	add		%o0, 4, %o0
2931:	be,pn		%xcc, 1f
294	 andcc		%o1, 1, %g0
295	sth		%g0, [%o0]
296	add		%o0, 2, %o0
2971:	bne,a,pn	%xcc, 8f
298	 stb		%g0, [%o0]
2998:	retl
300	 mov		%o5, %o0
30117:	be,pn		%xcc, 13b
302	 orcc		%o1, 0, %g0
303	be,pn		%xcc, 0f
3048:	 add		%o0, 1, %o0
305	subcc		%o1, 1, %o1
306	bne,pt		%xcc, 8b
307	 stb		%g0, [%o0 - 1]
3080:	retl
309	 mov		%o5, %o0
310END(__bzero)
311
312weak_alias (__bzero, bzero)
313