1/* Optimized memset for Huawei Kunpeng processor.
2   Copyright (C) 2012-2021 Free Software Foundation, Inc.
3
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library.  If not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include <sysdeps/aarch64/memset-reg.h>
22
23#if IS_IN (libc)
24# define MEMSET __memset_kunpeng
25
26/* Assumptions:
27 *
28 * ARMv8-a, AArch64, unaligned accesses
29 *
30 */
31
32ENTRY_ALIGN (MEMSET, 6)
33
34	PTR_ARG (0)
35	SIZE_ARG (2)
36
37	dup	v0.16B, valw
38	add	dstend, dstin, count
39
40	cmp	count, 128
41	b.hs	L(set_long)
42
43	cmp	count, 16
44	b.lo	L(less16)
45
46	/* Set 16..127 bytes.  */
47	str	q0, [dstin]
48	tbnz	count, 6, L(set127)
49	str	q0, [dstend, -16]
50	tbz	count, 5, 1f
51	str	q0, [dstin, 16]
52	str	q0, [dstend, -32]
531:	ret
54
55	.p2align 4
56	/* Set 64..127 bytes.  Write 64 bytes from the start and
57	   64 bytes from the end.  */
58L(set127):
59	stp	q0, q0, [dstin, 16]
60	str	q0, [dstin, 48]
61	stp	q0, q0, [dstend, -64]
62	stp	q0, q0, [dstend, -32]
63	ret
64
65	.p2align 4
66	/* Set 0..15 bytes.  */
67L(less16):
68	tbz	count, 3, L(less8)
69	str	d0, [dstin]
70	str	d0, [dstend, -8]
71	ret
72L(less8):
73	tbz	count, 2, 2f
74	str	s0, [dstin]
75	str	s0, [dstend, -4]
76	ret
772:	cbz	count, 3f
78	str	b0, [dstin]
79	tbz	count, 1, 3f
80	str	h0, [dstend, -2]
813:	ret
82
83	.p2align 4
84L(set_long):
85	bic	dst, dstin, 15
86	str	q0, [dstin]
87	sub	count, dstend, dst	/* Count is 16 too large.  */
88	sub	dst, dst, 16		/* Dst is biased by -32.  */
89	sub	count, count, 64 + 16 + 1 /* Adjust count and bias for loop.  */
901:	stp	q0, q0, [dst, 32]
91	stp	q0, q0, [dst, 64]!
92	subs	count, count, 64
93	b.lo	1f
94	stp	q0, q0, [dst, 32]
95	stp	q0, q0, [dst, 64]!
96	subs	count, count, 64
97	b.lo	1f
98	stp	q0, q0, [dst, 32]
99	stp	q0, q0, [dst, 64]!
100	subs	count, count, 64
101	b.lo	1f
102	stp	q0, q0, [dst, 32]
103	stp	q0, q0, [dst, 64]!
104	subs	count, count, 64
105	b.hs	1b
106
1071:	stp	q0, q0, [dstend, -64]
108	stp	q0, q0, [dstend, -32]
109	ret
110
111END (MEMSET)
112libc_hidden_builtin_def (MEMSET)
113#endif
114