1/* Copyright (C) 2006-2021 Free Software Foundation, Inc.
2   This file is part of the GNU C Library.
3
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library.  If not, see
17   <https://www.gnu.org/licenses/>.  */
18
19/* Thumb requires excessive IT insns here.  */
20#define NO_THUMB
21#include <sysdep.h>
22#include <arm-features.h>
23
24/*
25 * Data preload for architectures that support it (ARM V5TE and above)
26 */
27#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \
28     && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \
29     && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \
30     && !defined (__ARM_ARCH_5T__))
31#define PLD(code...)    code
32#else
33#define PLD(code...)
34#endif
35
36/*
37 * This can be used to enable code to cacheline align the source pointer.
38 * Experiments on tested architectures (StrongARM and XScale) didn't show
39 * this a worthwhile thing to do.  That might be different in the future.
40 */
41//#define CALGN(code...)        code
42#define CALGN(code...)
43
44/*
45 * Endian independent macros for shifting bytes within registers.
46 */
47#ifndef __ARMEB__
48#define PULL            lsr
49#define PUSH            lsl
50#else
51#define PULL            lsl
52#define PUSH            lsr
53#endif
54
55		.text
56		.syntax unified
57
58/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */
59
60ENTRY(memcpy)
61
62		push	{r0, r4, lr}
63		cfi_adjust_cfa_offset (12)
64		cfi_rel_offset (r4, 4)
65		cfi_rel_offset (lr, 8)
66
67		cfi_remember_state
68
69		subs	r2, r2, #4
70		blo	8f
71		ands	ip, r0, #3
72	PLD(	pld	[r1, #0]		)
73		bne	9f
74		ands	ip, r1, #3
75		bne	10f
76
771:		subs	r2, r2, #(28)
78		push	{r5 - r8}
79		cfi_adjust_cfa_offset (16)
80		cfi_rel_offset (r5, 0)
81		cfi_rel_offset (r6, 4)
82		cfi_rel_offset (r7, 8)
83		cfi_rel_offset (r8, 12)
84		blo	5f
85
86	CALGN(	ands	ip, r1, #31		)
87	CALGN(	rsb	r3, ip, #32		)
88	CALGN(	sbcsne	r4, r3, r2		)  @ C is always set here
89	CALGN(	bcs	2f			)
90	CALGN(	adr	r4, 6f			)
91	CALGN(	subs	r2, r2, r3		)  @ C gets set
92#ifndef ARM_ALWAYS_BX
93	CALGN(	add	pc, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
94#else
95	CALGN(	add	r4, r4, ip, lsl	#(ARM_BX_ALIGN_LOG2 - 2))
96	CALGN(	bx	r4			)
97#endif
98
99	PLD(	pld	[r1, #0]		)
1002:	PLD(	cmp	r2, #96			)
101	PLD(	pld	[r1, #28]		)
102	PLD(	blo	4f			)
103	PLD(	pld	[r1, #60]		)
104	PLD(	pld	[r1, #92]		)
105
1063:	PLD(	pld	[r1, #124]		)
1074:		ldmia	r1!, {r3, r4, r5, r6, r7, r8, ip, lr}
108		subs	r2, r2, #32
109		stmia	r0!, {r3, r4, r5, r6, r7, r8, ip, lr}
110		bhs	3b
111
1125:		ands	ip, r2, #28
113		rsb	ip, ip, #32
114#ifndef ARM_ALWAYS_BX
115		/* C is always clear here.  */
116		addne	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
117		b	7f
118#else
119		beq	7f
120		push	{r10}
121		cfi_adjust_cfa_offset (4)
122		cfi_rel_offset (r10, 0)
1230:		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
124		/* If alignment is not perfect, then there will be some
125		   padding (nop) instructions between this BX and label 6.
126		   The computation above assumed that two instructions
127		   later is exactly the right spot.  */
128		add	r10, #(6f - (0b + PC_OFS))
129		bx	r10
130#endif
131		.p2align ARM_BX_ALIGN_LOG2
1326:		nop
133		.p2align ARM_BX_ALIGN_LOG2
134		ldr	r3, [r1], #4
135		.p2align ARM_BX_ALIGN_LOG2
136		ldr	r4, [r1], #4
137		.p2align ARM_BX_ALIGN_LOG2
138		ldr	r5, [r1], #4
139		.p2align ARM_BX_ALIGN_LOG2
140		ldr	r6, [r1], #4
141		.p2align ARM_BX_ALIGN_LOG2
142		ldr	r7, [r1], #4
143		.p2align ARM_BX_ALIGN_LOG2
144		ldr	r8, [r1], #4
145		.p2align ARM_BX_ALIGN_LOG2
146		ldr	lr, [r1], #4
147
148#ifndef ARM_ALWAYS_BX
149		add	pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
150		nop
151#else
1520:		add	r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)
153		/* If alignment is not perfect, then there will be some
154		   padding (nop) instructions between this BX and label 66.
155		   The computation above assumed that two instructions
156		   later is exactly the right spot.  */
157		add	r10, #(66f - (0b + PC_OFS))
158		bx	r10
159#endif
160		.p2align ARM_BX_ALIGN_LOG2
16166:		nop
162		.p2align ARM_BX_ALIGN_LOG2
163		str	r3, [r0], #4
164		.p2align ARM_BX_ALIGN_LOG2
165		str	r4, [r0], #4
166		.p2align ARM_BX_ALIGN_LOG2
167		str	r5, [r0], #4
168		.p2align ARM_BX_ALIGN_LOG2
169		str	r6, [r0], #4
170		.p2align ARM_BX_ALIGN_LOG2
171		str	r7, [r0], #4
172		.p2align ARM_BX_ALIGN_LOG2
173		str	r8, [r0], #4
174		.p2align ARM_BX_ALIGN_LOG2
175		str	lr, [r0], #4
176
177#ifdef ARM_ALWAYS_BX
178		pop	{r10}
179		cfi_adjust_cfa_offset (-4)
180		cfi_restore (r10)
181#endif
182
183	CALGN(	bcs	2b			)
184
1857:		pop	{r5 - r8}
186		cfi_adjust_cfa_offset (-16)
187		cfi_restore (r5)
188		cfi_restore (r6)
189		cfi_restore (r7)
190		cfi_restore (r8)
191
1928:		movs	r2, r2, lsl #31
193		ldrbne	r3, [r1], #1
194		ldrbcs	r4, [r1], #1
195		ldrbcs	ip, [r1]
196		strbne	r3, [r0], #1
197		strbcs	r4, [r0], #1
198		strbcs	ip, [r0]
199
200#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \
201     || defined (ARM_ALWAYS_BX))
202		pop	{r0, r4, lr}
203		cfi_adjust_cfa_offset (-12)
204		cfi_restore (r4)
205		cfi_restore (lr)
206		bx      lr
207#else
208		pop	{r0, r4, pc}
209#endif
210
211		cfi_restore_state
212
2139:		rsb	ip, ip, #4
214		cmp	ip, #2
215		ldrbgt	r3, [r1], #1
216		ldrbge	r4, [r1], #1
217		ldrb	lr, [r1], #1
218		strbgt	r3, [r0], #1
219		strbge	r4, [r0], #1
220		subs	r2, r2, ip
221		strb	lr, [r0], #1
222		blo	8b
223		ands	ip, r1, #3
224		beq	1b
225
22610:		bic	r1, r1, #3
227		cmp	ip, #2
228		ldr	lr, [r1], #4
229		beq	17f
230		bgt	18f
231
232
233		.macro	forward_copy_shift pull push
234
235		subs	r2, r2, #28
236		blo	14f
237
238	CALGN(	ands	ip, r1, #31		)
239	CALGN(	rsb	ip, ip, #32		)
240	CALGN(	sbcsne	r4, ip, r2		)  @ C is always set here
241	CALGN(	subcc	r2, r2, ip		)
242	CALGN(	bcc	15f			)
243
24411:		push	{r5 - r8, r10}
245		cfi_adjust_cfa_offset (20)
246		cfi_rel_offset (r5, 0)
247		cfi_rel_offset (r6, 4)
248		cfi_rel_offset (r7, 8)
249		cfi_rel_offset (r8, 12)
250		cfi_rel_offset (r10, 16)
251
252	PLD(	pld	[r1, #0]		)
253	PLD(	cmp	r2, #96			)
254	PLD(	pld	[r1, #28]		)
255	PLD(	blo	13f			)
256	PLD(	pld	[r1, #60]		)
257	PLD(	pld	[r1, #92]		)
258
25912:	PLD(	pld	[r1, #124]		)
26013:		ldmia	r1!, {r4, r5, r6, r7}
261		mov	r3, lr, PULL #\pull
262		subs	r2, r2, #32
263		ldmia	r1!, {r8, r10, ip, lr}
264		orr	r3, r3, r4, PUSH #\push
265		mov	r4, r4, PULL #\pull
266		orr	r4, r4, r5, PUSH #\push
267		mov	r5, r5, PULL #\pull
268		orr	r5, r5, r6, PUSH #\push
269		mov	r6, r6, PULL #\pull
270		orr	r6, r6, r7, PUSH #\push
271		mov	r7, r7, PULL #\pull
272		orr	r7, r7, r8, PUSH #\push
273		mov	r8, r8, PULL #\pull
274		orr	r8, r8, r10, PUSH #\push
275		mov	r10, r10, PULL #\pull
276		orr	r10, r10, ip, PUSH #\push
277		mov	ip, ip, PULL #\pull
278		orr	ip, ip, lr, PUSH #\push
279		stmia	r0!, {r3, r4, r5, r6, r7, r8, r10, ip}
280		bhs	12b
281
282		pop	{r5 - r8, r10}
283		cfi_adjust_cfa_offset (-20)
284		cfi_restore (r5)
285		cfi_restore (r6)
286		cfi_restore (r7)
287		cfi_restore (r8)
288		cfi_restore (r10)
289
29014:		ands	ip, r2, #28
291		beq	16f
292
29315:		mov	r3, lr, PULL #\pull
294		ldr	lr, [r1], #4
295		subs	ip, ip, #4
296		orr	r3, r3, lr, PUSH #\push
297		str	r3, [r0], #4
298		bgt	15b
299	CALGN(	cmp	r2, #0			)
300	CALGN(	bge	11b			)
301
30216:		sub	r1, r1, #(\push / 8)
303		b	8b
304
305		.endm
306
307
308		forward_copy_shift	pull=8	push=24
309
31017:		forward_copy_shift	pull=16	push=16
311
31218:		forward_copy_shift	pull=24	push=8
313
314END(memcpy)
315libc_hidden_builtin_def (memcpy)
316