1/* Copyright (C) 2006-2021 Free Software Foundation, Inc. 2 This file is part of the GNU C Library. 3 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library. If not, see 17 <https://www.gnu.org/licenses/>. */ 18 19/* Thumb requires excessive IT insns here. */ 20#define NO_THUMB 21#include <sysdep.h> 22#include <arm-features.h> 23 24/* 25 * Data preload for architectures that support it (ARM V5TE and above) 26 */ 27#if (!defined (__ARM_ARCH_2__) && !defined (__ARM_ARCH_3__) \ 28 && !defined (__ARM_ARCH_3M__) && !defined (__ARM_ARCH_4__) \ 29 && !defined (__ARM_ARCH_4T__) && !defined (__ARM_ARCH_5__) \ 30 && !defined (__ARM_ARCH_5T__)) 31#define PLD(code...) code 32#else 33#define PLD(code...) 34#endif 35 36/* 37 * This can be used to enable code to cacheline align the source pointer. 38 * Experiments on tested architectures (StrongARM and XScale) didn't show 39 * this a worthwhile thing to do. That might be different in the future. 40 */ 41//#define CALGN(code...) code 42#define CALGN(code...) 43 44/* 45 * Endian independent macros for shifting bytes within registers. 46 */ 47#ifndef __ARMEB__ 48#define PULL lsr 49#define PUSH lsl 50#else 51#define PULL lsl 52#define PUSH lsr 53#endif 54 55 .text 56 .syntax unified 57 58/* Prototype: void *memcpy(void *dest, const void *src, size_t n); */ 59 60ENTRY(memcpy) 61 62 push {r0, r4, lr} 63 cfi_adjust_cfa_offset (12) 64 cfi_rel_offset (r4, 4) 65 cfi_rel_offset (lr, 8) 66 67 cfi_remember_state 68 69 subs r2, r2, #4 70 blo 8f 71 ands ip, r0, #3 72 PLD( pld [r1, #0] ) 73 bne 9f 74 ands ip, r1, #3 75 bne 10f 76 771: subs r2, r2, #(28) 78 push {r5 - r8} 79 cfi_adjust_cfa_offset (16) 80 cfi_rel_offset (r5, 0) 81 cfi_rel_offset (r6, 4) 82 cfi_rel_offset (r7, 8) 83 cfi_rel_offset (r8, 12) 84 blo 5f 85 86 CALGN( ands ip, r1, #31 ) 87 CALGN( rsb r3, ip, #32 ) 88 CALGN( sbcsne r4, r3, r2 ) @ C is always set here 89 CALGN( bcs 2f ) 90 CALGN( adr r4, 6f ) 91 CALGN( subs r2, r2, r3 ) @ C gets set 92#ifndef ARM_ALWAYS_BX 93 CALGN( add pc, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) 94#else 95 CALGN( add r4, r4, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2)) 96 CALGN( bx r4 ) 97#endif 98 99 PLD( pld [r1, #0] ) 1002: PLD( cmp r2, #96 ) 101 PLD( pld [r1, #28] ) 102 PLD( blo 4f ) 103 PLD( pld [r1, #60] ) 104 PLD( pld [r1, #92] ) 105 1063: PLD( pld [r1, #124] ) 1074: ldmia r1!, {r3, r4, r5, r6, r7, r8, ip, lr} 108 subs r2, r2, #32 109 stmia r0!, {r3, r4, r5, r6, r7, r8, ip, lr} 110 bhs 3b 111 1125: ands ip, r2, #28 113 rsb ip, ip, #32 114#ifndef ARM_ALWAYS_BX 115 /* C is always clear here. */ 116 addne pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 117 b 7f 118#else 119 beq 7f 120 push {r10} 121 cfi_adjust_cfa_offset (4) 122 cfi_rel_offset (r10, 0) 1230: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 124 /* If alignment is not perfect, then there will be some 125 padding (nop) instructions between this BX and label 6. 126 The computation above assumed that two instructions 127 later is exactly the right spot. */ 128 add r10, #(6f - (0b + PC_OFS)) 129 bx r10 130#endif 131 .p2align ARM_BX_ALIGN_LOG2 1326: nop 133 .p2align ARM_BX_ALIGN_LOG2 134 ldr r3, [r1], #4 135 .p2align ARM_BX_ALIGN_LOG2 136 ldr r4, [r1], #4 137 .p2align ARM_BX_ALIGN_LOG2 138 ldr r5, [r1], #4 139 .p2align ARM_BX_ALIGN_LOG2 140 ldr r6, [r1], #4 141 .p2align ARM_BX_ALIGN_LOG2 142 ldr r7, [r1], #4 143 .p2align ARM_BX_ALIGN_LOG2 144 ldr r8, [r1], #4 145 .p2align ARM_BX_ALIGN_LOG2 146 ldr lr, [r1], #4 147 148#ifndef ARM_ALWAYS_BX 149 add pc, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 150 nop 151#else 1520: add r10, pc, ip, lsl #(ARM_BX_ALIGN_LOG2 - 2) 153 /* If alignment is not perfect, then there will be some 154 padding (nop) instructions between this BX and label 66. 155 The computation above assumed that two instructions 156 later is exactly the right spot. */ 157 add r10, #(66f - (0b + PC_OFS)) 158 bx r10 159#endif 160 .p2align ARM_BX_ALIGN_LOG2 16166: nop 162 .p2align ARM_BX_ALIGN_LOG2 163 str r3, [r0], #4 164 .p2align ARM_BX_ALIGN_LOG2 165 str r4, [r0], #4 166 .p2align ARM_BX_ALIGN_LOG2 167 str r5, [r0], #4 168 .p2align ARM_BX_ALIGN_LOG2 169 str r6, [r0], #4 170 .p2align ARM_BX_ALIGN_LOG2 171 str r7, [r0], #4 172 .p2align ARM_BX_ALIGN_LOG2 173 str r8, [r0], #4 174 .p2align ARM_BX_ALIGN_LOG2 175 str lr, [r0], #4 176 177#ifdef ARM_ALWAYS_BX 178 pop {r10} 179 cfi_adjust_cfa_offset (-4) 180 cfi_restore (r10) 181#endif 182 183 CALGN( bcs 2b ) 184 1857: pop {r5 - r8} 186 cfi_adjust_cfa_offset (-16) 187 cfi_restore (r5) 188 cfi_restore (r6) 189 cfi_restore (r7) 190 cfi_restore (r8) 191 1928: movs r2, r2, lsl #31 193 ldrbne r3, [r1], #1 194 ldrbcs r4, [r1], #1 195 ldrbcs ip, [r1] 196 strbne r3, [r0], #1 197 strbcs r4, [r0], #1 198 strbcs ip, [r0] 199 200#if ((defined (__ARM_ARCH_4T__) && defined(__THUMB_INTERWORK__)) \ 201 || defined (ARM_ALWAYS_BX)) 202 pop {r0, r4, lr} 203 cfi_adjust_cfa_offset (-12) 204 cfi_restore (r4) 205 cfi_restore (lr) 206 bx lr 207#else 208 pop {r0, r4, pc} 209#endif 210 211 cfi_restore_state 212 2139: rsb ip, ip, #4 214 cmp ip, #2 215 ldrbgt r3, [r1], #1 216 ldrbge r4, [r1], #1 217 ldrb lr, [r1], #1 218 strbgt r3, [r0], #1 219 strbge r4, [r0], #1 220 subs r2, r2, ip 221 strb lr, [r0], #1 222 blo 8b 223 ands ip, r1, #3 224 beq 1b 225 22610: bic r1, r1, #3 227 cmp ip, #2 228 ldr lr, [r1], #4 229 beq 17f 230 bgt 18f 231 232 233 .macro forward_copy_shift pull push 234 235 subs r2, r2, #28 236 blo 14f 237 238 CALGN( ands ip, r1, #31 ) 239 CALGN( rsb ip, ip, #32 ) 240 CALGN( sbcsne r4, ip, r2 ) @ C is always set here 241 CALGN( subcc r2, r2, ip ) 242 CALGN( bcc 15f ) 243 24411: push {r5 - r8, r10} 245 cfi_adjust_cfa_offset (20) 246 cfi_rel_offset (r5, 0) 247 cfi_rel_offset (r6, 4) 248 cfi_rel_offset (r7, 8) 249 cfi_rel_offset (r8, 12) 250 cfi_rel_offset (r10, 16) 251 252 PLD( pld [r1, #0] ) 253 PLD( cmp r2, #96 ) 254 PLD( pld [r1, #28] ) 255 PLD( blo 13f ) 256 PLD( pld [r1, #60] ) 257 PLD( pld [r1, #92] ) 258 25912: PLD( pld [r1, #124] ) 26013: ldmia r1!, {r4, r5, r6, r7} 261 mov r3, lr, PULL #\pull 262 subs r2, r2, #32 263 ldmia r1!, {r8, r10, ip, lr} 264 orr r3, r3, r4, PUSH #\push 265 mov r4, r4, PULL #\pull 266 orr r4, r4, r5, PUSH #\push 267 mov r5, r5, PULL #\pull 268 orr r5, r5, r6, PUSH #\push 269 mov r6, r6, PULL #\pull 270 orr r6, r6, r7, PUSH #\push 271 mov r7, r7, PULL #\pull 272 orr r7, r7, r8, PUSH #\push 273 mov r8, r8, PULL #\pull 274 orr r8, r8, r10, PUSH #\push 275 mov r10, r10, PULL #\pull 276 orr r10, r10, ip, PUSH #\push 277 mov ip, ip, PULL #\pull 278 orr ip, ip, lr, PUSH #\push 279 stmia r0!, {r3, r4, r5, r6, r7, r8, r10, ip} 280 bhs 12b 281 282 pop {r5 - r8, r10} 283 cfi_adjust_cfa_offset (-20) 284 cfi_restore (r5) 285 cfi_restore (r6) 286 cfi_restore (r7) 287 cfi_restore (r8) 288 cfi_restore (r10) 289 29014: ands ip, r2, #28 291 beq 16f 292 29315: mov r3, lr, PULL #\pull 294 ldr lr, [r1], #4 295 subs ip, ip, #4 296 orr r3, r3, lr, PUSH #\push 297 str r3, [r0], #4 298 bgt 15b 299 CALGN( cmp r2, #0 ) 300 CALGN( bge 11b ) 301 30216: sub r1, r1, #(\push / 8) 303 b 8b 304 305 .endm 306 307 308 forward_copy_shift pull=8 push=24 309 31017: forward_copy_shift pull=16 push=16 311 31218: forward_copy_shift pull=24 push=8 313 314END(memcpy) 315libc_hidden_builtin_def (memcpy) 316