1/* Optimized memset for Huawei Kunpeng processor. 2 Copyright (C) 2012-2021 Free Software Foundation, Inc. 3 4 This file is part of the GNU C Library. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library. If not, see 18 <https://www.gnu.org/licenses/>. */ 19 20#include <sysdep.h> 21#include <sysdeps/aarch64/memset-reg.h> 22 23#if IS_IN (libc) 24# define MEMSET __memset_kunpeng 25 26/* Assumptions: 27 * 28 * ARMv8-a, AArch64, unaligned accesses 29 * 30 */ 31 32ENTRY_ALIGN (MEMSET, 6) 33 34 PTR_ARG (0) 35 SIZE_ARG (2) 36 37 dup v0.16B, valw 38 add dstend, dstin, count 39 40 cmp count, 128 41 b.hs L(set_long) 42 43 cmp count, 16 44 b.lo L(less16) 45 46 /* Set 16..127 bytes. */ 47 str q0, [dstin] 48 tbnz count, 6, L(set127) 49 str q0, [dstend, -16] 50 tbz count, 5, 1f 51 str q0, [dstin, 16] 52 str q0, [dstend, -32] 531: ret 54 55 .p2align 4 56 /* Set 64..127 bytes. Write 64 bytes from the start and 57 64 bytes from the end. */ 58L(set127): 59 stp q0, q0, [dstin, 16] 60 str q0, [dstin, 48] 61 stp q0, q0, [dstend, -64] 62 stp q0, q0, [dstend, -32] 63 ret 64 65 .p2align 4 66 /* Set 0..15 bytes. */ 67L(less16): 68 tbz count, 3, L(less8) 69 str d0, [dstin] 70 str d0, [dstend, -8] 71 ret 72L(less8): 73 tbz count, 2, 2f 74 str s0, [dstin] 75 str s0, [dstend, -4] 76 ret 772: cbz count, 3f 78 str b0, [dstin] 79 tbz count, 1, 3f 80 str h0, [dstend, -2] 813: ret 82 83 .p2align 4 84L(set_long): 85 bic dst, dstin, 15 86 str q0, [dstin] 87 sub count, dstend, dst /* Count is 16 too large. */ 88 sub dst, dst, 16 /* Dst is biased by -32. */ 89 sub count, count, 64 + 16 + 1 /* Adjust count and bias for loop. */ 901: stp q0, q0, [dst, 32] 91 stp q0, q0, [dst, 64]! 92 subs count, count, 64 93 b.lo 1f 94 stp q0, q0, [dst, 32] 95 stp q0, q0, [dst, 64]! 96 subs count, count, 64 97 b.lo 1f 98 stp q0, q0, [dst, 32] 99 stp q0, q0, [dst, 64]! 100 subs count, count, 64 101 b.lo 1f 102 stp q0, q0, [dst, 32] 103 stp q0, q0, [dst, 64]! 104 subs count, count, 64 105 b.hs 1b 106 1071: stp q0, q0, [dstend, -64] 108 stp q0, q0, [dstend, -32] 109 ret 110 111END (MEMSET) 112libc_hidden_builtin_def (MEMSET) 113#endif 114