1/* Copyright (C) 2012-2021 Free Software Foundation, Inc. 2 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library. If not, see 17 <https://www.gnu.org/licenses/>. */ 18 19/* Assumptions: 20 * 21 * ARMv8-a, AArch64. 22 * MTE compatible. 23 */ 24 25#include <sysdep.h> 26 27#define REP8_01 0x0101010101010101 28#define REP8_7f 0x7f7f7f7f7f7f7f7f 29 30/* Parameters and result. */ 31#define src1 x0 32#define src2 x1 33#define result x0 34 35/* Internal variables. */ 36#define data1 x2 37#define data1w w2 38#define data2 x3 39#define data2w w3 40#define has_nul x4 41#define diff x5 42#define off1 x5 43#define syndrome x6 44#define tmp x6 45#define data3 x7 46#define zeroones x8 47#define shift x9 48#define off2 x10 49 50/* On big-endian early bytes are at MSB and on little-endian LSB. 51 LS_FW means shifting towards early bytes. */ 52#ifdef __AARCH64EB__ 53# define LS_FW lsl 54#else 55# define LS_FW lsr 56#endif 57 58/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 59 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 60 can be done in parallel across the entire word. 61 Since carry propagation makes 0x1 bytes before a NUL byte appear 62 NUL too in big-endian, byte-reverse the data before the NUL check. */ 63 64ENTRY(strcmp) 65 PTR_ARG (0) 66 PTR_ARG (1) 67 sub off2, src2, src1 68 mov zeroones, REP8_01 69 and tmp, src1, 7 70 tst off2, 7 71 b.ne L(misaligned8) 72 cbnz tmp, L(mutual_align) 73 74 .p2align 4 75 76L(loop_aligned): 77 ldr data2, [src1, off2] 78 ldr data1, [src1], 8 79L(start_realigned): 80#ifdef __AARCH64EB__ 81 rev tmp, data1 82 sub has_nul, tmp, zeroones 83 orr tmp, tmp, REP8_7f 84#else 85 sub has_nul, data1, zeroones 86 orr tmp, data1, REP8_7f 87#endif 88 bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ 89 ccmp data1, data2, 0, eq 90 b.eq L(loop_aligned) 91#ifdef __AARCH64EB__ 92 rev has_nul, has_nul 93#endif 94 eor diff, data1, data2 95 orr syndrome, diff, has_nul 96L(end): 97#ifndef __AARCH64EB__ 98 rev syndrome, syndrome 99 rev data1, data1 100 rev data2, data2 101#endif 102 clz shift, syndrome 103 /* The most-significant-non-zero bit of the syndrome marks either the 104 first bit that is different, or the top bit of the first zero byte. 105 Shifting left now will bring the critical information into the 106 top bits. */ 107 lsl data1, data1, shift 108 lsl data2, data2, shift 109 /* But we need to zero-extend (char is unsigned) the value and then 110 perform a signed 32-bit subtraction. */ 111 lsr data1, data1, 56 112 sub result, data1, data2, lsr 56 113 ret 114 115 .p2align 4 116 117L(mutual_align): 118 /* Sources are mutually aligned, but are not currently at an 119 alignment boundary. Round down the addresses and then mask off 120 the bytes that precede the start point. */ 121 bic src1, src1, 7 122 ldr data2, [src1, off2] 123 ldr data1, [src1], 8 124 neg shift, src2, lsl 3 /* Bits to alignment -64. */ 125 mov tmp, -1 126 LS_FW tmp, tmp, shift 127 orr data1, data1, tmp 128 orr data2, data2, tmp 129 b L(start_realigned) 130 131L(misaligned8): 132 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 133 checking to make sure that we don't access beyond the end of SRC2. */ 134 cbz tmp, L(src1_aligned) 135L(do_misaligned): 136 ldrb data1w, [src1], 1 137 ldrb data2w, [src2], 1 138 cmp data1w, 0 139 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 140 b.ne L(done) 141 tst src1, 7 142 b.ne L(do_misaligned) 143 144L(src1_aligned): 145 neg shift, src2, lsl 3 146 bic src2, src2, 7 147 ldr data3, [src2], 8 148#ifdef __AARCH64EB__ 149 rev data3, data3 150#endif 151 lsr tmp, zeroones, shift 152 orr data3, data3, tmp 153 sub has_nul, data3, zeroones 154 orr tmp, data3, REP8_7f 155 bics has_nul, has_nul, tmp 156 b.ne L(tail) 157 158 sub off1, src2, src1 159 160 .p2align 4 161 162L(loop_unaligned): 163 ldr data3, [src1, off1] 164 ldr data2, [src1, off2] 165#ifdef __AARCH64EB__ 166 rev data3, data3 167#endif 168 sub has_nul, data3, zeroones 169 orr tmp, data3, REP8_7f 170 ldr data1, [src1], 8 171 bics has_nul, has_nul, tmp 172 ccmp data1, data2, 0, eq 173 b.eq L(loop_unaligned) 174 175 lsl tmp, has_nul, shift 176#ifdef __AARCH64EB__ 177 rev tmp, tmp 178#endif 179 eor diff, data1, data2 180 orr syndrome, diff, tmp 181 cbnz syndrome, L(end) 182L(tail): 183 ldr data1, [src1] 184 neg shift, shift 185 lsr data2, data3, shift 186 lsr has_nul, has_nul, shift 187#ifdef __AARCH64EB__ 188 rev data2, data2 189 rev has_nul, has_nul 190#endif 191 eor diff, data1, data2 192 orr syndrome, diff, has_nul 193 b L(end) 194 195L(done): 196 sub result, data1, data2 197 ret 198 199END(strcmp) 200libc_hidden_builtin_def (strcmp) 201