1/* strrchr: find the last instance of a character in a string. 2 3 Copyright (C) 2014-2021 Free Software Foundation, Inc. 4 5 This file is part of the GNU C Library. 6 7 The GNU C Library is free software; you can redistribute it and/or 8 modify it under the terms of the GNU Lesser General Public 9 License as published by the Free Software Foundation; either 10 version 2.1 of the License, or (at your option) any later version. 11 12 The GNU C Library is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 15 Lesser General Public License for more details. 16 17 You should have received a copy of the GNU Lesser General Public 18 License along with the GNU C Library. If not, see 19 <https://www.gnu.org/licenses/>. */ 20 21#include <sysdep.h> 22 23/* Assumptions: 24 * 25 * ARMv8-a, AArch64 26 * Neon Available. 27 * MTE compatible. 28 */ 29 30/* Arguments and results. */ 31#define srcin x0 32#define chrin w1 33#define result x0 34 35#define src x2 36#define tmp x3 37#define wtmp w3 38#define synd x3 39#define shift x4 40#define src_match x4 41#define nul_match x5 42#define chr_match x6 43 44#define vrepchr v0 45#define vdata v1 46#define vhas_nul v2 47#define vhas_chr v3 48#define vrepmask v4 49#define vrepmask2 v5 50#define vend v5 51#define dend d5 52 53/* Core algorithm. 54 55 For each 16-byte chunk we calculate a 64-bit syndrome value, with 56 four bits per byte (LSB is always in bits 0 and 1, for both big 57 and little-endian systems). For each tuple, bits 0-1 are set if 58 the relevant byte matched the requested character; bits 2-3 are set 59 if the relevant byte matched the NUL end of string. */ 60 61ENTRY(strrchr) 62 PTR_ARG (0) 63 bic src, srcin, 15 64 dup vrepchr.16b, chrin 65 mov wtmp, 0x3003 66 dup vrepmask.8h, wtmp 67 tst srcin, 15 68 beq L(loop1) 69 70 ld1 {vdata.16b}, [src], 16 71 cmeq vhas_nul.16b, vdata.16b, 0 72 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 73 mov wtmp, 0xf00f 74 dup vrepmask2.8h, wtmp 75 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 76 and vhas_nul.16b, vhas_nul.16b, vrepmask2.16b 77 addp vend.16b, vhas_nul.16b, vhas_nul.16b 78 lsl shift, srcin, 2 79 fmov synd, dend 80 lsr synd, synd, shift 81 lsl synd, synd, shift 82 ands nul_match, synd, 0xcccccccccccccccc 83 bne L(tail) 84 cbnz synd, L(loop2) 85 86 .p2align 5 87L(loop1): 88 ld1 {vdata.16b}, [src], 16 89 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 90 cmhs vhas_nul.16b, vhas_chr.16b, vdata.16b 91 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 92 fmov synd, dend 93 cbz synd, L(loop1) 94 95 cmeq vhas_nul.16b, vdata.16b, 0 96 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 97 bic vhas_nul.8h, 0x0f, lsl 8 98 addp vend.16b, vhas_nul.16b, vhas_nul.16b 99 fmov synd, dend 100 ands nul_match, synd, 0xcccccccccccccccc 101 beq L(loop2) 102 103L(tail): 104 sub nul_match, nul_match, 1 105 and chr_match, synd, 0x3333333333333333 106 ands chr_match, chr_match, nul_match 107 sub result, src, 1 108 clz tmp, chr_match 109 sub result, result, tmp, lsr 2 110 csel result, result, xzr, ne 111 ret 112 113 .p2align 4 114L(loop2): 115 cmp synd, 0 116 csel src_match, src, src_match, ne 117 csel chr_match, synd, chr_match, ne 118 ld1 {vdata.16b}, [src], 16 119 cmeq vhas_nul.16b, vdata.16b, 0 120 cmeq vhas_chr.16b, vdata.16b, vrepchr.16b 121 bit vhas_nul.16b, vhas_chr.16b, vrepmask.16b 122 umaxp vend.16b, vhas_nul.16b, vhas_nul.16b 123 fmov synd, dend 124 tst synd, 0xcccccccccccccccc 125 beq L(loop2) 126 127 bic vhas_nul.8h, 0x0f, lsl 8 128 addp vend.16b, vhas_nul.16b, vhas_nul.16b 129 fmov synd, dend 130 and nul_match, synd, 0xcccccccccccccccc 131 sub nul_match, nul_match, 1 132 and tmp, synd, 0x3333333333333333 133 ands tmp, tmp, nul_match 134 csel chr_match, tmp, chr_match, ne 135 csel src_match, src, src_match, ne 136 sub src_match, src_match, 1 137 clz tmp, chr_match 138 sub result, src_match, tmp, lsr 2 139 ret 140 141END(strrchr) 142weak_alias (strrchr, rindex) 143libc_hidden_builtin_def (strrchr) 144