1/* Optimized strchr implementation for PowerPC64. 2 Copyright (C) 1997-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20 21/* See strlen.s for comments on how this works. */ 22 23/* char * [r3] strchr (const char *s [r3] , int c [r4] ) */ 24 25#ifndef STRCHR 26# define STRCHR strchr 27#endif 28 29ENTRY_TOCLESS (STRCHR) 30 CALL_MCOUNT 2 31 32#define rTMP1 r0 33#define rRTN r3 /* outgoing result */ 34#define rSTR r8 /* current word pointer */ 35#define rCHR r4 /* byte we're looking for, spread over the whole word */ 36#define rWORD r5 /* the current word */ 37#define rCLZB rCHR /* leading zero byte count */ 38#define rFEFE r6 /* constant 0xfefefefefefefeff (-0x0101010101010101) */ 39#define r7F7F r7 /* constant 0x7f7f7f7f7f7f7f7f */ 40#define rTMP2 r9 41#define rIGN r10 /* number of bits we should ignore in the first word */ 42#define rMASK r11 /* mask with the bits to ignore set to 0 */ 43#define rTMP3 r12 44#define rTMP4 rIGN 45#define rTMP5 rMASK 46 47 dcbt 0,rRTN 48 insrdi rCHR, rCHR, 8, 48 49 li rMASK, -1 50 insrdi rCHR, rCHR, 16, 32 51 rlwinm rIGN, rRTN, 3, 26, 28 52 insrdi rCHR, rCHR, 32, 0 53 lis rFEFE, -0x101 54 lis r7F7F, 0x7f7f 55 clrrdi rSTR, rRTN, 3 56 addi rFEFE, rFEFE, -0x101 57 addi r7F7F, r7F7F, 0x7f7f 58 sldi rTMP1, rFEFE, 32 59 insrdi r7F7F, r7F7F, 32, 0 60 add rFEFE, rFEFE, rTMP1 61/* Test the first (partial?) word. */ 62 ld rWORD, 0(rSTR) 63#ifdef __LITTLE_ENDIAN__ 64 sld rMASK, rMASK, rIGN 65#else 66 srd rMASK, rMASK, rIGN 67#endif 68 orc rWORD, rWORD, rMASK 69 add rTMP1, rFEFE, rWORD 70 nor rTMP2, r7F7F, rWORD 71 and. rTMP4, rTMP1, rTMP2 72 xor rTMP3, rCHR, rWORD 73 orc rTMP3, rTMP3, rMASK 74 b L(loopentry) 75 76/* The loop. */ 77 78L(loop): 79 ldu rWORD, 8(rSTR) 80 and. rTMP5, rTMP1, rTMP2 81/* Test for 0. */ 82 add rTMP1, rFEFE, rWORD /* x - 0x01010101. */ 83 nor rTMP2, r7F7F, rWORD /* ~(x | 0x7f7f7f7f) == ~x & 0x80808080. */ 84 bne L(foundit) 85 and. rTMP4, rTMP1, rTMP2 /* (x - 0x01010101) & ~x & 0x80808080. */ 86/* Start test for the bytes we're looking for. */ 87 xor rTMP3, rCHR, rWORD 88L(loopentry): 89 add rTMP1, rFEFE, rTMP3 90 nor rTMP2, r7F7F, rTMP3 91 beq L(loop) 92 93/* There is a zero byte in the word, but may also be a matching byte (either 94 before or after the zero byte). In fact, we may be looking for a 95 zero byte, in which case we return a match. */ 96 and. rTMP5, rTMP1, rTMP2 97 li rRTN, 0 98 beqlr 99/* At this point: 100 rTMP5 bytes are 0x80 for each match of c, 0 otherwise. 101 rTMP4 bytes are 0x80 for each match of 0, 0 otherwise. 102 But there may be false matches in the next most significant byte from 103 a true match due to carries. This means we need to recalculate the 104 matches using a longer method for big-endian. */ 105#ifdef __LITTLE_ENDIAN__ 106 addi rTMP1, rTMP5, -1 107 andc rTMP1, rTMP1, rTMP5 108 cntlzd rCLZB, rTMP1 109 addi rTMP2, rTMP4, -1 110 andc rTMP2, rTMP2, rTMP4 111 cmpld rTMP1, rTMP2 112 bgtlr 113 subfic rCLZB, rCLZB, 64-7 114#else 115/* I think we could reduce this by two instructions by keeping the "nor" 116 results from the loop for reuse here. See strlen.S tail. Similarly 117 one instruction could be pruned from L(foundit). */ 118 and rFEFE, r7F7F, rWORD 119 or rTMP5, r7F7F, rWORD 120 and rTMP1, r7F7F, rTMP3 121 or rTMP4, r7F7F, rTMP3 122 add rFEFE, rFEFE, r7F7F 123 add rTMP1, rTMP1, r7F7F 124 nor rWORD, rTMP5, rFEFE 125 nor rTMP2, rTMP4, rTMP1 126 cntlzd rCLZB, rTMP2 127 cmpld rWORD, rTMP2 128 bgtlr 129#endif 130 srdi rCLZB, rCLZB, 3 131 add rRTN, rSTR, rCLZB 132 blr 133 134L(foundit): 135#ifdef __LITTLE_ENDIAN__ 136 addi rTMP1, rTMP5, -1 137 andc rTMP1, rTMP1, rTMP5 138 cntlzd rCLZB, rTMP1 139 subfic rCLZB, rCLZB, 64-7-64 140 sradi rCLZB, rCLZB, 3 141#else 142 and rTMP1, r7F7F, rTMP3 143 or rTMP4, r7F7F, rTMP3 144 add rTMP1, rTMP1, r7F7F 145 nor rTMP2, rTMP4, rTMP1 146 cntlzd rCLZB, rTMP2 147 subi rSTR, rSTR, 8 148 srdi rCLZB, rCLZB, 3 149#endif 150 add rRTN, rSTR, rCLZB 151 blr 152END (STRCHR) 153 154weak_alias (strchr, index) 155libc_hidden_builtin_def (strchr) 156