1/* memchr (str, chr, len) -- Return pointer to first occurrence of CHR in STR 2 less than LEN. For Intel 80x86, x>=3. 3 Copyright (C) 1994-2021 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 19 20#include <sysdep.h> 21#include "asm-syntax.h" 22 23#define PARMS 4+8 /* space for 2 saved regs */ 24#define RTN PARMS 25#define STR RTN 26#define CHR STR+4 27#define LEN CHR+4 28 29 .text 30ENTRY (__memchr) 31 32 /* Save callee-safe registers used in this function. */ 33 pushl %esi 34 cfi_adjust_cfa_offset (4) 35 pushl %edi 36 cfi_adjust_cfa_offset (4) 37 cfi_rel_offset (edi, 0) 38 39 /* Load parameters into registers. */ 40 movl STR(%esp), %eax /* str: pointer to memory block. */ 41 movl CHR(%esp), %edx /* c: byte we are looking for. */ 42 movl LEN(%esp), %esi /* len: length of memory block. */ 43 cfi_rel_offset (esi, 4) 44 45 /* If my must not test more than three characters test 46 them one by one. This is especially true for 0. */ 47 cmpl $4, %esi 48 jb L(3) 49 50 /* At the moment %edx contains CHR. What we need for the 51 algorithm is CHR in all bytes of the dword. Avoid 52 operations on 16 bit words because these require an 53 prefix byte (and one more cycle). */ 54 movb %dl, %dh /* Now it is 0|0|c|c */ 55 movl %edx, %ecx 56 shll $16, %edx /* Now c|c|0|0 */ 57 movw %cx, %dx /* And finally c|c|c|c */ 58 59 /* Better performance can be achieved if the word (32 60 bit) memory access is aligned on a four-byte-boundary. 61 So process first bytes one by one until boundary is 62 reached. Don't use a loop for better performance. */ 63 64 testb $3, %al /* correctly aligned ? */ 65 je L(2) /* yes => begin loop */ 66 cmpb %dl, (%eax) /* compare byte */ 67 je L(9) /* target found => return */ 68 incl %eax /* increment source pointer */ 69 decl %esi /* decrement length counter */ 70 je L(4) /* len==0 => return NULL */ 71 72 testb $3, %al /* correctly aligned ? */ 73 je L(2) /* yes => begin loop */ 74 cmpb %dl, (%eax) /* compare byte */ 75 je L(9) /* target found => return */ 76 incl %eax /* increment source pointer */ 77 decl %esi /* decrement length counter */ 78 je L(4) /* len==0 => return NULL */ 79 80 testb $3, %al /* correctly aligned ? */ 81 je L(2) /* yes => begin loop */ 82 cmpb %dl, (%eax) /* compare byte */ 83 je L(9) /* target found => return */ 84 incl %eax /* increment source pointer */ 85 decl %esi /* decrement length counter */ 86 /* no test for len==0 here, because this is done in the 87 loop head */ 88 jmp L(2) 89 90 /* We exit the loop if adding MAGIC_BITS to LONGWORD fails to 91 change any of the hole bits of LONGWORD. 92 93 1) Is this safe? Will it catch all the zero bytes? 94 Suppose there is a byte with all zeros. Any carry bits 95 propagating from its left will fall into the hole at its 96 least significant bit and stop. Since there will be no 97 carry from its most significant bit, the LSB of the 98 byte to the left will be unchanged, and the zero will be 99 detected. 100 101 2) Is this worthwhile? Will it ignore everything except 102 zero bytes? Suppose every byte of LONGWORD has a bit set 103 somewhere. There will be a carry into bit 8. If bit 8 104 is set, this will carry into bit 16. If bit 8 is clear, 105 one of bits 9-15 must be set, so there will be a carry 106 into bit 16. Similarly, there will be a carry into bit 107 24. If one of bits 24-31 is set, there will be a carry 108 into bit 32 (=carry flag), so all of the hole bits will 109 be changed. 110 111 3) But wait! Aren't we looking for CHR, not zero? 112 Good point. So what we do is XOR LONGWORD with a longword, 113 each of whose bytes is CHR. This turns each byte that is CHR 114 into a zero. */ 115 116 117 /* Each round the main loop processes 16 bytes. */ 118 119 ALIGN (4) 120 121L(1): movl (%eax), %ecx /* get word (= 4 bytes) in question */ 122 movl $0xfefefeff, %edi /* magic value */ 123 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c 124 are now 0 */ 125 addl %ecx, %edi /* add the magic value to the word. We get 126 carry bits reported for each byte which 127 is *not* 0 */ 128 129 /* According to the algorithm we had to reverse the effect of the 130 XOR first and then test the overflow bits. But because the 131 following XOR would destroy the carry flag and it would (in a 132 representation with more than 32 bits) not alter then last 133 overflow, we can now test this condition. If no carry is signaled 134 no overflow must have occurred in the last byte => it was 0. */ 135 jnc L(8) 136 137 /* We are only interested in carry bits that change due to the 138 previous add, so remove original bits */ 139 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ 140 141 /* Now test for the other three overflow bits. */ 142 orl $0xfefefeff, %edi /* set all non-carry bits */ 143 incl %edi /* add 1: if one carry bit was *not* set 144 the addition will not result in 0. */ 145 146 /* If at least one byte of the word is CHR we don't get 0 in %edi. */ 147 jnz L(8) /* found it => return pointer */ 148 149 /* This process is unfolded four times for better performance. 150 we don't increment the source pointer each time. Instead we 151 use offsets and increment by 16 in each run of the loop. But 152 before probing for the matching byte we need some extra code 153 (following LL(13) below). Even the len can be compared with 154 constants instead of decrementing each time. */ 155 156 movl 4(%eax), %ecx /* get word (= 4 bytes) in question */ 157 movl $0xfefefeff, %edi /* magic value */ 158 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c 159 are now 0 */ 160 addl %ecx, %edi /* add the magic value to the word. We get 161 carry bits reported for each byte which 162 is *not* 0 */ 163 jnc L(7) /* highest byte is CHR => return pointer */ 164 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ 165 orl $0xfefefeff, %edi /* set all non-carry bits */ 166 incl %edi /* add 1: if one carry bit was *not* set 167 the addition will not result in 0. */ 168 jnz L(7) /* found it => return pointer */ 169 170 movl 8(%eax), %ecx /* get word (= 4 bytes) in question */ 171 movl $0xfefefeff, %edi /* magic value */ 172 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c 173 are now 0 */ 174 addl %ecx, %edi /* add the magic value to the word. We get 175 carry bits reported for each byte which 176 is *not* 0 */ 177 jnc L(6) /* highest byte is CHR => return pointer */ 178 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ 179 orl $0xfefefeff, %edi /* set all non-carry bits */ 180 incl %edi /* add 1: if one carry bit was *not* set 181 the addition will not result in 0. */ 182 jnz L(6) /* found it => return pointer */ 183 184 movl 12(%eax), %ecx /* get word (= 4 bytes) in question */ 185 movl $0xfefefeff, %edi /* magic value */ 186 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c 187 are now 0 */ 188 addl %ecx, %edi /* add the magic value to the word. We get 189 carry bits reported for each byte which 190 is *not* 0 */ 191 jnc L(5) /* highest byte is CHR => return pointer */ 192 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ 193 orl $0xfefefeff, %edi /* set all non-carry bits */ 194 incl %edi /* add 1: if one carry bit was *not* set 195 the addition will not result in 0. */ 196 jnz L(5) /* found it => return pointer */ 197 198 /* Adjust both counters for a full round, i.e. 16 bytes. */ 199 addl $16, %eax 200L(2): subl $16, %esi 201 jae L(1) /* Still more than 16 bytes remaining */ 202 203 /* Process remaining bytes separately. */ 204 cmpl $4-16, %esi /* rest < 4 bytes? */ 205 jb L(3) /* yes, than test byte by byte */ 206 207 movl (%eax), %ecx /* get word (= 4 bytes) in question */ 208 movl $0xfefefeff, %edi /* magic value */ 209 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c 210 are now 0 */ 211 addl %ecx, %edi /* add the magic value to the word. We get 212 carry bits reported for each byte which 213 is *not* 0 */ 214 jnc L(8) /* highest byte is CHR => return pointer */ 215 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ 216 orl $0xfefefeff, %edi /* set all non-carry bits */ 217 incl %edi /* add 1: if one carry bit was *not* set 218 the addition will not result in 0. */ 219 jne L(8) /* found it => return pointer */ 220 addl $4, %eax /* adjust source pointer */ 221 222 cmpl $8-16, %esi /* rest < 8 bytes? */ 223 jb L(3) /* yes, than test byte by byte */ 224 225 movl (%eax), %ecx /* get word (= 4 bytes) in question */ 226 movl $0xfefefeff, %edi /* magic value */ 227 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c 228 are now 0 */ 229 addl %ecx, %edi /* add the magic value to the word. We get 230 carry bits reported for each byte which 231 is *not* 0 */ 232 jnc L(8) /* highest byte is CHR => return pointer */ 233 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ 234 orl $0xfefefeff, %edi /* set all non-carry bits */ 235 incl %edi /* add 1: if one carry bit was *not* set 236 the addition will not result in 0. */ 237 jne L(8) /* found it => return pointer */ 238 addl $4, %eax /* adjust source pointer */ 239 240 cmpl $12-16, %esi /* rest < 12 bytes? */ 241 jb L(3) /* yes, than test byte by byte */ 242 243 movl (%eax), %ecx /* get word (= 4 bytes) in question */ 244 movl $0xfefefeff, %edi /* magic value */ 245 xorl %edx, %ecx /* XOR with word c|c|c|c => bytes of str == c 246 are now 0 */ 247 addl %ecx, %edi /* add the magic value to the word. We get 248 carry bits reported for each byte which 249 is *not* 0 */ 250 jnc L(8) /* highest byte is CHR => return pointer */ 251 xorl %ecx, %edi /* ((word^charmask)+magic)^(word^charmask) */ 252 orl $0xfefefeff, %edi /* set all non-carry bits */ 253 incl %edi /* add 1: if one carry bit was *not* set 254 the addition will not result in 0. */ 255 jne L(8) /* found it => return pointer */ 256 addl $4, %eax /* adjust source pointer */ 257 258 /* Check the remaining bytes one by one. */ 259L(3): andl $3, %esi /* mask out uninteresting bytes */ 260 jz L(4) /* no remaining bytes => return NULL */ 261 262 cmpb %dl, (%eax) /* compare byte with CHR */ 263 je L(9) /* equal, than return pointer */ 264 incl %eax /* increment source pointer */ 265 decl %esi /* decrement length */ 266 jz L(4) /* no remaining bytes => return NULL */ 267 268 cmpb %dl, (%eax) /* compare byte with CHR */ 269 je L(9) /* equal, than return pointer */ 270 incl %eax /* increment source pointer */ 271 decl %esi /* decrement length */ 272 jz L(4) /* no remaining bytes => return NULL */ 273 274 cmpb %dl, (%eax) /* compare byte with CHR */ 275 je L(9) /* equal, than return pointer */ 276 277L(4): /* no byte found => return NULL */ 278 xorl %eax, %eax 279 jmp L(9) 280 281 /* add missing source pointer increments */ 282L(5): addl $4, %eax 283L(6): addl $4, %eax 284L(7): addl $4, %eax 285 286 /* Test for the matching byte in the word. %ecx contains a NUL 287 char in the byte which originally was the byte we are looking 288 at. */ 289L(8): testb %cl, %cl /* test first byte in dword */ 290 jz L(9) /* if zero => return pointer */ 291 incl %eax /* increment source pointer */ 292 293 testb %ch, %ch /* test second byte in dword */ 294 jz L(9) /* if zero => return pointer */ 295 incl %eax /* increment source pointer */ 296 297 testl $0xff0000, %ecx /* test third byte in dword */ 298 jz L(9) /* if zero => return pointer */ 299 incl %eax /* increment source pointer */ 300 301 /* No further test needed we we know it is one of the four bytes. */ 302L(9): popl %edi /* pop saved registers */ 303 cfi_adjust_cfa_offset (-4) 304 cfi_restore (edi) 305 popl %esi 306 cfi_adjust_cfa_offset (-4) 307 cfi_restore (esi) 308 309 ret 310END (__memchr) 311 312weak_alias (__memchr, memchr) 313libc_hidden_builtin_def (memchr) 314