1/* strchr with SSE2 without bsf 2 Copyright (C) 2011-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21# include <sysdep.h> 22# include "asm-syntax.h" 23 24 atom_text_section 25ENTRY (__strchr_sse2_no_bsf) 26 movd %esi, %xmm1 27 movq %rdi, %rcx 28 punpcklbw %xmm1, %xmm1 29 andq $~15, %rdi 30 pxor %xmm2, %xmm2 31 punpcklbw %xmm1, %xmm1 32 orl $0xffffffff, %esi 33 movdqa (%rdi), %xmm0 34 pshufd $0, %xmm1, %xmm1 35 subq %rdi, %rcx 36 movdqa %xmm0, %xmm3 37 leaq 16(%rdi), %rdi 38 pcmpeqb %xmm1, %xmm0 39 pcmpeqb %xmm2, %xmm3 40 shl %cl, %esi 41 pmovmskb %xmm0, %eax 42 pmovmskb %xmm3, %edx 43 andl %esi, %eax 44 andl %esi, %edx 45 test %eax, %eax 46 jnz L(matches) 47 test %edx, %edx 48 jnz L(return_null) 49 50L(loop): 51 movdqa (%rdi), %xmm0 52 leaq 16(%rdi), %rdi 53 movdqa %xmm0, %xmm3 54 pcmpeqb %xmm1, %xmm0 55 pcmpeqb %xmm2, %xmm3 56 pmovmskb %xmm0, %eax 57 pmovmskb %xmm3, %edx 58 or %eax, %edx 59 jz L(loop) 60 61 pmovmskb %xmm3, %edx 62 test %eax, %eax 63 jnz L(matches) 64 65/* Return NULL. */ 66 .p2align 4 67L(return_null): 68 xor %rax, %rax 69 ret 70 71L(matches): 72 /* There is a match. First find where NULL is. */ 73 leaq -16(%rdi), %rdi 74 test %edx, %edx 75 jz L(match_case1) 76 77 .p2align 4 78L(match_case2): 79 test %al, %al 80 jz L(match_high_case2) 81 82 mov %al, %cl 83 and $15, %cl 84 jnz L(match_case2_4) 85 86 mov %dl, %ch 87 and $15, %ch 88 jnz L(return_null) 89 90 test $0x10, %al 91 jnz L(Exit5) 92 test $0x10, %dl 93 jnz L(return_null) 94 test $0x20, %al 95 jnz L(Exit6) 96 test $0x20, %dl 97 jnz L(return_null) 98 test $0x40, %al 99 jnz L(Exit7) 100 test $0x40, %dl 101 jnz L(return_null) 102 lea 7(%rdi), %rax 103 ret 104 105 .p2align 4 106L(match_case2_4): 107 test $0x01, %al 108 jnz L(Exit1) 109 test $0x01, %dl 110 jnz L(return_null) 111 test $0x02, %al 112 jnz L(Exit2) 113 test $0x02, %dl 114 jnz L(return_null) 115 test $0x04, %al 116 jnz L(Exit3) 117 test $0x04, %dl 118 jnz L(return_null) 119 lea 3(%rdi), %rax 120 ret 121 122 .p2align 4 123L(match_high_case2): 124 test %dl, %dl 125 jnz L(return_null) 126 127 mov %ah, %cl 128 and $15, %cl 129 jnz L(match_case2_12) 130 131 mov %dh, %ch 132 and $15, %ch 133 jnz L(return_null) 134 135 test $0x10, %ah 136 jnz L(Exit13) 137 test $0x10, %dh 138 jnz L(return_null) 139 test $0x20, %ah 140 jnz L(Exit14) 141 test $0x20, %dh 142 jnz L(return_null) 143 test $0x40, %ah 144 jnz L(Exit15) 145 test $0x40, %dh 146 jnz L(return_null) 147 lea 15(%rdi), %rax 148 ret 149 150 .p2align 4 151L(match_case2_12): 152 test $0x01, %ah 153 jnz L(Exit9) 154 test $0x01, %dh 155 jnz L(return_null) 156 test $0x02, %ah 157 jnz L(Exit10) 158 test $0x02, %dh 159 jnz L(return_null) 160 test $0x04, %ah 161 jnz L(Exit11) 162 test $0x04, %dh 163 jnz L(return_null) 164 lea 11(%rdi), %rax 165 ret 166 167 .p2align 4 168L(match_case1): 169 test %al, %al 170 jz L(match_high_case1) 171 172 test $0x01, %al 173 jnz L(Exit1) 174 test $0x02, %al 175 jnz L(Exit2) 176 test $0x04, %al 177 jnz L(Exit3) 178 test $0x08, %al 179 jnz L(Exit4) 180 test $0x10, %al 181 jnz L(Exit5) 182 test $0x20, %al 183 jnz L(Exit6) 184 test $0x40, %al 185 jnz L(Exit7) 186 lea 7(%rdi), %rax 187 ret 188 189 .p2align 4 190L(match_high_case1): 191 test $0x01, %ah 192 jnz L(Exit9) 193 test $0x02, %ah 194 jnz L(Exit10) 195 test $0x04, %ah 196 jnz L(Exit11) 197 test $0x08, %ah 198 jnz L(Exit12) 199 test $0x10, %ah 200 jnz L(Exit13) 201 test $0x20, %ah 202 jnz L(Exit14) 203 test $0x40, %ah 204 jnz L(Exit15) 205 lea 15(%rdi), %rax 206 ret 207 208 .p2align 4 209L(Exit1): 210 lea (%rdi), %rax 211 ret 212 213 .p2align 4 214L(Exit2): 215 lea 1(%rdi), %rax 216 ret 217 218 .p2align 4 219L(Exit3): 220 lea 2(%rdi), %rax 221 ret 222 223 .p2align 4 224L(Exit4): 225 lea 3(%rdi), %rax 226 ret 227 228 .p2align 4 229L(Exit5): 230 lea 4(%rdi), %rax 231 ret 232 233 .p2align 4 234L(Exit6): 235 lea 5(%rdi), %rax 236 ret 237 238 .p2align 4 239L(Exit7): 240 lea 6(%rdi), %rax 241 ret 242 243 .p2align 4 244L(Exit9): 245 lea 8(%rdi), %rax 246 ret 247 248 .p2align 4 249L(Exit10): 250 lea 9(%rdi), %rax 251 ret 252 253 .p2align 4 254L(Exit11): 255 lea 10(%rdi), %rax 256 ret 257 258 .p2align 4 259L(Exit12): 260 lea 11(%rdi), %rax 261 ret 262 263 .p2align 4 264L(Exit13): 265 lea 12(%rdi), %rax 266 ret 267 268 .p2align 4 269L(Exit14): 270 lea 13(%rdi), %rax 271 ret 272 273 .p2align 4 274L(Exit15): 275 lea 14(%rdi), %rax 276 ret 277 278END (__strchr_sse2_no_bsf) 279#endif 280