1/* strchr (str, ch) -- Return pointer to first occurrence of CH in STR. 2 For AMD x86-64. 3 Copyright (C) 2009-2021 Free Software Foundation, Inc. 4 This file is part of the GNU C Library. 5 6 The GNU C Library is free software; you can redistribute it and/or 7 modify it under the terms of the GNU Lesser General Public 8 License as published by the Free Software Foundation; either 9 version 2.1 of the License, or (at your option) any later version. 10 11 The GNU C Library is distributed in the hope that it will be useful, 12 but WITHOUT ANY WARRANTY; without even the implied warranty of 13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 Lesser General Public License for more details. 15 16 You should have received a copy of the GNU Lesser General Public 17 License along with the GNU C Library; if not, see 18 <https://www.gnu.org/licenses/>. */ 19 20#include <sysdep.h> 21 22 .text 23ENTRY (strchr) 24 movd %esi, %xmm1 25 movl %edi, %eax 26 andl $4095, %eax 27 punpcklbw %xmm1, %xmm1 28 cmpl $4032, %eax 29 punpcklwd %xmm1, %xmm1 30 pshufd $0, %xmm1, %xmm1 31 jg L(cross_page) 32 movdqu (%rdi), %xmm0 33 pxor %xmm3, %xmm3 34 movdqa %xmm0, %xmm4 35 pcmpeqb %xmm1, %xmm0 36 pcmpeqb %xmm3, %xmm4 37 por %xmm4, %xmm0 38 pmovmskb %xmm0, %eax 39 test %eax, %eax 40 je L(next_48_bytes) 41 bsf %eax, %eax 42#ifdef AS_STRCHRNUL 43 leaq (%rdi,%rax), %rax 44#else 45 movl $0, %edx 46 leaq (%rdi,%rax), %rax 47 cmpb %sil, (%rax) 48 cmovne %rdx, %rax 49#endif 50 ret 51 52 .p2align 3 53 L(next_48_bytes): 54 movdqu 16(%rdi), %xmm0 55 movdqa %xmm0, %xmm4 56 pcmpeqb %xmm1, %xmm0 57 pcmpeqb %xmm3, %xmm4 58 por %xmm4, %xmm0 59 pmovmskb %xmm0, %ecx 60 movdqu 32(%rdi), %xmm0 61 movdqa %xmm0, %xmm4 62 pcmpeqb %xmm1, %xmm0 63 salq $16, %rcx 64 pcmpeqb %xmm3, %xmm4 65 por %xmm4, %xmm0 66 pmovmskb %xmm0, %eax 67 movdqu 48(%rdi), %xmm0 68 pcmpeqb %xmm0, %xmm3 69 salq $32, %rax 70 pcmpeqb %xmm1, %xmm0 71 orq %rcx, %rax 72 por %xmm3, %xmm0 73 pmovmskb %xmm0, %ecx 74 salq $48, %rcx 75 orq %rcx, %rax 76 testq %rax, %rax 77 jne L(return) 78L(loop_start): 79 /* We use this alignment to force loop be aligned to 8 but not 80 16 bytes. This gives better sheduling on AMD processors. */ 81 .p2align 4 82 pxor %xmm6, %xmm6 83 andq $-64, %rdi 84 .p2align 3 85L(loop64): 86 addq $64, %rdi 87 movdqa (%rdi), %xmm5 88 movdqa 16(%rdi), %xmm2 89 movdqa 32(%rdi), %xmm3 90 pxor %xmm1, %xmm5 91 movdqa 48(%rdi), %xmm4 92 pxor %xmm1, %xmm2 93 pxor %xmm1, %xmm3 94 pminub (%rdi), %xmm5 95 pxor %xmm1, %xmm4 96 pminub 16(%rdi), %xmm2 97 pminub 32(%rdi), %xmm3 98 pminub %xmm2, %xmm5 99 pminub 48(%rdi), %xmm4 100 pminub %xmm3, %xmm5 101 pminub %xmm4, %xmm5 102 pcmpeqb %xmm6, %xmm5 103 pmovmskb %xmm5, %eax 104 105 testl %eax, %eax 106 je L(loop64) 107 108 movdqa (%rdi), %xmm5 109 movdqa %xmm5, %xmm0 110 pcmpeqb %xmm1, %xmm5 111 pcmpeqb %xmm6, %xmm0 112 por %xmm0, %xmm5 113 pcmpeqb %xmm6, %xmm2 114 pcmpeqb %xmm6, %xmm3 115 pcmpeqb %xmm6, %xmm4 116 117 pmovmskb %xmm5, %ecx 118 pmovmskb %xmm2, %eax 119 salq $16, %rax 120 pmovmskb %xmm3, %r8d 121 pmovmskb %xmm4, %edx 122 salq $32, %r8 123 orq %r8, %rax 124 orq %rcx, %rax 125 salq $48, %rdx 126 orq %rdx, %rax 127 .p2align 3 128L(return): 129 bsfq %rax, %rax 130#ifdef AS_STRCHRNUL 131 leaq (%rdi,%rax), %rax 132#else 133 movl $0, %edx 134 leaq (%rdi,%rax), %rax 135 cmpb %sil, (%rax) 136 cmovne %rdx, %rax 137#endif 138 ret 139 .p2align 4 140 141L(cross_page): 142 movq %rdi, %rdx 143 pxor %xmm2, %xmm2 144 andq $-64, %rdx 145 movdqa %xmm1, %xmm0 146 movdqa (%rdx), %xmm3 147 movdqa %xmm3, %xmm4 148 pcmpeqb %xmm1, %xmm3 149 pcmpeqb %xmm2, %xmm4 150 por %xmm4, %xmm3 151 pmovmskb %xmm3, %r8d 152 movdqa 16(%rdx), %xmm3 153 movdqa %xmm3, %xmm4 154 pcmpeqb %xmm1, %xmm3 155 pcmpeqb %xmm2, %xmm4 156 por %xmm4, %xmm3 157 pmovmskb %xmm3, %eax 158 movdqa 32(%rdx), %xmm3 159 movdqa %xmm3, %xmm4 160 pcmpeqb %xmm1, %xmm3 161 salq $16, %rax 162 pcmpeqb %xmm2, %xmm4 163 por %xmm4, %xmm3 164 pmovmskb %xmm3, %r9d 165 movdqa 48(%rdx), %xmm3 166 pcmpeqb %xmm3, %xmm2 167 salq $32, %r9 168 pcmpeqb %xmm3, %xmm0 169 orq %r9, %rax 170 orq %r8, %rax 171 por %xmm2, %xmm0 172 pmovmskb %xmm0, %ecx 173 salq $48, %rcx 174 orq %rcx, %rax 175 movl %edi, %ecx 176 subb %dl, %cl 177 shrq %cl, %rax 178 testq %rax, %rax 179 jne L(return) 180 jmp L(loop_start) 181 182END (strchr) 183 184#ifndef AS_STRCHRNUL 185weak_alias (strchr, index) 186libc_hidden_builtin_def (strchr) 187#endif 188