1/* strrchr/wcsrchr optimized with AVX2. 2 Copyright (C) 2017-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21# include <sysdep.h> 22 23# ifndef STRRCHR 24# define STRRCHR __strrchr_avx2 25# endif 26 27# ifdef USE_AS_WCSRCHR 28# define VPBROADCAST vpbroadcastd 29# define VPCMPEQ vpcmpeqd 30# else 31# define VPBROADCAST vpbroadcastb 32# define VPCMPEQ vpcmpeqb 33# endif 34 35# ifndef VZEROUPPER 36# define VZEROUPPER vzeroupper 37# endif 38 39# ifndef SECTION 40# define SECTION(p) p##.avx 41# endif 42 43# define VEC_SIZE 32 44 45 .section SECTION(.text),"ax",@progbits 46ENTRY (STRRCHR) 47 movd %esi, %xmm4 48 movl %edi, %ecx 49 /* Broadcast CHAR to YMM4. */ 50 VPBROADCAST %xmm4, %ymm4 51 vpxor %xmm0, %xmm0, %xmm0 52 53 /* Check if we may cross page boundary with one vector load. */ 54 andl $(2 * VEC_SIZE - 1), %ecx 55 cmpl $VEC_SIZE, %ecx 56 ja L(cros_page_boundary) 57 58 vmovdqu (%rdi), %ymm1 59 VPCMPEQ %ymm1, %ymm0, %ymm2 60 VPCMPEQ %ymm1, %ymm4, %ymm3 61 vpmovmskb %ymm2, %ecx 62 vpmovmskb %ymm3, %eax 63 addq $VEC_SIZE, %rdi 64 65 testl %eax, %eax 66 jnz L(first_vec) 67 68 testl %ecx, %ecx 69 jnz L(return_null) 70 71 andq $-VEC_SIZE, %rdi 72 xorl %edx, %edx 73 jmp L(aligned_loop) 74 75 .p2align 4 76L(first_vec): 77 /* Check if there is a nul CHAR. */ 78 testl %ecx, %ecx 79 jnz L(char_and_nul_in_first_vec) 80 81 /* Remember the match and keep searching. */ 82 movl %eax, %edx 83 movq %rdi, %rsi 84 andq $-VEC_SIZE, %rdi 85 jmp L(aligned_loop) 86 87 .p2align 4 88L(cros_page_boundary): 89 andl $(VEC_SIZE - 1), %ecx 90 andq $-VEC_SIZE, %rdi 91 vmovdqa (%rdi), %ymm1 92 VPCMPEQ %ymm1, %ymm0, %ymm2 93 VPCMPEQ %ymm1, %ymm4, %ymm3 94 vpmovmskb %ymm2, %edx 95 vpmovmskb %ymm3, %eax 96 shrl %cl, %edx 97 shrl %cl, %eax 98 addq $VEC_SIZE, %rdi 99 100 /* Check if there is a CHAR. */ 101 testl %eax, %eax 102 jnz L(found_char) 103 104 testl %edx, %edx 105 jnz L(return_null) 106 107 jmp L(aligned_loop) 108 109 .p2align 4 110L(found_char): 111 testl %edx, %edx 112 jnz L(char_and_nul) 113 114 /* Remember the match and keep searching. */ 115 movl %eax, %edx 116 leaq (%rdi, %rcx), %rsi 117 118 .p2align 4 119L(aligned_loop): 120 vmovdqa (%rdi), %ymm1 121 VPCMPEQ %ymm1, %ymm0, %ymm2 122 addq $VEC_SIZE, %rdi 123 VPCMPEQ %ymm1, %ymm4, %ymm3 124 vpmovmskb %ymm2, %ecx 125 vpmovmskb %ymm3, %eax 126 orl %eax, %ecx 127 jnz L(char_nor_null) 128 129 vmovdqa (%rdi), %ymm1 130 VPCMPEQ %ymm1, %ymm0, %ymm2 131 add $VEC_SIZE, %rdi 132 VPCMPEQ %ymm1, %ymm4, %ymm3 133 vpmovmskb %ymm2, %ecx 134 vpmovmskb %ymm3, %eax 135 orl %eax, %ecx 136 jnz L(char_nor_null) 137 138 vmovdqa (%rdi), %ymm1 139 VPCMPEQ %ymm1, %ymm0, %ymm2 140 addq $VEC_SIZE, %rdi 141 VPCMPEQ %ymm1, %ymm4, %ymm3 142 vpmovmskb %ymm2, %ecx 143 vpmovmskb %ymm3, %eax 144 orl %eax, %ecx 145 jnz L(char_nor_null) 146 147 vmovdqa (%rdi), %ymm1 148 VPCMPEQ %ymm1, %ymm0, %ymm2 149 addq $VEC_SIZE, %rdi 150 VPCMPEQ %ymm1, %ymm4, %ymm3 151 vpmovmskb %ymm2, %ecx 152 vpmovmskb %ymm3, %eax 153 orl %eax, %ecx 154 jz L(aligned_loop) 155 156 .p2align 4 157L(char_nor_null): 158 /* Find a CHAR or a nul CHAR in a loop. */ 159 testl %eax, %eax 160 jnz L(match) 161L(return_value): 162 testl %edx, %edx 163 jz L(return_null) 164 movl %edx, %eax 165 movq %rsi, %rdi 166 167# ifdef USE_AS_WCSRCHR 168 /* Keep the first bit for each matching CHAR for bsr. */ 169 andl $0x11111111, %eax 170# endif 171 bsrl %eax, %eax 172 leaq -VEC_SIZE(%rdi, %rax), %rax 173L(return_vzeroupper): 174 ZERO_UPPER_VEC_REGISTERS_RETURN 175 176 .p2align 4 177L(match): 178 /* Find a CHAR. Check if there is a nul CHAR. */ 179 vpmovmskb %ymm2, %ecx 180 testl %ecx, %ecx 181 jnz L(find_nul) 182 183 /* Remember the match and keep searching. */ 184 movl %eax, %edx 185 movq %rdi, %rsi 186 jmp L(aligned_loop) 187 188 .p2align 4 189L(find_nul): 190# ifdef USE_AS_WCSRCHR 191 /* Keep the first bit for each matching CHAR for bsr. */ 192 andl $0x11111111, %ecx 193 andl $0x11111111, %eax 194# endif 195 /* Mask out any matching bits after the nul CHAR. */ 196 movl %ecx, %r8d 197 subl $1, %r8d 198 xorl %ecx, %r8d 199 andl %r8d, %eax 200 testl %eax, %eax 201 /* If there is no CHAR here, return the remembered one. */ 202 jz L(return_value) 203 bsrl %eax, %eax 204 leaq -VEC_SIZE(%rdi, %rax), %rax 205 VZEROUPPER_RETURN 206 207 .p2align 4 208L(char_and_nul): 209 /* Find both a CHAR and a nul CHAR. */ 210 addq %rcx, %rdi 211 movl %edx, %ecx 212L(char_and_nul_in_first_vec): 213# ifdef USE_AS_WCSRCHR 214 /* Keep the first bit for each matching CHAR for bsr. */ 215 andl $0x11111111, %ecx 216 andl $0x11111111, %eax 217# endif 218 /* Mask out any matching bits after the nul CHAR. */ 219 movl %ecx, %r8d 220 subl $1, %r8d 221 xorl %ecx, %r8d 222 andl %r8d, %eax 223 testl %eax, %eax 224 /* Return null pointer if the nul CHAR comes first. */ 225 jz L(return_null) 226 bsrl %eax, %eax 227 leaq -VEC_SIZE(%rdi, %rax), %rax 228 VZEROUPPER_RETURN 229 230 .p2align 4 231L(return_null): 232 xorl %eax, %eax 233 VZEROUPPER_RETURN 234 235END (STRRCHR) 236#endif 237