1/* wcschr with SSSE3 2 Copyright (C) 2011-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20 21 .text 22ENTRY (__wcschr) 23 24 movd %rsi, %xmm1 25 pxor %xmm2, %xmm2 26 mov %rdi, %rcx 27 punpckldq %xmm1, %xmm1 28 punpckldq %xmm1, %xmm1 29 30 and $63, %rcx 31 cmp $48, %rcx 32 ja L(cross_cache) 33 34 movdqu (%rdi), %xmm0 35 pcmpeqd %xmm0, %xmm2 36 add $16, %rdi 37 pcmpeqd %xmm1, %xmm0 38 pmovmskb %xmm2, %rdx 39 pmovmskb %xmm0, %rax 40 or %rax, %rdx 41 jnz L(matches) 42 43 and $-16, %rdi 44 45 movdqa (%rdi), %xmm0 46 pcmpeqd %xmm0, %xmm2 47 add $16, %rdi 48 pcmpeqd %xmm1, %xmm0 49 pmovmskb %xmm2, %rdx 50 pmovmskb %xmm0, %rax 51 or %rax, %rdx 52 jnz L(matches) 53 54 jmp L(loop) 55 56L(cross_cache): 57 and $15, %rcx 58 and $-16, %rdi 59 movdqa (%rdi), %xmm0 60 pcmpeqd %xmm0, %xmm2 61 pcmpeqd %xmm1, %xmm0 62 pmovmskb %xmm2, %rdx 63 pmovmskb %xmm0, %rax 64 65 sar %cl, %rdx 66 sar %cl, %rax 67 test %rax, %rax 68 je L(unaligned_no_match) 69 70 bsf %rax, %rax 71 test %rdx, %rdx 72 je L(unaligned_match) 73 bsf %rdx, %rdx 74 cmp %rdx, %rax 75 ja L(return_null) 76 77L(unaligned_match): 78 add %rdi, %rax 79 add %rcx, %rax 80 ret 81 82 .p2align 4 83L(unaligned_no_match): 84 test %rdx, %rdx 85 jne L(return_null) 86 pxor %xmm2, %xmm2 87 88 add $16, %rdi 89 90 .p2align 4 91/* Loop start on aligned string. */ 92L(loop): 93 movdqa (%rdi), %xmm0 94 pcmpeqd %xmm0, %xmm2 95 add $16, %rdi 96 pcmpeqd %xmm1, %xmm0 97 pmovmskb %xmm2, %rdx 98 pmovmskb %xmm0, %rax 99 or %rax, %rdx 100 jnz L(matches) 101 102 movdqa (%rdi), %xmm0 103 pcmpeqd %xmm0, %xmm2 104 add $16, %rdi 105 pcmpeqd %xmm1, %xmm0 106 pmovmskb %xmm2, %rdx 107 pmovmskb %xmm0, %rax 108 or %rax, %rdx 109 jnz L(matches) 110 111 movdqa (%rdi), %xmm0 112 pcmpeqd %xmm0, %xmm2 113 add $16, %rdi 114 pcmpeqd %xmm1, %xmm0 115 pmovmskb %xmm2, %rdx 116 pmovmskb %xmm0, %rax 117 or %rax, %rdx 118 jnz L(matches) 119 120 movdqa (%rdi), %xmm0 121 pcmpeqd %xmm0, %xmm2 122 add $16, %rdi 123 pcmpeqd %xmm1, %xmm0 124 pmovmskb %xmm2, %rdx 125 pmovmskb %xmm0, %rax 126 or %rax, %rdx 127 jnz L(matches) 128 jmp L(loop) 129 130 .p2align 4 131L(matches): 132 pmovmskb %xmm2, %rdx 133 test %rax, %rax 134 jz L(return_null) 135 bsf %rax, %rax 136 test %rdx, %rdx 137 je L(match) 138 bsf %rdx, %rcx 139 cmp %rcx, %rax 140 ja L(return_null) 141L(match): 142 sub $16, %rdi 143 add %rdi, %rax 144 ret 145 146 .p2align 4 147L(return_null): 148 xor %rax, %rax 149 ret 150 151END (__wcschr) 152 153libc_hidden_def(__wcschr) 154weak_alias (__wcschr, wcschr) 155libc_hidden_weak (wcschr) 156