1/* Optimized wcslen for x86-64 with SSE2. 2 Copyright (C) 2011-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20 21 .text 22ENTRY (__wcslen) 23 cmpl $0, (%rdi) 24 jz L(exit_tail0) 25 cmpl $0, 4(%rdi) 26 jz L(exit_tail1) 27 cmpl $0, 8(%rdi) 28 jz L(exit_tail2) 29 cmpl $0, 12(%rdi) 30 jz L(exit_tail3) 31 cmpl $0, 16(%rdi) 32 jz L(exit_tail4) 33 cmpl $0, 20(%rdi) 34 jz L(exit_tail5) 35 cmpl $0, 24(%rdi) 36 jz L(exit_tail6) 37 cmpl $0, 28(%rdi) 38 jz L(exit_tail7) 39 40 pxor %xmm0, %xmm0 41 42 lea 32(%rdi), %rax 43 lea 16(%rdi), %rcx 44 and $-16, %rax 45 46 pcmpeqd (%rax), %xmm0 47 pmovmskb %xmm0, %edx 48 pxor %xmm1, %xmm1 49 test %edx, %edx 50 lea 16(%rax), %rax 51 jnz L(exit) 52 53 pcmpeqd (%rax), %xmm1 54 pmovmskb %xmm1, %edx 55 pxor %xmm2, %xmm2 56 test %edx, %edx 57 lea 16(%rax), %rax 58 jnz L(exit) 59 60 pcmpeqd (%rax), %xmm2 61 pmovmskb %xmm2, %edx 62 pxor %xmm3, %xmm3 63 test %edx, %edx 64 lea 16(%rax), %rax 65 jnz L(exit) 66 67 pcmpeqd (%rax), %xmm3 68 pmovmskb %xmm3, %edx 69 test %edx, %edx 70 lea 16(%rax), %rax 71 jnz L(exit) 72 73 pcmpeqd (%rax), %xmm0 74 pmovmskb %xmm0, %edx 75 test %edx, %edx 76 lea 16(%rax), %rax 77 jnz L(exit) 78 79 pcmpeqd (%rax), %xmm1 80 pmovmskb %xmm1, %edx 81 test %edx, %edx 82 lea 16(%rax), %rax 83 jnz L(exit) 84 85 pcmpeqd (%rax), %xmm2 86 pmovmskb %xmm2, %edx 87 test %edx, %edx 88 lea 16(%rax), %rax 89 jnz L(exit) 90 91 pcmpeqd (%rax), %xmm3 92 pmovmskb %xmm3, %edx 93 test %edx, %edx 94 lea 16(%rax), %rax 95 jnz L(exit) 96 97 pcmpeqd (%rax), %xmm0 98 pmovmskb %xmm0, %edx 99 test %edx, %edx 100 lea 16(%rax), %rax 101 jnz L(exit) 102 103 pcmpeqd (%rax), %xmm1 104 pmovmskb %xmm1, %edx 105 test %edx, %edx 106 lea 16(%rax), %rax 107 jnz L(exit) 108 109 pcmpeqd (%rax), %xmm2 110 pmovmskb %xmm2, %edx 111 test %edx, %edx 112 lea 16(%rax), %rax 113 jnz L(exit) 114 115 pcmpeqd (%rax), %xmm3 116 pmovmskb %xmm3, %edx 117 test %edx, %edx 118 lea 16(%rax), %rax 119 jnz L(exit) 120 121 and $-0x40, %rax 122 123 .p2align 4 124L(aligned_64_loop): 125 movaps (%rax), %xmm0 126 movaps 16(%rax), %xmm1 127 movaps 32(%rax), %xmm2 128 movaps 48(%rax), %xmm6 129 130 pminub %xmm1, %xmm0 131 pminub %xmm6, %xmm2 132 pminub %xmm0, %xmm2 133 pcmpeqd %xmm3, %xmm2 134 pmovmskb %xmm2, %edx 135 test %edx, %edx 136 lea 64(%rax), %rax 137 jz L(aligned_64_loop) 138 139 pcmpeqd -64(%rax), %xmm3 140 pmovmskb %xmm3, %edx 141 test %edx, %edx 142 lea 48(%rcx), %rcx 143 jnz L(exit) 144 145 pcmpeqd %xmm1, %xmm3 146 pmovmskb %xmm3, %edx 147 test %edx, %edx 148 lea -16(%rcx), %rcx 149 jnz L(exit) 150 151 pcmpeqd -32(%rax), %xmm3 152 pmovmskb %xmm3, %edx 153 test %edx, %edx 154 lea -16(%rcx), %rcx 155 jnz L(exit) 156 157 pcmpeqd %xmm6, %xmm3 158 pmovmskb %xmm3, %edx 159 test %edx, %edx 160 lea -16(%rcx), %rcx 161 jnz L(exit) 162 163 jmp L(aligned_64_loop) 164 165 .p2align 4 166L(exit): 167 sub %rcx, %rax 168 shr $2, %rax 169 test %dl, %dl 170 jz L(exit_high) 171 172 mov %dl, %cl 173 and $15, %cl 174 jz L(exit_1) 175 ret 176 177 .p2align 4 178L(exit_high): 179 mov %dh, %ch 180 and $15, %ch 181 jz L(exit_3) 182 add $2, %rax 183 ret 184 185 .p2align 4 186L(exit_1): 187 add $1, %rax 188 ret 189 190 .p2align 4 191L(exit_3): 192 add $3, %rax 193 ret 194 195 .p2align 4 196L(exit_tail0): 197 xor %rax, %rax 198 ret 199 200 .p2align 4 201L(exit_tail1): 202 mov $1, %rax 203 ret 204 205 .p2align 4 206L(exit_tail2): 207 mov $2, %rax 208 ret 209 210 .p2align 4 211L(exit_tail3): 212 mov $3, %rax 213 ret 214 215 .p2align 4 216L(exit_tail4): 217 mov $4, %rax 218 ret 219 220 .p2align 4 221L(exit_tail5): 222 mov $5, %rax 223 ret 224 225 .p2align 4 226L(exit_tail6): 227 mov $6, %rax 228 ret 229 230 .p2align 4 231L(exit_tail7): 232 mov $7, %rax 233 ret 234 235END (__wcslen) 236 237weak_alias(__wcslen, wcslen) 238