1/* Optimized wcscmp for x86-64 with SSE2. 2 Copyright (C) 2011-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20 21/* Note: wcscmp uses signed comparison, not unsighed as in strcmp function. */ 22 23 .text 24ENTRY (__wcscmp) 25/* 26 * This implementation uses SSE to compare up to 16 bytes at a time. 27*/ 28 mov %esi, %eax 29 mov %edi, %edx 30 pxor %xmm0, %xmm0 /* clear %xmm0 for null char checks */ 31 mov %al, %ch 32 mov %dl, %cl 33 and $63, %eax /* rsi alignment in cache line */ 34 and $63, %edx /* rdi alignment in cache line */ 35 and $15, %cl 36 jz L(continue_00) 37 cmp $16, %edx 38 jb L(continue_0) 39 cmp $32, %edx 40 jb L(continue_16) 41 cmp $48, %edx 42 jb L(continue_32) 43 44L(continue_48): 45 and $15, %ch 46 jz L(continue_48_00) 47 cmp $16, %eax 48 jb L(continue_0_48) 49 cmp $32, %eax 50 jb L(continue_16_48) 51 cmp $48, %eax 52 jb L(continue_32_48) 53 54 .p2align 4 55L(continue_48_48): 56 mov (%rsi), %ecx 57 cmp %ecx, (%rdi) 58 jne L(nequal) 59 test %ecx, %ecx 60 jz L(equal) 61 62 mov 4(%rsi), %ecx 63 cmp %ecx, 4(%rdi) 64 jne L(nequal) 65 test %ecx, %ecx 66 jz L(equal) 67 68 mov 8(%rsi), %ecx 69 cmp %ecx, 8(%rdi) 70 jne L(nequal) 71 test %ecx, %ecx 72 jz L(equal) 73 74 mov 12(%rsi), %ecx 75 cmp %ecx, 12(%rdi) 76 jne L(nequal) 77 test %ecx, %ecx 78 jz L(equal) 79 80 movdqu 16(%rdi), %xmm1 81 movdqu 16(%rsi), %xmm2 82 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 83 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 84 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 85 pmovmskb %xmm1, %edx 86 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 87 jnz L(less4_double_words_16) 88 89 movdqu 32(%rdi), %xmm1 90 movdqu 32(%rsi), %xmm2 91 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 92 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 93 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 94 pmovmskb %xmm1, %edx 95 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 96 jnz L(less4_double_words_32) 97 98 movdqu 48(%rdi), %xmm1 99 movdqu 48(%rsi), %xmm2 100 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 101 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 102 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 103 pmovmskb %xmm1, %edx 104 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 105 jnz L(less4_double_words_48) 106 107 add $64, %rsi 108 add $64, %rdi 109 jmp L(continue_48_48) 110 111L(continue_0): 112 and $15, %ch 113 jz L(continue_0_00) 114 cmp $16, %eax 115 jb L(continue_0_0) 116 cmp $32, %eax 117 jb L(continue_0_16) 118 cmp $48, %eax 119 jb L(continue_0_32) 120 121 .p2align 4 122L(continue_0_48): 123 mov (%rsi), %ecx 124 cmp %ecx, (%rdi) 125 jne L(nequal) 126 test %ecx, %ecx 127 jz L(equal) 128 129 mov 4(%rsi), %ecx 130 cmp %ecx, 4(%rdi) 131 jne L(nequal) 132 test %ecx, %ecx 133 jz L(equal) 134 135 mov 8(%rsi), %ecx 136 cmp %ecx, 8(%rdi) 137 jne L(nequal) 138 test %ecx, %ecx 139 jz L(equal) 140 141 mov 12(%rsi), %ecx 142 cmp %ecx, 12(%rdi) 143 jne L(nequal) 144 test %ecx, %ecx 145 jz L(equal) 146 147 movdqu 16(%rdi), %xmm1 148 movdqu 16(%rsi), %xmm2 149 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 150 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 151 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 152 pmovmskb %xmm1, %edx 153 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 154 jnz L(less4_double_words_16) 155 156 movdqu 32(%rdi), %xmm1 157 movdqu 32(%rsi), %xmm2 158 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 159 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 160 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 161 pmovmskb %xmm1, %edx 162 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 163 jnz L(less4_double_words_32) 164 165 mov 48(%rsi), %ecx 166 cmp %ecx, 48(%rdi) 167 jne L(nequal) 168 test %ecx, %ecx 169 jz L(equal) 170 171 mov 52(%rsi), %ecx 172 cmp %ecx, 52(%rdi) 173 jne L(nequal) 174 test %ecx, %ecx 175 jz L(equal) 176 177 mov 56(%rsi), %ecx 178 cmp %ecx, 56(%rdi) 179 jne L(nequal) 180 test %ecx, %ecx 181 jz L(equal) 182 183 mov 60(%rsi), %ecx 184 cmp %ecx, 60(%rdi) 185 jne L(nequal) 186 test %ecx, %ecx 187 jz L(equal) 188 189 add $64, %rsi 190 add $64, %rdi 191 jmp L(continue_0_48) 192 193 .p2align 4 194L(continue_00): 195 and $15, %ch 196 jz L(continue_00_00) 197 cmp $16, %eax 198 jb L(continue_00_0) 199 cmp $32, %eax 200 jb L(continue_00_16) 201 cmp $48, %eax 202 jb L(continue_00_32) 203 204 .p2align 4 205L(continue_00_48): 206 pcmpeqd (%rdi), %xmm0 207 mov (%rdi), %eax 208 pmovmskb %xmm0, %ecx 209 test %ecx, %ecx 210 jnz L(less4_double_words1) 211 212 cmp (%rsi), %eax 213 jne L(nequal) 214 215 mov 4(%rdi), %eax 216 cmp 4(%rsi), %eax 217 jne L(nequal) 218 219 mov 8(%rdi), %eax 220 cmp 8(%rsi), %eax 221 jne L(nequal) 222 223 mov 12(%rdi), %eax 224 cmp 12(%rsi), %eax 225 jne L(nequal) 226 227 movdqu 16(%rsi), %xmm2 228 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 229 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ 230 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 231 pmovmskb %xmm2, %edx 232 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 233 jnz L(less4_double_words_16) 234 235 movdqu 32(%rsi), %xmm2 236 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 237 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ 238 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 239 pmovmskb %xmm2, %edx 240 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 241 jnz L(less4_double_words_32) 242 243 movdqu 48(%rsi), %xmm2 244 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 245 pcmpeqd 48(%rdi), %xmm2 /* compare first 4 double_words for equality */ 246 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 247 pmovmskb %xmm2, %edx 248 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 249 jnz L(less4_double_words_48) 250 251 add $64, %rsi 252 add $64, %rdi 253 jmp L(continue_00_48) 254 255 .p2align 4 256L(continue_32): 257 and $15, %ch 258 jz L(continue_32_00) 259 cmp $16, %eax 260 jb L(continue_0_32) 261 cmp $32, %eax 262 jb L(continue_16_32) 263 cmp $48, %eax 264 jb L(continue_32_32) 265 266 .p2align 4 267L(continue_32_48): 268 mov (%rsi), %ecx 269 cmp %ecx, (%rdi) 270 jne L(nequal) 271 test %ecx, %ecx 272 jz L(equal) 273 274 mov 4(%rsi), %ecx 275 cmp %ecx, 4(%rdi) 276 jne L(nequal) 277 test %ecx, %ecx 278 jz L(equal) 279 280 mov 8(%rsi), %ecx 281 cmp %ecx, 8(%rdi) 282 jne L(nequal) 283 test %ecx, %ecx 284 jz L(equal) 285 286 mov 12(%rsi), %ecx 287 cmp %ecx, 12(%rdi) 288 jne L(nequal) 289 test %ecx, %ecx 290 jz L(equal) 291 292 mov 16(%rsi), %ecx 293 cmp %ecx, 16(%rdi) 294 jne L(nequal) 295 test %ecx, %ecx 296 jz L(equal) 297 298 mov 20(%rsi), %ecx 299 cmp %ecx, 20(%rdi) 300 jne L(nequal) 301 test %ecx, %ecx 302 jz L(equal) 303 304 mov 24(%rsi), %ecx 305 cmp %ecx, 24(%rdi) 306 jne L(nequal) 307 test %ecx, %ecx 308 jz L(equal) 309 310 mov 28(%rsi), %ecx 311 cmp %ecx, 28(%rdi) 312 jne L(nequal) 313 test %ecx, %ecx 314 jz L(equal) 315 316 movdqu 32(%rdi), %xmm1 317 movdqu 32(%rsi), %xmm2 318 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 319 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 320 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 321 pmovmskb %xmm1, %edx 322 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 323 jnz L(less4_double_words_32) 324 325 movdqu 48(%rdi), %xmm1 326 movdqu 48(%rsi), %xmm2 327 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 328 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 329 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 330 pmovmskb %xmm1, %edx 331 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 332 jnz L(less4_double_words_48) 333 334 add $64, %rsi 335 add $64, %rdi 336 jmp L(continue_32_48) 337 338 .p2align 4 339L(continue_16): 340 and $15, %ch 341 jz L(continue_16_00) 342 cmp $16, %eax 343 jb L(continue_0_16) 344 cmp $32, %eax 345 jb L(continue_16_16) 346 cmp $48, %eax 347 jb L(continue_16_32) 348 349 .p2align 4 350L(continue_16_48): 351 mov (%rsi), %ecx 352 cmp %ecx, (%rdi) 353 jne L(nequal) 354 test %ecx, %ecx 355 jz L(equal) 356 357 mov 4(%rsi), %ecx 358 cmp %ecx, 4(%rdi) 359 jne L(nequal) 360 test %ecx, %ecx 361 jz L(equal) 362 363 mov 8(%rsi), %ecx 364 cmp %ecx, 8(%rdi) 365 jne L(nequal) 366 test %ecx, %ecx 367 jz L(equal) 368 369 mov 12(%rsi), %ecx 370 cmp %ecx, 12(%rdi) 371 jne L(nequal) 372 test %ecx, %ecx 373 jz L(equal) 374 375 movdqu 16(%rdi), %xmm1 376 movdqu 16(%rsi), %xmm2 377 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 378 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 379 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 380 pmovmskb %xmm1, %edx 381 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 382 jnz L(less4_double_words_16) 383 384 mov 32(%rsi), %ecx 385 cmp %ecx, 32(%rdi) 386 jne L(nequal) 387 test %ecx, %ecx 388 jz L(equal) 389 390 mov 36(%rsi), %ecx 391 cmp %ecx, 36(%rdi) 392 jne L(nequal) 393 test %ecx, %ecx 394 jz L(equal) 395 396 mov 40(%rsi), %ecx 397 cmp %ecx, 40(%rdi) 398 jne L(nequal) 399 test %ecx, %ecx 400 jz L(equal) 401 402 mov 44(%rsi), %ecx 403 cmp %ecx, 44(%rdi) 404 jne L(nequal) 405 test %ecx, %ecx 406 jz L(equal) 407 408 movdqu 48(%rdi), %xmm1 409 movdqu 48(%rsi), %xmm2 410 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 411 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 412 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 413 pmovmskb %xmm1, %edx 414 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 415 jnz L(less4_double_words_48) 416 417 add $64, %rsi 418 add $64, %rdi 419 jmp L(continue_16_48) 420 421 .p2align 4 422L(continue_00_00): 423 movdqa (%rdi), %xmm1 424 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 425 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ 426 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 427 pmovmskb %xmm1, %edx 428 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 429 jnz L(less4_double_words) 430 431 movdqa 16(%rdi), %xmm3 432 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 433 pcmpeqd 16(%rsi), %xmm3 /* compare first 4 double_words for equality */ 434 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 435 pmovmskb %xmm3, %edx 436 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 437 jnz L(less4_double_words_16) 438 439 movdqa 32(%rdi), %xmm5 440 pcmpeqd %xmm5, %xmm0 /* Any null double_word? */ 441 pcmpeqd 32(%rsi), %xmm5 /* compare first 4 double_words for equality */ 442 psubb %xmm0, %xmm5 /* packed sub of comparison results*/ 443 pmovmskb %xmm5, %edx 444 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 445 jnz L(less4_double_words_32) 446 447 movdqa 48(%rdi), %xmm1 448 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 449 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ 450 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 451 pmovmskb %xmm1, %edx 452 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 453 jnz L(less4_double_words_48) 454 455 add $64, %rsi 456 add $64, %rdi 457 jmp L(continue_00_00) 458 459 .p2align 4 460L(continue_00_32): 461 movdqu (%rsi), %xmm2 462 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 463 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ 464 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 465 pmovmskb %xmm2, %edx 466 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 467 jnz L(less4_double_words) 468 469 add $16, %rsi 470 add $16, %rdi 471 jmp L(continue_00_48) 472 473 .p2align 4 474L(continue_00_16): 475 movdqu (%rsi), %xmm2 476 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 477 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ 478 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 479 pmovmskb %xmm2, %edx 480 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 481 jnz L(less4_double_words) 482 483 movdqu 16(%rsi), %xmm2 484 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 485 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ 486 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 487 pmovmskb %xmm2, %edx 488 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 489 jnz L(less4_double_words_16) 490 491 add $32, %rsi 492 add $32, %rdi 493 jmp L(continue_00_48) 494 495 .p2align 4 496L(continue_00_0): 497 movdqu (%rsi), %xmm2 498 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 499 pcmpeqd (%rdi), %xmm2 /* compare first 4 double_words for equality */ 500 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 501 pmovmskb %xmm2, %edx 502 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 503 jnz L(less4_double_words) 504 505 movdqu 16(%rsi), %xmm2 506 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 507 pcmpeqd 16(%rdi), %xmm2 /* compare first 4 double_words for equality */ 508 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 509 pmovmskb %xmm2, %edx 510 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 511 jnz L(less4_double_words_16) 512 513 movdqu 32(%rsi), %xmm2 514 pcmpeqd %xmm2, %xmm0 /* Any null double_word? */ 515 pcmpeqd 32(%rdi), %xmm2 /* compare first 4 double_words for equality */ 516 psubb %xmm0, %xmm2 /* packed sub of comparison results*/ 517 pmovmskb %xmm2, %edx 518 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 519 jnz L(less4_double_words_32) 520 521 add $48, %rsi 522 add $48, %rdi 523 jmp L(continue_00_48) 524 525 .p2align 4 526L(continue_48_00): 527 pcmpeqd (%rsi), %xmm0 528 mov (%rdi), %eax 529 pmovmskb %xmm0, %ecx 530 test %ecx, %ecx 531 jnz L(less4_double_words1) 532 533 cmp (%rsi), %eax 534 jne L(nequal) 535 536 mov 4(%rdi), %eax 537 cmp 4(%rsi), %eax 538 jne L(nequal) 539 540 mov 8(%rdi), %eax 541 cmp 8(%rsi), %eax 542 jne L(nequal) 543 544 mov 12(%rdi), %eax 545 cmp 12(%rsi), %eax 546 jne L(nequal) 547 548 movdqu 16(%rdi), %xmm1 549 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 550 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ 551 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 552 pmovmskb %xmm1, %edx 553 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 554 jnz L(less4_double_words_16) 555 556 movdqu 32(%rdi), %xmm1 557 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 558 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ 559 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 560 pmovmskb %xmm1, %edx 561 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 562 jnz L(less4_double_words_32) 563 564 movdqu 48(%rdi), %xmm1 565 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 566 pcmpeqd 48(%rsi), %xmm1 /* compare first 4 double_words for equality */ 567 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 568 pmovmskb %xmm1, %edx 569 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 570 jnz L(less4_double_words_48) 571 572 add $64, %rsi 573 add $64, %rdi 574 jmp L(continue_48_00) 575 576 .p2align 4 577L(continue_32_00): 578 movdqu (%rdi), %xmm1 579 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 580 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ 581 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 582 pmovmskb %xmm1, %edx 583 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 584 jnz L(less4_double_words) 585 586 add $16, %rsi 587 add $16, %rdi 588 jmp L(continue_48_00) 589 590 .p2align 4 591L(continue_16_00): 592 movdqu (%rdi), %xmm1 593 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 594 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ 595 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 596 pmovmskb %xmm1, %edx 597 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 598 jnz L(less4_double_words) 599 600 movdqu 16(%rdi), %xmm1 601 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 602 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ 603 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 604 pmovmskb %xmm1, %edx 605 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 606 jnz L(less4_double_words_16) 607 608 add $32, %rsi 609 add $32, %rdi 610 jmp L(continue_48_00) 611 612 .p2align 4 613L(continue_0_00): 614 movdqu (%rdi), %xmm1 615 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 616 pcmpeqd (%rsi), %xmm1 /* compare first 4 double_words for equality */ 617 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 618 pmovmskb %xmm1, %edx 619 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 620 jnz L(less4_double_words) 621 622 movdqu 16(%rdi), %xmm1 623 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 624 pcmpeqd 16(%rsi), %xmm1 /* compare first 4 double_words for equality */ 625 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 626 pmovmskb %xmm1, %edx 627 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 628 jnz L(less4_double_words_16) 629 630 movdqu 32(%rdi), %xmm1 631 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 632 pcmpeqd 32(%rsi), %xmm1 /* compare first 4 double_words for equality */ 633 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 634 pmovmskb %xmm1, %edx 635 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 636 jnz L(less4_double_words_32) 637 638 add $48, %rsi 639 add $48, %rdi 640 jmp L(continue_48_00) 641 642 .p2align 4 643L(continue_32_32): 644 movdqu (%rdi), %xmm1 645 movdqu (%rsi), %xmm2 646 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 647 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 648 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 649 pmovmskb %xmm1, %edx 650 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 651 jnz L(less4_double_words) 652 653 add $16, %rsi 654 add $16, %rdi 655 jmp L(continue_48_48) 656 657 .p2align 4 658L(continue_16_16): 659 movdqu (%rdi), %xmm1 660 movdqu (%rsi), %xmm2 661 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 662 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 663 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 664 pmovmskb %xmm1, %edx 665 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 666 jnz L(less4_double_words) 667 668 movdqu 16(%rdi), %xmm3 669 movdqu 16(%rsi), %xmm4 670 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 671 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ 672 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 673 pmovmskb %xmm3, %edx 674 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 675 jnz L(less4_double_words_16) 676 677 add $32, %rsi 678 add $32, %rdi 679 jmp L(continue_48_48) 680 681 .p2align 4 682L(continue_0_0): 683 movdqu (%rdi), %xmm1 684 movdqu (%rsi), %xmm2 685 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 686 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 687 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 688 pmovmskb %xmm1, %edx 689 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 690 jnz L(less4_double_words) 691 692 movdqu 16(%rdi), %xmm3 693 movdqu 16(%rsi), %xmm4 694 pcmpeqd %xmm3, %xmm0 /* Any null double_word? */ 695 pcmpeqd %xmm4, %xmm3 /* compare first 4 double_words for equality */ 696 psubb %xmm0, %xmm3 /* packed sub of comparison results*/ 697 pmovmskb %xmm3, %edx 698 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 699 jnz L(less4_double_words_16) 700 701 movdqu 32(%rdi), %xmm1 702 movdqu 32(%rsi), %xmm2 703 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 704 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 705 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 706 pmovmskb %xmm1, %edx 707 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 708 jnz L(less4_double_words_32) 709 710 add $48, %rsi 711 add $48, %rdi 712 jmp L(continue_48_48) 713 714 .p2align 4 715L(continue_0_16): 716 movdqu (%rdi), %xmm1 717 movdqu (%rsi), %xmm2 718 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 719 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 720 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 721 pmovmskb %xmm1, %edx 722 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 723 jnz L(less4_double_words) 724 725 movdqu 16(%rdi), %xmm1 726 movdqu 16(%rsi), %xmm2 727 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 728 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 729 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 730 pmovmskb %xmm1, %edx 731 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 732 jnz L(less4_double_words_16) 733 734 add $32, %rsi 735 add $32, %rdi 736 jmp L(continue_32_48) 737 738 .p2align 4 739L(continue_0_32): 740 movdqu (%rdi), %xmm1 741 movdqu (%rsi), %xmm2 742 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 743 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 744 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 745 pmovmskb %xmm1, %edx 746 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 747 jnz L(less4_double_words) 748 749 add $16, %rsi 750 add $16, %rdi 751 jmp L(continue_16_48) 752 753 .p2align 4 754L(continue_16_32): 755 movdqu (%rdi), %xmm1 756 movdqu (%rsi), %xmm2 757 pcmpeqd %xmm1, %xmm0 /* Any null double_word? */ 758 pcmpeqd %xmm2, %xmm1 /* compare first 4 double_words for equality */ 759 psubb %xmm0, %xmm1 /* packed sub of comparison results*/ 760 pmovmskb %xmm1, %edx 761 sub $0xffff, %edx /* if first 4 double_words are same, edx == 0xffff */ 762 jnz L(less4_double_words) 763 764 add $16, %rsi 765 add $16, %rdi 766 jmp L(continue_32_48) 767 768 .p2align 4 769L(less4_double_words1): 770 cmp (%rsi), %eax 771 jne L(nequal) 772 test %eax, %eax 773 jz L(equal) 774 775 mov 4(%rsi), %ecx 776 cmp %ecx, 4(%rdi) 777 jne L(nequal) 778 test %ecx, %ecx 779 jz L(equal) 780 781 mov 8(%rsi), %ecx 782 cmp %ecx, 8(%rdi) 783 jne L(nequal) 784 test %ecx, %ecx 785 jz L(equal) 786 787 mov 12(%rsi), %ecx 788 cmp %ecx, 12(%rdi) 789 jne L(nequal) 790 xor %eax, %eax 791 ret 792 793 .p2align 4 794L(less4_double_words): 795 xor %eax, %eax 796 test %dl, %dl 797 jz L(next_two_double_words) 798 and $15, %dl 799 jz L(second_double_word) 800 mov (%rdi), %eax 801 cmp (%rsi), %eax 802 jne L(nequal) 803 ret 804 805 .p2align 4 806L(second_double_word): 807 mov 4(%rdi), %eax 808 cmp 4(%rsi), %eax 809 jne L(nequal) 810 ret 811 812 .p2align 4 813L(next_two_double_words): 814 and $15, %dh 815 jz L(fourth_double_word) 816 mov 8(%rdi), %eax 817 cmp 8(%rsi), %eax 818 jne L(nequal) 819 ret 820 821 .p2align 4 822L(fourth_double_word): 823 mov 12(%rdi), %eax 824 cmp 12(%rsi), %eax 825 jne L(nequal) 826 ret 827 828 .p2align 4 829L(less4_double_words_16): 830 xor %eax, %eax 831 test %dl, %dl 832 jz L(next_two_double_words_16) 833 and $15, %dl 834 jz L(second_double_word_16) 835 mov 16(%rdi), %eax 836 cmp 16(%rsi), %eax 837 jne L(nequal) 838 ret 839 840 .p2align 4 841L(second_double_word_16): 842 mov 20(%rdi), %eax 843 cmp 20(%rsi), %eax 844 jne L(nequal) 845 ret 846 847 .p2align 4 848L(next_two_double_words_16): 849 and $15, %dh 850 jz L(fourth_double_word_16) 851 mov 24(%rdi), %eax 852 cmp 24(%rsi), %eax 853 jne L(nequal) 854 ret 855 856 .p2align 4 857L(fourth_double_word_16): 858 mov 28(%rdi), %eax 859 cmp 28(%rsi), %eax 860 jne L(nequal) 861 ret 862 863 .p2align 4 864L(less4_double_words_32): 865 xor %eax, %eax 866 test %dl, %dl 867 jz L(next_two_double_words_32) 868 and $15, %dl 869 jz L(second_double_word_32) 870 mov 32(%rdi), %eax 871 cmp 32(%rsi), %eax 872 jne L(nequal) 873 ret 874 875 .p2align 4 876L(second_double_word_32): 877 mov 36(%rdi), %eax 878 cmp 36(%rsi), %eax 879 jne L(nequal) 880 ret 881 882 .p2align 4 883L(next_two_double_words_32): 884 and $15, %dh 885 jz L(fourth_double_word_32) 886 mov 40(%rdi), %eax 887 cmp 40(%rsi), %eax 888 jne L(nequal) 889 ret 890 891 .p2align 4 892L(fourth_double_word_32): 893 mov 44(%rdi), %eax 894 cmp 44(%rsi), %eax 895 jne L(nequal) 896 ret 897 898 .p2align 4 899L(less4_double_words_48): 900 xor %eax, %eax 901 test %dl, %dl 902 jz L(next_two_double_words_48) 903 and $15, %dl 904 jz L(second_double_word_48) 905 mov 48(%rdi), %eax 906 cmp 48(%rsi), %eax 907 jne L(nequal) 908 ret 909 910 .p2align 4 911L(second_double_word_48): 912 mov 52(%rdi), %eax 913 cmp 52(%rsi), %eax 914 jne L(nequal) 915 ret 916 917 .p2align 4 918L(next_two_double_words_48): 919 and $15, %dh 920 jz L(fourth_double_word_48) 921 mov 56(%rdi), %eax 922 cmp 56(%rsi), %eax 923 jne L(nequal) 924 ret 925 926 .p2align 4 927L(fourth_double_word_48): 928 mov 60(%rdi), %eax 929 cmp 60(%rsi), %eax 930 jne L(nequal) 931 ret 932 933 .p2align 4 934L(nequal): 935 mov $1, %eax 936 jg L(nequal_bigger) 937 neg %eax 938 939L(nequal_bigger): 940 ret 941 942 .p2align 4 943L(equal): 944 xor %rax, %rax 945 ret 946 947END (__wcscmp) 948#ifndef __wcscmp 949libc_hidden_def (__wcscmp) 950weak_alias (__wcscmp, wcscmp) 951#endif 952