1/* memcpy with SSSE3 and REP string. 2 Copyright (C) 2010-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include <sysdep.h> 20 21#if IS_IN (libc) \ 22 && (defined SHARED \ 23 || defined USE_AS_MEMMOVE \ 24 || !defined USE_MULTIARCH) 25 26#include "asm-syntax.h" 27 28#ifndef MEMCPY 29# define MEMCPY __memcpy_ssse3_rep 30# define MEMCPY_CHK __memcpy_chk_ssse3_rep 31#endif 32 33#ifdef USE_AS_BCOPY 34# define SRC PARMS 35# define DEST SRC+4 36# define LEN DEST+4 37#else 38# define DEST PARMS 39# define SRC DEST+4 40# define LEN SRC+4 41#endif 42 43#define CFI_PUSH(REG) \ 44 cfi_adjust_cfa_offset (4); \ 45 cfi_rel_offset (REG, 0) 46 47#define CFI_POP(REG) \ 48 cfi_adjust_cfa_offset (-4); \ 49 cfi_restore (REG) 50 51#define PUSH(REG) pushl REG; CFI_PUSH (REG) 52#define POP(REG) popl REG; CFI_POP (REG) 53 54#ifdef PIC 55# define PARMS 8 /* Preserve EBX. */ 56# define ENTRANCE PUSH (%ebx); 57# define RETURN_END POP (%ebx); ret 58# define RETURN RETURN_END; CFI_PUSH (%ebx) 59# define JMPTBL(I, B) I - B 60 61/* Load an entry in a jump table into EBX and branch to it. TABLE is a 62 jump table with relative offsets. INDEX is a register contains the 63 index into the jump table. SCALE is the scale of INDEX. */ 64# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ 65 /* We first load PC into EBX. */ \ 66 SETUP_PIC_REG(bx); \ 67 /* Get the address of the jump table. */ \ 68 addl $(TABLE - .), %ebx; \ 69 /* Get the entry and convert the relative offset to the \ 70 absolute address. */ \ 71 addl (%ebx,INDEX,SCALE), %ebx; \ 72 /* We loaded the jump table. Go. */ \ 73 _CET_NOTRACK jmp *%ebx 74 75# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) \ 76 addl $(TABLE - .), %ebx 77 78# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ 79 addl (%ebx,INDEX,SCALE), %ebx; \ 80 /* We loaded the jump table. Go. */ \ 81 _CET_NOTRACK jmp *%ebx 82#else 83# define PARMS 4 84# define ENTRANCE 85# define RETURN_END ret 86# define RETURN RETURN_END 87# define JMPTBL(I, B) I 88 89/* Branch to an entry in a jump table. TABLE is a jump table with 90 absolute offsets. INDEX is a register contains the index into the 91 jump table. SCALE is the scale of INDEX. */ 92# define BRANCH_TO_JMPTBL_ENTRY(TABLE, INDEX, SCALE) \ 93 _CET_NOTRACK jmp *TABLE(,INDEX,SCALE) 94 95# define BRANCH_TO_JMPTBL_ENTRY_VALUE(TABLE) 96 97# define BRANCH_TO_JMPTBL_ENTRY_TAIL(TABLE, INDEX, SCALE) \ 98 _CET_NOTRACK jmp *TABLE(,INDEX,SCALE) 99#endif 100 101 .section .text.ssse3,"ax",@progbits 102#if !defined USE_AS_BCOPY && defined SHARED 103ENTRY (MEMCPY_CHK) 104 movl 12(%esp), %eax 105 cmpl %eax, 16(%esp) 106 jb HIDDEN_JUMPTARGET (__chk_fail) 107END (MEMCPY_CHK) 108#endif 109ENTRY (MEMCPY) 110 ENTRANCE 111 movl LEN(%esp), %ecx 112 movl SRC(%esp), %eax 113 movl DEST(%esp), %edx 114 115#ifdef USE_AS_MEMMOVE 116 cmp %eax, %edx 117 jb L(copy_forward) 118 je L(fwd_write_0bytes) 119 cmp $48, %ecx 120 jb L(bk_write_less48bytes) 121 add %ecx, %eax 122 cmp %eax, %edx 123 movl SRC(%esp), %eax 124 jb L(copy_backward) 125 126L(copy_forward): 127#endif 128 cmp $48, %ecx 129 jae L(48bytesormore) 130 131L(fwd_write_less32bytes): 132#ifndef USE_AS_MEMMOVE 133 cmp %dl, %al 134 jb L(bk_write) 135#endif 136 add %ecx, %edx 137 add %ecx, %eax 138 BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) 139#ifndef USE_AS_MEMMOVE 140L(bk_write): 141 BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) 142#endif 143 144 ALIGN (4) 145/* ECX > 32 and EDX is 4 byte aligned. */ 146L(48bytesormore): 147 movdqu (%eax), %xmm0 148 PUSH (%edi) 149 movl %edx, %edi 150 and $-16, %edx 151 PUSH (%esi) 152 cfi_remember_state 153 add $16, %edx 154 movl %edi, %esi 155 sub %edx, %edi 156 add %edi, %ecx 157 sub %edi, %eax 158 159#ifdef SHARED_CACHE_SIZE_HALF 160 cmp $SHARED_CACHE_SIZE_HALF, %ecx 161#else 162# ifdef PIC 163 SETUP_PIC_REG(bx) 164 add $_GLOBAL_OFFSET_TABLE_, %ebx 165 cmp __x86_shared_cache_size_half@GOTOFF(%ebx), %ecx 166# else 167 cmp __x86_shared_cache_size_half, %ecx 168# endif 169#endif 170 171 mov %eax, %edi 172 jae L(large_page) 173 and $0xf, %edi 174 jz L(shl_0) 175 176 BRANCH_TO_JMPTBL_ENTRY (L(shl_table), %edi, 4) 177 178 ALIGN (4) 179L(shl_0): 180 movdqu %xmm0, (%esi) 181 xor %edi, %edi 182 cmp $127, %ecx 183 ja L(shl_0_gobble) 184 lea -32(%ecx), %ecx 185L(shl_0_loop): 186 movdqa (%eax, %edi), %xmm0 187 movdqa 16(%eax, %edi), %xmm1 188 sub $32, %ecx 189 movdqa %xmm0, (%edx, %edi) 190 movdqa %xmm1, 16(%edx, %edi) 191 lea 32(%edi), %edi 192 jb L(shl_0_end) 193 194 movdqa (%eax, %edi), %xmm0 195 movdqa 16(%eax, %edi), %xmm1 196 sub $32, %ecx 197 movdqa %xmm0, (%edx, %edi) 198 movdqa %xmm1, 16(%edx, %edi) 199 lea 32(%edi), %edi 200 jb L(shl_0_end) 201 202 movdqa (%eax, %edi), %xmm0 203 movdqa 16(%eax, %edi), %xmm1 204 sub $32, %ecx 205 movdqa %xmm0, (%edx, %edi) 206 movdqa %xmm1, 16(%edx, %edi) 207 lea 32(%edi), %edi 208 jb L(shl_0_end) 209 210 movdqa (%eax, %edi), %xmm0 211 movdqa 16(%eax, %edi), %xmm1 212 sub $32, %ecx 213 movdqa %xmm0, (%edx, %edi) 214 movdqa %xmm1, 16(%edx, %edi) 215 lea 32(%edi), %edi 216L(shl_0_end): 217 lea 32(%ecx), %ecx 218 add %ecx, %edi 219 add %edi, %edx 220 add %edi, %eax 221 POP (%esi) 222 POP (%edi) 223 BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) 224 225 cfi_restore_state 226 cfi_remember_state 227L(shl_0_gobble): 228 229#ifdef DATA_CACHE_SIZE_HALF 230 cmp $DATA_CACHE_SIZE_HALF, %ecx 231#else 232# ifdef PIC 233 SETUP_PIC_REG(bx) 234 add $_GLOBAL_OFFSET_TABLE_, %ebx 235 mov __x86_data_cache_size_half@GOTOFF(%ebx), %edi 236# else 237 mov __x86_data_cache_size_half, %edi 238# endif 239#endif 240 mov %edi, %esi 241 shr $3, %esi 242 sub %esi, %edi 243 cmp %edi, %ecx 244 jae L(shl_0_gobble_mem_start) 245 sub $128, %ecx 246 ALIGN (4) 247L(shl_0_gobble_cache_loop): 248 movdqa (%eax), %xmm0 249 movaps 0x10(%eax), %xmm1 250 movaps 0x20(%eax), %xmm2 251 movaps 0x30(%eax), %xmm3 252 movaps 0x40(%eax), %xmm4 253 movaps 0x50(%eax), %xmm5 254 movaps 0x60(%eax), %xmm6 255 movaps 0x70(%eax), %xmm7 256 lea 0x80(%eax), %eax 257 sub $128, %ecx 258 movdqa %xmm0, (%edx) 259 movaps %xmm1, 0x10(%edx) 260 movaps %xmm2, 0x20(%edx) 261 movaps %xmm3, 0x30(%edx) 262 movaps %xmm4, 0x40(%edx) 263 movaps %xmm5, 0x50(%edx) 264 movaps %xmm6, 0x60(%edx) 265 movaps %xmm7, 0x70(%edx) 266 lea 0x80(%edx), %edx 267 268 jae L(shl_0_gobble_cache_loop) 269 add $0x80, %ecx 270 cmp $0x40, %ecx 271 jb L(shl_0_cache_less_64bytes) 272 273 movdqa (%eax), %xmm0 274 sub $0x40, %ecx 275 movdqa 0x10(%eax), %xmm1 276 277 movdqa %xmm0, (%edx) 278 movdqa %xmm1, 0x10(%edx) 279 280 movdqa 0x20(%eax), %xmm0 281 movdqa 0x30(%eax), %xmm1 282 add $0x40, %eax 283 284 movdqa %xmm0, 0x20(%edx) 285 movdqa %xmm1, 0x30(%edx) 286 add $0x40, %edx 287L(shl_0_cache_less_64bytes): 288 cmp $0x20, %ecx 289 jb L(shl_0_cache_less_32bytes) 290 movdqa (%eax), %xmm0 291 sub $0x20, %ecx 292 movdqa 0x10(%eax), %xmm1 293 add $0x20, %eax 294 movdqa %xmm0, (%edx) 295 movdqa %xmm1, 0x10(%edx) 296 add $0x20, %edx 297L(shl_0_cache_less_32bytes): 298 cmp $0x10, %ecx 299 jb L(shl_0_cache_less_16bytes) 300 sub $0x10, %ecx 301 movdqa (%eax), %xmm0 302 add $0x10, %eax 303 movdqa %xmm0, (%edx) 304 add $0x10, %edx 305L(shl_0_cache_less_16bytes): 306 add %ecx, %edx 307 add %ecx, %eax 308 POP (%esi) 309 POP (%edi) 310 BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) 311 312 cfi_restore_state 313 cfi_remember_state 314 ALIGN (4) 315L(shl_0_gobble_mem_start): 316 cmp %al, %dl 317 je L(copy_page_by_rep) 318 sub $128, %ecx 319L(shl_0_gobble_mem_loop): 320 prefetchnta 0x1c0(%eax) 321 prefetchnta 0x280(%eax) 322 prefetchnta 0x1c0(%edx) 323 prefetchnta 0x280(%edx) 324 325 movdqa (%eax), %xmm0 326 movaps 0x10(%eax), %xmm1 327 movaps 0x20(%eax), %xmm2 328 movaps 0x30(%eax), %xmm3 329 movaps 0x40(%eax), %xmm4 330 movaps 0x50(%eax), %xmm5 331 movaps 0x60(%eax), %xmm6 332 movaps 0x70(%eax), %xmm7 333 lea 0x80(%eax), %eax 334 sub $0x80, %ecx 335 movdqa %xmm0, (%edx) 336 movaps %xmm1, 0x10(%edx) 337 movaps %xmm2, 0x20(%edx) 338 movaps %xmm3, 0x30(%edx) 339 movaps %xmm4, 0x40(%edx) 340 movaps %xmm5, 0x50(%edx) 341 movaps %xmm6, 0x60(%edx) 342 movaps %xmm7, 0x70(%edx) 343 lea 0x80(%edx), %edx 344 345 jae L(shl_0_gobble_mem_loop) 346 add $0x80, %ecx 347 cmp $0x40, %ecx 348 jb L(shl_0_mem_less_64bytes) 349 350 movdqa (%eax), %xmm0 351 sub $0x40, %ecx 352 movdqa 0x10(%eax), %xmm1 353 354 movdqa %xmm0, (%edx) 355 movdqa %xmm1, 0x10(%edx) 356 357 movdqa 0x20(%eax), %xmm0 358 movdqa 0x30(%eax), %xmm1 359 add $0x40, %eax 360 361 movdqa %xmm0, 0x20(%edx) 362 movdqa %xmm1, 0x30(%edx) 363 add $0x40, %edx 364L(shl_0_mem_less_64bytes): 365 cmp $0x20, %ecx 366 jb L(shl_0_mem_less_32bytes) 367 movdqa (%eax), %xmm0 368 sub $0x20, %ecx 369 movdqa 0x10(%eax), %xmm1 370 add $0x20, %eax 371 movdqa %xmm0, (%edx) 372 movdqa %xmm1, 0x10(%edx) 373 add $0x20, %edx 374L(shl_0_mem_less_32bytes): 375 cmp $0x10, %ecx 376 jb L(shl_0_mem_less_16bytes) 377 sub $0x10, %ecx 378 movdqa (%eax), %xmm0 379 add $0x10, %eax 380 movdqa %xmm0, (%edx) 381 add $0x10, %edx 382L(shl_0_mem_less_16bytes): 383 add %ecx, %edx 384 add %ecx, %eax 385 POP (%esi) 386 POP (%edi) 387 BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) 388 389 cfi_restore_state 390 cfi_remember_state 391 ALIGN (4) 392L(shl_1): 393 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 394 sub $1, %eax 395 movaps (%eax), %xmm1 396 xor %edi, %edi 397 sub $32, %ecx 398 movdqu %xmm0, (%esi) 399 POP (%esi) 400L(shl_1_loop): 401 402 movdqa 16(%eax, %edi), %xmm2 403 sub $32, %ecx 404 movdqa 32(%eax, %edi), %xmm3 405 movdqa %xmm3, %xmm4 406 palignr $1, %xmm2, %xmm3 407 palignr $1, %xmm1, %xmm2 408 lea 32(%edi), %edi 409 movdqa %xmm2, -32(%edx, %edi) 410 movdqa %xmm3, -16(%edx, %edi) 411 412 jb L(shl_1_end) 413 414 movdqa 16(%eax, %edi), %xmm2 415 sub $32, %ecx 416 movdqa 32(%eax, %edi), %xmm3 417 movdqa %xmm3, %xmm1 418 palignr $1, %xmm2, %xmm3 419 palignr $1, %xmm4, %xmm2 420 lea 32(%edi), %edi 421 movdqa %xmm2, -32(%edx, %edi) 422 movdqa %xmm3, -16(%edx, %edi) 423 424 jae L(shl_1_loop) 425 426L(shl_1_end): 427 add $32, %ecx 428 add %ecx, %edi 429 add %edi, %edx 430 lea 1(%edi, %eax), %eax 431 POP (%edi) 432 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 433 434 cfi_restore_state 435 cfi_remember_state 436 ALIGN (4) 437L(shl_2): 438 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 439 sub $2, %eax 440 movaps (%eax), %xmm1 441 xor %edi, %edi 442 sub $32, %ecx 443 movdqu %xmm0, (%esi) 444 POP (%esi) 445L(shl_2_loop): 446 447 movdqa 16(%eax, %edi), %xmm2 448 sub $32, %ecx 449 movdqa 32(%eax, %edi), %xmm3 450 movdqa %xmm3, %xmm4 451 palignr $2, %xmm2, %xmm3 452 palignr $2, %xmm1, %xmm2 453 lea 32(%edi), %edi 454 movdqa %xmm2, -32(%edx, %edi) 455 movdqa %xmm3, -16(%edx, %edi) 456 457 jb L(shl_2_end) 458 459 movdqa 16(%eax, %edi), %xmm2 460 sub $32, %ecx 461 movdqa 32(%eax, %edi), %xmm3 462 movdqa %xmm3, %xmm1 463 palignr $2, %xmm2, %xmm3 464 palignr $2, %xmm4, %xmm2 465 lea 32(%edi), %edi 466 movdqa %xmm2, -32(%edx, %edi) 467 movdqa %xmm3, -16(%edx, %edi) 468 469 jae L(shl_2_loop) 470 471L(shl_2_end): 472 add $32, %ecx 473 add %ecx, %edi 474 add %edi, %edx 475 lea 2(%edi, %eax), %eax 476 POP (%edi) 477 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 478 479 cfi_restore_state 480 cfi_remember_state 481 ALIGN (4) 482L(shl_3): 483 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 484 sub $3, %eax 485 movaps (%eax), %xmm1 486 xor %edi, %edi 487 sub $32, %ecx 488 movdqu %xmm0, (%esi) 489 POP (%esi) 490L(shl_3_loop): 491 492 movdqa 16(%eax, %edi), %xmm2 493 sub $32, %ecx 494 movdqa 32(%eax, %edi), %xmm3 495 movdqa %xmm3, %xmm4 496 palignr $3, %xmm2, %xmm3 497 palignr $3, %xmm1, %xmm2 498 lea 32(%edi), %edi 499 movdqa %xmm2, -32(%edx, %edi) 500 movdqa %xmm3, -16(%edx, %edi) 501 502 jb L(shl_3_end) 503 504 movdqa 16(%eax, %edi), %xmm2 505 sub $32, %ecx 506 movdqa 32(%eax, %edi), %xmm3 507 movdqa %xmm3, %xmm1 508 palignr $3, %xmm2, %xmm3 509 palignr $3, %xmm4, %xmm2 510 lea 32(%edi), %edi 511 movdqa %xmm2, -32(%edx, %edi) 512 movdqa %xmm3, -16(%edx, %edi) 513 514 jae L(shl_3_loop) 515 516L(shl_3_end): 517 add $32, %ecx 518 add %ecx, %edi 519 add %edi, %edx 520 lea 3(%edi, %eax), %eax 521 POP (%edi) 522 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 523 524 cfi_restore_state 525 cfi_remember_state 526 ALIGN (4) 527L(shl_4): 528 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 529 sub $4, %eax 530 movaps (%eax), %xmm1 531 xor %edi, %edi 532 sub $32, %ecx 533 movdqu %xmm0, (%esi) 534 POP (%esi) 535L(shl_4_loop): 536 537 movdqa 16(%eax, %edi), %xmm2 538 sub $32, %ecx 539 movdqa 32(%eax, %edi), %xmm3 540 movdqa %xmm3, %xmm4 541 palignr $4, %xmm2, %xmm3 542 palignr $4, %xmm1, %xmm2 543 lea 32(%edi), %edi 544 movdqa %xmm2, -32(%edx, %edi) 545 movdqa %xmm3, -16(%edx, %edi) 546 547 jb L(shl_4_end) 548 549 movdqa 16(%eax, %edi), %xmm2 550 sub $32, %ecx 551 movdqa 32(%eax, %edi), %xmm3 552 movdqa %xmm3, %xmm1 553 palignr $4, %xmm2, %xmm3 554 palignr $4, %xmm4, %xmm2 555 lea 32(%edi), %edi 556 movdqa %xmm2, -32(%edx, %edi) 557 movdqa %xmm3, -16(%edx, %edi) 558 559 jae L(shl_4_loop) 560 561L(shl_4_end): 562 add $32, %ecx 563 add %ecx, %edi 564 add %edi, %edx 565 lea 4(%edi, %eax), %eax 566 POP (%edi) 567 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 568 569 cfi_restore_state 570 cfi_remember_state 571 ALIGN (4) 572L(shl_5): 573 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 574 sub $5, %eax 575 movaps (%eax), %xmm1 576 xor %edi, %edi 577 sub $32, %ecx 578 movdqu %xmm0, (%esi) 579 POP (%esi) 580L(shl_5_loop): 581 582 movdqa 16(%eax, %edi), %xmm2 583 sub $32, %ecx 584 movdqa 32(%eax, %edi), %xmm3 585 movdqa %xmm3, %xmm4 586 palignr $5, %xmm2, %xmm3 587 palignr $5, %xmm1, %xmm2 588 lea 32(%edi), %edi 589 movdqa %xmm2, -32(%edx, %edi) 590 movdqa %xmm3, -16(%edx, %edi) 591 592 jb L(shl_5_end) 593 594 movdqa 16(%eax, %edi), %xmm2 595 sub $32, %ecx 596 movdqa 32(%eax, %edi), %xmm3 597 movdqa %xmm3, %xmm1 598 palignr $5, %xmm2, %xmm3 599 palignr $5, %xmm4, %xmm2 600 lea 32(%edi), %edi 601 movdqa %xmm2, -32(%edx, %edi) 602 movdqa %xmm3, -16(%edx, %edi) 603 604 jae L(shl_5_loop) 605 606L(shl_5_end): 607 add $32, %ecx 608 add %ecx, %edi 609 add %edi, %edx 610 lea 5(%edi, %eax), %eax 611 POP (%edi) 612 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 613 614 cfi_restore_state 615 cfi_remember_state 616 ALIGN (4) 617L(shl_6): 618 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 619 sub $6, %eax 620 movaps (%eax), %xmm1 621 xor %edi, %edi 622 sub $32, %ecx 623 movdqu %xmm0, (%esi) 624 POP (%esi) 625L(shl_6_loop): 626 627 movdqa 16(%eax, %edi), %xmm2 628 sub $32, %ecx 629 movdqa 32(%eax, %edi), %xmm3 630 movdqa %xmm3, %xmm4 631 palignr $6, %xmm2, %xmm3 632 palignr $6, %xmm1, %xmm2 633 lea 32(%edi), %edi 634 movdqa %xmm2, -32(%edx, %edi) 635 movdqa %xmm3, -16(%edx, %edi) 636 637 jb L(shl_6_end) 638 639 movdqa 16(%eax, %edi), %xmm2 640 sub $32, %ecx 641 movdqa 32(%eax, %edi), %xmm3 642 movdqa %xmm3, %xmm1 643 palignr $6, %xmm2, %xmm3 644 palignr $6, %xmm4, %xmm2 645 lea 32(%edi), %edi 646 movdqa %xmm2, -32(%edx, %edi) 647 movdqa %xmm3, -16(%edx, %edi) 648 649 jae L(shl_6_loop) 650 651L(shl_6_end): 652 add $32, %ecx 653 add %ecx, %edi 654 add %edi, %edx 655 lea 6(%edi, %eax), %eax 656 POP (%edi) 657 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 658 659 cfi_restore_state 660 cfi_remember_state 661 ALIGN (4) 662L(shl_7): 663 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 664 sub $7, %eax 665 movaps (%eax), %xmm1 666 xor %edi, %edi 667 sub $32, %ecx 668 movdqu %xmm0, (%esi) 669 POP (%esi) 670L(shl_7_loop): 671 672 movdqa 16(%eax, %edi), %xmm2 673 sub $32, %ecx 674 movdqa 32(%eax, %edi), %xmm3 675 movdqa %xmm3, %xmm4 676 palignr $7, %xmm2, %xmm3 677 palignr $7, %xmm1, %xmm2 678 lea 32(%edi), %edi 679 movdqa %xmm2, -32(%edx, %edi) 680 movdqa %xmm3, -16(%edx, %edi) 681 682 jb L(shl_7_end) 683 684 movdqa 16(%eax, %edi), %xmm2 685 sub $32, %ecx 686 movdqa 32(%eax, %edi), %xmm3 687 movdqa %xmm3, %xmm1 688 palignr $7, %xmm2, %xmm3 689 palignr $7, %xmm4, %xmm2 690 lea 32(%edi), %edi 691 movdqa %xmm2, -32(%edx, %edi) 692 movdqa %xmm3, -16(%edx, %edi) 693 694 jae L(shl_7_loop) 695 696L(shl_7_end): 697 add $32, %ecx 698 add %ecx, %edi 699 add %edi, %edx 700 lea 7(%edi, %eax), %eax 701 POP (%edi) 702 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 703 704 cfi_restore_state 705 cfi_remember_state 706 ALIGN (4) 707L(shl_8): 708 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 709 sub $8, %eax 710 movaps (%eax), %xmm1 711 xor %edi, %edi 712 sub $32, %ecx 713 movdqu %xmm0, (%esi) 714 POP (%esi) 715L(shl_8_loop): 716 717 movdqa 16(%eax, %edi), %xmm2 718 sub $32, %ecx 719 movdqa 32(%eax, %edi), %xmm3 720 movdqa %xmm3, %xmm4 721 palignr $8, %xmm2, %xmm3 722 palignr $8, %xmm1, %xmm2 723 lea 32(%edi), %edi 724 movdqa %xmm2, -32(%edx, %edi) 725 movdqa %xmm3, -16(%edx, %edi) 726 727 jb L(shl_8_end) 728 729 movdqa 16(%eax, %edi), %xmm2 730 sub $32, %ecx 731 movdqa 32(%eax, %edi), %xmm3 732 movdqa %xmm3, %xmm1 733 palignr $8, %xmm2, %xmm3 734 palignr $8, %xmm4, %xmm2 735 lea 32(%edi), %edi 736 movdqa %xmm2, -32(%edx, %edi) 737 movdqa %xmm3, -16(%edx, %edi) 738 739 jae L(shl_8_loop) 740 741L(shl_8_end): 742 add $32, %ecx 743 add %ecx, %edi 744 add %edi, %edx 745 lea 8(%edi, %eax), %eax 746 POP (%edi) 747 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 748 749 cfi_restore_state 750 cfi_remember_state 751 ALIGN (4) 752L(shl_9): 753 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 754 sub $9, %eax 755 movaps (%eax), %xmm1 756 xor %edi, %edi 757 sub $32, %ecx 758 movdqu %xmm0, (%esi) 759 POP (%esi) 760L(shl_9_loop): 761 762 movdqa 16(%eax, %edi), %xmm2 763 sub $32, %ecx 764 movdqa 32(%eax, %edi), %xmm3 765 movdqa %xmm3, %xmm4 766 palignr $9, %xmm2, %xmm3 767 palignr $9, %xmm1, %xmm2 768 lea 32(%edi), %edi 769 movdqa %xmm2, -32(%edx, %edi) 770 movdqa %xmm3, -16(%edx, %edi) 771 772 jb L(shl_9_end) 773 774 movdqa 16(%eax, %edi), %xmm2 775 sub $32, %ecx 776 movdqa 32(%eax, %edi), %xmm3 777 movdqa %xmm3, %xmm1 778 palignr $9, %xmm2, %xmm3 779 palignr $9, %xmm4, %xmm2 780 lea 32(%edi), %edi 781 movdqa %xmm2, -32(%edx, %edi) 782 movdqa %xmm3, -16(%edx, %edi) 783 784 jae L(shl_9_loop) 785 786L(shl_9_end): 787 add $32, %ecx 788 add %ecx, %edi 789 add %edi, %edx 790 lea 9(%edi, %eax), %eax 791 POP (%edi) 792 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 793 794 cfi_restore_state 795 cfi_remember_state 796 ALIGN (4) 797L(shl_10): 798 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 799 sub $10, %eax 800 movaps (%eax), %xmm1 801 xor %edi, %edi 802 sub $32, %ecx 803 movdqu %xmm0, (%esi) 804 POP (%esi) 805L(shl_10_loop): 806 807 movdqa 16(%eax, %edi), %xmm2 808 sub $32, %ecx 809 movdqa 32(%eax, %edi), %xmm3 810 movdqa %xmm3, %xmm4 811 palignr $10, %xmm2, %xmm3 812 palignr $10, %xmm1, %xmm2 813 lea 32(%edi), %edi 814 movdqa %xmm2, -32(%edx, %edi) 815 movdqa %xmm3, -16(%edx, %edi) 816 817 jb L(shl_10_end) 818 819 movdqa 16(%eax, %edi), %xmm2 820 sub $32, %ecx 821 movdqa 32(%eax, %edi), %xmm3 822 movdqa %xmm3, %xmm1 823 palignr $10, %xmm2, %xmm3 824 palignr $10, %xmm4, %xmm2 825 lea 32(%edi), %edi 826 movdqa %xmm2, -32(%edx, %edi) 827 movdqa %xmm3, -16(%edx, %edi) 828 829 jae L(shl_10_loop) 830 831L(shl_10_end): 832 add $32, %ecx 833 add %ecx, %edi 834 add %edi, %edx 835 lea 10(%edi, %eax), %eax 836 POP (%edi) 837 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 838 839 cfi_restore_state 840 cfi_remember_state 841 ALIGN (4) 842L(shl_11): 843 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 844 sub $11, %eax 845 movaps (%eax), %xmm1 846 xor %edi, %edi 847 sub $32, %ecx 848 movdqu %xmm0, (%esi) 849 POP (%esi) 850L(shl_11_loop): 851 852 movdqa 16(%eax, %edi), %xmm2 853 sub $32, %ecx 854 movdqa 32(%eax, %edi), %xmm3 855 movdqa %xmm3, %xmm4 856 palignr $11, %xmm2, %xmm3 857 palignr $11, %xmm1, %xmm2 858 lea 32(%edi), %edi 859 movdqa %xmm2, -32(%edx, %edi) 860 movdqa %xmm3, -16(%edx, %edi) 861 862 jb L(shl_11_end) 863 864 movdqa 16(%eax, %edi), %xmm2 865 sub $32, %ecx 866 movdqa 32(%eax, %edi), %xmm3 867 movdqa %xmm3, %xmm1 868 palignr $11, %xmm2, %xmm3 869 palignr $11, %xmm4, %xmm2 870 lea 32(%edi), %edi 871 movdqa %xmm2, -32(%edx, %edi) 872 movdqa %xmm3, -16(%edx, %edi) 873 874 jae L(shl_11_loop) 875 876L(shl_11_end): 877 add $32, %ecx 878 add %ecx, %edi 879 add %edi, %edx 880 lea 11(%edi, %eax), %eax 881 POP (%edi) 882 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 883 884 cfi_restore_state 885 cfi_remember_state 886 ALIGN (4) 887L(shl_12): 888 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 889 sub $12, %eax 890 movaps (%eax), %xmm1 891 xor %edi, %edi 892 sub $32, %ecx 893 movdqu %xmm0, (%esi) 894 POP (%esi) 895L(shl_12_loop): 896 897 movdqa 16(%eax, %edi), %xmm2 898 sub $32, %ecx 899 movdqa 32(%eax, %edi), %xmm3 900 movdqa %xmm3, %xmm4 901 palignr $12, %xmm2, %xmm3 902 palignr $12, %xmm1, %xmm2 903 lea 32(%edi), %edi 904 movdqa %xmm2, -32(%edx, %edi) 905 movdqa %xmm3, -16(%edx, %edi) 906 907 jb L(shl_12_end) 908 909 movdqa 16(%eax, %edi), %xmm2 910 sub $32, %ecx 911 movdqa 32(%eax, %edi), %xmm3 912 movdqa %xmm3, %xmm1 913 palignr $12, %xmm2, %xmm3 914 palignr $12, %xmm4, %xmm2 915 lea 32(%edi), %edi 916 movdqa %xmm2, -32(%edx, %edi) 917 movdqa %xmm3, -16(%edx, %edi) 918 919 jae L(shl_12_loop) 920 921L(shl_12_end): 922 add $32, %ecx 923 add %ecx, %edi 924 add %edi, %edx 925 lea 12(%edi, %eax), %eax 926 POP (%edi) 927 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 928 929 cfi_restore_state 930 cfi_remember_state 931 ALIGN (4) 932L(shl_13): 933 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 934 sub $13, %eax 935 movaps (%eax), %xmm1 936 xor %edi, %edi 937 sub $32, %ecx 938 movdqu %xmm0, (%esi) 939 POP (%esi) 940L(shl_13_loop): 941 942 movdqa 16(%eax, %edi), %xmm2 943 sub $32, %ecx 944 movdqa 32(%eax, %edi), %xmm3 945 movdqa %xmm3, %xmm4 946 palignr $13, %xmm2, %xmm3 947 palignr $13, %xmm1, %xmm2 948 lea 32(%edi), %edi 949 movdqa %xmm2, -32(%edx, %edi) 950 movdqa %xmm3, -16(%edx, %edi) 951 952 jb L(shl_13_end) 953 954 movdqa 16(%eax, %edi), %xmm2 955 sub $32, %ecx 956 movdqa 32(%eax, %edi), %xmm3 957 movdqa %xmm3, %xmm1 958 palignr $13, %xmm2, %xmm3 959 palignr $13, %xmm4, %xmm2 960 lea 32(%edi), %edi 961 movdqa %xmm2, -32(%edx, %edi) 962 movdqa %xmm3, -16(%edx, %edi) 963 964 jae L(shl_13_loop) 965 966L(shl_13_end): 967 add $32, %ecx 968 add %ecx, %edi 969 add %edi, %edx 970 lea 13(%edi, %eax), %eax 971 POP (%edi) 972 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 973 974 cfi_restore_state 975 cfi_remember_state 976 ALIGN (4) 977L(shl_14): 978 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 979 sub $14, %eax 980 movaps (%eax), %xmm1 981 xor %edi, %edi 982 sub $32, %ecx 983 movdqu %xmm0, (%esi) 984 POP (%esi) 985L(shl_14_loop): 986 987 movdqa 16(%eax, %edi), %xmm2 988 sub $32, %ecx 989 movdqa 32(%eax, %edi), %xmm3 990 movdqa %xmm3, %xmm4 991 palignr $14, %xmm2, %xmm3 992 palignr $14, %xmm1, %xmm2 993 lea 32(%edi), %edi 994 movdqa %xmm2, -32(%edx, %edi) 995 movdqa %xmm3, -16(%edx, %edi) 996 997 jb L(shl_14_end) 998 999 movdqa 16(%eax, %edi), %xmm2 1000 sub $32, %ecx 1001 movdqa 32(%eax, %edi), %xmm3 1002 movdqa %xmm3, %xmm1 1003 palignr $14, %xmm2, %xmm3 1004 palignr $14, %xmm4, %xmm2 1005 lea 32(%edi), %edi 1006 movdqa %xmm2, -32(%edx, %edi) 1007 movdqa %xmm3, -16(%edx, %edi) 1008 1009 jae L(shl_14_loop) 1010 1011L(shl_14_end): 1012 add $32, %ecx 1013 add %ecx, %edi 1014 add %edi, %edx 1015 lea 14(%edi, %eax), %eax 1016 POP (%edi) 1017 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 1018 1019 cfi_restore_state 1020 cfi_remember_state 1021 ALIGN (4) 1022L(shl_15): 1023 BRANCH_TO_JMPTBL_ENTRY_VALUE(L(table_48bytes_fwd)) 1024 sub $15, %eax 1025 movaps (%eax), %xmm1 1026 xor %edi, %edi 1027 sub $32, %ecx 1028 movdqu %xmm0, (%esi) 1029 POP (%esi) 1030L(shl_15_loop): 1031 1032 movdqa 16(%eax, %edi), %xmm2 1033 sub $32, %ecx 1034 movdqa 32(%eax, %edi), %xmm3 1035 movdqa %xmm3, %xmm4 1036 palignr $15, %xmm2, %xmm3 1037 palignr $15, %xmm1, %xmm2 1038 lea 32(%edi), %edi 1039 movdqa %xmm2, -32(%edx, %edi) 1040 movdqa %xmm3, -16(%edx, %edi) 1041 1042 jb L(shl_15_end) 1043 1044 movdqa 16(%eax, %edi), %xmm2 1045 sub $32, %ecx 1046 movdqa 32(%eax, %edi), %xmm3 1047 movdqa %xmm3, %xmm1 1048 palignr $15, %xmm2, %xmm3 1049 palignr $15, %xmm4, %xmm2 1050 lea 32(%edi), %edi 1051 movdqa %xmm2, -32(%edx, %edi) 1052 movdqa %xmm3, -16(%edx, %edi) 1053 1054 jae L(shl_15_loop) 1055 1056L(shl_15_end): 1057 add $32, %ecx 1058 add %ecx, %edi 1059 add %edi, %edx 1060 lea 15(%edi, %eax), %eax 1061 POP (%edi) 1062 BRANCH_TO_JMPTBL_ENTRY_TAIL(L(table_48bytes_fwd), %ecx, 4) 1063 1064 1065 ALIGN (4) 1066L(fwd_write_44bytes): 1067 movl -44(%eax), %ecx 1068 movl %ecx, -44(%edx) 1069L(fwd_write_40bytes): 1070 movl -40(%eax), %ecx 1071 movl %ecx, -40(%edx) 1072L(fwd_write_36bytes): 1073 movl -36(%eax), %ecx 1074 movl %ecx, -36(%edx) 1075L(fwd_write_32bytes): 1076 movl -32(%eax), %ecx 1077 movl %ecx, -32(%edx) 1078L(fwd_write_28bytes): 1079 movl -28(%eax), %ecx 1080 movl %ecx, -28(%edx) 1081L(fwd_write_24bytes): 1082 movl -24(%eax), %ecx 1083 movl %ecx, -24(%edx) 1084L(fwd_write_20bytes): 1085 movl -20(%eax), %ecx 1086 movl %ecx, -20(%edx) 1087L(fwd_write_16bytes): 1088 movl -16(%eax), %ecx 1089 movl %ecx, -16(%edx) 1090L(fwd_write_12bytes): 1091 movl -12(%eax), %ecx 1092 movl %ecx, -12(%edx) 1093L(fwd_write_8bytes): 1094 movl -8(%eax), %ecx 1095 movl %ecx, -8(%edx) 1096L(fwd_write_4bytes): 1097 movl -4(%eax), %ecx 1098 movl %ecx, -4(%edx) 1099L(fwd_write_0bytes): 1100#ifndef USE_AS_BCOPY 1101# ifdef USE_AS_MEMPCPY 1102 movl %edx, %eax 1103# else 1104 movl DEST(%esp), %eax 1105# endif 1106#endif 1107 RETURN 1108 1109 ALIGN (4) 1110L(fwd_write_5bytes): 1111 movl -5(%eax), %ecx 1112 movl -4(%eax), %eax 1113 movl %ecx, -5(%edx) 1114 movl %eax, -4(%edx) 1115#ifndef USE_AS_BCOPY 1116# ifdef USE_AS_MEMPCPY 1117 movl %edx, %eax 1118# else 1119 movl DEST(%esp), %eax 1120# endif 1121#endif 1122 RETURN 1123 1124 ALIGN (4) 1125L(fwd_write_45bytes): 1126 movl -45(%eax), %ecx 1127 movl %ecx, -45(%edx) 1128L(fwd_write_41bytes): 1129 movl -41(%eax), %ecx 1130 movl %ecx, -41(%edx) 1131L(fwd_write_37bytes): 1132 movl -37(%eax), %ecx 1133 movl %ecx, -37(%edx) 1134L(fwd_write_33bytes): 1135 movl -33(%eax), %ecx 1136 movl %ecx, -33(%edx) 1137L(fwd_write_29bytes): 1138 movl -29(%eax), %ecx 1139 movl %ecx, -29(%edx) 1140L(fwd_write_25bytes): 1141 movl -25(%eax), %ecx 1142 movl %ecx, -25(%edx) 1143L(fwd_write_21bytes): 1144 movl -21(%eax), %ecx 1145 movl %ecx, -21(%edx) 1146L(fwd_write_17bytes): 1147 movl -17(%eax), %ecx 1148 movl %ecx, -17(%edx) 1149L(fwd_write_13bytes): 1150 movl -13(%eax), %ecx 1151 movl %ecx, -13(%edx) 1152L(fwd_write_9bytes): 1153 movl -9(%eax), %ecx 1154 movl %ecx, -9(%edx) 1155 movl -5(%eax), %ecx 1156 movl %ecx, -5(%edx) 1157L(fwd_write_1bytes): 1158 movzbl -1(%eax), %ecx 1159 movb %cl, -1(%edx) 1160#ifndef USE_AS_BCOPY 1161# ifdef USE_AS_MEMPCPY 1162 movl %edx, %eax 1163# else 1164 movl DEST(%esp), %eax 1165# endif 1166#endif 1167 RETURN 1168 1169 ALIGN (4) 1170L(fwd_write_46bytes): 1171 movl -46(%eax), %ecx 1172 movl %ecx, -46(%edx) 1173L(fwd_write_42bytes): 1174 movl -42(%eax), %ecx 1175 movl %ecx, -42(%edx) 1176L(fwd_write_38bytes): 1177 movl -38(%eax), %ecx 1178 movl %ecx, -38(%edx) 1179L(fwd_write_34bytes): 1180 movl -34(%eax), %ecx 1181 movl %ecx, -34(%edx) 1182L(fwd_write_30bytes): 1183 movl -30(%eax), %ecx 1184 movl %ecx, -30(%edx) 1185L(fwd_write_26bytes): 1186 movl -26(%eax), %ecx 1187 movl %ecx, -26(%edx) 1188L(fwd_write_22bytes): 1189 movl -22(%eax), %ecx 1190 movl %ecx, -22(%edx) 1191L(fwd_write_18bytes): 1192 movl -18(%eax), %ecx 1193 movl %ecx, -18(%edx) 1194L(fwd_write_14bytes): 1195 movl -14(%eax), %ecx 1196 movl %ecx, -14(%edx) 1197L(fwd_write_10bytes): 1198 movl -10(%eax), %ecx 1199 movl %ecx, -10(%edx) 1200L(fwd_write_6bytes): 1201 movl -6(%eax), %ecx 1202 movl %ecx, -6(%edx) 1203L(fwd_write_2bytes): 1204 movzwl -2(%eax), %ecx 1205 movw %cx, -2(%edx) 1206#ifndef USE_AS_BCOPY 1207# ifdef USE_AS_MEMPCPY 1208 movl %edx, %eax 1209# else 1210 movl DEST(%esp), %eax 1211# endif 1212#endif 1213 RETURN 1214 1215 ALIGN (4) 1216L(fwd_write_47bytes): 1217 movl -47(%eax), %ecx 1218 movl %ecx, -47(%edx) 1219L(fwd_write_43bytes): 1220 movl -43(%eax), %ecx 1221 movl %ecx, -43(%edx) 1222L(fwd_write_39bytes): 1223 movl -39(%eax), %ecx 1224 movl %ecx, -39(%edx) 1225L(fwd_write_35bytes): 1226 movl -35(%eax), %ecx 1227 movl %ecx, -35(%edx) 1228L(fwd_write_31bytes): 1229 movl -31(%eax), %ecx 1230 movl %ecx, -31(%edx) 1231L(fwd_write_27bytes): 1232 movl -27(%eax), %ecx 1233 movl %ecx, -27(%edx) 1234L(fwd_write_23bytes): 1235 movl -23(%eax), %ecx 1236 movl %ecx, -23(%edx) 1237L(fwd_write_19bytes): 1238 movl -19(%eax), %ecx 1239 movl %ecx, -19(%edx) 1240L(fwd_write_15bytes): 1241 movl -15(%eax), %ecx 1242 movl %ecx, -15(%edx) 1243L(fwd_write_11bytes): 1244 movl -11(%eax), %ecx 1245 movl %ecx, -11(%edx) 1246L(fwd_write_7bytes): 1247 movl -7(%eax), %ecx 1248 movl %ecx, -7(%edx) 1249L(fwd_write_3bytes): 1250 movzwl -3(%eax), %ecx 1251 movzbl -1(%eax), %eax 1252 movw %cx, -3(%edx) 1253 movb %al, -1(%edx) 1254#ifndef USE_AS_BCOPY 1255# ifdef USE_AS_MEMPCPY 1256 movl %edx, %eax 1257# else 1258 movl DEST(%esp), %eax 1259# endif 1260#endif 1261 RETURN_END 1262 1263 cfi_restore_state 1264 cfi_remember_state 1265 ALIGN (4) 1266L(large_page): 1267 movdqu (%eax), %xmm1 1268 movdqu %xmm0, (%esi) 1269 movntdq %xmm1, (%edx) 1270 add $0x10, %eax 1271 add $0x10, %edx 1272 sub $0x10, %ecx 1273 cmp %al, %dl 1274 je L(copy_page_by_rep) 1275L(large_page_loop_init): 1276 POP (%esi) 1277 sub $0x80, %ecx 1278 POP (%edi) 1279L(large_page_loop): 1280 prefetchnta 0x1c0(%eax) 1281 prefetchnta 0x280(%eax) 1282 movdqu (%eax), %xmm0 1283 movdqu 0x10(%eax), %xmm1 1284 movdqu 0x20(%eax), %xmm2 1285 movdqu 0x30(%eax), %xmm3 1286 movdqu 0x40(%eax), %xmm4 1287 movdqu 0x50(%eax), %xmm5 1288 movdqu 0x60(%eax), %xmm6 1289 movdqu 0x70(%eax), %xmm7 1290 lea 0x80(%eax), %eax 1291 lfence 1292 sub $0x80, %ecx 1293 movntdq %xmm0, (%edx) 1294 movntdq %xmm1, 0x10(%edx) 1295 movntdq %xmm2, 0x20(%edx) 1296 movntdq %xmm3, 0x30(%edx) 1297 movntdq %xmm4, 0x40(%edx) 1298 movntdq %xmm5, 0x50(%edx) 1299 movntdq %xmm6, 0x60(%edx) 1300 movntdq %xmm7, 0x70(%edx) 1301 lea 0x80(%edx), %edx 1302 jae L(large_page_loop) 1303 add $0x80, %ecx 1304 cmp $0x40, %ecx 1305 jb L(large_page_less_64bytes) 1306 1307 movdqu (%eax), %xmm0 1308 movdqu 0x10(%eax), %xmm1 1309 movdqu 0x20(%eax), %xmm2 1310 movdqu 0x30(%eax), %xmm3 1311 lea 0x40(%eax), %eax 1312 1313 movntdq %xmm0, (%edx) 1314 movntdq %xmm1, 0x10(%edx) 1315 movntdq %xmm2, 0x20(%edx) 1316 movntdq %xmm3, 0x30(%edx) 1317 lea 0x40(%edx), %edx 1318 sub $0x40, %ecx 1319L(large_page_less_64bytes): 1320 cmp $32, %ecx 1321 jb L(large_page_less_32bytes) 1322 movdqu (%eax), %xmm0 1323 movdqu 0x10(%eax), %xmm1 1324 lea 0x20(%eax), %eax 1325 movntdq %xmm0, (%edx) 1326 movntdq %xmm1, 0x10(%edx) 1327 lea 0x20(%edx), %edx 1328 sub $0x20, %ecx 1329L(large_page_less_32bytes): 1330 add %ecx, %edx 1331 add %ecx, %eax 1332 sfence 1333 BRANCH_TO_JMPTBL_ENTRY (L(table_48bytes_fwd), %ecx, 4) 1334 1335 cfi_restore_state 1336 cfi_remember_state 1337 ALIGN (4) 1338L(copy_page_by_rep): 1339 mov %eax, %esi 1340 mov %edx, %edi 1341 mov %ecx, %edx 1342 shr $2, %ecx 1343 and $3, %edx 1344 rep movsl 1345 jz L(copy_page_by_rep_exit) 1346 cmp $2, %edx 1347 jb L(copy_page_by_rep_left_1) 1348 movzwl (%esi), %eax 1349 movw %ax, (%edi) 1350 add $2, %esi 1351 add $2, %edi 1352 sub $2, %edx 1353 jz L(copy_page_by_rep_exit) 1354L(copy_page_by_rep_left_1): 1355 movzbl (%esi), %eax 1356 movb %al, (%edi) 1357L(copy_page_by_rep_exit): 1358 POP (%esi) 1359 POP (%edi) 1360#ifndef USE_AS_BCOPY 1361 movl DEST(%esp), %eax 1362# ifdef USE_AS_MEMPCPY 1363 movl LEN(%esp), %ecx 1364 add %ecx, %eax 1365# endif 1366#endif 1367 RETURN 1368 1369 ALIGN (4) 1370L(bk_write_44bytes): 1371 movl 40(%eax), %ecx 1372 movl %ecx, 40(%edx) 1373L(bk_write_40bytes): 1374 movl 36(%eax), %ecx 1375 movl %ecx, 36(%edx) 1376L(bk_write_36bytes): 1377 movl 32(%eax), %ecx 1378 movl %ecx, 32(%edx) 1379L(bk_write_32bytes): 1380 movl 28(%eax), %ecx 1381 movl %ecx, 28(%edx) 1382L(bk_write_28bytes): 1383 movl 24(%eax), %ecx 1384 movl %ecx, 24(%edx) 1385L(bk_write_24bytes): 1386 movl 20(%eax), %ecx 1387 movl %ecx, 20(%edx) 1388L(bk_write_20bytes): 1389 movl 16(%eax), %ecx 1390 movl %ecx, 16(%edx) 1391L(bk_write_16bytes): 1392 movl 12(%eax), %ecx 1393 movl %ecx, 12(%edx) 1394L(bk_write_12bytes): 1395 movl 8(%eax), %ecx 1396 movl %ecx, 8(%edx) 1397L(bk_write_8bytes): 1398 movl 4(%eax), %ecx 1399 movl %ecx, 4(%edx) 1400L(bk_write_4bytes): 1401 movl (%eax), %ecx 1402 movl %ecx, (%edx) 1403L(bk_write_0bytes): 1404#ifndef USE_AS_BCOPY 1405 movl DEST(%esp), %eax 1406# ifdef USE_AS_MEMPCPY 1407 movl LEN(%esp), %ecx 1408 add %ecx, %eax 1409# endif 1410#endif 1411 RETURN 1412 1413 ALIGN (4) 1414L(bk_write_45bytes): 1415 movl 41(%eax), %ecx 1416 movl %ecx, 41(%edx) 1417L(bk_write_41bytes): 1418 movl 37(%eax), %ecx 1419 movl %ecx, 37(%edx) 1420L(bk_write_37bytes): 1421 movl 33(%eax), %ecx 1422 movl %ecx, 33(%edx) 1423L(bk_write_33bytes): 1424 movl 29(%eax), %ecx 1425 movl %ecx, 29(%edx) 1426L(bk_write_29bytes): 1427 movl 25(%eax), %ecx 1428 movl %ecx, 25(%edx) 1429L(bk_write_25bytes): 1430 movl 21(%eax), %ecx 1431 movl %ecx, 21(%edx) 1432L(bk_write_21bytes): 1433 movl 17(%eax), %ecx 1434 movl %ecx, 17(%edx) 1435L(bk_write_17bytes): 1436 movl 13(%eax), %ecx 1437 movl %ecx, 13(%edx) 1438L(bk_write_13bytes): 1439 movl 9(%eax), %ecx 1440 movl %ecx, 9(%edx) 1441L(bk_write_9bytes): 1442 movl 5(%eax), %ecx 1443 movl %ecx, 5(%edx) 1444L(bk_write_5bytes): 1445 movl 1(%eax), %ecx 1446 movl %ecx, 1(%edx) 1447L(bk_write_1bytes): 1448 movzbl (%eax), %ecx 1449 movb %cl, (%edx) 1450#ifndef USE_AS_BCOPY 1451 movl DEST(%esp), %eax 1452# ifdef USE_AS_MEMPCPY 1453 movl LEN(%esp), %ecx 1454 add %ecx, %eax 1455# endif 1456#endif 1457 RETURN 1458 1459 ALIGN (4) 1460L(bk_write_46bytes): 1461 movl 42(%eax), %ecx 1462 movl %ecx, 42(%edx) 1463L(bk_write_42bytes): 1464 movl 38(%eax), %ecx 1465 movl %ecx, 38(%edx) 1466L(bk_write_38bytes): 1467 movl 34(%eax), %ecx 1468 movl %ecx, 34(%edx) 1469L(bk_write_34bytes): 1470 movl 30(%eax), %ecx 1471 movl %ecx, 30(%edx) 1472L(bk_write_30bytes): 1473 movl 26(%eax), %ecx 1474 movl %ecx, 26(%edx) 1475L(bk_write_26bytes): 1476 movl 22(%eax), %ecx 1477 movl %ecx, 22(%edx) 1478L(bk_write_22bytes): 1479 movl 18(%eax), %ecx 1480 movl %ecx, 18(%edx) 1481L(bk_write_18bytes): 1482 movl 14(%eax), %ecx 1483 movl %ecx, 14(%edx) 1484L(bk_write_14bytes): 1485 movl 10(%eax), %ecx 1486 movl %ecx, 10(%edx) 1487L(bk_write_10bytes): 1488 movl 6(%eax), %ecx 1489 movl %ecx, 6(%edx) 1490L(bk_write_6bytes): 1491 movl 2(%eax), %ecx 1492 movl %ecx, 2(%edx) 1493L(bk_write_2bytes): 1494 movzwl (%eax), %ecx 1495 movw %cx, (%edx) 1496#ifndef USE_AS_BCOPY 1497 movl DEST(%esp), %eax 1498# ifdef USE_AS_MEMPCPY 1499 movl LEN(%esp), %ecx 1500 add %ecx, %eax 1501# endif 1502#endif 1503 RETURN 1504 1505 ALIGN (4) 1506L(bk_write_47bytes): 1507 movl 43(%eax), %ecx 1508 movl %ecx, 43(%edx) 1509L(bk_write_43bytes): 1510 movl 39(%eax), %ecx 1511 movl %ecx, 39(%edx) 1512L(bk_write_39bytes): 1513 movl 35(%eax), %ecx 1514 movl %ecx, 35(%edx) 1515L(bk_write_35bytes): 1516 movl 31(%eax), %ecx 1517 movl %ecx, 31(%edx) 1518L(bk_write_31bytes): 1519 movl 27(%eax), %ecx 1520 movl %ecx, 27(%edx) 1521L(bk_write_27bytes): 1522 movl 23(%eax), %ecx 1523 movl %ecx, 23(%edx) 1524L(bk_write_23bytes): 1525 movl 19(%eax), %ecx 1526 movl %ecx, 19(%edx) 1527L(bk_write_19bytes): 1528 movl 15(%eax), %ecx 1529 movl %ecx, 15(%edx) 1530L(bk_write_15bytes): 1531 movl 11(%eax), %ecx 1532 movl %ecx, 11(%edx) 1533L(bk_write_11bytes): 1534 movl 7(%eax), %ecx 1535 movl %ecx, 7(%edx) 1536L(bk_write_7bytes): 1537 movl 3(%eax), %ecx 1538 movl %ecx, 3(%edx) 1539L(bk_write_3bytes): 1540 movzwl 1(%eax), %ecx 1541 movw %cx, 1(%edx) 1542 movzbl (%eax), %eax 1543 movb %al, (%edx) 1544#ifndef USE_AS_BCOPY 1545 movl DEST(%esp), %eax 1546# ifdef USE_AS_MEMPCPY 1547 movl LEN(%esp), %ecx 1548 add %ecx, %eax 1549# endif 1550#endif 1551 RETURN_END 1552 1553 1554 .pushsection .rodata.ssse3,"a",@progbits 1555 ALIGN (2) 1556L(table_48bytes_fwd): 1557 .int JMPTBL (L(fwd_write_0bytes), L(table_48bytes_fwd)) 1558 .int JMPTBL (L(fwd_write_1bytes), L(table_48bytes_fwd)) 1559 .int JMPTBL (L(fwd_write_2bytes), L(table_48bytes_fwd)) 1560 .int JMPTBL (L(fwd_write_3bytes), L(table_48bytes_fwd)) 1561 .int JMPTBL (L(fwd_write_4bytes), L(table_48bytes_fwd)) 1562 .int JMPTBL (L(fwd_write_5bytes), L(table_48bytes_fwd)) 1563 .int JMPTBL (L(fwd_write_6bytes), L(table_48bytes_fwd)) 1564 .int JMPTBL (L(fwd_write_7bytes), L(table_48bytes_fwd)) 1565 .int JMPTBL (L(fwd_write_8bytes), L(table_48bytes_fwd)) 1566 .int JMPTBL (L(fwd_write_9bytes), L(table_48bytes_fwd)) 1567 .int JMPTBL (L(fwd_write_10bytes), L(table_48bytes_fwd)) 1568 .int JMPTBL (L(fwd_write_11bytes), L(table_48bytes_fwd)) 1569 .int JMPTBL (L(fwd_write_12bytes), L(table_48bytes_fwd)) 1570 .int JMPTBL (L(fwd_write_13bytes), L(table_48bytes_fwd)) 1571 .int JMPTBL (L(fwd_write_14bytes), L(table_48bytes_fwd)) 1572 .int JMPTBL (L(fwd_write_15bytes), L(table_48bytes_fwd)) 1573 .int JMPTBL (L(fwd_write_16bytes), L(table_48bytes_fwd)) 1574 .int JMPTBL (L(fwd_write_17bytes), L(table_48bytes_fwd)) 1575 .int JMPTBL (L(fwd_write_18bytes), L(table_48bytes_fwd)) 1576 .int JMPTBL (L(fwd_write_19bytes), L(table_48bytes_fwd)) 1577 .int JMPTBL (L(fwd_write_20bytes), L(table_48bytes_fwd)) 1578 .int JMPTBL (L(fwd_write_21bytes), L(table_48bytes_fwd)) 1579 .int JMPTBL (L(fwd_write_22bytes), L(table_48bytes_fwd)) 1580 .int JMPTBL (L(fwd_write_23bytes), L(table_48bytes_fwd)) 1581 .int JMPTBL (L(fwd_write_24bytes), L(table_48bytes_fwd)) 1582 .int JMPTBL (L(fwd_write_25bytes), L(table_48bytes_fwd)) 1583 .int JMPTBL (L(fwd_write_26bytes), L(table_48bytes_fwd)) 1584 .int JMPTBL (L(fwd_write_27bytes), L(table_48bytes_fwd)) 1585 .int JMPTBL (L(fwd_write_28bytes), L(table_48bytes_fwd)) 1586 .int JMPTBL (L(fwd_write_29bytes), L(table_48bytes_fwd)) 1587 .int JMPTBL (L(fwd_write_30bytes), L(table_48bytes_fwd)) 1588 .int JMPTBL (L(fwd_write_31bytes), L(table_48bytes_fwd)) 1589 .int JMPTBL (L(fwd_write_32bytes), L(table_48bytes_fwd)) 1590 .int JMPTBL (L(fwd_write_33bytes), L(table_48bytes_fwd)) 1591 .int JMPTBL (L(fwd_write_34bytes), L(table_48bytes_fwd)) 1592 .int JMPTBL (L(fwd_write_35bytes), L(table_48bytes_fwd)) 1593 .int JMPTBL (L(fwd_write_36bytes), L(table_48bytes_fwd)) 1594 .int JMPTBL (L(fwd_write_37bytes), L(table_48bytes_fwd)) 1595 .int JMPTBL (L(fwd_write_38bytes), L(table_48bytes_fwd)) 1596 .int JMPTBL (L(fwd_write_39bytes), L(table_48bytes_fwd)) 1597 .int JMPTBL (L(fwd_write_40bytes), L(table_48bytes_fwd)) 1598 .int JMPTBL (L(fwd_write_41bytes), L(table_48bytes_fwd)) 1599 .int JMPTBL (L(fwd_write_42bytes), L(table_48bytes_fwd)) 1600 .int JMPTBL (L(fwd_write_43bytes), L(table_48bytes_fwd)) 1601 .int JMPTBL (L(fwd_write_44bytes), L(table_48bytes_fwd)) 1602 .int JMPTBL (L(fwd_write_45bytes), L(table_48bytes_fwd)) 1603 .int JMPTBL (L(fwd_write_46bytes), L(table_48bytes_fwd)) 1604 .int JMPTBL (L(fwd_write_47bytes), L(table_48bytes_fwd)) 1605 1606 ALIGN (2) 1607L(shl_table): 1608 .int JMPTBL (L(shl_0), L(shl_table)) 1609 .int JMPTBL (L(shl_1), L(shl_table)) 1610 .int JMPTBL (L(shl_2), L(shl_table)) 1611 .int JMPTBL (L(shl_3), L(shl_table)) 1612 .int JMPTBL (L(shl_4), L(shl_table)) 1613 .int JMPTBL (L(shl_5), L(shl_table)) 1614 .int JMPTBL (L(shl_6), L(shl_table)) 1615 .int JMPTBL (L(shl_7), L(shl_table)) 1616 .int JMPTBL (L(shl_8), L(shl_table)) 1617 .int JMPTBL (L(shl_9), L(shl_table)) 1618 .int JMPTBL (L(shl_10), L(shl_table)) 1619 .int JMPTBL (L(shl_11), L(shl_table)) 1620 .int JMPTBL (L(shl_12), L(shl_table)) 1621 .int JMPTBL (L(shl_13), L(shl_table)) 1622 .int JMPTBL (L(shl_14), L(shl_table)) 1623 .int JMPTBL (L(shl_15), L(shl_table)) 1624 1625 ALIGN (2) 1626L(table_48_bytes_bwd): 1627 .int JMPTBL (L(bk_write_0bytes), L(table_48_bytes_bwd)) 1628 .int JMPTBL (L(bk_write_1bytes), L(table_48_bytes_bwd)) 1629 .int JMPTBL (L(bk_write_2bytes), L(table_48_bytes_bwd)) 1630 .int JMPTBL (L(bk_write_3bytes), L(table_48_bytes_bwd)) 1631 .int JMPTBL (L(bk_write_4bytes), L(table_48_bytes_bwd)) 1632 .int JMPTBL (L(bk_write_5bytes), L(table_48_bytes_bwd)) 1633 .int JMPTBL (L(bk_write_6bytes), L(table_48_bytes_bwd)) 1634 .int JMPTBL (L(bk_write_7bytes), L(table_48_bytes_bwd)) 1635 .int JMPTBL (L(bk_write_8bytes), L(table_48_bytes_bwd)) 1636 .int JMPTBL (L(bk_write_9bytes), L(table_48_bytes_bwd)) 1637 .int JMPTBL (L(bk_write_10bytes), L(table_48_bytes_bwd)) 1638 .int JMPTBL (L(bk_write_11bytes), L(table_48_bytes_bwd)) 1639 .int JMPTBL (L(bk_write_12bytes), L(table_48_bytes_bwd)) 1640 .int JMPTBL (L(bk_write_13bytes), L(table_48_bytes_bwd)) 1641 .int JMPTBL (L(bk_write_14bytes), L(table_48_bytes_bwd)) 1642 .int JMPTBL (L(bk_write_15bytes), L(table_48_bytes_bwd)) 1643 .int JMPTBL (L(bk_write_16bytes), L(table_48_bytes_bwd)) 1644 .int JMPTBL (L(bk_write_17bytes), L(table_48_bytes_bwd)) 1645 .int JMPTBL (L(bk_write_18bytes), L(table_48_bytes_bwd)) 1646 .int JMPTBL (L(bk_write_19bytes), L(table_48_bytes_bwd)) 1647 .int JMPTBL (L(bk_write_20bytes), L(table_48_bytes_bwd)) 1648 .int JMPTBL (L(bk_write_21bytes), L(table_48_bytes_bwd)) 1649 .int JMPTBL (L(bk_write_22bytes), L(table_48_bytes_bwd)) 1650 .int JMPTBL (L(bk_write_23bytes), L(table_48_bytes_bwd)) 1651 .int JMPTBL (L(bk_write_24bytes), L(table_48_bytes_bwd)) 1652 .int JMPTBL (L(bk_write_25bytes), L(table_48_bytes_bwd)) 1653 .int JMPTBL (L(bk_write_26bytes), L(table_48_bytes_bwd)) 1654 .int JMPTBL (L(bk_write_27bytes), L(table_48_bytes_bwd)) 1655 .int JMPTBL (L(bk_write_28bytes), L(table_48_bytes_bwd)) 1656 .int JMPTBL (L(bk_write_29bytes), L(table_48_bytes_bwd)) 1657 .int JMPTBL (L(bk_write_30bytes), L(table_48_bytes_bwd)) 1658 .int JMPTBL (L(bk_write_31bytes), L(table_48_bytes_bwd)) 1659 .int JMPTBL (L(bk_write_32bytes), L(table_48_bytes_bwd)) 1660 .int JMPTBL (L(bk_write_33bytes), L(table_48_bytes_bwd)) 1661 .int JMPTBL (L(bk_write_34bytes), L(table_48_bytes_bwd)) 1662 .int JMPTBL (L(bk_write_35bytes), L(table_48_bytes_bwd)) 1663 .int JMPTBL (L(bk_write_36bytes), L(table_48_bytes_bwd)) 1664 .int JMPTBL (L(bk_write_37bytes), L(table_48_bytes_bwd)) 1665 .int JMPTBL (L(bk_write_38bytes), L(table_48_bytes_bwd)) 1666 .int JMPTBL (L(bk_write_39bytes), L(table_48_bytes_bwd)) 1667 .int JMPTBL (L(bk_write_40bytes), L(table_48_bytes_bwd)) 1668 .int JMPTBL (L(bk_write_41bytes), L(table_48_bytes_bwd)) 1669 .int JMPTBL (L(bk_write_42bytes), L(table_48_bytes_bwd)) 1670 .int JMPTBL (L(bk_write_43bytes), L(table_48_bytes_bwd)) 1671 .int JMPTBL (L(bk_write_44bytes), L(table_48_bytes_bwd)) 1672 .int JMPTBL (L(bk_write_45bytes), L(table_48_bytes_bwd)) 1673 .int JMPTBL (L(bk_write_46bytes), L(table_48_bytes_bwd)) 1674 .int JMPTBL (L(bk_write_47bytes), L(table_48_bytes_bwd)) 1675 1676 .popsection 1677 1678#ifdef USE_AS_MEMMOVE 1679 ALIGN (4) 1680L(copy_backward): 1681 PUSH (%esi) 1682 movl %eax, %esi 1683 add %ecx, %edx 1684 add %ecx, %esi 1685 testl $0x3, %edx 1686 jnz L(bk_align) 1687 1688L(bk_aligned_4): 1689 cmp $64, %ecx 1690 jae L(bk_write_more64bytes) 1691 1692L(bk_write_64bytesless): 1693 cmp $32, %ecx 1694 jb L(bk_write_less32bytes) 1695 1696L(bk_write_more32bytes): 1697 /* Copy 32 bytes at a time. */ 1698 sub $32, %ecx 1699 movl -4(%esi), %eax 1700 movl %eax, -4(%edx) 1701 movl -8(%esi), %eax 1702 movl %eax, -8(%edx) 1703 movl -12(%esi), %eax 1704 movl %eax, -12(%edx) 1705 movl -16(%esi), %eax 1706 movl %eax, -16(%edx) 1707 movl -20(%esi), %eax 1708 movl %eax, -20(%edx) 1709 movl -24(%esi), %eax 1710 movl %eax, -24(%edx) 1711 movl -28(%esi), %eax 1712 movl %eax, -28(%edx) 1713 movl -32(%esi), %eax 1714 movl %eax, -32(%edx) 1715 sub $32, %edx 1716 sub $32, %esi 1717 1718L(bk_write_less32bytes): 1719 movl %esi, %eax 1720 sub %ecx, %edx 1721 sub %ecx, %eax 1722 POP (%esi) 1723L(bk_write_less48bytes): 1724 BRANCH_TO_JMPTBL_ENTRY (L(table_48_bytes_bwd), %ecx, 4) 1725 1726 CFI_PUSH (%esi) 1727 ALIGN (4) 1728L(bk_align): 1729 cmp $8, %ecx 1730 jbe L(bk_write_less32bytes) 1731 testl $1, %edx 1732 /* We get here only if (EDX & 3 ) != 0 so if (EDX & 1) ==0, 1733 then (EDX & 2) must be != 0. */ 1734 jz L(bk_got2) 1735 sub $1, %esi 1736 sub $1, %ecx 1737 sub $1, %edx 1738 movzbl (%esi), %eax 1739 movb %al, (%edx) 1740 1741 testl $2, %edx 1742 jz L(bk_aligned_4) 1743 1744L(bk_got2): 1745 sub $2, %esi 1746 sub $2, %ecx 1747 sub $2, %edx 1748 movzwl (%esi), %eax 1749 movw %ax, (%edx) 1750 jmp L(bk_aligned_4) 1751 1752 ALIGN (4) 1753L(bk_write_more64bytes): 1754 /* Check alignment of last byte. */ 1755 testl $15, %edx 1756 jz L(bk_ssse3_cpy_pre) 1757 1758/* EDX is aligned 4 bytes, but not 16 bytes. */ 1759L(bk_ssse3_align): 1760 sub $4, %esi 1761 sub $4, %ecx 1762 sub $4, %edx 1763 movl (%esi), %eax 1764 movl %eax, (%edx) 1765 1766 testl $15, %edx 1767 jz L(bk_ssse3_cpy_pre) 1768 1769 sub $4, %esi 1770 sub $4, %ecx 1771 sub $4, %edx 1772 movl (%esi), %eax 1773 movl %eax, (%edx) 1774 1775 testl $15, %edx 1776 jz L(bk_ssse3_cpy_pre) 1777 1778 sub $4, %esi 1779 sub $4, %ecx 1780 sub $4, %edx 1781 movl (%esi), %eax 1782 movl %eax, (%edx) 1783 1784L(bk_ssse3_cpy_pre): 1785 cmp $64, %ecx 1786 jb L(bk_write_more32bytes) 1787 1788L(bk_ssse3_cpy): 1789 sub $64, %esi 1790 sub $64, %ecx 1791 sub $64, %edx 1792 movdqu 0x30(%esi), %xmm3 1793 movdqa %xmm3, 0x30(%edx) 1794 movdqu 0x20(%esi), %xmm2 1795 movdqa %xmm2, 0x20(%edx) 1796 movdqu 0x10(%esi), %xmm1 1797 movdqa %xmm1, 0x10(%edx) 1798 movdqu (%esi), %xmm0 1799 movdqa %xmm0, (%edx) 1800 cmp $64, %ecx 1801 jae L(bk_ssse3_cpy) 1802 jmp L(bk_write_64bytesless) 1803 1804#endif 1805 1806END (MEMCPY) 1807 1808#endif 1809