1/* memset with SSE2 2 Copyright (C) 2010-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#if IS_IN (libc) 20 21#include <sysdep.h> 22#include "asm-syntax.h" 23 24#define CFI_PUSH(REG) \ 25 cfi_adjust_cfa_offset (4); \ 26 cfi_rel_offset (REG, 0) 27 28#define CFI_POP(REG) \ 29 cfi_adjust_cfa_offset (-4); \ 30 cfi_restore (REG) 31 32#define PUSH(REG) pushl REG; CFI_PUSH (REG) 33#define POP(REG) popl REG; CFI_POP (REG) 34 35#ifdef USE_AS_BZERO 36# define DEST PARMS 37# define LEN DEST+4 38# define SETRTNVAL 39#else 40# define DEST PARMS 41# define CHR DEST+4 42# define LEN CHR+4 43# define SETRTNVAL movl DEST(%esp), %eax 44#endif 45 46#ifdef PIC 47# define ENTRANCE PUSH (%ebx); 48# define RETURN_END POP (%ebx); ret 49# define RETURN RETURN_END; CFI_PUSH (%ebx) 50# define PARMS 8 /* Preserve EBX. */ 51# define JMPTBL(I, B) I - B 52 53/* Load an entry in a jump table into EBX and branch to it. TABLE is a 54 jump table with relative offsets. */ 55# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 56 /* We first load PC into EBX. */ \ 57 SETUP_PIC_REG(bx); \ 58 /* Get the address of the jump table. */ \ 59 add $(TABLE - .), %ebx; \ 60 /* Get the entry and convert the relative offset to the \ 61 absolute address. */ \ 62 add (%ebx,%ecx,4), %ebx; \ 63 add %ecx, %edx; \ 64 /* We loaded the jump table and adjusted EDX. Go. */ \ 65 _CET_NOTRACK jmp *%ebx 66#else 67# define ENTRANCE 68# define RETURN_END ret 69# define RETURN RETURN_END 70# define PARMS 4 71# define JMPTBL(I, B) I 72 73/* Branch to an entry in a jump table. TABLE is a jump table with 74 absolute offsets. */ 75# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 76 add %ecx, %edx; \ 77 _CET_NOTRACK jmp *TABLE(,%ecx,4) 78#endif 79 80 .section .text.sse2,"ax",@progbits 81#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO 82ENTRY (__memset_chk_sse2) 83 movl 12(%esp), %eax 84 cmpl %eax, 16(%esp) 85 jb HIDDEN_JUMPTARGET (__chk_fail) 86END (__memset_chk_sse2) 87#endif 88ENTRY (__memset_sse2) 89 ENTRANCE 90 91 movl LEN(%esp), %ecx 92#ifdef USE_AS_BZERO 93 xor %eax, %eax 94#else 95 movzbl CHR(%esp), %eax 96 movb %al, %ah 97 /* Fill the whole EAX with pattern. */ 98 movl %eax, %edx 99 shl $16, %eax 100 or %edx, %eax 101#endif 102 movl DEST(%esp), %edx 103 cmp $32, %ecx 104 jae L(32bytesormore) 105 106L(write_less32bytes): 107 BRANCH_TO_JMPTBL_ENTRY (L(table_less_32bytes)) 108 109 110 .pushsection .rodata.sse2,"a",@progbits 111 ALIGN (2) 112L(table_less_32bytes): 113 .int JMPTBL (L(write_0bytes), L(table_less_32bytes)) 114 .int JMPTBL (L(write_1bytes), L(table_less_32bytes)) 115 .int JMPTBL (L(write_2bytes), L(table_less_32bytes)) 116 .int JMPTBL (L(write_3bytes), L(table_less_32bytes)) 117 .int JMPTBL (L(write_4bytes), L(table_less_32bytes)) 118 .int JMPTBL (L(write_5bytes), L(table_less_32bytes)) 119 .int JMPTBL (L(write_6bytes), L(table_less_32bytes)) 120 .int JMPTBL (L(write_7bytes), L(table_less_32bytes)) 121 .int JMPTBL (L(write_8bytes), L(table_less_32bytes)) 122 .int JMPTBL (L(write_9bytes), L(table_less_32bytes)) 123 .int JMPTBL (L(write_10bytes), L(table_less_32bytes)) 124 .int JMPTBL (L(write_11bytes), L(table_less_32bytes)) 125 .int JMPTBL (L(write_12bytes), L(table_less_32bytes)) 126 .int JMPTBL (L(write_13bytes), L(table_less_32bytes)) 127 .int JMPTBL (L(write_14bytes), L(table_less_32bytes)) 128 .int JMPTBL (L(write_15bytes), L(table_less_32bytes)) 129 .int JMPTBL (L(write_16bytes), L(table_less_32bytes)) 130 .int JMPTBL (L(write_17bytes), L(table_less_32bytes)) 131 .int JMPTBL (L(write_18bytes), L(table_less_32bytes)) 132 .int JMPTBL (L(write_19bytes), L(table_less_32bytes)) 133 .int JMPTBL (L(write_20bytes), L(table_less_32bytes)) 134 .int JMPTBL (L(write_21bytes), L(table_less_32bytes)) 135 .int JMPTBL (L(write_22bytes), L(table_less_32bytes)) 136 .int JMPTBL (L(write_23bytes), L(table_less_32bytes)) 137 .int JMPTBL (L(write_24bytes), L(table_less_32bytes)) 138 .int JMPTBL (L(write_25bytes), L(table_less_32bytes)) 139 .int JMPTBL (L(write_26bytes), L(table_less_32bytes)) 140 .int JMPTBL (L(write_27bytes), L(table_less_32bytes)) 141 .int JMPTBL (L(write_28bytes), L(table_less_32bytes)) 142 .int JMPTBL (L(write_29bytes), L(table_less_32bytes)) 143 .int JMPTBL (L(write_30bytes), L(table_less_32bytes)) 144 .int JMPTBL (L(write_31bytes), L(table_less_32bytes)) 145 .popsection 146 147 ALIGN (4) 148L(write_28bytes): 149 movl %eax, -28(%edx) 150L(write_24bytes): 151 movl %eax, -24(%edx) 152L(write_20bytes): 153 movl %eax, -20(%edx) 154L(write_16bytes): 155 movl %eax, -16(%edx) 156L(write_12bytes): 157 movl %eax, -12(%edx) 158L(write_8bytes): 159 movl %eax, -8(%edx) 160L(write_4bytes): 161 movl %eax, -4(%edx) 162L(write_0bytes): 163 SETRTNVAL 164 RETURN 165 166 ALIGN (4) 167L(write_29bytes): 168 movl %eax, -29(%edx) 169L(write_25bytes): 170 movl %eax, -25(%edx) 171L(write_21bytes): 172 movl %eax, -21(%edx) 173L(write_17bytes): 174 movl %eax, -17(%edx) 175L(write_13bytes): 176 movl %eax, -13(%edx) 177L(write_9bytes): 178 movl %eax, -9(%edx) 179L(write_5bytes): 180 movl %eax, -5(%edx) 181L(write_1bytes): 182 movb %al, -1(%edx) 183 SETRTNVAL 184 RETURN 185 186 ALIGN (4) 187L(write_30bytes): 188 movl %eax, -30(%edx) 189L(write_26bytes): 190 movl %eax, -26(%edx) 191L(write_22bytes): 192 movl %eax, -22(%edx) 193L(write_18bytes): 194 movl %eax, -18(%edx) 195L(write_14bytes): 196 movl %eax, -14(%edx) 197L(write_10bytes): 198 movl %eax, -10(%edx) 199L(write_6bytes): 200 movl %eax, -6(%edx) 201L(write_2bytes): 202 movw %ax, -2(%edx) 203 SETRTNVAL 204 RETURN 205 206 ALIGN (4) 207L(write_31bytes): 208 movl %eax, -31(%edx) 209L(write_27bytes): 210 movl %eax, -27(%edx) 211L(write_23bytes): 212 movl %eax, -23(%edx) 213L(write_19bytes): 214 movl %eax, -19(%edx) 215L(write_15bytes): 216 movl %eax, -15(%edx) 217L(write_11bytes): 218 movl %eax, -11(%edx) 219L(write_7bytes): 220 movl %eax, -7(%edx) 221L(write_3bytes): 222 movw %ax, -3(%edx) 223 movb %al, -1(%edx) 224 SETRTNVAL 225 RETURN 226 227 ALIGN (4) 228/* ECX > 32 and EDX is 4 byte aligned. */ 229L(32bytesormore): 230 /* Fill xmm0 with the pattern. */ 231#ifdef USE_AS_BZERO 232 pxor %xmm0, %xmm0 233#else 234 movd %eax, %xmm0 235 pshufd $0, %xmm0, %xmm0 236#endif 237 testl $0xf, %edx 238 jz L(aligned_16) 239/* ECX > 32 and EDX is not 16 byte aligned. */ 240L(not_aligned_16): 241 movdqu %xmm0, (%edx) 242 movl %edx, %eax 243 and $-16, %edx 244 add $16, %edx 245 sub %edx, %eax 246 add %eax, %ecx 247 movd %xmm0, %eax 248 249 ALIGN (4) 250L(aligned_16): 251 cmp $128, %ecx 252 jae L(128bytesormore) 253 254L(aligned_16_less128bytes): 255 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 256 257 ALIGN (4) 258L(128bytesormore): 259#ifdef SHARED_CACHE_SIZE 260 PUSH (%ebx) 261 mov $SHARED_CACHE_SIZE, %ebx 262#else 263# ifdef PIC 264 SETUP_PIC_REG(bx) 265 add $_GLOBAL_OFFSET_TABLE_, %ebx 266 mov __x86_shared_cache_size@GOTOFF(%ebx), %ebx 267# else 268 PUSH (%ebx) 269 mov __x86_shared_cache_size, %ebx 270# endif 271#endif 272 cmp %ebx, %ecx 273 jae L(128bytesormore_nt_start) 274 275 276#ifdef DATA_CACHE_SIZE 277 POP (%ebx) 278# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 279 cmp $DATA_CACHE_SIZE, %ecx 280#else 281# ifdef PIC 282# define RESTORE_EBX_STATE 283 SETUP_PIC_REG(bx) 284 add $_GLOBAL_OFFSET_TABLE_, %ebx 285 cmp __x86_data_cache_size@GOTOFF(%ebx), %ecx 286# else 287 POP (%ebx) 288# define RESTORE_EBX_STATE CFI_PUSH (%ebx) 289 cmp __x86_data_cache_size, %ecx 290# endif 291#endif 292 293 jae L(128bytes_L2_normal) 294 subl $128, %ecx 295L(128bytesormore_normal): 296 sub $128, %ecx 297 movdqa %xmm0, (%edx) 298 movdqa %xmm0, 0x10(%edx) 299 movdqa %xmm0, 0x20(%edx) 300 movdqa %xmm0, 0x30(%edx) 301 movdqa %xmm0, 0x40(%edx) 302 movdqa %xmm0, 0x50(%edx) 303 movdqa %xmm0, 0x60(%edx) 304 movdqa %xmm0, 0x70(%edx) 305 lea 128(%edx), %edx 306 jb L(128bytesless_normal) 307 308 309 sub $128, %ecx 310 movdqa %xmm0, (%edx) 311 movdqa %xmm0, 0x10(%edx) 312 movdqa %xmm0, 0x20(%edx) 313 movdqa %xmm0, 0x30(%edx) 314 movdqa %xmm0, 0x40(%edx) 315 movdqa %xmm0, 0x50(%edx) 316 movdqa %xmm0, 0x60(%edx) 317 movdqa %xmm0, 0x70(%edx) 318 lea 128(%edx), %edx 319 jae L(128bytesormore_normal) 320 321L(128bytesless_normal): 322 add $128, %ecx 323 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 324 325 ALIGN (4) 326L(128bytes_L2_normal): 327 prefetcht0 0x380(%edx) 328 prefetcht0 0x3c0(%edx) 329 sub $128, %ecx 330 movdqa %xmm0, (%edx) 331 movaps %xmm0, 0x10(%edx) 332 movaps %xmm0, 0x20(%edx) 333 movaps %xmm0, 0x30(%edx) 334 movaps %xmm0, 0x40(%edx) 335 movaps %xmm0, 0x50(%edx) 336 movaps %xmm0, 0x60(%edx) 337 movaps %xmm0, 0x70(%edx) 338 add $128, %edx 339 cmp $128, %ecx 340 jae L(128bytes_L2_normal) 341 342L(128bytesless_L2_normal): 343 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 344 345 RESTORE_EBX_STATE 346L(128bytesormore_nt_start): 347 sub %ebx, %ecx 348 ALIGN (4) 349L(128bytesormore_shared_cache_loop): 350 prefetcht0 0x3c0(%edx) 351 prefetcht0 0x380(%edx) 352 sub $0x80, %ebx 353 movdqa %xmm0, (%edx) 354 movdqa %xmm0, 0x10(%edx) 355 movdqa %xmm0, 0x20(%edx) 356 movdqa %xmm0, 0x30(%edx) 357 movdqa %xmm0, 0x40(%edx) 358 movdqa %xmm0, 0x50(%edx) 359 movdqa %xmm0, 0x60(%edx) 360 movdqa %xmm0, 0x70(%edx) 361 add $0x80, %edx 362 cmp $0x80, %ebx 363 jae L(128bytesormore_shared_cache_loop) 364 cmp $0x80, %ecx 365 jb L(shared_cache_loop_end) 366 ALIGN (4) 367L(128bytesormore_nt): 368 sub $0x80, %ecx 369 movntdq %xmm0, (%edx) 370 movntdq %xmm0, 0x10(%edx) 371 movntdq %xmm0, 0x20(%edx) 372 movntdq %xmm0, 0x30(%edx) 373 movntdq %xmm0, 0x40(%edx) 374 movntdq %xmm0, 0x50(%edx) 375 movntdq %xmm0, 0x60(%edx) 376 movntdq %xmm0, 0x70(%edx) 377 add $0x80, %edx 378 cmp $0x80, %ecx 379 jae L(128bytesormore_nt) 380 sfence 381L(shared_cache_loop_end): 382#if defined DATA_CACHE_SIZE || !defined PIC 383 POP (%ebx) 384#endif 385 BRANCH_TO_JMPTBL_ENTRY (L(table_16_128bytes)) 386 387 388 .pushsection .rodata.sse2,"a",@progbits 389 ALIGN (2) 390L(table_16_128bytes): 391 .int JMPTBL (L(aligned_16_0bytes), L(table_16_128bytes)) 392 .int JMPTBL (L(aligned_16_1bytes), L(table_16_128bytes)) 393 .int JMPTBL (L(aligned_16_2bytes), L(table_16_128bytes)) 394 .int JMPTBL (L(aligned_16_3bytes), L(table_16_128bytes)) 395 .int JMPTBL (L(aligned_16_4bytes), L(table_16_128bytes)) 396 .int JMPTBL (L(aligned_16_5bytes), L(table_16_128bytes)) 397 .int JMPTBL (L(aligned_16_6bytes), L(table_16_128bytes)) 398 .int JMPTBL (L(aligned_16_7bytes), L(table_16_128bytes)) 399 .int JMPTBL (L(aligned_16_8bytes), L(table_16_128bytes)) 400 .int JMPTBL (L(aligned_16_9bytes), L(table_16_128bytes)) 401 .int JMPTBL (L(aligned_16_10bytes), L(table_16_128bytes)) 402 .int JMPTBL (L(aligned_16_11bytes), L(table_16_128bytes)) 403 .int JMPTBL (L(aligned_16_12bytes), L(table_16_128bytes)) 404 .int JMPTBL (L(aligned_16_13bytes), L(table_16_128bytes)) 405 .int JMPTBL (L(aligned_16_14bytes), L(table_16_128bytes)) 406 .int JMPTBL (L(aligned_16_15bytes), L(table_16_128bytes)) 407 .int JMPTBL (L(aligned_16_16bytes), L(table_16_128bytes)) 408 .int JMPTBL (L(aligned_16_17bytes), L(table_16_128bytes)) 409 .int JMPTBL (L(aligned_16_18bytes), L(table_16_128bytes)) 410 .int JMPTBL (L(aligned_16_19bytes), L(table_16_128bytes)) 411 .int JMPTBL (L(aligned_16_20bytes), L(table_16_128bytes)) 412 .int JMPTBL (L(aligned_16_21bytes), L(table_16_128bytes)) 413 .int JMPTBL (L(aligned_16_22bytes), L(table_16_128bytes)) 414 .int JMPTBL (L(aligned_16_23bytes), L(table_16_128bytes)) 415 .int JMPTBL (L(aligned_16_24bytes), L(table_16_128bytes)) 416 .int JMPTBL (L(aligned_16_25bytes), L(table_16_128bytes)) 417 .int JMPTBL (L(aligned_16_26bytes), L(table_16_128bytes)) 418 .int JMPTBL (L(aligned_16_27bytes), L(table_16_128bytes)) 419 .int JMPTBL (L(aligned_16_28bytes), L(table_16_128bytes)) 420 .int JMPTBL (L(aligned_16_29bytes), L(table_16_128bytes)) 421 .int JMPTBL (L(aligned_16_30bytes), L(table_16_128bytes)) 422 .int JMPTBL (L(aligned_16_31bytes), L(table_16_128bytes)) 423 .int JMPTBL (L(aligned_16_32bytes), L(table_16_128bytes)) 424 .int JMPTBL (L(aligned_16_33bytes), L(table_16_128bytes)) 425 .int JMPTBL (L(aligned_16_34bytes), L(table_16_128bytes)) 426 .int JMPTBL (L(aligned_16_35bytes), L(table_16_128bytes)) 427 .int JMPTBL (L(aligned_16_36bytes), L(table_16_128bytes)) 428 .int JMPTBL (L(aligned_16_37bytes), L(table_16_128bytes)) 429 .int JMPTBL (L(aligned_16_38bytes), L(table_16_128bytes)) 430 .int JMPTBL (L(aligned_16_39bytes), L(table_16_128bytes)) 431 .int JMPTBL (L(aligned_16_40bytes), L(table_16_128bytes)) 432 .int JMPTBL (L(aligned_16_41bytes), L(table_16_128bytes)) 433 .int JMPTBL (L(aligned_16_42bytes), L(table_16_128bytes)) 434 .int JMPTBL (L(aligned_16_43bytes), L(table_16_128bytes)) 435 .int JMPTBL (L(aligned_16_44bytes), L(table_16_128bytes)) 436 .int JMPTBL (L(aligned_16_45bytes), L(table_16_128bytes)) 437 .int JMPTBL (L(aligned_16_46bytes), L(table_16_128bytes)) 438 .int JMPTBL (L(aligned_16_47bytes), L(table_16_128bytes)) 439 .int JMPTBL (L(aligned_16_48bytes), L(table_16_128bytes)) 440 .int JMPTBL (L(aligned_16_49bytes), L(table_16_128bytes)) 441 .int JMPTBL (L(aligned_16_50bytes), L(table_16_128bytes)) 442 .int JMPTBL (L(aligned_16_51bytes), L(table_16_128bytes)) 443 .int JMPTBL (L(aligned_16_52bytes), L(table_16_128bytes)) 444 .int JMPTBL (L(aligned_16_53bytes), L(table_16_128bytes)) 445 .int JMPTBL (L(aligned_16_54bytes), L(table_16_128bytes)) 446 .int JMPTBL (L(aligned_16_55bytes), L(table_16_128bytes)) 447 .int JMPTBL (L(aligned_16_56bytes), L(table_16_128bytes)) 448 .int JMPTBL (L(aligned_16_57bytes), L(table_16_128bytes)) 449 .int JMPTBL (L(aligned_16_58bytes), L(table_16_128bytes)) 450 .int JMPTBL (L(aligned_16_59bytes), L(table_16_128bytes)) 451 .int JMPTBL (L(aligned_16_60bytes), L(table_16_128bytes)) 452 .int JMPTBL (L(aligned_16_61bytes), L(table_16_128bytes)) 453 .int JMPTBL (L(aligned_16_62bytes), L(table_16_128bytes)) 454 .int JMPTBL (L(aligned_16_63bytes), L(table_16_128bytes)) 455 .int JMPTBL (L(aligned_16_64bytes), L(table_16_128bytes)) 456 .int JMPTBL (L(aligned_16_65bytes), L(table_16_128bytes)) 457 .int JMPTBL (L(aligned_16_66bytes), L(table_16_128bytes)) 458 .int JMPTBL (L(aligned_16_67bytes), L(table_16_128bytes)) 459 .int JMPTBL (L(aligned_16_68bytes), L(table_16_128bytes)) 460 .int JMPTBL (L(aligned_16_69bytes), L(table_16_128bytes)) 461 .int JMPTBL (L(aligned_16_70bytes), L(table_16_128bytes)) 462 .int JMPTBL (L(aligned_16_71bytes), L(table_16_128bytes)) 463 .int JMPTBL (L(aligned_16_72bytes), L(table_16_128bytes)) 464 .int JMPTBL (L(aligned_16_73bytes), L(table_16_128bytes)) 465 .int JMPTBL (L(aligned_16_74bytes), L(table_16_128bytes)) 466 .int JMPTBL (L(aligned_16_75bytes), L(table_16_128bytes)) 467 .int JMPTBL (L(aligned_16_76bytes), L(table_16_128bytes)) 468 .int JMPTBL (L(aligned_16_77bytes), L(table_16_128bytes)) 469 .int JMPTBL (L(aligned_16_78bytes), L(table_16_128bytes)) 470 .int JMPTBL (L(aligned_16_79bytes), L(table_16_128bytes)) 471 .int JMPTBL (L(aligned_16_80bytes), L(table_16_128bytes)) 472 .int JMPTBL (L(aligned_16_81bytes), L(table_16_128bytes)) 473 .int JMPTBL (L(aligned_16_82bytes), L(table_16_128bytes)) 474 .int JMPTBL (L(aligned_16_83bytes), L(table_16_128bytes)) 475 .int JMPTBL (L(aligned_16_84bytes), L(table_16_128bytes)) 476 .int JMPTBL (L(aligned_16_85bytes), L(table_16_128bytes)) 477 .int JMPTBL (L(aligned_16_86bytes), L(table_16_128bytes)) 478 .int JMPTBL (L(aligned_16_87bytes), L(table_16_128bytes)) 479 .int JMPTBL (L(aligned_16_88bytes), L(table_16_128bytes)) 480 .int JMPTBL (L(aligned_16_89bytes), L(table_16_128bytes)) 481 .int JMPTBL (L(aligned_16_90bytes), L(table_16_128bytes)) 482 .int JMPTBL (L(aligned_16_91bytes), L(table_16_128bytes)) 483 .int JMPTBL (L(aligned_16_92bytes), L(table_16_128bytes)) 484 .int JMPTBL (L(aligned_16_93bytes), L(table_16_128bytes)) 485 .int JMPTBL (L(aligned_16_94bytes), L(table_16_128bytes)) 486 .int JMPTBL (L(aligned_16_95bytes), L(table_16_128bytes)) 487 .int JMPTBL (L(aligned_16_96bytes), L(table_16_128bytes)) 488 .int JMPTBL (L(aligned_16_97bytes), L(table_16_128bytes)) 489 .int JMPTBL (L(aligned_16_98bytes), L(table_16_128bytes)) 490 .int JMPTBL (L(aligned_16_99bytes), L(table_16_128bytes)) 491 .int JMPTBL (L(aligned_16_100bytes), L(table_16_128bytes)) 492 .int JMPTBL (L(aligned_16_101bytes), L(table_16_128bytes)) 493 .int JMPTBL (L(aligned_16_102bytes), L(table_16_128bytes)) 494 .int JMPTBL (L(aligned_16_103bytes), L(table_16_128bytes)) 495 .int JMPTBL (L(aligned_16_104bytes), L(table_16_128bytes)) 496 .int JMPTBL (L(aligned_16_105bytes), L(table_16_128bytes)) 497 .int JMPTBL (L(aligned_16_106bytes), L(table_16_128bytes)) 498 .int JMPTBL (L(aligned_16_107bytes), L(table_16_128bytes)) 499 .int JMPTBL (L(aligned_16_108bytes), L(table_16_128bytes)) 500 .int JMPTBL (L(aligned_16_109bytes), L(table_16_128bytes)) 501 .int JMPTBL (L(aligned_16_110bytes), L(table_16_128bytes)) 502 .int JMPTBL (L(aligned_16_111bytes), L(table_16_128bytes)) 503 .int JMPTBL (L(aligned_16_112bytes), L(table_16_128bytes)) 504 .int JMPTBL (L(aligned_16_113bytes), L(table_16_128bytes)) 505 .int JMPTBL (L(aligned_16_114bytes), L(table_16_128bytes)) 506 .int JMPTBL (L(aligned_16_115bytes), L(table_16_128bytes)) 507 .int JMPTBL (L(aligned_16_116bytes), L(table_16_128bytes)) 508 .int JMPTBL (L(aligned_16_117bytes), L(table_16_128bytes)) 509 .int JMPTBL (L(aligned_16_118bytes), L(table_16_128bytes)) 510 .int JMPTBL (L(aligned_16_119bytes), L(table_16_128bytes)) 511 .int JMPTBL (L(aligned_16_120bytes), L(table_16_128bytes)) 512 .int JMPTBL (L(aligned_16_121bytes), L(table_16_128bytes)) 513 .int JMPTBL (L(aligned_16_122bytes), L(table_16_128bytes)) 514 .int JMPTBL (L(aligned_16_123bytes), L(table_16_128bytes)) 515 .int JMPTBL (L(aligned_16_124bytes), L(table_16_128bytes)) 516 .int JMPTBL (L(aligned_16_125bytes), L(table_16_128bytes)) 517 .int JMPTBL (L(aligned_16_126bytes), L(table_16_128bytes)) 518 .int JMPTBL (L(aligned_16_127bytes), L(table_16_128bytes)) 519 .popsection 520 521 ALIGN (4) 522L(aligned_16_112bytes): 523 movdqa %xmm0, -112(%edx) 524L(aligned_16_96bytes): 525 movdqa %xmm0, -96(%edx) 526L(aligned_16_80bytes): 527 movdqa %xmm0, -80(%edx) 528L(aligned_16_64bytes): 529 movdqa %xmm0, -64(%edx) 530L(aligned_16_48bytes): 531 movdqa %xmm0, -48(%edx) 532L(aligned_16_32bytes): 533 movdqa %xmm0, -32(%edx) 534L(aligned_16_16bytes): 535 movdqa %xmm0, -16(%edx) 536L(aligned_16_0bytes): 537 SETRTNVAL 538 RETURN 539 540 ALIGN (4) 541L(aligned_16_113bytes): 542 movdqa %xmm0, -113(%edx) 543L(aligned_16_97bytes): 544 movdqa %xmm0, -97(%edx) 545L(aligned_16_81bytes): 546 movdqa %xmm0, -81(%edx) 547L(aligned_16_65bytes): 548 movdqa %xmm0, -65(%edx) 549L(aligned_16_49bytes): 550 movdqa %xmm0, -49(%edx) 551L(aligned_16_33bytes): 552 movdqa %xmm0, -33(%edx) 553L(aligned_16_17bytes): 554 movdqa %xmm0, -17(%edx) 555L(aligned_16_1bytes): 556 movb %al, -1(%edx) 557 SETRTNVAL 558 RETURN 559 560 ALIGN (4) 561L(aligned_16_114bytes): 562 movdqa %xmm0, -114(%edx) 563L(aligned_16_98bytes): 564 movdqa %xmm0, -98(%edx) 565L(aligned_16_82bytes): 566 movdqa %xmm0, -82(%edx) 567L(aligned_16_66bytes): 568 movdqa %xmm0, -66(%edx) 569L(aligned_16_50bytes): 570 movdqa %xmm0, -50(%edx) 571L(aligned_16_34bytes): 572 movdqa %xmm0, -34(%edx) 573L(aligned_16_18bytes): 574 movdqa %xmm0, -18(%edx) 575L(aligned_16_2bytes): 576 movw %ax, -2(%edx) 577 SETRTNVAL 578 RETURN 579 580 ALIGN (4) 581L(aligned_16_115bytes): 582 movdqa %xmm0, -115(%edx) 583L(aligned_16_99bytes): 584 movdqa %xmm0, -99(%edx) 585L(aligned_16_83bytes): 586 movdqa %xmm0, -83(%edx) 587L(aligned_16_67bytes): 588 movdqa %xmm0, -67(%edx) 589L(aligned_16_51bytes): 590 movdqa %xmm0, -51(%edx) 591L(aligned_16_35bytes): 592 movdqa %xmm0, -35(%edx) 593L(aligned_16_19bytes): 594 movdqa %xmm0, -19(%edx) 595L(aligned_16_3bytes): 596 movw %ax, -3(%edx) 597 movb %al, -1(%edx) 598 SETRTNVAL 599 RETURN 600 601 ALIGN (4) 602L(aligned_16_116bytes): 603 movdqa %xmm0, -116(%edx) 604L(aligned_16_100bytes): 605 movdqa %xmm0, -100(%edx) 606L(aligned_16_84bytes): 607 movdqa %xmm0, -84(%edx) 608L(aligned_16_68bytes): 609 movdqa %xmm0, -68(%edx) 610L(aligned_16_52bytes): 611 movdqa %xmm0, -52(%edx) 612L(aligned_16_36bytes): 613 movdqa %xmm0, -36(%edx) 614L(aligned_16_20bytes): 615 movdqa %xmm0, -20(%edx) 616L(aligned_16_4bytes): 617 movl %eax, -4(%edx) 618 SETRTNVAL 619 RETURN 620 621 ALIGN (4) 622L(aligned_16_117bytes): 623 movdqa %xmm0, -117(%edx) 624L(aligned_16_101bytes): 625 movdqa %xmm0, -101(%edx) 626L(aligned_16_85bytes): 627 movdqa %xmm0, -85(%edx) 628L(aligned_16_69bytes): 629 movdqa %xmm0, -69(%edx) 630L(aligned_16_53bytes): 631 movdqa %xmm0, -53(%edx) 632L(aligned_16_37bytes): 633 movdqa %xmm0, -37(%edx) 634L(aligned_16_21bytes): 635 movdqa %xmm0, -21(%edx) 636L(aligned_16_5bytes): 637 movl %eax, -5(%edx) 638 movb %al, -1(%edx) 639 SETRTNVAL 640 RETURN 641 642 ALIGN (4) 643L(aligned_16_118bytes): 644 movdqa %xmm0, -118(%edx) 645L(aligned_16_102bytes): 646 movdqa %xmm0, -102(%edx) 647L(aligned_16_86bytes): 648 movdqa %xmm0, -86(%edx) 649L(aligned_16_70bytes): 650 movdqa %xmm0, -70(%edx) 651L(aligned_16_54bytes): 652 movdqa %xmm0, -54(%edx) 653L(aligned_16_38bytes): 654 movdqa %xmm0, -38(%edx) 655L(aligned_16_22bytes): 656 movdqa %xmm0, -22(%edx) 657L(aligned_16_6bytes): 658 movl %eax, -6(%edx) 659 movw %ax, -2(%edx) 660 SETRTNVAL 661 RETURN 662 663 ALIGN (4) 664L(aligned_16_119bytes): 665 movdqa %xmm0, -119(%edx) 666L(aligned_16_103bytes): 667 movdqa %xmm0, -103(%edx) 668L(aligned_16_87bytes): 669 movdqa %xmm0, -87(%edx) 670L(aligned_16_71bytes): 671 movdqa %xmm0, -71(%edx) 672L(aligned_16_55bytes): 673 movdqa %xmm0, -55(%edx) 674L(aligned_16_39bytes): 675 movdqa %xmm0, -39(%edx) 676L(aligned_16_23bytes): 677 movdqa %xmm0, -23(%edx) 678L(aligned_16_7bytes): 679 movl %eax, -7(%edx) 680 movw %ax, -3(%edx) 681 movb %al, -1(%edx) 682 SETRTNVAL 683 RETURN 684 685 ALIGN (4) 686L(aligned_16_120bytes): 687 movdqa %xmm0, -120(%edx) 688L(aligned_16_104bytes): 689 movdqa %xmm0, -104(%edx) 690L(aligned_16_88bytes): 691 movdqa %xmm0, -88(%edx) 692L(aligned_16_72bytes): 693 movdqa %xmm0, -72(%edx) 694L(aligned_16_56bytes): 695 movdqa %xmm0, -56(%edx) 696L(aligned_16_40bytes): 697 movdqa %xmm0, -40(%edx) 698L(aligned_16_24bytes): 699 movdqa %xmm0, -24(%edx) 700L(aligned_16_8bytes): 701 movq %xmm0, -8(%edx) 702 SETRTNVAL 703 RETURN 704 705 ALIGN (4) 706L(aligned_16_121bytes): 707 movdqa %xmm0, -121(%edx) 708L(aligned_16_105bytes): 709 movdqa %xmm0, -105(%edx) 710L(aligned_16_89bytes): 711 movdqa %xmm0, -89(%edx) 712L(aligned_16_73bytes): 713 movdqa %xmm0, -73(%edx) 714L(aligned_16_57bytes): 715 movdqa %xmm0, -57(%edx) 716L(aligned_16_41bytes): 717 movdqa %xmm0, -41(%edx) 718L(aligned_16_25bytes): 719 movdqa %xmm0, -25(%edx) 720L(aligned_16_9bytes): 721 movq %xmm0, -9(%edx) 722 movb %al, -1(%edx) 723 SETRTNVAL 724 RETURN 725 726 ALIGN (4) 727L(aligned_16_122bytes): 728 movdqa %xmm0, -122(%edx) 729L(aligned_16_106bytes): 730 movdqa %xmm0, -106(%edx) 731L(aligned_16_90bytes): 732 movdqa %xmm0, -90(%edx) 733L(aligned_16_74bytes): 734 movdqa %xmm0, -74(%edx) 735L(aligned_16_58bytes): 736 movdqa %xmm0, -58(%edx) 737L(aligned_16_42bytes): 738 movdqa %xmm0, -42(%edx) 739L(aligned_16_26bytes): 740 movdqa %xmm0, -26(%edx) 741L(aligned_16_10bytes): 742 movq %xmm0, -10(%edx) 743 movw %ax, -2(%edx) 744 SETRTNVAL 745 RETURN 746 747 ALIGN (4) 748L(aligned_16_123bytes): 749 movdqa %xmm0, -123(%edx) 750L(aligned_16_107bytes): 751 movdqa %xmm0, -107(%edx) 752L(aligned_16_91bytes): 753 movdqa %xmm0, -91(%edx) 754L(aligned_16_75bytes): 755 movdqa %xmm0, -75(%edx) 756L(aligned_16_59bytes): 757 movdqa %xmm0, -59(%edx) 758L(aligned_16_43bytes): 759 movdqa %xmm0, -43(%edx) 760L(aligned_16_27bytes): 761 movdqa %xmm0, -27(%edx) 762L(aligned_16_11bytes): 763 movq %xmm0, -11(%edx) 764 movw %ax, -3(%edx) 765 movb %al, -1(%edx) 766 SETRTNVAL 767 RETURN 768 769 ALIGN (4) 770L(aligned_16_124bytes): 771 movdqa %xmm0, -124(%edx) 772L(aligned_16_108bytes): 773 movdqa %xmm0, -108(%edx) 774L(aligned_16_92bytes): 775 movdqa %xmm0, -92(%edx) 776L(aligned_16_76bytes): 777 movdqa %xmm0, -76(%edx) 778L(aligned_16_60bytes): 779 movdqa %xmm0, -60(%edx) 780L(aligned_16_44bytes): 781 movdqa %xmm0, -44(%edx) 782L(aligned_16_28bytes): 783 movdqa %xmm0, -28(%edx) 784L(aligned_16_12bytes): 785 movq %xmm0, -12(%edx) 786 movl %eax, -4(%edx) 787 SETRTNVAL 788 RETURN 789 790 ALIGN (4) 791L(aligned_16_125bytes): 792 movdqa %xmm0, -125(%edx) 793L(aligned_16_109bytes): 794 movdqa %xmm0, -109(%edx) 795L(aligned_16_93bytes): 796 movdqa %xmm0, -93(%edx) 797L(aligned_16_77bytes): 798 movdqa %xmm0, -77(%edx) 799L(aligned_16_61bytes): 800 movdqa %xmm0, -61(%edx) 801L(aligned_16_45bytes): 802 movdqa %xmm0, -45(%edx) 803L(aligned_16_29bytes): 804 movdqa %xmm0, -29(%edx) 805L(aligned_16_13bytes): 806 movq %xmm0, -13(%edx) 807 movl %eax, -5(%edx) 808 movb %al, -1(%edx) 809 SETRTNVAL 810 RETURN 811 812 ALIGN (4) 813L(aligned_16_126bytes): 814 movdqa %xmm0, -126(%edx) 815L(aligned_16_110bytes): 816 movdqa %xmm0, -110(%edx) 817L(aligned_16_94bytes): 818 movdqa %xmm0, -94(%edx) 819L(aligned_16_78bytes): 820 movdqa %xmm0, -78(%edx) 821L(aligned_16_62bytes): 822 movdqa %xmm0, -62(%edx) 823L(aligned_16_46bytes): 824 movdqa %xmm0, -46(%edx) 825L(aligned_16_30bytes): 826 movdqa %xmm0, -30(%edx) 827L(aligned_16_14bytes): 828 movq %xmm0, -14(%edx) 829 movl %eax, -6(%edx) 830 movw %ax, -2(%edx) 831 SETRTNVAL 832 RETURN 833 834 ALIGN (4) 835L(aligned_16_127bytes): 836 movdqa %xmm0, -127(%edx) 837L(aligned_16_111bytes): 838 movdqa %xmm0, -111(%edx) 839L(aligned_16_95bytes): 840 movdqa %xmm0, -95(%edx) 841L(aligned_16_79bytes): 842 movdqa %xmm0, -79(%edx) 843L(aligned_16_63bytes): 844 movdqa %xmm0, -63(%edx) 845L(aligned_16_47bytes): 846 movdqa %xmm0, -47(%edx) 847L(aligned_16_31bytes): 848 movdqa %xmm0, -31(%edx) 849L(aligned_16_15bytes): 850 movq %xmm0, -15(%edx) 851 movl %eax, -7(%edx) 852 movw %ax, -3(%edx) 853 movb %al, -1(%edx) 854 SETRTNVAL 855 RETURN_END 856 857END (__memset_sse2) 858 859#endif 860