1/* memset/bzero -- set memory area to CH/0
2   Highly optimized version for ix86, x>=5.
3   Copyright (C) 1996-2021 Free Software Foundation, Inc.
4   This file is part of the GNU C Library.
5
6   The GNU C Library is free software; you can redistribute it and/or
7   modify it under the terms of the GNU Lesser General Public
8   License as published by the Free Software Foundation; either
9   version 2.1 of the License, or (at your option) any later version.
10
11   The GNU C Library is distributed in the hope that it will be useful,
12   but WITHOUT ANY WARRANTY; without even the implied warranty of
13   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14   Lesser General Public License for more details.
15
16   You should have received a copy of the GNU Lesser General Public
17   License along with the GNU C Library; if not, see
18   <https://www.gnu.org/licenses/>.  */
19
20#include <sysdep.h>
21#include "asm-syntax.h"
22
23#define PARMS	4+4	/* space for 1 saved reg */
24#define RTN	PARMS
25#define DEST	RTN
26#ifdef USE_AS_BZERO
27# define LEN	DEST+4
28#else
29# define CHR	DEST+4
30# define LEN	CHR+4
31#endif
32
33        .text
34#if defined SHARED && IS_IN (libc) && !defined USE_AS_BZERO
35ENTRY (__memset_chk)
36	movl	12(%esp), %eax
37	cmpl	%eax, 16(%esp)
38	jb	HIDDEN_JUMPTARGET (__chk_fail)
39END (__memset_chk)
40#endif
41ENTRY (memset)
42
43	pushl	%edi
44	cfi_adjust_cfa_offset (4)
45
46	movl	DEST(%esp), %edi
47	cfi_rel_offset (edi, 0)
48	movl	LEN(%esp), %edx
49#ifdef USE_AS_BZERO
50	xorl	%eax, %eax	/* we fill with 0 */
51#else
52	movb	CHR(%esp), %al
53	movb	%al, %ah
54	movl	%eax, %ecx
55	shll	$16, %eax
56	movw	%cx, %ax
57#endif
58	cld
59
60/* If less than 36 bytes to write, skip tricky code (it wouldn't work).  */
61	cmpl	$36, %edx
62	movl	%edx, %ecx	/* needed when branch is taken! */
63	jl	L(2)
64
65/* First write 0-3 bytes to make the pointer 32-bit aligned.  */
66	movl	%edi, %ecx	/* Copy ptr to ecx... */
67	negl	%ecx		/* ...and negate that and... */
68	andl	$3, %ecx	/* ...mask to get byte count.  */
69	subl	%ecx, %edx	/* adjust global byte count */
70	rep
71	stosb
72
73	subl	$32, %edx	/* offset count for unrolled loop */
74	movl	(%edi), %ecx	/* Fetch destination cache line */
75
76	.align	2, 0x90		/* supply 0x90 for broken assemblers */
77L(1):	movl	28(%edi), %ecx	/* allocate cache line for destination */
78	subl	$32, %edx	/* decr loop count */
79	movl	%eax, 0(%edi)	/* store words pairwise */
80	movl	%eax, 4(%edi)
81	movl	%eax, 8(%edi)
82	movl	%eax, 12(%edi)
83	movl	%eax, 16(%edi)
84	movl	%eax, 20(%edi)
85	movl	%eax, 24(%edi)
86	movl	%eax, 28(%edi)
87	leal	32(%edi), %edi	/* update destination pointer */
88	jge	L(1)
89
90	leal	32(%edx), %ecx	/* reset offset count */
91
92/* Write last 0-7 full 32-bit words (up to 8 words if loop was skipped).  */
93L(2):	shrl	$2, %ecx	/* convert byte count to longword count */
94	rep
95	stosl
96
97/* Finally write the last 0-3 bytes.  */
98	movl	%edx, %ecx
99	andl	$3, %ecx
100	rep
101	stosb
102
103#ifndef USE_AS_BZERO
104	/* Load result (only if used as memset).  */
105	movl DEST(%esp), %eax	/* start address of destination is result */
106#endif
107	popl	%edi
108	cfi_adjust_cfa_offset (-4)
109	cfi_restore (edi)
110
111	ret
112END (memset)
113libc_hidden_builtin_def (memset)
114