1/* strcpy/stpcpy implementation for x86-64.
2   Copyright (C) 2002-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#include <sysdep.h>
20#include "asm-syntax.h"
21
22#ifndef USE_AS_STPCPY
23# define STRCPY strcpy
24#endif
25
26	.text
27ENTRY (STRCPY)
28	movq %rsi, %rcx		/* Source register. */
29	andl $7, %ecx		/* mask alignment bits */
30	movq %rdi, %rdx		/* Duplicate destination pointer.  */
31
32	jz 5f			/* aligned => start loop */
33
34	neg %ecx		/* We need to align to 8 bytes.  */
35	addl $8,%ecx
36	/* Search the first bytes directly.  */
370:
38	movb	(%rsi), %al	/* Fetch a byte */
39	testb	%al, %al	/* Is it NUL? */
40	movb	%al, (%rdx)	/* Store it */
41	jz	4f		/* If it was NUL, done! */
42	incq	%rsi
43	incq	%rdx
44	decl	%ecx
45	jnz	0b
46
475:
48	movq $0xfefefefefefefeff,%r8
49
50	/* Now the sources is aligned.  Unfortunatly we cannot force
51	   to have both source and destination aligned, so ignore the
52	   alignment of the destination.  */
53	.p2align 4
541:
55	/* 1st unroll.  */
56	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
57	addq	$8, %rsi	/* Adjust pointer for next word.  */
58	movq	%rax, %r9	/* Save a copy for NUL finding.  */
59	addq	%r8, %r9	/* add the magic value to the word.  We get
60				   carry bits reported for each byte which
61				   is *not* 0 */
62	jnc	3f		/* highest byte is NUL => return pointer */
63	xorq	%rax, %r9	/* (word+magic)^word */
64	orq	%r8, %r9	/* set all non-carry bits */
65	incq	%r9		/* add 1: if one carry bit was *not* set
66				   the addition will not result in 0.  */
67
68	jnz	3f		/* found NUL => return pointer */
69
70	movq	%rax, (%rdx)	/* Write value to destination.  */
71	addq	$8, %rdx	/* Adjust pointer.  */
72
73	/* 2nd unroll.  */
74	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
75	addq	$8, %rsi	/* Adjust pointer for next word.  */
76	movq	%rax, %r9	/* Save a copy for NUL finding.  */
77	addq	%r8, %r9	/* add the magic value to the word.  We get
78				   carry bits reported for each byte which
79				   is *not* 0 */
80	jnc	3f		/* highest byte is NUL => return pointer */
81	xorq	%rax, %r9	/* (word+magic)^word */
82	orq	%r8, %r9	/* set all non-carry bits */
83	incq	%r9		/* add 1: if one carry bit was *not* set
84				   the addition will not result in 0.  */
85
86	jnz	3f		/* found NUL => return pointer */
87
88	movq	%rax, (%rdx)	/* Write value to destination.  */
89	addq	$8, %rdx	/* Adjust pointer.  */
90
91	/* 3rd unroll.  */
92	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
93	addq	$8, %rsi	/* Adjust pointer for next word.  */
94	movq	%rax, %r9	/* Save a copy for NUL finding.  */
95	addq	%r8, %r9	/* add the magic value to the word.  We get
96				   carry bits reported for each byte which
97				   is *not* 0 */
98	jnc	3f		/* highest byte is NUL => return pointer */
99	xorq	%rax, %r9	/* (word+magic)^word */
100	orq	%r8, %r9	/* set all non-carry bits */
101	incq	%r9		/* add 1: if one carry bit was *not* set
102				   the addition will not result in 0.  */
103
104	jnz	3f		/* found NUL => return pointer */
105
106	movq	%rax, (%rdx)	/* Write value to destination.  */
107	addq	$8, %rdx	/* Adjust pointer.  */
108
109	/* 4th unroll.  */
110	movq	(%rsi), %rax	/* Read double word (8 bytes).  */
111	addq	$8, %rsi	/* Adjust pointer for next word.  */
112	movq	%rax, %r9	/* Save a copy for NUL finding.  */
113	addq	%r8, %r9	/* add the magic value to the word.  We get
114				   carry bits reported for each byte which
115				   is *not* 0 */
116	jnc	3f		/* highest byte is NUL => return pointer */
117	xorq	%rax, %r9	/* (word+magic)^word */
118	orq	%r8, %r9	/* set all non-carry bits */
119	incq	%r9		/* add 1: if one carry bit was *not* set
120				   the addition will not result in 0.  */
121
122	jnz	3f		/* found NUL => return pointer */
123
124	movq	%rax, (%rdx)	/* Write value to destination.  */
125	addq	$8, %rdx	/* Adjust pointer.  */
126	jmp	1b		/* Next iteration.  */
127
128	/* Do the last few bytes. %rax contains the value to write.
129	   The loop is unrolled twice.  */
130	.p2align 4
1313:
132	/* Note that stpcpy needs to return with the value of the NUL
133	   byte.  */
134	movb	%al, (%rdx)	/* 1st byte.  */
135	testb	%al, %al	/* Is it NUL.  */
136	jz	4f		/* yes, finish.  */
137	incq	%rdx		/* Increment destination.  */
138	movb	%ah, (%rdx)	/* 2nd byte.  */
139	testb	%ah, %ah	/* Is it NUL?.  */
140	jz	4f		/* yes, finish.  */
141	incq	%rdx		/* Increment destination.  */
142	shrq	$16, %rax	/* Shift...  */
143	jmp	3b		/* and look at next two bytes in %rax.  */
144
1454:
146#ifdef USE_AS_STPCPY
147	movq	%rdx, %rax	/* Destination is return value.  */
148#else
149	movq	%rdi, %rax	/* Source is return value.  */
150#endif
151	retq
152END (STRCPY)
153#ifndef USE_AS_STPCPY
154libc_hidden_builtin_def (strcpy)
155#endif
156