1/* strcat with SSSE3
2   Copyright (C) 2011-2021 Free Software Foundation, Inc.
3   This file is part of the GNU C Library.
4
5   The GNU C Library is free software; you can redistribute it and/or
6   modify it under the terms of the GNU Lesser General Public
7   License as published by the Free Software Foundation; either
8   version 2.1 of the License, or (at your option) any later version.
9
10   The GNU C Library is distributed in the hope that it will be useful,
11   but WITHOUT ANY WARRANTY; without even the implied warranty of
12   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13   Lesser General Public License for more details.
14
15   You should have received a copy of the GNU Lesser General Public
16   License along with the GNU C Library; if not, see
17   <https://www.gnu.org/licenses/>.  */
18
19#if IS_IN (libc)
20
21# include <sysdep.h>
22
23# ifndef STRCAT
24#  define STRCAT  __strcat_ssse3
25# endif
26
27# define USE_AS_STRCAT
28
29.text
30ENTRY (STRCAT)
31# ifdef USE_AS_STRNCAT
32	mov	%rdx, %r8
33# endif
34
35
36/* Inline corresponding strlen file, temporary until new strcpy
37   implementation gets merged.  */
38
39	xor	%eax, %eax
40	cmpb	$0, (%rdi)
41	jz	L(exit_tail0)
42	cmpb	$0, 1(%rdi)
43	jz	L(exit_tail1)
44	cmpb	$0, 2(%rdi)
45	jz	L(exit_tail2)
46	cmpb	$0, 3(%rdi)
47	jz	L(exit_tail3)
48
49	cmpb	$0, 4(%rdi)
50	jz	L(exit_tail4)
51	cmpb	$0, 5(%rdi)
52	jz	L(exit_tail5)
53	cmpb	$0, 6(%rdi)
54	jz	L(exit_tail6)
55	cmpb	$0, 7(%rdi)
56	jz	L(exit_tail7)
57
58	cmpb	$0, 8(%rdi)
59	jz	L(exit_tail8)
60	cmpb	$0, 9(%rdi)
61	jz	L(exit_tail9)
62	cmpb	$0, 10(%rdi)
63	jz	L(exit_tail10)
64	cmpb	$0, 11(%rdi)
65	jz	L(exit_tail11)
66
67	cmpb	$0, 12(%rdi)
68	jz	L(exit_tail12)
69	cmpb	$0, 13(%rdi)
70	jz	L(exit_tail13)
71	cmpb	$0, 14(%rdi)
72	jz	L(exit_tail14)
73	cmpb	$0, 15(%rdi)
74	jz	L(exit_tail15)
75	pxor	%xmm0, %xmm0
76	lea	16(%rdi), %rcx
77	lea	16(%rdi), %rax
78	and	$-16, %rax
79
80	pcmpeqb	(%rax), %xmm0
81	pmovmskb %xmm0, %edx
82	pxor	%xmm1, %xmm1
83	test	%edx, %edx
84	lea	16(%rax), %rax
85	jnz	L(exit)
86
87	pcmpeqb	(%rax), %xmm1
88	pmovmskb %xmm1, %edx
89	pxor	%xmm2, %xmm2
90	test	%edx, %edx
91	lea	16(%rax), %rax
92	jnz	L(exit)
93
94	pcmpeqb	(%rax), %xmm2
95	pmovmskb %xmm2, %edx
96	pxor	%xmm3, %xmm3
97	test	%edx, %edx
98	lea	16(%rax), %rax
99	jnz	L(exit)
100
101	pcmpeqb	(%rax), %xmm3
102	pmovmskb %xmm3, %edx
103	test	%edx, %edx
104	lea	16(%rax), %rax
105	jnz	L(exit)
106
107	pcmpeqb	(%rax), %xmm0
108	pmovmskb %xmm0, %edx
109	test	%edx, %edx
110	lea	16(%rax), %rax
111	jnz	L(exit)
112
113	pcmpeqb	(%rax), %xmm1
114	pmovmskb %xmm1, %edx
115	test	%edx, %edx
116	lea	16(%rax), %rax
117	jnz	L(exit)
118
119	pcmpeqb	(%rax), %xmm2
120	pmovmskb %xmm2, %edx
121	test	%edx, %edx
122	lea	16(%rax), %rax
123	jnz	L(exit)
124
125	pcmpeqb	(%rax), %xmm3
126	pmovmskb %xmm3, %edx
127	test	%edx, %edx
128	lea	16(%rax), %rax
129	jnz	L(exit)
130
131	pcmpeqb	(%rax), %xmm0
132	pmovmskb %xmm0, %edx
133	test	%edx, %edx
134	lea	16(%rax), %rax
135	jnz	L(exit)
136
137	pcmpeqb	(%rax), %xmm1
138	pmovmskb %xmm1, %edx
139	test	%edx, %edx
140	lea	16(%rax), %rax
141	jnz	L(exit)
142
143	pcmpeqb	(%rax), %xmm2
144	pmovmskb %xmm2, %edx
145	test	%edx, %edx
146	lea	16(%rax), %rax
147	jnz	L(exit)
148
149	pcmpeqb	(%rax), %xmm3
150	pmovmskb %xmm3, %edx
151	test	%edx, %edx
152	lea	16(%rax), %rax
153	jnz	L(exit)
154
155	pcmpeqb	(%rax), %xmm0
156	pmovmskb %xmm0, %edx
157	test	%edx, %edx
158	lea	16(%rax), %rax
159	jnz	L(exit)
160
161	pcmpeqb	(%rax), %xmm1
162	pmovmskb %xmm1, %edx
163	test	%edx, %edx
164	lea	16(%rax), %rax
165	jnz	L(exit)
166
167	pcmpeqb	(%rax), %xmm2
168	pmovmskb %xmm2, %edx
169	test	%edx, %edx
170	lea	16(%rax), %rax
171	jnz	L(exit)
172
173	pcmpeqb	(%rax), %xmm3
174	pmovmskb %xmm3, %edx
175	test	%edx, %edx
176	lea	16(%rax), %rax
177	jnz	L(exit)
178
179	and	$-0x40, %rax
180
181	.p2align 4
182L(aligned_64):
183	pcmpeqb	(%rax), %xmm0
184	pcmpeqb	16(%rax), %xmm1
185	pcmpeqb	32(%rax), %xmm2
186	pcmpeqb	48(%rax), %xmm3
187	pmovmskb %xmm0, %edx
188	pmovmskb %xmm1, %r11d
189	pmovmskb %xmm2, %r10d
190	pmovmskb %xmm3, %r9d
191	or	%edx, %r9d
192	or	%r11d, %r9d
193	or	%r10d, %r9d
194	lea	64(%rax), %rax
195	jz	L(aligned_64)
196
197	test	%edx, %edx
198	jnz	L(aligned_64_exit_16)
199	test	%r11d, %r11d
200	jnz	L(aligned_64_exit_32)
201	test	%r10d, %r10d
202	jnz	L(aligned_64_exit_48)
203
204L(aligned_64_exit_64):
205	pmovmskb %xmm3, %edx
206	jmp	L(exit)
207
208L(aligned_64_exit_48):
209	lea	-16(%rax), %rax
210	mov	%r10d, %edx
211	jmp	L(exit)
212
213L(aligned_64_exit_32):
214	lea	-32(%rax), %rax
215	mov	%r11d, %edx
216	jmp	L(exit)
217
218L(aligned_64_exit_16):
219	lea	-48(%rax), %rax
220
221L(exit):
222	sub	%rcx, %rax
223	test	%dl, %dl
224	jz	L(exit_high)
225	test	$0x01, %dl
226	jnz	L(exit_tail0)
227
228	test	$0x02, %dl
229	jnz	L(exit_tail1)
230
231	test	$0x04, %dl
232	jnz	L(exit_tail2)
233
234	test	$0x08, %dl
235	jnz	L(exit_tail3)
236
237	test	$0x10, %dl
238	jnz	L(exit_tail4)
239
240	test	$0x20, %dl
241	jnz	L(exit_tail5)
242
243	test	$0x40, %dl
244	jnz	L(exit_tail6)
245	add	$7, %eax
246L(exit_tail0):
247	jmp	L(StartStrcpyPart)
248
249	.p2align 4
250L(exit_high):
251	add	$8, %eax
252	test	$0x01, %dh
253	jnz	L(exit_tail0)
254
255	test	$0x02, %dh
256	jnz	L(exit_tail1)
257
258	test	$0x04, %dh
259	jnz	L(exit_tail2)
260
261	test	$0x08, %dh
262	jnz	L(exit_tail3)
263
264	test	$0x10, %dh
265	jnz	L(exit_tail4)
266
267	test	$0x20, %dh
268	jnz	L(exit_tail5)
269
270	test	$0x40, %dh
271	jnz	L(exit_tail6)
272	add	$7, %eax
273	jmp	L(StartStrcpyPart)
274
275	.p2align 4
276L(exit_tail1):
277	add	$1, %eax
278	jmp	L(StartStrcpyPart)
279
280	.p2align 4
281L(exit_tail2):
282	add	$2, %eax
283	jmp	L(StartStrcpyPart)
284
285	.p2align 4
286L(exit_tail3):
287	add	$3, %eax
288	jmp	L(StartStrcpyPart)
289
290	.p2align 4
291L(exit_tail4):
292	add	$4, %eax
293	jmp	L(StartStrcpyPart)
294
295	.p2align 4
296L(exit_tail5):
297	add	$5, %eax
298	jmp	L(StartStrcpyPart)
299
300	.p2align 4
301L(exit_tail6):
302	add	$6, %eax
303	jmp	L(StartStrcpyPart)
304
305	.p2align 4
306L(exit_tail7):
307	add	$7, %eax
308	jmp	L(StartStrcpyPart)
309
310	.p2align 4
311L(exit_tail8):
312	add	$8, %eax
313	jmp	L(StartStrcpyPart)
314
315	.p2align 4
316L(exit_tail9):
317	add	$9, %eax
318	jmp	L(StartStrcpyPart)
319
320	.p2align 4
321L(exit_tail10):
322	add	$10, %eax
323	jmp	L(StartStrcpyPart)
324
325	.p2align 4
326L(exit_tail11):
327	add	$11, %eax
328	jmp	L(StartStrcpyPart)
329
330	.p2align 4
331L(exit_tail12):
332	add	$12, %eax
333	jmp	L(StartStrcpyPart)
334
335	.p2align 4
336L(exit_tail13):
337	add	$13, %eax
338	jmp	L(StartStrcpyPart)
339
340	.p2align 4
341L(exit_tail14):
342	add	$14, %eax
343	jmp	L(StartStrcpyPart)
344
345	.p2align 4
346L(exit_tail15):
347	add	$15, %eax
348
349	.p2align 4
350L(StartStrcpyPart):
351	mov	%rsi, %rcx
352	lea	(%rdi, %rax), %rdx
353# ifdef USE_AS_STRNCAT
354	test	%r8, %r8
355	jz	L(StrncatExit0)
356	cmp	$8, %r8
357	jbe	L(StrncatExit8Bytes)
358# endif
359	cmpb	$0, (%rcx)
360	jz	L(Exit1)
361	cmpb	$0, 1(%rcx)
362	jz	L(Exit2)
363	cmpb	$0, 2(%rcx)
364	jz	L(Exit3)
365	cmpb	$0, 3(%rcx)
366	jz	L(Exit4)
367	cmpb	$0, 4(%rcx)
368	jz	L(Exit5)
369	cmpb	$0, 5(%rcx)
370	jz	L(Exit6)
371	cmpb	$0, 6(%rcx)
372	jz	L(Exit7)
373	cmpb	$0, 7(%rcx)
374	jz	L(Exit8)
375	cmpb	$0, 8(%rcx)
376	jz	L(Exit9)
377# ifdef USE_AS_STRNCAT
378	cmp	$16, %r8
379	jb	L(StrncatExit15Bytes)
380# endif
381	cmpb	$0, 9(%rcx)
382	jz	L(Exit10)
383	cmpb	$0, 10(%rcx)
384	jz	L(Exit11)
385	cmpb	$0, 11(%rcx)
386	jz	L(Exit12)
387	cmpb	$0, 12(%rcx)
388	jz	L(Exit13)
389	cmpb	$0, 13(%rcx)
390	jz	L(Exit14)
391	cmpb	$0, 14(%rcx)
392	jz	L(Exit15)
393	cmpb	$0, 15(%rcx)
394	jz	L(Exit16)
395# ifdef USE_AS_STRNCAT
396	cmp	$16, %r8
397	je	L(StrncatExit16)
398#  define USE_AS_STRNCPY
399# endif
400
401# include "strcpy-ssse3.S"
402
403	.p2align 4
404L(CopyFrom1To16Bytes):
405	add	%rsi, %rdx
406	add	%rsi, %rcx
407
408	test	%al, %al
409	jz	L(ExitHigh)
410	test	$0x01, %al
411	jnz	L(Exit1)
412	test	$0x02, %al
413	jnz	L(Exit2)
414	test	$0x04, %al
415	jnz	L(Exit3)
416	test	$0x08, %al
417	jnz	L(Exit4)
418	test	$0x10, %al
419	jnz	L(Exit5)
420	test	$0x20, %al
421	jnz	L(Exit6)
422	test	$0x40, %al
423	jnz	L(Exit7)
424	movlpd	(%rcx), %xmm0
425	movlpd	%xmm0, (%rdx)
426	mov	%rdi, %rax
427	ret
428
429	.p2align 4
430L(ExitHigh):
431	test	$0x01, %ah
432	jnz	L(Exit9)
433	test	$0x02, %ah
434	jnz	L(Exit10)
435	test	$0x04, %ah
436	jnz	L(Exit11)
437	test	$0x08, %ah
438	jnz	L(Exit12)
439	test	$0x10, %ah
440	jnz	L(Exit13)
441	test	$0x20, %ah
442	jnz	L(Exit14)
443	test	$0x40, %ah
444	jnz	L(Exit15)
445	movlpd	(%rcx), %xmm0
446	movlpd	8(%rcx), %xmm1
447	movlpd	%xmm0, (%rdx)
448	movlpd	%xmm1, 8(%rdx)
449	mov	%rdi, %rax
450	ret
451
452	.p2align 4
453L(StrncatExit1):
454	xor	%ah, %ah
455	movb	%ah, 1(%rdx)
456L(Exit1):
457	movb	(%rcx), %al
458	movb	%al, (%rdx)
459	mov	%rdi, %rax
460	ret
461
462	.p2align 4
463L(StrncatExit2):
464	xor	%ah, %ah
465	movb	%ah, 2(%rdx)
466L(Exit2):
467	movw	(%rcx), %ax
468	movw	%ax, (%rdx)
469	mov	%rdi, %rax
470	ret
471
472	.p2align 4
473L(StrncatExit3):
474	xor	%ah, %ah
475	movb	%ah, 3(%rdx)
476L(Exit3):
477	movw	(%rcx), %ax
478	movw	%ax, (%rdx)
479	movb	2(%rcx), %al
480	movb	%al, 2(%rdx)
481	mov	%rdi, %rax
482	ret
483
484	.p2align 4
485L(StrncatExit4):
486	xor	%ah, %ah
487	movb	%ah, 4(%rdx)
488L(Exit4):
489	mov	(%rcx), %eax
490	mov	%eax, (%rdx)
491	mov	%rdi, %rax
492	ret
493
494	.p2align 4
495L(StrncatExit5):
496	xor	%ah, %ah
497	movb	%ah, 5(%rdx)
498L(Exit5):
499	mov	(%rcx), %eax
500	mov	%eax, (%rdx)
501	movb	4(%rcx), %al
502	movb	%al, 4(%rdx)
503	mov	%rdi, %rax
504	ret
505
506	.p2align 4
507L(StrncatExit6):
508	xor	%ah, %ah
509	movb	%ah, 6(%rdx)
510L(Exit6):
511	mov	(%rcx), %eax
512	mov	%eax, (%rdx)
513	movw	4(%rcx), %ax
514	movw	%ax, 4(%rdx)
515	mov	%rdi, %rax
516	ret
517
518	.p2align 4
519L(StrncatExit7):
520	xor	%ah, %ah
521	movb	%ah, 7(%rdx)
522L(Exit7):
523	mov	(%rcx), %eax
524	mov	%eax, (%rdx)
525	mov	3(%rcx), %eax
526	mov	%eax, 3(%rdx)
527	mov	%rdi, %rax
528	ret
529
530	.p2align 4
531L(StrncatExit8):
532	xor	%ah, %ah
533	movb	%ah, 8(%rdx)
534L(Exit8):
535	movlpd	(%rcx), %xmm0
536	movlpd	%xmm0, (%rdx)
537	mov	%rdi, %rax
538	ret
539
540	.p2align 4
541L(StrncatExit9):
542	xor	%ah, %ah
543	movb	%ah, 9(%rdx)
544L(Exit9):
545	movlpd	(%rcx), %xmm0
546	movlpd	%xmm0, (%rdx)
547	movb	8(%rcx), %al
548	movb	%al, 8(%rdx)
549	mov	%rdi, %rax
550	ret
551
552	.p2align 4
553L(StrncatExit10):
554	xor	%ah, %ah
555	movb	%ah, 10(%rdx)
556L(Exit10):
557	movlpd	(%rcx), %xmm0
558	movlpd	%xmm0, (%rdx)
559	movw	8(%rcx), %ax
560	movw	%ax, 8(%rdx)
561	mov	%rdi, %rax
562	ret
563
564	.p2align 4
565L(StrncatExit11):
566	xor	%ah, %ah
567	movb	%ah, 11(%rdx)
568L(Exit11):
569	movlpd	(%rcx), %xmm0
570	movlpd	%xmm0, (%rdx)
571	mov	7(%rcx), %eax
572	mov	%eax, 7(%rdx)
573	mov	%rdi, %rax
574	ret
575
576	.p2align 4
577L(StrncatExit12):
578	xor	%ah, %ah
579	movb	%ah, 12(%rdx)
580L(Exit12):
581	movlpd	(%rcx), %xmm0
582	movlpd	%xmm0, (%rdx)
583	mov	8(%rcx), %eax
584	mov	%eax, 8(%rdx)
585	mov	%rdi, %rax
586	ret
587
588	.p2align 4
589L(StrncatExit13):
590	xor	%ah, %ah
591	movb	%ah, 13(%rdx)
592L(Exit13):
593	movlpd	(%rcx), %xmm0
594	movlpd	%xmm0, (%rdx)
595	movlpd	5(%rcx), %xmm1
596	movlpd	%xmm1, 5(%rdx)
597	mov	%rdi, %rax
598	ret
599
600	.p2align 4
601L(StrncatExit14):
602	xor	%ah, %ah
603	movb	%ah, 14(%rdx)
604L(Exit14):
605	movlpd	(%rcx), %xmm0
606	movlpd	%xmm0, (%rdx)
607	movlpd	6(%rcx), %xmm1
608	movlpd	%xmm1, 6(%rdx)
609	mov	%rdi, %rax
610	ret
611
612	.p2align 4
613L(StrncatExit15):
614	xor	%ah, %ah
615	movb	%ah, 15(%rdx)
616L(Exit15):
617	movlpd	(%rcx), %xmm0
618	movlpd	%xmm0, (%rdx)
619	movlpd	7(%rcx), %xmm1
620	movlpd	%xmm1, 7(%rdx)
621	mov	%rdi, %rax
622	ret
623
624	.p2align 4
625L(StrncatExit16):
626	xor	%ah, %ah
627	movb	%ah, 16(%rdx)
628L(Exit16):
629	movlpd	(%rcx), %xmm0
630	movlpd	8(%rcx), %xmm1
631	movlpd	%xmm0, (%rdx)
632	movlpd	%xmm1, 8(%rdx)
633	mov	%rdi, %rax
634	ret
635
636# ifdef USE_AS_STRNCPY
637
638	.p2align 4
639L(CopyFrom1To16BytesCase2):
640	add	$16, %r8
641	add	%rsi, %rcx
642	lea	(%rsi, %rdx), %rsi
643	lea	-9(%r8), %rdx
644	and	$1<<7, %dh
645	or	%al, %dh
646	test	%dh, %dh
647	lea	(%rsi), %rdx
648	jz	L(ExitHighCase2)
649
650	test	$0x01, %al
651	jnz	L(Exit1)
652	cmp	$1, %r8
653	je	L(StrncatExit1)
654	test	$0x02, %al
655	jnz	L(Exit2)
656	cmp	$2, %r8
657	je	L(StrncatExit2)
658	test	$0x04, %al
659	jnz	L(Exit3)
660	cmp	$3, %r8
661	je	L(StrncatExit3)
662	test	$0x08, %al
663	jnz	L(Exit4)
664	cmp	$4, %r8
665	je	L(StrncatExit4)
666	test	$0x10, %al
667	jnz	L(Exit5)
668	cmp	$5, %r8
669	je	L(StrncatExit5)
670	test	$0x20, %al
671	jnz	L(Exit6)
672	cmp	$6, %r8
673	je	L(StrncatExit6)
674	test	$0x40, %al
675	jnz	L(Exit7)
676	cmp	$7, %r8
677	je	L(StrncatExit7)
678	movlpd	(%rcx), %xmm0
679	movlpd	%xmm0, (%rdx)
680	lea	7(%rdx), %rax
681	cmpb	$1, (%rax)
682	sbb	$-1, %rax
683	xor	%cl, %cl
684	movb	%cl, (%rax)
685	mov	%rdi, %rax
686	ret
687
688	.p2align 4
689L(ExitHighCase2):
690	test	$0x01, %ah
691	jnz	L(Exit9)
692	cmp	$9, %r8
693	je	L(StrncatExit9)
694	test	$0x02, %ah
695	jnz	L(Exit10)
696	cmp	$10, %r8
697	je	L(StrncatExit10)
698	test	$0x04, %ah
699	jnz	L(Exit11)
700	cmp	$11, %r8
701	je	L(StrncatExit11)
702	test	$0x8, %ah
703	jnz	L(Exit12)
704	cmp	$12, %r8
705	je	L(StrncatExit12)
706	test	$0x10, %ah
707	jnz	L(Exit13)
708	cmp	$13, %r8
709	je	L(StrncatExit13)
710	test	$0x20, %ah
711	jnz	L(Exit14)
712	cmp	$14, %r8
713	je	L(StrncatExit14)
714	test	$0x40, %ah
715	jnz	L(Exit15)
716	cmp	$15, %r8
717	je	L(StrncatExit15)
718	movlpd	(%rcx), %xmm0
719	movlpd	%xmm0, (%rdx)
720	movlpd	8(%rcx), %xmm1
721	movlpd	%xmm1, 8(%rdx)
722	mov	%rdi, %rax
723	ret
724
725L(CopyFrom1To16BytesCase2OrCase3):
726	test	%rax, %rax
727	jnz	L(CopyFrom1To16BytesCase2)
728
729	.p2align 4
730L(CopyFrom1To16BytesCase3):
731	add	$16, %r8
732	add	%rsi, %rdx
733	add	%rsi, %rcx
734
735	cmp	$8, %r8
736	ja	L(ExitHighCase3)
737	cmp	$1, %r8
738	je	L(StrncatExit1)
739	cmp	$2, %r8
740	je	L(StrncatExit2)
741	cmp	$3, %r8
742	je	L(StrncatExit3)
743	cmp	$4, %r8
744	je	L(StrncatExit4)
745	cmp	$5, %r8
746	je	L(StrncatExit5)
747	cmp	$6, %r8
748	je	L(StrncatExit6)
749	cmp	$7, %r8
750	je	L(StrncatExit7)
751	movlpd	(%rcx), %xmm0
752	movlpd	%xmm0, (%rdx)
753	xor	%ah, %ah
754	movb	%ah, 8(%rdx)
755	mov	%rdi, %rax
756	ret
757
758	.p2align 4
759L(ExitHighCase3):
760	cmp	$9, %r8
761	je	L(StrncatExit9)
762	cmp	$10, %r8
763	je	L(StrncatExit10)
764	cmp	$11, %r8
765	je	L(StrncatExit11)
766	cmp	$12, %r8
767	je	L(StrncatExit12)
768	cmp	$13, %r8
769	je	L(StrncatExit13)
770	cmp	$14, %r8
771	je	L(StrncatExit14)
772	cmp	$15, %r8
773	je	L(StrncatExit15)
774	movlpd	(%rcx), %xmm0
775	movlpd	%xmm0, (%rdx)
776	movlpd	8(%rcx), %xmm1
777	movlpd	%xmm1, 8(%rdx)
778	xor	%ah, %ah
779	movb	%ah, 16(%rdx)
780	mov	%rdi, %rax
781	ret
782
783	.p2align 4
784L(StrncatExit0):
785	mov	%rdi, %rax
786	ret
787
788	.p2align 4
789L(StrncatExit15Bytes):
790	cmp	$9, %r8
791	je	L(StrncatExit9)
792	cmpb	$0, 9(%rcx)
793	jz	L(Exit10)
794	cmp	$10, %r8
795	je	L(StrncatExit10)
796	cmpb	$0, 10(%rcx)
797	jz	L(Exit11)
798	cmp	$11, %r8
799	je	L(StrncatExit11)
800	cmpb	$0, 11(%rcx)
801	jz	L(Exit12)
802	cmp	$12, %r8
803	je	L(StrncatExit12)
804	cmpb	$0, 12(%rcx)
805	jz	L(Exit13)
806	cmp	$13, %r8
807	je	L(StrncatExit13)
808	cmpb	$0, 13(%rcx)
809	jz	L(Exit14)
810	cmp	$14, %r8
811	je	L(StrncatExit14)
812	movlpd	(%rcx), %xmm0
813	movlpd	%xmm0, (%rdx)
814	movlpd	7(%rcx), %xmm1
815	movlpd	%xmm1, 7(%rdx)
816	lea	14(%rdx), %rax
817	cmpb	$1, (%rax)
818	sbb	$-1, %rax
819	xor	%cl, %cl
820	movb	%cl, (%rax)
821	mov	%rdi, %rax
822	ret
823
824	.p2align 4
825L(StrncatExit8Bytes):
826	cmpb	$0, (%rcx)
827	jz	L(Exit1)
828	cmp	$1, %r8
829	je	L(StrncatExit1)
830	cmpb	$0, 1(%rcx)
831	jz	L(Exit2)
832	cmp	$2, %r8
833	je	L(StrncatExit2)
834	cmpb	$0, 2(%rcx)
835	jz	L(Exit3)
836	cmp	$3, %r8
837	je	L(StrncatExit3)
838	cmpb	$0, 3(%rcx)
839	jz	L(Exit4)
840	cmp	$4, %r8
841	je	L(StrncatExit4)
842	cmpb	$0, 4(%rcx)
843	jz	L(Exit5)
844	cmp	$5, %r8
845	je	L(StrncatExit5)
846	cmpb	$0, 5(%rcx)
847	jz	L(Exit6)
848	cmp	$6, %r8
849	je	L(StrncatExit6)
850	cmpb	$0, 6(%rcx)
851	jz	L(Exit7)
852	cmp	$7, %r8
853	je	L(StrncatExit7)
854	movlpd	(%rcx), %xmm0
855	movlpd	%xmm0, (%rdx)
856	lea	7(%rdx), %rax
857	cmpb	$1, (%rax)
858	sbb	$-1, %rax
859	xor	%cl, %cl
860	movb	%cl, (%rax)
861	mov	%rdi, %rax
862	ret
863
864# endif
865END (STRCAT)
866#endif
867