1/* x86-64 __mpn_addmul_1 -- Multiply a limb vector with a limb and add
2   the result to a second limb vector.
3   Copyright (C) 2003-2021 Free Software Foundation, Inc.
4   This file is part of the GNU MP Library.
5
6   The GNU MP Library is free software; you can redistribute it and/or modify
7   it under the terms of the GNU Lesser General Public License as published by
8   the Free Software Foundation; either version 2.1 of the License, or (at your
9   option) any later version.
10
11   The GNU MP Library is distributed in the hope that it will be useful, but
12   WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
13   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public
14   License for more details.
15
16   You should have received a copy of the GNU Lesser General Public License
17   along with the GNU MP Library; see the file COPYING.LIB.  If not,
18   see <https://www.gnu.org/licenses/>.  */
19
20#include "sysdep.h"
21#include "asm-syntax.h"
22
23#define rp	%rdi
24#define up	%rsi
25#define n	%rdx
26#define v0	%rcx
27
28#ifndef func
29# define func __mpn_addmul_1
30# define ADDSUB add
31#endif
32
33	.text
34ENTRY (func)
35	push	%rbx
36	push	%rbp
37	lea	(%rdx), %rbx
38	neg	%rbx
39
40	mov	(up), %rax
41	mov	(rp), %r10
42
43	lea	-16(rp,%rdx,8), rp
44	lea	(up,%rdx,8), up
45	mul	%rcx
46
47	bt	$0, %ebx
48	jc	L(odd)
49
50	lea	(%rax), %r11
51	mov	8(up,%rbx,8), %rax
52	lea	(%rdx), %rbp
53	mul	%rcx
54	add	$2, %rbx
55	jns	L(n2)
56
57	lea	(%rax), %r8
58	mov	(up,%rbx,8), %rax
59	lea	(%rdx), %r9
60	jmp	L(mid)
61
62L(odd):	add	$1, %rbx
63	jns	L(n1)
64
65	lea	(%rax), %r8
66	mov	(up,%rbx,8), %rax
67	lea	(%rdx), %r9
68	mul	%rcx
69	lea	(%rax), %r11
70	mov	8(up,%rbx,8), %rax
71	lea	(%rdx), %rbp
72	jmp	L(e)
73
74	.p2align 4
75L(top):	mul	%rcx
76	ADDSUB	%r8, %r10
77	lea	(%rax), %r8
78	mov	(up,%rbx,8), %rax
79	adc	%r9, %r11
80	mov	%r10, -8(rp,%rbx,8)
81	mov	(rp,%rbx,8), %r10
82	lea	(%rdx), %r9
83	adc	$0, %rbp
84L(mid):	mul	%rcx
85	ADDSUB	%r11, %r10
86	lea	(%rax), %r11
87	mov	8(up,%rbx,8), %rax
88	adc	%rbp, %r8
89	mov	%r10, (rp,%rbx,8)
90	mov	8(rp,%rbx,8), %r10
91	lea	(%rdx), %rbp
92	adc	$0, %r9
93L(e):	add	$2, %rbx
94	js	L(top)
95
96	mul	%rcx
97	ADDSUB	%r8, %r10
98	adc	%r9, %r11
99	mov	%r10, -8(rp)
100	adc	$0, %rbp
101L(n2):	mov	(rp), %r10
102	ADDSUB	%r11, %r10
103	adc	%rbp, %rax
104	mov	%r10, (rp)
105	adc	$0, %rdx
106L(n1):	mov	8(rp), %r10
107	ADDSUB	%rax, %r10
108	mov	%r10, 8(rp)
109	mov	%ebx, %eax	/* zero rax */
110	adc	%rdx, %rax
111	pop	%rbp
112	pop	%rbx
113	ret
114END (func)
115