1 # Alpha 21064 __mpn_submul_1 -- Multiply a limb vector with a limb and 2 # subtract the result from a second limb vector. 3 4 # Copyright (C) 1992-2021 Free Software Foundation, Inc. 5 6 # This file is part of the GNU MP Library. 7 8 # The GNU MP Library is free software; you can redistribute it and/or modify 9 # it under the terms of the GNU Lesser General Public License as published by 10 # the Free Software Foundation; either version 2.1 of the License, or (at your 11 # option) any later version. 12 13 # The GNU MP Library is distributed in the hope that it will be useful, but 14 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 16 # License for more details. 17 18 # You should have received a copy of the GNU Lesser General Public License 19 # along with the GNU MP Library. If not, see <https://www.gnu.org/licenses/>. 20 21 22 # INPUT PARAMETERS 23 # res_ptr r16 24 # s1_ptr r17 25 # size r18 26 # s2_limb r19 27 28 # This code runs at 42 cycles/limb on EV4 and 18 cycles/limb on EV5. 29 30 .set noreorder 31 .set noat 32.text 33 .align 3 34 .globl __mpn_submul_1 35 .ent __mpn_submul_1 2 36__mpn_submul_1: 37 .frame $30,0,$26 38 39 ldq $2,0($17) # $2 = s1_limb 40 addq $17,8,$17 # s1_ptr++ 41 subq $18,1,$18 # size-- 42 mulq $2,$19,$3 # $3 = prod_low 43 ldq $5,0($16) # $5 = *res_ptr 44 umulh $2,$19,$0 # $0 = prod_high 45 beq $18,.Lend1 # jump if size was == 1 46 ldq $2,0($17) # $2 = s1_limb 47 addq $17,8,$17 # s1_ptr++ 48 subq $18,1,$18 # size-- 49 subq $5,$3,$3 50 cmpult $5,$3,$4 51 stq $3,0($16) 52 addq $16,8,$16 # res_ptr++ 53 beq $18,.Lend2 # jump if size was == 2 54 55 .align 3 56.Loop: mulq $2,$19,$3 # $3 = prod_low 57 ldq $5,0($16) # $5 = *res_ptr 58 addq $4,$0,$0 # cy_limb = cy_limb + 'cy' 59 subq $18,1,$18 # size-- 60 umulh $2,$19,$4 # $4 = cy_limb 61 ldq $2,0($17) # $2 = s1_limb 62 addq $17,8,$17 # s1_ptr++ 63 addq $3,$0,$3 # $3 = cy_limb + prod_low 64 cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) 65 subq $5,$3,$3 66 cmpult $5,$3,$5 67 stq $3,0($16) 68 addq $16,8,$16 # res_ptr++ 69 addq $5,$0,$0 # combine carries 70 bne $18,.Loop 71 72.Lend2: mulq $2,$19,$3 # $3 = prod_low 73 ldq $5,0($16) # $5 = *res_ptr 74 addq $4,$0,$0 # cy_limb = cy_limb + 'cy' 75 umulh $2,$19,$4 # $4 = cy_limb 76 addq $3,$0,$3 # $3 = cy_limb + prod_low 77 cmpult $3,$0,$0 # $0 = carry from (cy_limb + prod_low) 78 subq $5,$3,$3 79 cmpult $5,$3,$5 80 stq $3,0($16) 81 addq $5,$0,$0 # combine carries 82 addq $4,$0,$0 # cy_limb = prod_high + cy 83 ret $31,($26),1 84.Lend1: subq $5,$3,$3 85 cmpult $5,$3,$5 86 stq $3,0($16) 87 addq $0,$5,$0 88 ret $31,($26),1 89 90 .end __mpn_submul_1 91