1 # Alpha EV5 __mpn_lshift -- 2 3 # Copyright (C) 1994-2021 Free Software Foundation, Inc. 4 5 # This file is part of the GNU MP Library. 6 7 # The GNU MP Library is free software; you can redistribute it and/or modify 8 # it under the terms of the GNU Lesser General Public License as published by 9 # the Free Software Foundation; either version 2.1 of the License, or (at your 10 # option) any later version. 11 12 # The GNU MP Library is distributed in the hope that it will be useful, but 13 # WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 14 # or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public 15 # License for more details. 16 17 # You should have received a copy of the GNU Lesser General Public License 18 # along with the GNU MP Library. If not, see <https://www.gnu.org/licenses/>. 19 20 21 # INPUT PARAMETERS 22 # res_ptr r16 23 # s1_ptr r17 24 # size r18 25 # cnt r19 26 27 # This code runs at 3.25 cycles/limb on the EV5. 28 29 .set noreorder 30 .set noat 31.text 32 .align 3 33 .globl __mpn_lshift 34 .ent __mpn_lshift 35__mpn_lshift: 36 .frame $30,0,$26,0 37 38 s8addq $18,$17,$17 # make r17 point at end of s1 39 ldq $4,-8($17) # load first limb 40 subq $31,$19,$20 41 s8addq $18,$16,$16 # make r16 point at end of RES 42 subq $18,1,$18 43 and $18,4-1,$28 # number of limbs in first loop 44 srl $4,$20,$0 # compute function result 45 46 beq $28,.L0 47 subq $18,$28,$18 48 49 .align 3 50.Loop0: ldq $3,-16($17) 51 subq $16,8,$16 52 sll $4,$19,$5 53 subq $17,8,$17 54 subq $28,1,$28 55 srl $3,$20,$6 56 or $3,$3,$4 57 or $5,$6,$8 58 stq $8,0($16) 59 bne $28,.Loop0 60 61.L0: sll $4,$19,$24 62 beq $18,.Lend 63 # warm up phase 1 64 ldq $1,-16($17) 65 subq $18,4,$18 66 ldq $2,-24($17) 67 ldq $3,-32($17) 68 ldq $4,-40($17) 69 beq $18,.Lend1 70 # warm up phase 2 71 srl $1,$20,$7 72 sll $1,$19,$21 73 srl $2,$20,$8 74 ldq $1,-48($17) 75 sll $2,$19,$22 76 ldq $2,-56($17) 77 srl $3,$20,$5 78 or $7,$24,$7 79 sll $3,$19,$23 80 or $8,$21,$8 81 srl $4,$20,$6 82 ldq $3,-64($17) 83 sll $4,$19,$24 84 ldq $4,-72($17) 85 subq $18,4,$18 86 beq $18,.Lend2 87 .align 4 88 # main loop 89.Loop: stq $7,-8($16) 90 or $5,$22,$5 91 stq $8,-16($16) 92 or $6,$23,$6 93 94 srl $1,$20,$7 95 subq $18,4,$18 96 sll $1,$19,$21 97 unop # ldq $31,-96($17) 98 99 srl $2,$20,$8 100 ldq $1,-80($17) 101 sll $2,$19,$22 102 ldq $2,-88($17) 103 104 stq $5,-24($16) 105 or $7,$24,$7 106 stq $6,-32($16) 107 or $8,$21,$8 108 109 srl $3,$20,$5 110 unop # ldq $31,-96($17) 111 sll $3,$19,$23 112 subq $16,32,$16 113 114 srl $4,$20,$6 115 ldq $3,-96($17) 116 sll $4,$19,$24 117 ldq $4,-104($17) 118 119 subq $17,32,$17 120 bne $18,.Loop 121 # cool down phase 2/1 122.Lend2: stq $7,-8($16) 123 or $5,$22,$5 124 stq $8,-16($16) 125 or $6,$23,$6 126 srl $1,$20,$7 127 sll $1,$19,$21 128 srl $2,$20,$8 129 sll $2,$19,$22 130 stq $5,-24($16) 131 or $7,$24,$7 132 stq $6,-32($16) 133 or $8,$21,$8 134 srl $3,$20,$5 135 sll $3,$19,$23 136 srl $4,$20,$6 137 sll $4,$19,$24 138 # cool down phase 2/2 139 stq $7,-40($16) 140 or $5,$22,$5 141 stq $8,-48($16) 142 or $6,$23,$6 143 stq $5,-56($16) 144 stq $6,-64($16) 145 # cool down phase 2/3 146 stq $24,-72($16) 147 ret $31,($26),1 148 149 # cool down phase 1/1 150.Lend1: srl $1,$20,$7 151 sll $1,$19,$21 152 srl $2,$20,$8 153 sll $2,$19,$22 154 srl $3,$20,$5 155 or $7,$24,$7 156 sll $3,$19,$23 157 or $8,$21,$8 158 srl $4,$20,$6 159 sll $4,$19,$24 160 # cool down phase 1/2 161 stq $7,-8($16) 162 or $5,$22,$5 163 stq $8,-16($16) 164 or $6,$23,$6 165 stq $5,-24($16) 166 stq $6,-32($16) 167 stq $24,-40($16) 168 ret $31,($26),1 169 170.Lend: stq $24,-8($16) 171 ret $31,($26),1 172 .end __mpn_lshift 173