1/* Pentium optimized __mpn_lshift -- 2 Copyright (C) 1992-2021 Free Software Foundation, Inc. 3 This file is part of the GNU C Library. 4 5 The GNU C Library is free software; you can redistribute it and/or 6 modify it under the terms of the GNU Lesser General Public 7 License as published by the Free Software Foundation; either 8 version 2.1 of the License, or (at your option) any later version. 9 10 The GNU C Library is distributed in the hope that it will be useful, 11 but WITHOUT ANY WARRANTY; without even the implied warranty of 12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 13 Lesser General Public License for more details. 14 15 You should have received a copy of the GNU Lesser General Public 16 License along with the GNU C Library; if not, see 17 <https://www.gnu.org/licenses/>. */ 18 19#include "sysdep.h" 20#include "asm-syntax.h" 21 22#define PARMS 4+16 /* space for 4 saved regs */ 23#define RES PARMS 24#define S RES+4 25#define SIZE S+4 26#define CNT SIZE+4 27 28 .text 29ENTRY (__mpn_lshift) 30 31 pushl %edi 32 cfi_adjust_cfa_offset (4) 33 pushl %esi 34 cfi_adjust_cfa_offset (4) 35 pushl %ebp 36 cfi_adjust_cfa_offset (4) 37 cfi_rel_offset (ebp, 0) 38 pushl %ebx 39 cfi_adjust_cfa_offset (4) 40 41 movl RES(%esp),%edi 42 cfi_rel_offset (edi, 12) 43 movl S(%esp),%esi 44 cfi_rel_offset (esi, 8) 45 movl SIZE(%esp),%ebx 46 cfi_rel_offset (ebx, 0) 47 movl CNT(%esp),%ecx 48 49/* We can use faster code for shift-by-1 under certain conditions. */ 50 cmp $1,%ecx 51 jne L(normal) 52 leal 4(%esi),%eax 53 cmpl %edi,%eax 54 jnc L(special) /* jump if s_ptr + 1 >= res_ptr */ 55 leal (%esi,%ebx,4),%eax 56 cmpl %eax,%edi 57 jnc L(special) /* jump if res_ptr >= s_ptr + size */ 58 59L(normal): 60 leal -4(%edi,%ebx,4),%edi 61 leal -4(%esi,%ebx,4),%esi 62 63 movl (%esi),%edx 64 subl $4,%esi 65 xorl %eax,%eax 66 shldl %cl,%edx,%eax /* compute carry limb */ 67 pushl %eax /* push carry limb onto stack */ 68 cfi_adjust_cfa_offset (4) 69 70 decl %ebx 71 pushl %ebx 72 cfi_adjust_cfa_offset (4) 73 shrl $3,%ebx 74 jz L(end) 75 76 movl (%edi),%eax /* fetch destination cache line */ 77 78 ALIGN (2) 79L(oop): movl -28(%edi),%eax /* fetch destination cache line */ 80 movl %edx,%ebp 81 82 movl (%esi),%eax 83 movl -4(%esi),%edx 84 shldl %cl,%eax,%ebp 85 shldl %cl,%edx,%eax 86 movl %ebp,(%edi) 87 movl %eax,-4(%edi) 88 89 movl -8(%esi),%ebp 90 movl -12(%esi),%eax 91 shldl %cl,%ebp,%edx 92 shldl %cl,%eax,%ebp 93 movl %edx,-8(%edi) 94 movl %ebp,-12(%edi) 95 96 movl -16(%esi),%edx 97 movl -20(%esi),%ebp 98 shldl %cl,%edx,%eax 99 shldl %cl,%ebp,%edx 100 movl %eax,-16(%edi) 101 movl %edx,-20(%edi) 102 103 movl -24(%esi),%eax 104 movl -28(%esi),%edx 105 shldl %cl,%eax,%ebp 106 shldl %cl,%edx,%eax 107 movl %ebp,-24(%edi) 108 movl %eax,-28(%edi) 109 110 subl $32,%esi 111 subl $32,%edi 112 decl %ebx 113 jnz L(oop) 114 115L(end): popl %ebx 116 cfi_adjust_cfa_offset (-4) 117 andl $7,%ebx 118 jz L(end2) 119L(oop2): 120 movl (%esi),%eax 121 shldl %cl,%eax,%edx 122 movl %edx,(%edi) 123 movl %eax,%edx 124 subl $4,%esi 125 subl $4,%edi 126 decl %ebx 127 jnz L(oop2) 128 129L(end2): 130 shll %cl,%edx /* compute least significant limb */ 131 movl %edx,(%edi) /* store it */ 132 133 popl %eax /* pop carry limb */ 134 cfi_adjust_cfa_offset (-4) 135 136 popl %ebx 137 cfi_adjust_cfa_offset (-4) 138 cfi_restore (ebx) 139 popl %ebp 140 cfi_adjust_cfa_offset (-4) 141 cfi_restore (ebp) 142 popl %esi 143 cfi_adjust_cfa_offset (-4) 144 cfi_restore (esi) 145 popl %edi 146 cfi_adjust_cfa_offset (-4) 147 cfi_restore (edi) 148 149 ret 150 151/* We loop from least significant end of the arrays, which is only 152 permissible if the source and destination don't overlap, since the 153 function is documented to work for overlapping source and destination. 154*/ 155 156 cfi_adjust_cfa_offset (16) 157 cfi_rel_offset (edi, 12) 158 cfi_rel_offset (esi, 8) 159 cfi_rel_offset (ebp, 4) 160 cfi_rel_offset (ebx, 0) 161L(special): 162 movl (%esi),%edx 163 addl $4,%esi 164 165 decl %ebx 166 pushl %ebx 167 cfi_adjust_cfa_offset (4) 168 shrl $3,%ebx 169 170 addl %edx,%edx 171 incl %ebx 172 decl %ebx 173 jz L(Lend) 174 175 movl (%edi),%eax /* fetch destination cache line */ 176 177 ALIGN (2) 178L(Loop): 179 movl 28(%edi),%eax /* fetch destination cache line */ 180 movl %edx,%ebp 181 182 movl (%esi),%eax 183 movl 4(%esi),%edx 184 adcl %eax,%eax 185 movl %ebp,(%edi) 186 adcl %edx,%edx 187 movl %eax,4(%edi) 188 189 movl 8(%esi),%ebp 190 movl 12(%esi),%eax 191 adcl %ebp,%ebp 192 movl %edx,8(%edi) 193 adcl %eax,%eax 194 movl %ebp,12(%edi) 195 196 movl 16(%esi),%edx 197 movl 20(%esi),%ebp 198 adcl %edx,%edx 199 movl %eax,16(%edi) 200 adcl %ebp,%ebp 201 movl %edx,20(%edi) 202 203 movl 24(%esi),%eax 204 movl 28(%esi),%edx 205 adcl %eax,%eax 206 movl %ebp,24(%edi) 207 adcl %edx,%edx 208 movl %eax,28(%edi) 209 210 leal 32(%esi),%esi /* use leal not to clobber carry */ 211 leal 32(%edi),%edi 212 decl %ebx 213 jnz L(Loop) 214 215L(Lend): 216 popl %ebx 217 cfi_adjust_cfa_offset (-4) 218 sbbl %eax,%eax /* save carry in %eax */ 219 andl $7,%ebx 220 jz L(Lend2) 221 addl %eax,%eax /* restore carry from eax */ 222L(Loop2): 223 movl %edx,%ebp 224 movl (%esi),%edx 225 adcl %edx,%edx 226 movl %ebp,(%edi) 227 228 leal 4(%esi),%esi /* use leal not to clobber carry */ 229 leal 4(%edi),%edi 230 decl %ebx 231 jnz L(Loop2) 232 233 jmp L(L1) 234L(Lend2): 235 addl %eax,%eax /* restore carry from eax */ 236L(L1): movl %edx,(%edi) /* store last limb */ 237 238 sbbl %eax,%eax 239 negl %eax 240 241 popl %ebx 242 cfi_adjust_cfa_offset (-4) 243 cfi_restore (ebx) 244 popl %ebp 245 cfi_adjust_cfa_offset (-4) 246 cfi_restore (ebp) 247 popl %esi 248 cfi_adjust_cfa_offset (-4) 249 cfi_restore (esi) 250 popl %edi 251 cfi_adjust_cfa_offset (-4) 252 cfi_restore (edi) 253 254 ret 255END (__mpn_lshift) 256