1/* 2 * Copyright (c) 2020 Raspberry Pi (Trading) Ltd. 3 * 4 * SPDX-License-Identifier: BSD-3-Clause 5 */ 6 7#include "pico/asm_helper.S" 8#include "pico/bootrom/sf_table.h" 9 10__pre_init __aeabi_float_init, 00020 11 12.syntax unified 13.cpu cortex-m0plus 14.thumb 15 16.macro float_section name 17#if PICO_FLOAT_IN_RAM 18.section RAM_SECTION_NAME(\name), "ax" 19#else 20.section SECTION_NAME(\name), "ax" 21#endif 22.endm 23 24.macro float_wrapper_section func 25float_section WRAPPER_FUNC_NAME(\func) 26.endm 27 28.macro _float_wrapper_func x 29 wrapper_func \x 30.endm 31 32.macro wrapper_func_f1 x 33 _float_wrapper_func \x 34#if PICO_FLOAT_PROPAGATE_NANS 35 mov ip, lr 36 bl __check_nan_f1 37 mov lr, ip 38#endif 39.endm 40 41.macro wrapper_func_f2 x 42 _float_wrapper_func \x 43#if PICO_FLOAT_PROPAGATE_NANS 44 mov ip, lr 45 bl __check_nan_f2 46 mov lr, ip 47#endif 48.endm 49 50.section .text 51 52#if PICO_FLOAT_PROPAGATE_NANS 53.thumb_func 54__check_nan_f1: 55 movs r3, #1 56 lsls r3, #24 57 lsls r2, r0, #1 58 adds r2, r3 59 bhi 1f 60 bx lr 611: 62 bx ip 63 64.thumb_func 65__check_nan_f2: 66 movs r3, #1 67 lsls r3, #24 68 lsls r2, r0, #1 69 adds r2, r3 70 bhi 1f 71 lsls r2, r1, #1 72 adds r2, r3 73 bhi 2f 74 bx lr 752: 76 mov r0, r1 771: 78 bx ip 79#endif 80 81.macro table_tail_call SF_TABLE_OFFSET 82#if PICO_FLOAT_SUPPORT_ROM_V1 83#ifndef NDEBUG 84 movs r3, #0 85 mov ip, r3 86#endif 87#endif 88 ldr r3, =sf_table 89 ldr r3, [r3, #\SF_TABLE_OFFSET] 90 bx r3 91.endm 92 93.macro shimmable_table_tail_call SF_TABLE_OFFSET shim 94 ldr r3, =sf_table 95 ldr r3, [r3, #\SF_TABLE_OFFSET] 96#if PICO_FLOAT_SUPPORT_ROM_V1 97 mov ip, pc 98#endif 99 bx r3 100#if PICO_FLOAT_SUPPORT_ROM_V1 101.byte \SF_TABLE_OFFSET, 0xdf 102.word \shim 103#endif 104.endm 105 106 107# note generally each function is in a separate section unless there is fall thru or branching between them 108# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool 109 110# note functions are word aligned except where they are an odd number of linear instructions 111 112// float FUNC_NAME(__aeabi_fadd)(float, float) single-precision addition 113float_wrapper_section __aeabi_farithmetic 114// float FUNC_NAME(__aeabi_frsub)(float x, float y) single-precision reverse subtraction, y - x 115 116# frsub first because it is the only one that needs alignment 117.align 2 118wrapper_func __aeabi_frsub 119 eors r0, r1 120 eors r1, r0 121 eors r0, r1 122 // fall thru 123 124// float FUNC_NAME(__aeabi_fsub)(float x, float y) single-precision subtraction, x - y 125wrapper_func_f2 __aeabi_fsub 126#if PICO_FLOAT_PROPAGATE_NANS 127 // we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost 128 mov r2, r0 129 eors r2, r1 130 bmi 1f // different signs 131 push {r0, r1, lr} 132 bl 1f 133 b fdiv_fsub_nan_helper 1341: 135#endif 136 table_tail_call SF_TABLE_FSUB 137 138wrapper_func_f2 __aeabi_fadd 139 table_tail_call SF_TABLE_FADD 140 141// float FUNC_NAME(__aeabi_fdiv)(float n, float d) single-precision division, n / d 142wrapper_func_f2 __aeabi_fdiv 143#if PICO_FLOAT_PROPAGATE_NANS 144 push {r0, r1, lr} 145 bl 1f 146 b fdiv_fsub_nan_helper 1471: 148#endif 149 table_tail_call SF_TABLE_FDIV 150 151fdiv_fsub_nan_helper: 152#if PICO_FLOAT_PROPAGATE_NANS 153 pop {r1, r2} 154 155 // check for infinite op infinite (or rather check for infinite result with both 156 // operands being infinite) 157 lsls r3, r0, #1 158 asrs r3, r3, #24 159 adds r3, #1 160 beq 2f 161 pop {pc} 1622: 163 lsls r1, #1 164 asrs r1, r1, #24 165 lsls r2, #1 166 asrs r2, r2, #24 167 ands r1, r2 168 adds r1, #1 169 bne 3f 170 // infinite to nan 171 movs r1, #1 172 lsls r1, #22 173 orrs r0, r1 1743: 175 pop {pc} 176#endif 177 178// float FUNC_NAME(__aeabi_fmul)(float, float) single-precision multiplication 179wrapper_func_f2 __aeabi_fmul 180#if PICO_FLOAT_PROPAGATE_NANS 181 push {r0, r1, lr} 182 bl 1f 183 pop {r1, r2} 184 185 // check for multiplication of infinite by zero (or rather check for infinite result with either 186 // operand 0) 187 lsls r3, r0, #1 188 asrs r3, r3, #24 189 adds r3, #1 190 beq 2f 191 pop {pc} 1922: 193 ands r1, r2 194 bne 3f 195 // infinite to nan 196 movs r1, #1 197 lsls r1, #22 198 orrs r0, r1 1993: 200 pop {pc} 2011: 202#endif 203 table_tail_call SF_TABLE_FMUL 204 205// void FUNC_NAME(__aeabi_cfrcmple)(float, float) reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags 206float_wrapper_section __aeabi_cfcmple 207.align 2 208wrapper_func __aeabi_cfrcmple 209 push {r0-r2, lr} 210 eors r0, r1 211 eors r1, r0 212 eors r0, r1 213 b __aeabi_cfcmple_guts 214 215// NOTE these share an implementation as we have no excepting NaNs. 216// void FUNC_NAME(__aeabi_cfcmple)(float, float) 3-way (<, =, ?>) compare [1], result in PSR ZC flags 217// void FUNC_NAME(__aeabi_cfcmpeq)(float, float) non-excepting equality comparison [1], result in PSR ZC flags 218.align 2 219wrapper_func __aeabi_cfcmple 220wrapper_func __aeabi_cfcmpeq 221 push {r0-r2, lr} 222 223__aeabi_cfcmple_guts: 224 lsls r2,r0,#1 225 lsrs r2,#24 226 beq 1f 227 cmp r2,#0xff 228 bne 2f 229 lsls r2, r0, #9 230 bhi 3f 2311: 232 lsrs r0,#23 @ clear mantissa if denormal or infinite 233 lsls r0,#23 2342: 235 lsls r2,r1,#1 236 lsrs r2,#24 237 beq 1f 238 cmp r2,#0xff 239 bne 2f 240 lsls r2, r1, #9 241 bhi 3f 2421: 243 lsrs r1,#23 @ clear mantissa if denormal or infinite 244 lsls r1,#23 2452: 246 movs r2,#1 @ initialise result 247 eors r1,r0 248 bmi 2f @ opposite signs? then can proceed on basis of sign of x 249 eors r1,r0 @ restore y 250 bpl 1f 251 cmp r1,r0 252 pop {r0-r2, pc} 2531: 254 cmp r0,r1 255 pop {r0-r2, pc} 2562: 257 orrs r1, r0 @ handle 0/-0 258 adds r1, r1 @ note this always sets C 259 beq 3f 260 mvns r0, r0 @ carry inverse of r0 sign 261 adds r0, r0 2623: 263 pop {r0-r2, pc} 264 265 266// int FUNC_NAME(__aeabi_fcmpeq)(float, float) result (1, 0) denotes (=, ?<>) [2], use for C == and != 267float_wrapper_section __aeabi_fcmpeq 268.align 2 269wrapper_func __aeabi_fcmpeq 270 push {lr} 271 bl __aeabi_cfcmpeq 272 beq 1f 273 movs r0, #0 274 pop {pc} 2751: 276 movs r0, #1 277 pop {pc} 278 279// int FUNC_NAME(__aeabi_fcmplt)(float, float) result (1, 0) denotes (<, ?>=) [2], use for C < 280float_wrapper_section __aeabi_fcmplt 281.align 2 282wrapper_func __aeabi_fcmplt 283 push {lr} 284 bl __aeabi_cfcmple 285 sbcs r0, r0 286 pop {pc} 287 288// int FUNC_NAME(__aeabi_fcmple)(float, float) result (1, 0) denotes (<=, ?>) [2], use for C <= 289float_wrapper_section __aeabi_fcmple 290.align 2 291wrapper_func __aeabi_fcmple 292 push {lr} 293 bl __aeabi_cfcmple 294 bls 1f 295 movs r0, #0 296 pop {pc} 2971: 298 movs r0, #1 299 pop {pc} 300 301// int FUNC_NAME(__aeabi_fcmpge)(float, float) result (1, 0) denotes (>=, ?<) [2], use for C >= 302float_wrapper_section __aeabi_fcmpge 303.align 2 304wrapper_func __aeabi_fcmpge 305 push {lr} 306 // because of NaNs it is better to reverse the args than the result 307 bl __aeabi_cfrcmple 308 bls 1f 309 movs r0, #0 310 pop {pc} 3111: 312 movs r0, #1 313 pop {pc} 314 315// int FUNC_NAME(__aeabi_fcmpgt)(float, float) result (1, 0) denotes (>, ?<=) [2], use for C > 316float_wrapper_section __aeabi_fcmpgt 317wrapper_func __aeabi_fcmpgt 318 push {lr} 319 // because of NaNs it is better to reverse the args than the result 320 bl __aeabi_cfrcmple 321 sbcs r0, r0 322 pop {pc} 323 324// int FUNC_NAME(__aeabi_fcmpun)(float, float) result (1, 0) denotes (?, <=>) [2], use for C99 isunordered() 325float_wrapper_section __aeabi_fcmpun 326wrapper_func __aeabi_fcmpun 327 movs r3, #1 328 lsls r3, #24 329 lsls r2, r0, #1 330 adds r2, r3 331 bhi 1f 332 lsls r2, r1, #1 333 adds r2, r3 334 bhi 1f 335 movs r0, #0 336 bx lr 3371: 338 movs r0, #1 339 bx lr 340 341 342// float FUNC_NAME(__aeabi_ui2f)(unsigned) unsigned to float (single precision) conversion 343float_wrapper_section __aeabi_ui2f 344wrapper_func __aeabi_ui2f 345 subs r1, r1 346 cmp r0, #0 347 bne __aeabi_i2f_main 348 mov r0, r1 349 bx lr 350 351float_wrapper_section __aeabi_i2f 352// float FUNC_NAME(__aeabi_i2f)(int) integer to float (single precision) conversion 353wrapper_func __aeabi_i2f 354 lsrs r1, r0, #31 355 lsls r1, #31 356 bpl 1f 357 rsbs r0, #0 3581: 359 cmp r0, #0 360 beq 7f 361__aeabi_i2f_main: 362 363 mov ip, lr 364 push {r0, r1} 365 ldr r3, =sf_clz_func 366 ldr r3, [r3] 367 blx r3 368 pop {r1, r2} 369 lsls r1, r0 370 subs r0, #158 371 rsbs r0, #0 372 373 adds r1,#0x80 @ rounding 374 bcs 5f @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits) 375 376 lsls r3,r1,#24 @ check bottom 8 bits of r1 377 beq 6f @ in rounding-tie case? 378 lsls r1,#1 @ remove leading 1 3793: 380 lsrs r1,#9 @ align mantissa 381 lsls r0,#23 @ align exponent 382 orrs r0,r2 @ assemble exponent and mantissa 3834: 384 orrs r0,r1 @ apply sign 3851: 386 bx ip 3875: 388 adds r0,#1 @ correct exponent offset 389 b 3b 3906: 391 lsrs r1,#9 @ ensure even result 392 lsls r1,#10 393 b 3b 3947: 395 bx lr 396 397 398// int FUNC_NAME(__aeabi_f2iz)(float) float (single precision) to integer C-style conversion [3] 399float_wrapper_section __aeabi_f2iz 400wrapper_func __aeabi_f2iz 401regular_func float2int_z 402 lsls r1, r0, #1 403 lsrs r2, r1, #24 404 movs r3, #0x80 405 lsls r3, #24 406 cmp r2, #126 407 ble 1f 408 subs r2, #158 409 bge 2f 410 asrs r1, r0, #31 411 lsls r0, #9 412 lsrs r0, #1 413 orrs r0, r3 414 negs r2, r2 415 lsrs r0, r2 416 lsls r1, #1 417 adds r1, #1 418 muls r0, r1 419 bx lr 4201: 421 movs r0, #0 422 bx lr 4232: 424 lsrs r0, #31 425 adds r0, r3 426 subs r0, #1 427 bx lr 428 429 cmn r0, r0 430 bcc float2int 431 push {lr} 432 lsls r0, #1 433 lsrs r0, #1 434 movs r1, #0 435 bl __aeabi_f2uiz 436 cmp r0, #0 437 bmi 1f 438 rsbs r0, #0 439 pop {pc} 4401: 441 movs r0, #128 442 lsls r0, #24 443 pop {pc} 444 445float_section float2int 446regular_func float2int 447 shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim 448 449float_section float2fix 450regular_func float2fix 451 shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim 452 453float_section float2ufix 454regular_func float2ufix 455 table_tail_call SF_TABLE_FLOAT2UFIX 456 457// unsigned FUNC_NAME(__aeabi_f2uiz)(float) float (single precision) to unsigned C-style conversion [3] 458float_wrapper_section __aeabi_f2uiz 459wrapper_func __aeabi_f2uiz 460 table_tail_call SF_TABLE_FLOAT2UINT 461 462float_section fix2float 463regular_func fix2float 464 table_tail_call SF_TABLE_FIX2FLOAT 465 466float_section ufix2float 467regular_func ufix2float 468 table_tail_call SF_TABLE_UFIX2FLOAT 469 470float_section fix642float 471regular_func fix642float 472 shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim 473 474float_section ufix642float 475regular_func ufix642float 476 shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim 477 478// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion 479float_wrapper_section __aeabi_l2f 4801: 481 ldr r2, =__aeabi_i2f 482 bx r2 483wrapper_func __aeabi_l2f 484 asrs r2, r0, #31 485 cmp r1, r2 486 beq 1b 487 shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim 488 489// float FUNC_NAME(__aeabi_l2f)(long long) long long to float (single precision) conversion 490float_wrapper_section __aeabi_ul2f 4911: 492 ldr r2, =__aeabi_ui2f 493 bx r2 494wrapper_func __aeabi_ul2f 495 cmp r1, #0 496 beq 1b 497 shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim 498 499// long long FUNC_NAME(__aeabi_f2lz)(float) float (single precision) to long long C-style conversion [3] 500float_wrapper_section __aeabi_f2lz 501wrapper_func __aeabi_f2lz 502regular_func float2int64_z 503 cmn r0, r0 504 bcc float2int64 505 push {lr} 506 lsls r0, #1 507 lsrs r0, #1 508 movs r1, #0 509 bl float2ufix64 510 cmp r1, #0 511 bmi 1f 512 movs r2, #0 513 rsbs r0, #0 514 sbcs r2, r1 515 mov r1, r2 516 pop {pc} 5171: 518 movs r1, #128 519 lsls r1, #24 520 movs r0, #0 521 pop {pc} 522 523float_section float2int64 524regular_func float2int64 525 shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim 526 527float_section float2fix64 528regular_func float2fix64 529 shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim 530 531// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float) float to unsigned long long C-style conversion [3] 532float_wrapper_section __aeabi_f2ulz 533wrapper_func __aeabi_f2ulz 534 shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim 535 536float_section float2ufix64 537regular_func float2ufix64 538 shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim 539 540float_wrapper_section __aeabi_f2d 5411: 542#if PICO_FLOAT_PROPAGATE_NANS 543 // copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit 544 asrs r1, r0, #3 545 movs r2, #0xf 546 lsls r2, #27 547 orrs r1, r2 548 lsls r0, #25 549 bx lr 550#endif 551wrapper_func __aeabi_f2d 552#if PICO_FLOAT_PROPAGATE_NANS 553 movs r3, #1 554 lsls r3, #24 555 lsls r2, r0, #1 556 adds r2, r3 557 bhi 1b 558#endif 559 shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim 560 561float_wrapper_section srqtf 562wrapper_func_f1 sqrtf 563#if PICO_FLOAT_SUPPORT_ROM_V1 564 // check for negative 565 asrs r1, r0, #23 566 bmi 1f 567#endif 568 table_tail_call SF_TABLE_FSQRT 569#if PICO_FLOAT_SUPPORT_ROM_V1 5701: 571 mvns r0, r1 572 cmp r0, #255 573 bne 2f 574 // -0 or -Denormal return -0 (0x80000000) 575 lsls r0, #31 576 bx lr 5772: 578 // return -Inf (0xff800000) 579 asrs r0, r1, #31 580 lsls r0, #23 581 bx lr 582#endif 583 584float_wrapper_section cosf 585// note we don't use _f1 since we do an infinity/nan check for outside of range 586wrapper_func cosf 587 // rom version only works for -128 < angle < 128 588 lsls r1, r0, #1 589 lsrs r1, #24 590 cmp r1, #127 + 7 591 bge 1f 5922: 593 table_tail_call SF_TABLE_FCOS 5941: 595#if PICO_FLOAT_PROPAGATE_NANS 596 // also check for infinites 597 cmp r1, #255 598 bne 3f 599 // infinite to nan 600 movs r1, #1 601 lsls r1, #22 602 orrs r0, r1 603 bx lr 6043: 605#endif 606 ldr r1, =0x40c90fdb // 2 * M_PI 607 push {lr} 608 bl remainderf 609 pop {r1} 610 mov lr, r1 611 b 2b 612 613float_wrapper_section sinf 614// note we don't use _f1 since we do an infinity/nan check for outside of range 615wrapper_func sinf 616 // rom version only works for -128 < angle < 128 617 lsls r1, r0, #1 618 lsrs r1, #24 619 cmp r1, #127 + 7 620 bge 1f 6212: 622 table_tail_call SF_TABLE_FSIN 6231: 624#if PICO_FLOAT_PROPAGATE_NANS 625 // also check for infinites 626 cmp r1, #255 627 bne 3f 628 // infinite to nan 629 movs r1, #1 630 lsls r1, #22 631 orrs r0, r1 632 bx lr 6333: 634#endif 635 ldr r1, =0x40c90fdb // 2 * M_PI 636 push {lr} 637 bl remainderf 638 pop {r1} 639 mov lr, r1 640 b 2b 641 642float_wrapper_section sincosf 643// note we don't use _f1 since we do an infinity/nan check for outside of range 644wrapper_func sincosf 645 push {r1, r2, lr} 646 // rom version only works for -128 < angle < 128 647 lsls r3, r0, #1 648 lsrs r3, #24 649 cmp r3, #127 + 7 650 bge 3f 6512: 652 ldr r3, =sf_table 653 ldr r3, [r3, #SF_TABLE_FSIN] 654 blx r3 655 pop {r2, r3} 656 str r0, [r2] 657 str r1, [r3] 658 pop {pc} 659#if PICO_FLOAT_PROPAGATE_NANS 660.align 2 661 pop {pc} 662#endif 6633: 664#if PICO_FLOAT_PROPAGATE_NANS 665 // also check for infinites 666 cmp r3, #255 667 bne 4f 668 // infinite to nan 669 movs r3, #1 670 lsls r3, #22 671 orrs r0, r3 672 str r0, [r1] 673 str r0, [r2] 674 add sp, #12 675 bx lr 6764: 677#endif 678 ldr r1, =0x40c90fdb // 2 * M_PI 679 push {lr} 680 bl remainderf 681 pop {r1} 682 mov lr, r1 683 b 2b 684 685float_wrapper_section tanf 686// note we don't use _f1 since we do an infinity/nan check for outside of range 687wrapper_func tanf 688 // rom version only works for -128 < angle < 128 689 lsls r1, r0, #1 690 lsrs r1, #24 691 cmp r1, #127 + 7 692 bge 1f 6932: 694 table_tail_call SF_TABLE_FTAN 6951: 696#if PICO_FLOAT_PROPAGATE_NANS 697 // also check for infinites 698 cmp r1, #255 699 bne 3f 700 // infinite to nan 701 movs r1, #1 702 lsls r1, #22 703 orrs r0, r1 704 bx lr 7053: 706#endif 707 ldr r1, =0x40c90fdb // 2 * M_PI 708 push {lr} 709 bl remainderf 710 pop {r1} 711 mov lr, r1 712 b 2b 713 714float_wrapper_section atan2f 715wrapper_func_f2 atan2f 716 shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim 717 718float_wrapper_section expf 719wrapper_func_f1 expf 720 table_tail_call SF_TABLE_FEXP 721 722float_wrapper_section logf 723wrapper_func_f1 logf 724 table_tail_call SF_TABLE_FLN 725