1/*
2 * Copyright (c) 2020 Raspberry Pi (Trading) Ltd.
3 *
4 * SPDX-License-Identifier: BSD-3-Clause
5 */
6
7#include "pico/asm_helper.S"
8#include "pico/bootrom/sf_table.h"
9
10__pre_init __aeabi_float_init, 00020
11
12.syntax unified
13.cpu cortex-m0plus
14.thumb
15
16.macro float_section name
17#if PICO_FLOAT_IN_RAM
18.section RAM_SECTION_NAME(\name), "ax"
19#else
20.section SECTION_NAME(\name), "ax"
21#endif
22.endm
23
24.macro float_wrapper_section func
25float_section WRAPPER_FUNC_NAME(\func)
26.endm
27
28.macro _float_wrapper_func x
29    wrapper_func \x
30.endm
31
32.macro wrapper_func_f1 x
33   _float_wrapper_func \x
34#if PICO_FLOAT_PROPAGATE_NANS
35    mov ip, lr
36    bl __check_nan_f1
37    mov lr, ip
38#endif
39.endm
40
41.macro wrapper_func_f2 x
42   _float_wrapper_func \x
43#if PICO_FLOAT_PROPAGATE_NANS
44    mov ip, lr
45    bl __check_nan_f2
46    mov lr, ip
47#endif
48.endm
49
50.section .text
51
52#if PICO_FLOAT_PROPAGATE_NANS
53.thumb_func
54__check_nan_f1:
55   movs r3, #1
56   lsls r3, #24
57   lsls r2, r0, #1
58   adds r2, r3
59   bhi 1f
60   bx lr
611:
62   bx ip
63
64.thumb_func
65__check_nan_f2:
66   movs r3, #1
67   lsls r3, #24
68   lsls r2, r0, #1
69   adds r2, r3
70   bhi 1f
71   lsls r2, r1, #1
72   adds r2, r3
73   bhi 2f
74   bx lr
752:
76   mov r0, r1
771:
78   bx ip
79#endif
80
81.macro table_tail_call SF_TABLE_OFFSET
82#if PICO_FLOAT_SUPPORT_ROM_V1
83#ifndef NDEBUG
84    movs r3, #0
85    mov ip, r3
86#endif
87#endif
88    ldr r3, =sf_table
89    ldr r3, [r3, #\SF_TABLE_OFFSET]
90    bx r3
91.endm
92
93.macro shimmable_table_tail_call SF_TABLE_OFFSET shim
94    ldr r3, =sf_table
95    ldr r3, [r3, #\SF_TABLE_OFFSET]
96#if PICO_FLOAT_SUPPORT_ROM_V1
97    mov ip, pc
98#endif
99    bx r3
100#if PICO_FLOAT_SUPPORT_ROM_V1
101.byte \SF_TABLE_OFFSET, 0xdf
102.word \shim
103#endif
104.endm
105
106
107# note generally each function is in a separate section unless there is fall thru or branching between them
108# note fadd, fsub, fmul, fdiv are so tiny and just defer to rom so are lumped together so they can share constant pool
109
110# note functions are word aligned except where they are an odd number of linear instructions
111
112// float FUNC_NAME(__aeabi_fadd)(float, float)         single-precision addition
113float_wrapper_section __aeabi_farithmetic
114// float FUNC_NAME(__aeabi_frsub)(float x, float y)    single-precision reverse subtraction, y - x
115
116# frsub first because it is the only one that needs alignment
117.align 2
118wrapper_func __aeabi_frsub
119    eors r0, r1
120    eors r1, r0
121    eors r0, r1
122    // fall thru
123
124// float FUNC_NAME(__aeabi_fsub)(float x, float y)     single-precision subtraction, x - y
125wrapper_func_f2 __aeabi_fsub
126#if PICO_FLOAT_PROPAGATE_NANS
127    // we want to return nan for inf-inf or -inf - -inf, but without too much upfront cost
128    mov r2, r0
129    eors r2, r1
130    bmi 1f // different signs
131    push {r0, r1, lr}
132    bl 1f
133    b fdiv_fsub_nan_helper
1341:
135#endif
136    table_tail_call SF_TABLE_FSUB
137
138wrapper_func_f2 __aeabi_fadd
139    table_tail_call SF_TABLE_FADD
140
141// float FUNC_NAME(__aeabi_fdiv)(float n, float d)     single-precision division, n / d
142wrapper_func_f2 __aeabi_fdiv
143#if PICO_FLOAT_PROPAGATE_NANS
144    push {r0, r1, lr}
145    bl 1f
146    b fdiv_fsub_nan_helper
1471:
148#endif
149    table_tail_call SF_TABLE_FDIV
150
151fdiv_fsub_nan_helper:
152#if PICO_FLOAT_PROPAGATE_NANS
153    pop {r1, r2}
154
155    // check for infinite op infinite (or rather check for infinite result with both
156    // operands being infinite)
157    lsls r3, r0, #1
158    asrs r3, r3, #24
159    adds r3, #1
160    beq 2f
161    pop {pc}
1622:
163    lsls r1, #1
164    asrs r1, r1, #24
165    lsls r2, #1
166    asrs r2, r2, #24
167    ands r1, r2
168    adds r1, #1
169    bne 3f
170    // infinite to nan
171    movs r1, #1
172    lsls r1, #22
173    orrs r0, r1
1743:
175    pop {pc}
176#endif
177
178// float FUNC_NAME(__aeabi_fmul)(float, float)         single-precision multiplication
179wrapper_func_f2 __aeabi_fmul
180#if PICO_FLOAT_PROPAGATE_NANS
181    push {r0, r1, lr}
182    bl 1f
183    pop {r1, r2}
184
185    // check for multiplication of infinite by zero (or rather check for infinite result with either
186    // operand 0)
187    lsls r3, r0, #1
188    asrs r3, r3, #24
189    adds r3, #1
190    beq 2f
191    pop {pc}
1922:
193    ands r1, r2
194    bne 3f
195    // infinite to nan
196    movs r1, #1
197    lsls r1, #22
198    orrs r0, r1
1993:
200    pop {pc}
2011:
202#endif
203    table_tail_call SF_TABLE_FMUL
204
205// void FUNC_NAME(__aeabi_cfrcmple)(float, float)         reversed 3-way (<, =, ?>) compare [1], result in PSR ZC flags
206float_wrapper_section __aeabi_cfcmple
207.align 2
208wrapper_func __aeabi_cfrcmple
209    push {r0-r2, lr}
210    eors r0, r1
211    eors r1, r0
212    eors r0, r1
213    b __aeabi_cfcmple_guts
214
215// NOTE these share an implementation as we have no excepting NaNs.
216// void FUNC_NAME(__aeabi_cfcmple)(float, float)         3-way (<, =, ?>) compare [1], result in PSR ZC flags
217// void FUNC_NAME(__aeabi_cfcmpeq)(float, float)         non-excepting equality comparison [1], result in PSR ZC flags
218.align 2
219wrapper_func __aeabi_cfcmple
220wrapper_func __aeabi_cfcmpeq
221    push {r0-r2, lr}
222
223__aeabi_cfcmple_guts:
224    lsls r2,r0,#1
225    lsrs r2,#24
226    beq 1f
227    cmp r2,#0xff
228    bne 2f
229    lsls r2, r0, #9
230    bhi 3f
2311:
232    lsrs r0,#23     @ clear mantissa if denormal or infinite
233    lsls r0,#23
2342:
235    lsls r2,r1,#1
236    lsrs r2,#24
237    beq 1f
238    cmp r2,#0xff
239    bne 2f
240    lsls r2, r1, #9
241    bhi 3f
2421:
243    lsrs r1,#23     @ clear mantissa if denormal or infinite
244    lsls r1,#23
2452:
246    movs r2,#1      @ initialise result
247    eors r1,r0
248    bmi 2f          @ opposite signs? then can proceed on basis of sign of x
249    eors r1,r0      @ restore y
250    bpl 1f
251    cmp r1,r0
252    pop {r0-r2, pc}
2531:
254    cmp r0,r1
255    pop {r0-r2, pc}
2562:
257    orrs r1, r0     @ handle 0/-0
258    adds r1, r1     @ note this always sets C
259    beq 3f
260    mvns r0, r0     @ carry inverse of r0 sign
261    adds r0, r0
2623:
263    pop {r0-r2, pc}
264
265
266// int FUNC_NAME(__aeabi_fcmpeq)(float, float)         result (1, 0) denotes (=, ?<>) [2], use for C == and !=
267float_wrapper_section __aeabi_fcmpeq
268.align 2
269wrapper_func __aeabi_fcmpeq
270    push {lr}
271    bl __aeabi_cfcmpeq
272    beq 1f
273    movs r0, #0
274    pop {pc}
2751:
276    movs r0, #1
277    pop {pc}
278
279// int FUNC_NAME(__aeabi_fcmplt)(float, float)         result (1, 0) denotes (<, ?>=) [2], use for C <
280float_wrapper_section __aeabi_fcmplt
281.align 2
282wrapper_func __aeabi_fcmplt
283    push {lr}
284    bl __aeabi_cfcmple
285    sbcs r0, r0
286    pop {pc}
287
288// int FUNC_NAME(__aeabi_fcmple)(float, float)         result (1, 0) denotes (<=, ?>) [2], use for C <=
289float_wrapper_section __aeabi_fcmple
290.align 2
291wrapper_func __aeabi_fcmple
292    push {lr}
293    bl __aeabi_cfcmple
294    bls 1f
295    movs r0, #0
296    pop {pc}
2971:
298    movs r0, #1
299    pop {pc}
300
301// int FUNC_NAME(__aeabi_fcmpge)(float, float)         result (1, 0) denotes (>=, ?<) [2], use for C >=
302float_wrapper_section __aeabi_fcmpge
303.align 2
304wrapper_func __aeabi_fcmpge
305    push {lr}
306    // because of NaNs it is better to reverse the args than the result
307    bl __aeabi_cfrcmple
308    bls 1f
309    movs r0, #0
310    pop {pc}
3111:
312    movs r0, #1
313    pop {pc}
314
315// int FUNC_NAME(__aeabi_fcmpgt)(float, float)         result (1, 0) denotes (>, ?<=) [2], use for C >
316float_wrapper_section __aeabi_fcmpgt
317wrapper_func __aeabi_fcmpgt
318    push {lr}
319    // because of NaNs it is better to reverse the args than the result
320    bl __aeabi_cfrcmple
321    sbcs r0, r0
322    pop {pc}
323
324// int FUNC_NAME(__aeabi_fcmpun)(float, float)         result (1, 0) denotes (?, <=>) [2], use for C99 isunordered()
325float_wrapper_section __aeabi_fcmpun
326wrapper_func __aeabi_fcmpun
327   movs r3, #1
328   lsls r3, #24
329   lsls r2, r0, #1
330   adds r2, r3
331   bhi 1f
332   lsls r2, r1, #1
333   adds r2, r3
334   bhi 1f
335   movs r0, #0
336   bx lr
3371:
338   movs r0, #1
339   bx lr
340
341
342// float FUNC_NAME(__aeabi_ui2f)(unsigned)             unsigned to float (single precision) conversion
343float_wrapper_section __aeabi_ui2f
344wrapper_func __aeabi_ui2f
345        subs r1, r1
346        cmp r0, #0
347        bne __aeabi_i2f_main
348        mov r0, r1
349        bx lr
350
351float_wrapper_section __aeabi_i2f
352// float FUNC_NAME(__aeabi_i2f)(int)                     integer to float (single precision) conversion
353wrapper_func __aeabi_i2f
354        lsrs r1, r0, #31
355        lsls r1, #31
356        bpl 1f
357        rsbs r0, #0
3581:
359        cmp r0, #0
360        beq 7f
361__aeabi_i2f_main:
362
363        mov ip, lr
364        push {r0, r1}
365        ldr r3, =sf_clz_func
366        ldr r3, [r3]
367        blx r3
368        pop {r1, r2}
369        lsls r1, r0
370        subs r0, #158
371        rsbs r0, #0
372
373        adds r1,#0x80  @ rounding
374        bcs 5f         @ tripped carry? then have leading 1 in C as required (and result is even so can ignore sticky bits)
375
376        lsls r3,r1,#24 @ check bottom 8 bits of r1
377        beq 6f         @ in rounding-tie case?
378        lsls r1,#1     @ remove leading 1
3793:
380        lsrs r1,#9     @ align mantissa
381        lsls r0,#23    @ align exponent
382        orrs r0,r2     @ assemble exponent and mantissa
3834:
384        orrs r0,r1     @ apply sign
3851:
386        bx ip
3875:
388        adds r0,#1     @ correct exponent offset
389        b 3b
3906:
391        lsrs r1,#9     @ ensure even result
392        lsls r1,#10
393        b 3b
3947:
395        bx lr
396
397
398// int FUNC_NAME(__aeabi_f2iz)(float)                     float (single precision) to integer C-style conversion [3]
399float_wrapper_section __aeabi_f2iz
400wrapper_func __aeabi_f2iz
401regular_func float2int_z
402    lsls r1, r0, #1
403    lsrs r2, r1, #24
404    movs r3, #0x80
405    lsls r3, #24
406    cmp r2, #126
407    ble 1f
408    subs r2, #158
409    bge 2f
410    asrs r1, r0, #31
411    lsls r0, #9
412    lsrs r0, #1
413    orrs r0, r3
414    negs r2, r2
415    lsrs r0, r2
416    lsls r1, #1
417    adds r1, #1
418    muls r0, r1
419    bx lr
4201:
421    movs r0, #0
422    bx lr
4232:
424    lsrs r0, #31
425    adds r0, r3
426    subs r0, #1
427    bx lr
428
429    cmn r0, r0
430    bcc float2int
431    push {lr}
432    lsls r0, #1
433    lsrs r0, #1
434    movs r1, #0
435    bl __aeabi_f2uiz
436    cmp r0, #0
437    bmi 1f
438    rsbs r0, #0
439    pop {pc}
4401:
441    movs r0, #128
442    lsls r0, #24
443    pop {pc}
444
445float_section float2int
446regular_func float2int
447    shimmable_table_tail_call SF_TABLE_FLOAT2INT float2int_shim
448
449float_section float2fix
450regular_func float2fix
451    shimmable_table_tail_call SF_TABLE_FLOAT2FIX float2fix_shim
452
453float_section float2ufix
454regular_func float2ufix
455    table_tail_call SF_TABLE_FLOAT2UFIX
456
457// unsigned FUNC_NAME(__aeabi_f2uiz)(float)             float (single precision) to unsigned C-style conversion [3]
458float_wrapper_section __aeabi_f2uiz
459wrapper_func __aeabi_f2uiz
460    table_tail_call SF_TABLE_FLOAT2UINT
461
462float_section fix2float
463regular_func fix2float
464    table_tail_call SF_TABLE_FIX2FLOAT
465
466float_section ufix2float
467regular_func ufix2float
468    table_tail_call SF_TABLE_UFIX2FLOAT
469
470float_section fix642float
471regular_func fix642float
472    shimmable_table_tail_call SF_TABLE_FIX642FLOAT fix642float_shim
473
474float_section ufix642float
475regular_func ufix642float
476    shimmable_table_tail_call SF_TABLE_UFIX642FLOAT ufix642float_shim
477
478// float FUNC_NAME(__aeabi_l2f)(long long)             long long to float (single precision) conversion
479float_wrapper_section __aeabi_l2f
4801:
481    ldr r2, =__aeabi_i2f
482    bx r2
483wrapper_func __aeabi_l2f
484    asrs r2, r0, #31
485    cmp r1, r2
486    beq 1b
487    shimmable_table_tail_call SF_TABLE_INT642FLOAT int642float_shim
488
489// float FUNC_NAME(__aeabi_l2f)(long long)             long long to float (single precision) conversion
490float_wrapper_section __aeabi_ul2f
4911:
492    ldr r2, =__aeabi_ui2f
493    bx r2
494wrapper_func __aeabi_ul2f
495    cmp r1, #0
496    beq 1b
497    shimmable_table_tail_call SF_TABLE_UINT642FLOAT uint642float_shim
498
499// long long FUNC_NAME(__aeabi_f2lz)(float)             float (single precision) to long long C-style conversion [3]
500float_wrapper_section __aeabi_f2lz
501wrapper_func __aeabi_f2lz
502regular_func float2int64_z
503    cmn r0, r0
504    bcc float2int64
505    push {lr}
506    lsls r0, #1
507    lsrs r0, #1
508    movs r1, #0
509    bl float2ufix64
510    cmp r1, #0
511    bmi 1f
512    movs r2, #0
513    rsbs r0, #0
514    sbcs r2, r1
515    mov r1, r2
516    pop {pc}
5171:
518    movs r1, #128
519    lsls r1, #24
520    movs r0, #0
521    pop {pc}
522
523float_section float2int64
524regular_func float2int64
525    shimmable_table_tail_call SF_TABLE_FLOAT2INT64 float2int64_shim
526
527float_section float2fix64
528regular_func float2fix64
529    shimmable_table_tail_call SF_TABLE_FLOAT2FIX64 float2fix64_shim
530
531// unsigned long long FUNC_NAME(__aeabi_f2ulz)(float)     float to unsigned long long C-style conversion [3]
532float_wrapper_section __aeabi_f2ulz
533wrapper_func __aeabi_f2ulz
534    shimmable_table_tail_call SF_TABLE_FLOAT2UINT64 float2uint64_shim
535
536float_section float2ufix64
537regular_func float2ufix64
538    shimmable_table_tail_call SF_TABLE_FLOAT2UFIX64 float2ufix64_shim
539
540float_wrapper_section __aeabi_f2d
5411:
542#if PICO_FLOAT_PROPAGATE_NANS
543    // copy sign bit and 25 NAN id bits into sign bit and significant ID bits, also setting the high id bit
544    asrs r1, r0, #3
545    movs r2, #0xf
546    lsls r2, #27
547    orrs r1, r2
548    lsls r0, #25
549    bx lr
550#endif
551wrapper_func __aeabi_f2d
552#if PICO_FLOAT_PROPAGATE_NANS
553    movs r3, #1
554    lsls r3, #24
555    lsls r2, r0, #1
556    adds r2, r3
557    bhi 1b
558#endif
559    shimmable_table_tail_call SF_TABLE_FLOAT2DOUBLE float2double_shim
560
561float_wrapper_section srqtf
562wrapper_func_f1 sqrtf
563#if PICO_FLOAT_SUPPORT_ROM_V1
564    // check for negative
565    asrs r1, r0, #23
566    bmi 1f
567#endif
568    table_tail_call SF_TABLE_FSQRT
569#if PICO_FLOAT_SUPPORT_ROM_V1
5701:
571    mvns r0, r1
572    cmp r0, #255
573    bne 2f
574    // -0 or -Denormal return -0 (0x80000000)
575    lsls r0, #31
576    bx lr
5772:
578    // return -Inf (0xff800000)
579    asrs r0, r1, #31
580    lsls r0, #23
581    bx lr
582#endif
583
584float_wrapper_section cosf
585// note we don't use _f1 since we do an infinity/nan check for outside of range
586wrapper_func cosf
587    // rom version only works for -128 < angle < 128
588    lsls r1, r0, #1
589    lsrs r1, #24
590    cmp r1, #127 + 7
591    bge 1f
5922:
593    table_tail_call SF_TABLE_FCOS
5941:
595#if PICO_FLOAT_PROPAGATE_NANS
596    // also check for infinites
597    cmp r1, #255
598    bne 3f
599    // infinite to nan
600    movs r1, #1
601    lsls r1, #22
602    orrs r0, r1
603    bx lr
6043:
605#endif
606    ldr r1, =0x40c90fdb // 2 * M_PI
607    push {lr}
608    bl remainderf
609    pop {r1}
610    mov lr, r1
611    b 2b
612
613float_wrapper_section sinf
614// note we don't use _f1 since we do an infinity/nan check for outside of range
615wrapper_func sinf
616    // rom version only works for -128 < angle < 128
617    lsls r1, r0, #1
618    lsrs r1, #24
619    cmp r1, #127 + 7
620    bge 1f
6212:
622    table_tail_call SF_TABLE_FSIN
6231:
624#if PICO_FLOAT_PROPAGATE_NANS
625    // also check for infinites
626    cmp r1, #255
627    bne 3f
628    // infinite to nan
629    movs r1, #1
630    lsls r1, #22
631    orrs r0, r1
632    bx lr
6333:
634#endif
635    ldr r1, =0x40c90fdb // 2 * M_PI
636    push {lr}
637    bl remainderf
638    pop {r1}
639    mov lr, r1
640    b 2b
641
642float_wrapper_section sincosf
643// note we don't use _f1 since we do an infinity/nan check for outside of range
644wrapper_func sincosf
645    push {r1, r2, lr}
646    // rom version only works for -128 < angle < 128
647    lsls r3, r0, #1
648    lsrs r3, #24
649    cmp r3, #127 + 7
650    bge 3f
6512:
652    ldr r3, =sf_table
653    ldr r3, [r3, #SF_TABLE_FSIN]
654    blx r3
655    pop {r2, r3}
656    str r0, [r2]
657    str r1, [r3]
658    pop {pc}
659#if PICO_FLOAT_PROPAGATE_NANS
660.align 2
661    pop {pc}
662#endif
6633:
664#if PICO_FLOAT_PROPAGATE_NANS
665    // also check for infinites
666    cmp r3, #255
667    bne 4f
668    // infinite to nan
669    movs r3, #1
670    lsls r3, #22
671    orrs r0, r3
672    str r0, [r1]
673    str r0, [r2]
674    add sp, #12
675    bx lr
6764:
677#endif
678    ldr r1, =0x40c90fdb // 2 * M_PI
679    push {lr}
680    bl remainderf
681    pop {r1}
682    mov lr, r1
683    b 2b
684
685float_wrapper_section tanf
686// note we don't use _f1 since we do an infinity/nan check for outside of range
687wrapper_func tanf
688    // rom version only works for -128 < angle < 128
689    lsls r1, r0, #1
690    lsrs r1, #24
691    cmp r1, #127 + 7
692    bge 1f
6932:
694    table_tail_call SF_TABLE_FTAN
6951:
696#if PICO_FLOAT_PROPAGATE_NANS
697    // also check for infinites
698    cmp r1, #255
699    bne 3f
700    // infinite to nan
701    movs r1, #1
702    lsls r1, #22
703    orrs r0, r1
704    bx lr
7053:
706#endif
707    ldr r1, =0x40c90fdb // 2 * M_PI
708    push {lr}
709    bl remainderf
710    pop {r1}
711    mov lr, r1
712    b 2b
713
714float_wrapper_section atan2f
715wrapper_func_f2 atan2f
716    shimmable_table_tail_call SF_TABLE_FATAN2 fatan2_shim
717
718float_wrapper_section expf
719wrapper_func_f1 expf
720    table_tail_call SF_TABLE_FEXP
721
722float_wrapper_section logf
723wrapper_func_f1 logf
724    table_tail_call SF_TABLE_FLN
725