1.file "cbrt.s" 2 3 4// Copyright (c) 2000 - 2003, Intel Corporation 5// All rights reserved. 6// 7// 8// Redistribution and use in source and binary forms, with or without 9// modification, are permitted provided that the following conditions are 10// met: 11// 12// * Redistributions of source code must retain the above copyright 13// notice, this list of conditions and the following disclaimer. 14// 15// * Redistributions in binary form must reproduce the above copyright 16// notice, this list of conditions and the following disclaimer in the 17// documentation and/or other materials provided with the distribution. 18// 19// * The name of Intel Corporation may not be used to endorse or promote 20// products derived from this software without specific prior written 21// permission. 22 23// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 24// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 25// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 26// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL INTEL OR ITS 27// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 28// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, 29// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 30// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY 31// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY OR TORT (INCLUDING 32// NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 33// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 34// 35// Intel Corporation is the author of this code, and requests that all 36// problem reports or change requests be submitted to it directly at 37// http://www.intel.com/software/products/opensource/libraries/num.htm. 38// 39// History 40//============================================================== 41// 02/02/00 Initial version 42// 05/19/00 New version (modified algorithm) 43// 05/20/02 Cleaned up namespace and sf0 syntax 44// 01/28/03 Updated polynomial coefficients (changed to Remez coefficients), 45// to slightly improve accuracy 46// 47// API 48//============================================================== 49// double cbrt(double) 50// 51// Overview of operation 52//============================================================== 53// Background 54// 55// Implementation 56// 57// Let y= frcpa(a), where a is the argument 58// 59// cbrt(a)= cbrt(a*y)/cbrt(y) = cbrt(1 - (1-a*y)) * (1/cbrt(y)) 60// 61// For all values of y, the 3 possible significands of 1/cbrt(y) 62// are stored in a table (T0) to 64 bits of accuracy. (There are 63// 3 possible significands because the exponent of y modulo 3 64// can be 0, 1, or 2.) 65// 66// 67// * cbrt(1 - (1-a*y)) is approximated by a degree-5 polynomial ~ 68// 69// ~ 1 - (1/3)*r - (1/9)*r^2 - (5/81)*r^3 - (10/243)*r^4 - (22/729)*r^5 70// 71// in r = 1-a*y. 72// 73// 74// The table values are stored for three exponent values and are 75// then multiplied by e/3 where e is the exponent of the input number. 76// This computation is carried out in parallel with the polynomial 77// evaluation: 78// 79// T= 2^(e/3) * T0 80 81 82 83 84 85//=============== 86// input= x 87// C= frcpa(x) 88// r= 1 - C * x 89// 90// Special values 91//============================================================== 92 93 94 95// Registers used 96//============================================================== 97// f6-f15 98// GR_GP, r23-r26, r28-r30 99// p6, p7, p8, p12 100 101 FR_R = f6 102 FR_COEFF1 = f7 103 FR_COEFF2 = f9 104 FR_COEFF3 = f10 105 FR_COEFF4 = f11 106 FR_COEFF5 = f12 107 FR_R2 = f13 108 FR_ARG = f14 109 FR_P23 = f15 110 FR_P25 = f32 111 FR_P15 = f33 112 FR_P1 = f34 113 FR_P45 = f35 114 FR_2EXP = f36 115 FR_TMP63 = f37 116 117 GR_GP = r2 118 GR_ADDR = r2 119 GR_CONST1 = r3 120 GR_I1 = r8 121 GR_EXP = r9 122 GR_ADDR2 = r10 123 GR_IT1 = r11 124 GR_TMP2 = r11 125 GR_EXPON = r15 126 GR_TMP1 = r16 127 GR_TMP6 = r16 128 GR_ITB1 = r17 129 GR_TMP3 = r18 130 GR_TMP4 = r19 131 GR_TMP63 = r19 132 GR_TMP5 = r20 133 GR_EXP_BY_3 = r20 134 GR_CONST4 = r21 135 GR_TMP6 = r22 136 GR_INDEX = r23 137 GR_EBIAS = r24 138 GR_SIGNIF = r25 139 GR_SIGNIF2 = r25 140 GR_TEST = r25 141 GR_ARGEXP = r26 142 GR_CONST2 = r27 143 GR_SIGN = r28 144 GR_REM = r29 145 GR_CONST3 = r30 146 GR_SEXP = r31 147 148 149 150 151 152// Data tables 153//============================================================== 154 155RODATA 156 157.align 16 158 159LOCAL_OBJECT_START(poly_coeffs) 160 161 data8 0xaaaaaaaaaaaaaab4, 0x0000bffd // ~ 1/3 162 data8 0xbfbc71c71c718e45, 0xbfaf9add3c0bbb43 163 data8 0xbfa511edb93dc98d, 0xbf9ee71c45f0dfbc 164LOCAL_OBJECT_END(poly_coeffs) 165 166 167// For every entry B in the frcpa table, this table contains 168// the significands of cbrt(1/B), cbrt(2/B), cbrt(4/B). 169// The index to this table is the same as the frcpa index. 170 171LOCAL_OBJECT_START(T_table) 172 173 174 data8 0x80155c748c374836, 0xa160019ed37fb4ae 175 data8 0xcb51ddcb9e93095e, 0x8040404b0879f7f9 176 data8 0xa1960b5966da4608, 0xcb95f333968ad59b 177 data8 0x806b5dce4b405c10, 0xa1cc5dbe6dc2aab4 178 data8 0xcbda64292d3ffd97, 0x8096b586974669b1 179 data8 0xa202f97995b69c0d, 0xcc1f3184af961596 180 data8 0x80bcd273d952a028, 0xa232fe6eb0c0577d 181 data8 0xcc5bb1ac954d33e2, 0x80e898c52813f2f3 182 data8 0xa26a2582012f6e17, 0xcca12e9831fc6402 183 data8 0x81149add67c2d208, 0xa2a197e5d10465cb 184 data8 0xcce70a67b64f24ad, 0x813b4e2c856b6e9a 185 data8 0xa2d25a532efefbc8, 0xcd24794726477ea5 186 data8 0x8167c1dde03de7aa, 0xa30a5bd6e49e4ab8 187 data8 0xcd6b096a0b70ee87, 0x818ed973b811135e 188 data8 0xa33b9c9b59879e24, 0xcda9177738b15a90 189 data8 0x81bbc0c33e13ec98, 0xa3742fca6a3c1f21 190 data8 0xcdf05f2247dffab9, 0x81e33e69fbe7504a 191 data8 0xa3a5f1273887bf22, 0xce2f0f347f96f906 192 data8 0x820aec524e3c23e9, 0xa3d7ef508ff11574 193 data8 0xce6e0be0cd551a61, 0x823880f78e70b805 194 data8 0xa4115ce30548bc15, 0xceb666b2c347d1de 195 data8 0x826097a62a8e5200, 0xa443df0e53df577a 196 data8 0xcef609b0cb874f00, 0x8288dfe00e9b5eaf 197 data8 0xa4769fa5913c0ec3, 0xcf35fb5447e5c765 198 data8 0x82b15a10c5371624, 0xa4a99f303bc7def5 199 data8 0xcf763c47ee869f00, 0x82da06a527b18937 200 data8 0xa4dcde37779adf4b, 0xcfb6cd3888d71785 201 data8 0x8302e60b635ab394, 0xa5105d46152c938a 202 data8 0xcff7aed4fbfbb447, 0x832bf8b2feec2f0e 203 data8 0xa5441ce89825cb8d, 0xd038e1ce5167e3c6 204 data8 0x83553f0ce00e276b, 0xa5781dad3e54d899 205 data8 0xd07a66d7bfa0ebba, 0x837eb98b50f8322a 206 data8 0xa5ac602406c4e68c, 0xd0bc3ea6b32d1b21 207 data8 0x83a270f44c84f699, 0xa5d9601d95c2c0bc 208 data8 0xd0f4f0e8f36c1bf8, 0x83cc4d7cfcfac5ca 209 data8 0xa60e1e1a2de14745, 0xd1376458e34b037e 210 data8 0x83f65f78a8872b4c, 0xa6431f6e3fbd9658 211 data8 0xd17a2ca133f78572, 0x8420a75f2f7b53c8 212 data8 0xa67864b0d432fda4, 0xd1bd4a80301c5715 213 data8 0x844510461ff14209, 0xa6a6444aa0243c0b 214 data8 0xd1f71682b2fa4575, 0x846fbd91b930bed2 215 data8 0xa6dc094d10f25792, 0xd23ad555f773f059 216 data8 0x84947e18234f3294, 0xa70a574cc02bba69 217 data8 0xd2752c7039a5bf73, 0x84bf92755825045a 218 data8 0xa7409e2af9549084, 0xd2b98ee008c06b59 219 data8 0x84e4ac0ee112ba51, 0xa76f5c64ca2cf13b 220 data8 0xd2f4735ffd700280, 0x8509ef44b86f20be 221 data8 0xa79e4f0babab5dc0, 0xd32f99ed6d9ac0e1 222 data8 0x85359d5d91768427, 0xa7d5579ae5164b85 223 data8 0xd374f0666c75d51c, 0x855b3bd5b7384357 224 data8 0xa804bd3c6fe61cc8, 0xd3b0a7d13618e4a1 225 data8 0x858104f0c415f79a, 0xa8345895e5250a5a 226 data8 0xd3eca2ea53bcec0c, 0x85a6f90390d29864 227 data8 0xa8642a122b44ef0b, 0xd428e23874f13a17 228 data8 0x85d3772fcd56a1dd, 0xa89c38ca18f6108b 229 data8 0xd46f82fe293bc6d3, 0x85f9c982fcc002f3 230 data8 0xa8cc81063b6e87ca, 0xd4ac57e9b7186420 231 data8 0x862047e0e7ea554b, 0xa8fd00bfa409285e 232 data8 0xd4e972becb04e8b8, 0x8646f2a26f7f5852 233 data8 0xa92db8664d5516da, 0xd526d40a7a9b43a3 234 data8 0x866dca21754096b5, 0xa95ea86b75cc2c20 235 data8 0xd5647c5b73917370, 0x8694ceb8dfd17a37 236 data8 0xa98fd141a4992deb, 0xd5a26c4201bd6d13 237 data8 0x86bc00c49e9307e8, 0xa9c1335cae7446ba 238 data8 0xd5e0a45015350a7e, 0x86dccd74fce79610 239 data8 0xa9ea8686f556f645, 0xd614b539c6194104 240 data8 0x870453c845acf90f, 0xaa1c52d17906bb19 241 data8 0xd6537310e224283f, 0x872c089a1e90342c 242 data8 0xaa4e59b046dab887, 0xd6927ab62244c917 243 data8 0x8753ec4a92d16c5e, 0xaa809b9c60d1890b 244 data8 0xd6d1ccc1fc4ef4b7, 0x877bff3aca19f6b4 245 data8 0xaab319102f3f9b33, 0xd71169cea98fdded 246 data8 0x879d88b6fe1c324c, 0xaadd5a18c1e21274 247 data8 0xd746a66a5bc9f6d9, 0x87c5f346dbf98c3a 248 data8 0xab1045f2ac31bdf5, 0xd786ce8f0fae5317 249 data8 0x87e7c653efacef2c, 0xab3ae3ab2df7231e 250 data8 0xd7bc7ff214c4e75a, 0x881089d4e73ffefc 251 data8 0xab6e3f945d1e96fc, 0xd7fd35467a517ed1 252 data8 0x88397e6a366f2a8a, 0xaba1d953a08fa94e 253 data8 0xd83e38838648d815, 0x885bc559e5e1c081 254 data8 0xabcd090db7ef4c3f, 0xd874a1db598b8951 255 data8 0x887e2ee392bb7a93, 0xabf864602d7c323d 256 data8 0xd8ab42205b80edaf, 0x88a7a8587e404257 257 data8 0xac2ca5886ccf9b57, 0xd8ed1849d202f965 258 data8 0x88ca5eda67594784, 0xac5861d4aa441f0f 259 data8 0xd92432bd5a173685, 0x88f4356166bd590e 260 data8 0xac8d183fe3a2fbed, 0xd9669ca45b03c23e 261 data8 0x89173a0acf5ce026, 0xacb93703ff51571e 262 data8 0xd99e3327cf89574e, 0x893a62a098b6a57b 263 data8 0xace5830ad0c3f14b, 0xd9d602b19b100466 264 data8 0x895daf637236ae2c, 0xad11fca5d78b3ff2 265 data8 0xda0e0ba86c096841, 0x89883b9d1c2fa9c5 266 data8 0xad4797fddf91a798, 0xda5195fcdb1c3dce 267 data8 0x89abd8dd374a5d7b, 0xad747701e559ebcb 268 data8 0xda8a1eb87a491f6c, 0x89cf9b1dcd197fa0 269 data8 0xada184a47e9c7613, 0xdac2e230b91c3f84 270 data8 0x89f382a258ea79de, 0xadcec13ab0dda8ff 271 data8 0xdafbe0d0b66aea30, 0x8a178faf06648f29 272 data8 0xadfc2d1a5fd21ba8, 0xdb351b04a8fafced 273 data8 0x8a3bc288b3e1d18a, 0xae29c89a5053c33a 274 data8 0xdb6e9139e33cdd8e, 0x8a601b74f4d1f835 275 data8 0xae5794122b638df9, 0xdba843ded7151ea1 276 data8 0x8a849aba14274764, 0xae858fda8137ae0a 277 data8 0xdbe2336319b61fc8, 0x8aa9409f16cdbc9b 278 data8 0xaeb3bc4ccc56d3d1, 0xdc1c60376789fa68 279 data8 0x8ace0d6bbe2cb316, 0xaee219c374c09920 280 data8 0xdc56cacda82d0cd5, 0x8af301688ab33558 281 data8 0xaf10a899d3235fe7, 0xdc917398f2797814 282 data8 0x8b181cdebe6f3206, 0xaf3f692c341fe8b4 283 data8 0xdccc5b0d90a3e628, 0x8b3d60185fafcb7c 284 data8 0xaf6e5bd7db9ae6c2, 0xdd0781a10469f0f2 285 data8 0x8b62cb603bb2fad0, 0xaf9d80fb081cd91b 286 data8 0xdd42e7ca0b52838f, 0x8b80d7d6bc4104de 287 data8 0xafc35ce063eb3787, 0xdd729ad01c69114d 288 data8 0x8ba68bf73ac74f39, 0xaff2ddcb5f28f03d 289 data8 0xddae749c001fbf5e, 0x8bcc68fb9f9f7335 290 data8 0xb022923b148e05c5, 0xddea8f50a51c69b1 291 data8 0x8bf26f31c534fca2, 0xb0527a919adbf58b 292 data8 0xde26eb69a0f0f111, 0x8c10f86e13a1a1f9 293 data8 0xb078f3ab1d701c65, 0xde576480262399bc 294 data8 0x8c3749916cc6abb5, 0xb0a93a6870649f31 295 data8 0xde943789645933c8, 0x8c5dc4c4f7706032 296 data8 0xb0d9b624d62ec856, 0xded14d58139a28af 297 data8 0x8c7cac3a8c42e3e0, 0xb100a5f53fb3c8e1 298 data8 0xdf025c00bbf2b5c7, 0x8ca373f1b7bf2716 299 data8 0xb131821882f5540a, 0xdf3feb44d723a713 300 data8 0x8cc29907fb951294, 0xb158bf8e4cb04055 301 data8 0xdf715bc16c159be0, 0x8ce9ae4e9492aac8 302 data8 0xb189fd69d56b238f, 0xdfaf66240e29cda8 303 data8 0x8d0911dddbfdad0e, 0xb1b189958e8108e4 304 data8 0xdfe139cbf6e19bdc, 0x8d3075c4f20f04ee 305 data8 0xb1e32a8165b09832, 0xe01fc0fe94d9fc52 306 data8 0x8d5018a9d4de77d5, 0xb20b0678fc271eec 307 data8 0xe051f92ffcc0bd60, 0x8d77cc47dd143515 308 data8 0xb23d0bd3f7592b6e, 0xe090feec9c9a06ac 309 data8 0x8d97af6352739cb7, 0xb26538b2db8420dc 310 data8 0xe0c39d0c9ff862d6, 0x8db7af523167800f 311 data8 0xb28d89e339ceca14, 0xe0f668eeb99f188d 312 data8 0x8ddfd80bc68c32ff, 0xb2c022ca12e55a16 313 data8 0xe1362890eb663139, 0x8e00197e1e7c88fe 314 data8 0xb2e8c6852c6b03f1, 0xe1695c7212aecbaa 315 data8 0x8e207859f77e20e7, 0xb3118f4eda9fe40f 316 data8 0xe19cbf0391bbbbe9, 0x8e40f4ce60c9f8e2 317 data8 0xb33a7d6268109ebe, 0xe1d050901c531e85 318 data8 0x8e69ba46cf2fde4d, 0xb36ddbc5ea70ec55 319 data8 0xe2110903b4f4047a, 0x8e8a7a00bd7ae63e 320 data8 0xb3971e9b39264023, 0xe2450559b4d80b6d 321 data8 0x8eab57ef1cf2f529, 0xb3c0877ecc18e24a 322 data8 0xe27931a231554ef3, 0x8ecc5442cffb1dad 323 data8 0xb3ea16ae3a6c905f, 0xe2ad8e2ac3c5b04b 324 data8 0x8eed6f2d2a4acbfe, 0xb413cc67aa0e4d2d 325 data8 0xe2e21b41b9694cce, 0x8f0ea8dff24441ff 326 data8 0xb43da8e9d163e1af, 0xe316d93615862714 327 data8 0x8f385c95d696b817, 0xb47233773b84d425 328 data8 0xe3590bd86a0d30f9, 0x8f59dc43edd930f3 329 data8 0xb49c6825430fe730, 0xe38e38e38e38e38e 330 data8 0x8f7b7b5f5ffad1c4, 0xb4c6c46bcdb27dcf 331 data8 0xe3c397d1e6db7839, 0x8f9d3a1bea165f38 332 data8 0xb4f1488c0b35d26f, 0xe3f928f5953feb9e 333 data8 0x8fbf18adc34b66da, 0xb51bf4c7c51f0168 334 data8 0xe42eeca17c62886c, 0x8fe117499e356095 335 data8 0xb546c9616087ab9c, 0xe464e32943446305 336 data8 0x90033624aa685f8d, 0xb571c69bdffd9a70 337 data8 0xe49b0ce15747a8a2, 0x9025757495f36b86 338 data8 0xb59cecbae56984c3, 0xe4d16a1eee94e9d4 339 data8 0x903f3a5dcc091203, 0xb5bd64512bb14bb7 340 data8 0xe4fa52107353f67d, 0x9061b2fceb2bdbab 341 data8 0xb5e8d2a4bf5ba416, 0xe5310a471f4d2dc3 342 data8 0x90844ca7211032a7, 0xb6146a9a1bc47819 343 data8 0xe567f6f1c2b9c224, 0x90a7079403e6a15d 344 data8 0xb6402c7749d621c0, 0xe59f18689a9e4c9a 345 data8 0x90c9e3fbafd63799, 0xb66c1882fb435ea2 346 data8 0xe5d66f04b8a68ecf, 0x90ece216c8a16ee4 347 data8 0xb6982f048c999a56, 0xe60dfb2005c192e9 348 data8 0x9110021e7b516f0a, 0xb6c47044075b4142 349 data8 0xe645bd1544c7ea51, 0x912a708a39be9075 350 data8 0xb6e5bd6bfd02bafd, 0xe66fb21b505b20a0 351 data8 0x914dcc7b31146370, 0xb7124a2736ff8ef2 352 data8 0xe6a7d32af4a7c59a, 0x91714af8cfe984d5 353 data8 0xb73f026a01e94177, 0xe6e02b129c6a5ae4 354 data8 0x918c00a6f3795e97, 0xb760a959f1d0a7a7 355 data8 0xe70a9136a7403039, 0x91afbc299ed0295d 356 data8 0xb78dae7e06868ab0, 0xe74349fb2d92a589 357 data8 0x91d39add3e958db0, 0xb7badff8ad9e4e02 358 data8 0xe77c3a9c86ed7d42, 0x91ee9920a8974d92 359 data8 0xb7dce25b8e17ae9f, 0xe7a713f88151518a 360 data8 0x9212b5fcac537c19, 0xb80a6226904045e2 361 data8 0xe7e067453317ed2b, 0x9236f6b256923fcf 362 data8 0xb8380f1cafd73c1c, 0xe819f37a81871bb5 363 data8 0x92523ee6f90dcfc3, 0xb85a6ea8e321b4d8 364 data8 0xe8454236bfaeca14, 0x9276bef031e6eb79 365 data8 0xb8886b684ae7d2fa, 0xe87f32f24c3fc90e 366 data8 0x929236ec237a24ad, 0xb8ab0726fa00cf5d 367 data8 0xe8aacd8688892ba6, 0x92b6f70b7efe9dc3 368 data8 0xb8d954a4d13b7cb1, 0xe8e523fd32f606f7 369 data8 0x92d29f61eec7dc2b, 0xb8fc2d4f6cd9f04a 370 data8 0xe9110b5311407927, 0x92f7a05d5b8ba92f 371 data8 0xb92acc851476b1ab, 0xe94bc8bf0c108fa3 372 data8 0x931379a403be5c16, 0xb94de2d841a184c2 373 data8 0xe977fdc439c2ca3c, 0x9338bc44de2e3f34 374 data8 0xb97cd4c36c92693c, 0xe9b3236528fc349e 375 data8 0x9354c71412c69486, 0xb9a0297f172665e3 376 data8 0xe9dfa70b745ac1b4, 0x937a4c273907e262 377 data8 0xb9cf6f21e36c3924, 0xea1b36268d0eaa38 378 data8 0x93968919f6e7975d, 0xb9f3030951267208 379 data8 0xea480963fd394197, 0x93bc516fdd4680c9 380 data8 0xba229d6a618e7c59, 0xea84034425f27484 381 data8 0x93d8c123d9be59b2, 0xba467144459f9855 382 data8 0xeab12713138dd1cc, 0x93f546c955e60076 383 data8 0xba6a60c3c48f1a4b, 0xeade6db73a5e503b 384 data8 0x941b70a65879079f, 0xba9a76056b67ee7a 385 data8 0xeb1b0268343b121b, 0x943829f337410591 386 data8 0xbabea699563ada6e, 0xeb489b0b2bdb5f14 387 data8 0x9454f995765bc4d2, 0xbae2f350b262cc4b 388 data8 0xeb765721e85f03d0, 0x947b86b57f5842ed 389 data8 0xbb1385a23be24e57, 0xebb389645f222f62 390 data8 0x94988aeb23470f86, 0xbb3814975e17c680 391 data8 0xebe198f090607e0c, 0x94b5a5dc9695f42a 392 data8 0xbb5cc031009bf467, 0xec0fcc9321024509 393 data8 0x94d2d7a9170d8b42, 0xbb81889680024764 394 data8 0xec3e247da8b82f61, 0x94f9e87dd78bf019 395 data8 0xbbb2c0d8703ae95d, 0xec7c27d21321c9f7 396 data8 0x95175019a503d89e, 0xbbd7cd09ba3c5463 397 data8 0xecaad5278824e453, 0x9534cefa625fcb3a 398 data8 0xbbfcf68c4977718f, 0xecd9a76d097d4e77 399 data8 0x955265405c491a25, 0xbc223d88cfc88eee 400 data8 0xed089ed5dcd99446, 0x9570130c1f9bb857 401 data8 0xbc47a2284fee4ff8, 0xed37bb95add09a1c 402 data8 0x9597ca4119525184, 0xbc79ac0916ed7b8a 403 data8 0xed76c70508f904b6, 0x95b5af6fb5aa4d3c 404 data8 0xbc9f5670d1a13030, 0xeda63bb05e7f93c6 405 data8 0x95d3ac9273aafd7a, 0xbcc51f068cb95c1d 406 data8 0xedd5d661daed2dc4, 0x95f1c1cafdfd3684 407 data8 0xbceb05f4b30a9bc0, 0xee05974eef86b903 408 data8 0x960fef3b430b8d5f, 0xbd110b6604c7d306 409 data8 0xee357ead791fc670, 0x962e350575b409c5 410 data8 0xbd372f8598620f19, 0xee658cb3c134a463 411 data8 0x964c934c0dfc1708, 0xbd5d727edb6b3c7e 412 data8 0xee95c1987f080211, 0x966b0a31c9c6bc7d 413 data8 0xbd83d47d937bbc6d, 0xeec61d92d8c4314f 414 data8 0x968999d9ad8d264e, 0xbdaa55addf1ae47d 415 data8 0xeef6a0da64a014ac, 0x96a8426705198795 416 data8 0xbdd0f63c36aa73f0, 0xef274ba72a07c811 417 data8 0x96c703fd64445ee5, 0xbdf7b6556d550a15 418 data8 0xef581e31a2c91260, 0x96e5dec0a7b4268d 419 data8 0xbe1e9626b1ffa96b, 0xef8918b2bc43aec6 420 data8 0x9704d2d4f59f79f3, 0xbe4595dd903e5371 421 data8 0xefba3b63d89d7cbf, 0x9723e05ebe91b9b0 422 data8 0xbe6cb5a7f14bc935, 0xefeb867ecffaa607 423 data8 0x97430782be323831, 0xbe93f5b41d047cf7 424 data8 0xf01cfa3df1b9c9fa, 0x97624865fc0df8bf 425 data8 0xbebb5630bae4c15f, 0xf04e96dc05b43e2d 426 data8 0x9781a32dcc640b2a, 0xbee2d74cd30a430c 427 data8 0xf0805c944d827454, 0x97a117ffd0f48e46 428 data8 0xbf0a7937cf38d981, 0xf0b24ba285c495cb 429 data8 0x97c0a701f9d263c9, 0xbf323c217be2bc8c 430 data8 0xf0e46442e76f6569, 0x97e0505a8637a036 431 data8 0xbf5a203a09342bbb, 0xf116a6b2291d7896 432 data8 0x97f57a9fb0b08c6e, 0xbf74cad1c14ebfc4 433 data8 0xf1383fa9e9b5b381, 0x9815503365914a9d 434 data8 0xbf9ce6a497a89f78, 0xf16ac84f90083b9b 435 data8 0x98354085054fd204, 0xbfc52428bec6e72f 436 data8 0xf19d7b686dcb03d7, 0x98554bbbf8a77902 437 data8 0xbfed838fddab024b, 0xf1d0593311db1757 438 data8 0x987571fffb7f94f6, 0xc016050c0420981a 439 data8 0xf20361ee8f1c711e, 0x9895b3791dd03c23 440 data8 0xc03ea8cfabddc330, 0xf23695da7de51d3f 441 data8 0x98ab43a5fc65d0c8, 0xc059d3cbd65ddbce 442 data8 0xf258d095e465cc35, 0x98cbb2d196bd713d 443 data8 0xc082b122a3c78c9d, 0xf28c4d0bfc982b34 444 data8 0x98ec3d9ec7b6f21a, 0xc0abb1499ae736c4 445 data8 0xf2bff55eb3f0ea71, 0x990ce436db5e8344 446 data8 0xc0d4d474c3aedaaf, 0xf2f3c9cf9884636e 447 data8 0x9922b8218160967a, 0xc0f054ca33eb3437 448 data8 0xf31670135ab9cc0f, 0x99438d686f75779d 449 data8 0xc119b2c67e600ed0, 0xf34a8e9f0b54cdfb 450 data8 0x99647eea131fa20b, 0xc1433453de2033ff 451 data8 0xf37ed9fa6b8add3f, 0x997a85045a47c6d0 452 data8 0xc15ef3e44e10032d, 0xf3a1cfe884ef6bb6 453 data8 0x999ba5f14f8add02, 0xc188b130431d80e6 454 data8 0xf3d66689dcc8e8d3, 0x99bce38b5465ecae 455 data8 0xc1b2929d6067730e, 0xf40b2ab069d5c96a 456 data8 0x99d31ca0887f30f9, 0xc1ce9268f31cc734 457 data8 0xf42e718b90c8bc16, 0x99f48a669c74c09e 458 data8 0xc1f8b0877c1b0c08, 0xf463822a0a3b4b00 459 data8 0x9a16154eb445c873, 0xc222f35a87b415ba 460 data8 0xf498c1076015faf8, 0x9a2c822ec198d667 461 data8 0xc23f3467349e5c88, 0xf4bc5a19a33990b5 462 data8 0x9a4e3e080cd91b78, 0xc269b4e40e088c01 463 data8 0xf4f1e6a7d6f5425f, 0x9a70177afe52322e 464 data8 0xc2945aac24daaf6e, 0xf527a232cf6be334 465 data8 0x9a86b8fa94eebe10, 0xc2b0de05e43c1d66 466 data8 0xf54b8ecdcda90851, 0x9aa8c42866ae2958 467 data8 0xc2dbc275e1229d09, 0xf5819949c7ad87b4 468 data8 0x9abf86f9e12fc45e, 0xc2f86fca9d80eeff 469 data8 0xf5a5bac9213b48a9, 0x9ae1c462fc05f49d 470 data8 0xc323938449a2587e, 0xf5dc1501f324a812 471 data8 0x9af8a8dc936b84d0, 0xc3406b40a538ed20 472 data8 0xf6006bee86b5589e, 0x9b1b19033be35730 473 data8 0xc36bcee8211d15e0, 0xf63716b2fa067fa4 474 data8 0x9b3da7daf04c2892, 0xc397593adf2ba366 475 data8 0xf66df22fb6132b9c, 0x9b54c2e4c8a9012b 476 data8 0xc3b475b6206155d5, 0xf6929fb98225deb1 477 data8 0x9b77854e6c661200, 0xc3e0410243b97383 478 data8 0xf6c9cd13021e3fea, 0x9b8ec2e678d56d2f 479 data8 0xc3fd890709833d37, 0xf6eeb177472cedae 480 data8 0x9ba60e6a5ca133b6, 0xc41ae295f7e7fa06 481 data8 0xf713abf4cb0b3afb, 0x9bc919ea66a151a4 482 data8 0xc44709f7bb8a4dd2, 0xf74b4d5333684ef1 483 data8 0x9be0887c09ef82bb, 0xc4648fb0e0bec4c1 484 data8 0xf7707f75a72f8e94, 0x9c03c8d5fffc3503 485 data8 0xc490f9a94695ba14, 0xf7a874b97927af44 486 data8 0x9c1b5ad21a81cbb9, 0xc4aeac0173b7d390 487 data8 0xf7cddf140aedf1d8, 0x9c3ed09216e9ca02 488 data8 0xc4db5941007aa853, 0xf806291bacb7f7a9 489 data8 0x9c568656c0423def, 0xc4f938aec206291a 490 data8 0xf82bcc43b92eafef, 0x9c7a320af242ce60 491 data8 0xc52629e899dfd622, 0xf8646bf0defb759e 492 data8 0x9c920bf7a8c01dc2, 0xc54436e44043b965 493 data8 0xf88a487dfc3ff5f7, 0x9ca9f475d98b159c 494 data8 0xc562563abf9ea07f, 0xf8b03c2b46cdc17f 495 data8 0x9ccdeca60e80b5f8, 0xc58fa7d1dc42921c 496 data8 0xf8e95541c152ae7a, 0x9ce5f9d4653d4902 497 data8 0xc5adf561b91e110a, 0xf90f832c2700c160 498 data8 0x9cfe15cb38bfdd8e, 0xc5cc5591bdbd82fa 499 data8 0xf935c88e0c7f419b, 0x9d225b983f6c1f96 500 data8 0xc5fa08f1ff20593c, 0xf96f5cd84fd86873 501 data8 0x9d3a9cca32261ed7, 0xc618980a79ce6862 502 data8 0xf995dd53ebdd9d6d, 0x9d52ecfccebe1768 503 data8 0xc6373a09e34b50fa, 0xf9bc75a034436a41 504 data8 0x9d77818d95b82f86, 0xc66550a6e0baaf35 505 data8 0xf9f686f26d5518de, 0x9d8ff7893fa4706c 506 data8 0xc6842241926342c9, 0xfa1d5b39b910a8c5 507 data8 0x9da87cbef36f2a5e, 0xc6a3070b7c93bb9e 508 data8 0xfa4447acc4ecbfd2, 0x9dcd6140b4a35aeb 509 data8 0xc6d18260bb84081b, 0xfa7ed7e51e6fdfb4 510 data8 0x9de60cd06dc6e2d4, 0xc6f0977c9416828b 511 data8 0xfaa601394d49a1a0, 0x9dfec7d4cc43b76f 512 data8 0xc70fc0117c641630, 0xfacd431644ce0e40 513 data8 0x9e17925ec9fccc4a, 0xc72efc34d7e615be 514 data8 0xfaf49d96f7a75909, 0x9e3cdf6db57dc075 515 data8 0xc75dfb441594141e, 0xfb2fd3c65e562fd5 516 data8 0x9e55d110b63637a8, 0xc77d68aa019bda4c 517 data8 0xfb576c5762024805, 0x9e6ed27594550d2e 518 data8 0xc79ce9ea478dbc4f, 0xfb7f1debc22c4040 519 data8 0x9e87e3adc385d393, 0xc7bc7f1ae453219d 520 data8 0xfba6e89f32d0190a, 0x9ead9b54b37a1055 521 data8 0xc7ec0476e15e141a, 0xfbe2c803a0894893 522 data8 0x9ec6d46a3d7de215, 0xc80bcbe16f1d540f 523 data8 0xfc0ad1ff0ed9ecf0, 0x9ee01d9108be3154 524 data8 0xc82ba78a5d349735, 0xfc32f57bdfbcbe7f 525 data8 0x9ef976db07288d04, 0xc84b978847a06b87 526 data8 0xfc5b32968f99b21c, 0x9f12e05a4759ec25 527 data8 0xc86b9bf1ee817bc6, 0xfc83896bc861ab08 528 data8 0x9f2c5a20f4da6668, 0xc88bb4de3667cdf4 529 data8 0xfcabfa1861ed4815, 0x9f52af78ed1733ca 530 data8 0xc8bc00e7fe9e23a3, 0xfce8d3cea7d3163e 531 data8 0x9f6c52426a39d003, 0xc8dc4d7ff2d25232 532 data8 0xfd118595143ee273, 0x9f860593d42fd7f3 533 data8 0xc8fcaeebcb40eb47, 0xfd3a519943d4865a 534 data8 0x9f9fc97fdb96bd51, 0xc91d25431426a663 535 data8 0xfd6337f8e1ae5a4b, 0x9fb99e194f4a7037 536 data8 0xc93db09d7fdb2949, 0xfd8c38d1c8e927eb 537 data8 0x9fd383731ca51db9, 0xc95e5112e721582a 538 data8 0xfdb5544205095a53, 0x9fed79a04fbf9423 539 data8 0xc97f06bb49787677, 0xfdde8a67d2613531 540 data8 0xa00780b413b24ee8, 0xc99fd1aecd6e1b06 541 data8 0xfe07db619e781611, 0xa02eab2c4474b0cd 542 data8 0xc9d12a3e27bb1625, 0xfe460768d80bf758 543 data8 0xa048dcd51ccfd142, 0xc9f22ad82ba3d5f0 544 data8 0xfe6f9bfb06cd32f6, 0xa0631fa894b11b8d 545 data8 0xca134113105e67b2, 0xfe994bcd3d14fcc2 546 data8 0xa07d73ba65e680af, 0xca346d07b045a876 547 data8 0xfec316fecaf3f2ab, 0xa097d91e6aaf71b0 548 data8 0xca55aecf0e94bb88, 0xfeecfdaf33fadb80 549 data8 0xa0b24fe89e02602f, 0xca77068257be9bab 550 data8 0xff16fffe2fa8fad6, 0xa0ccd82d1bd2f68b 551 data8 0xca98743ae1c693a8, 0xff411e0ba9db886d 552 data8 0xa0e77200215909e6, 0xcab9f8122c99a101 553 data8 0xff6b57f7c33e4e9a, 0xa1021d760d584855 554 data8 0xcadb9221e268c3b5, 0xff95ade2d1bd7358 555 data8 0xa11cdaa36068a57d, 0xcafd4283d8043dfd 556 data8 0xffc01fed60f86fb5, 0xa137a99cbd3f880b 557 data8 0xcb1f09520d37c6fb, 0xffeaae3832b63956 558LOCAL_OBJECT_END(T_table) 559 560 561 562 563 564 565 566.section .text 567GLOBAL_LIBM_ENTRY(cbrt) 568 569 570{.mfi 571 // get significand 572 getf.sig GR_SIGNIF = f8 573 // normalize a 574 fma.s1 FR_ARG = f8, f1, f0 575 // GR_GP = pointer to C_1,..., C_5 followed by T_table 576 addl GR_GP = @ltoff(poly_coeffs), gp ;; 577} 578 579{.mfi 580 // get exponent 581 getf.exp GR_ARGEXP = f8 582 // will continue only for normal/denormal numbers 583 fclass.m.unc p12, p13 = f8, 0x1e7 584 // GR_CONST4 = bias-((2^{12}-1)/3)-63 = 0xffff-0x555-0x3f = 0xfa6b 585 mov GR_CONST4 = 0xfa6b ;; 586} 587 588{.mlx 589 mov GR_CONST2 = 0x20000 590 // GR_CONST3 = 2^52 591 movl GR_CONST3 = 0x8000000000000000 ;; 592} 593 594.pred.rel "mutex", p12, p13 595{.mfi 596 // load start address for C_1,..., C_5 followed by T_table 597 ld8 GR_ADDR = [ GR_GP ] 598 // y = frcpa(a) 599 (p13) frcpa.s0 f8, p0 = f1, f8 600 // p7 = 1 if denormal input 601 cmp.gtu p7, p0 = GR_CONST3, GR_SIGNIF 602} 603{.mfb 604 nop.m 0 605 // if argument is 0, +/-Infinity, NaN, or NaTVal, then return 606 (p12) fma.d.s0 f8 = f8, f1, f0 607 (p12) br.ret.spnt b0 ;; 608} 609 610{.mmi 611 // get exponent (for denormal input) 612 (p7) getf.exp GR_ARGEXP = FR_ARG 613 // get normalized significand (for denormal input) 614 (p7) getf.sig GR_SIGNIF = FR_ARG 615 // GR_CONST1 = bias-(2^{12}-1) 616 mov GR_CONST1 = 0xf000 ;; 617} 618 619{.mii 620 // get GR_SIGN = sign 621 and GR_SIGN = GR_ARGEXP, GR_CONST2 622 // eliminate leading 1 from GR_I1 = 1st table index 623 shl GR_I1 = GR_SIGNIF, 1 624 // eliminate sign from exponent 625 andcm GR_EXP = GR_ARGEXP, GR_CONST2 ;; 626} 627 628{.mib 629 add GR_ADDR2 = 32, GR_ADDR 630 // GR_IT1 = 1st table index (y_index, 8 bits) 631 shr.u GR_IT1 = GR_I1, 56 632 nop.b 0 633} 634{.mib 635 // load C_1 636 ldfe FR_COEFF1 = [ GR_ADDR ], 16 637 // subtract bias from GR_EXPON = exponent 638 sub GR_EXPON = GR_EXP, GR_CONST1 639 nop.b 0 ;; 640} 641 642{.mib 643 // load C_2, C_3 644 ldfpd FR_COEFF2, FR_COEFF3 = [ GR_ADDR ] 645 // 1: exponent* = 5; // (2^{16}-1)/3 = 0x5555 646 shladd GR_TMP1 = GR_EXPON, 2, GR_EXPON 647 nop.b 0 648} 649{.mib 650 // load C_4, C_5 651 ldfpd FR_COEFF4, FR_COEFF5 = [ GR_ADDR2 ], 16 652 // GR_TMP2 = 3*y_index 653 shladd GR_TMP2 = GR_IT1, 1, GR_IT1 654 nop.b 0 ;; 655} 656 657{.mfi 658 // GR_TMP6 = (5*expon)*16+5*expon = (0x55)*expon 659 shladd GR_TMP6 = GR_TMP1, 4, GR_TMP1 660 // r = 1-a*y 661 fnma.s1 FR_R = f8, FR_ARG, f1 662 // adjust T_table pointer by 1st index 663 shladd GR_ITB1 = GR_TMP2, 3, GR_ADDR2 ;; 664} 665 666{.mii 667 // eliminate leading 1 from significand 668 add GR_SIGNIF2 = GR_SIGNIF, GR_SIGNIF 669 // GR_TMP3 = (0x5500)*expon 670 shl GR_TMP3 = GR_TMP6, 8 ;; 671 // GR_TMP4 = (0x5555)*expon 672 add GR_TMP4 = GR_TMP3, GR_TMP6 ;; 673} 674 675{.mii 676 // GR_TMP5 = (0x5556)*expon // 0x5556 = (2^{16}+2)/3 677 add GR_TMP5 = GR_TMP4, GR_EXPON 678 nop.i 0 ;; 679 // GR_EXP_BY_3 = floor(expon/3) 680 shr GR_EXP_BY_3 = GR_TMP5, 16 ;; 681} 682 683{.mfi 684 // GR_TMP6 = 3*exponent 685 shladd GR_TMP6 = GR_EXP_BY_3, 1, GR_EXP_BY_3 686 // r*r 687 fma.s1 FR_R2 = FR_R, FR_R, f0 688 // bias exponent 689 add GR_EBIAS = GR_CONST4, GR_EXP_BY_3 ;; 690} 691 692{.mfi 693 // get remainder of exponent/3 694 sub GR_REM = GR_EXPON, GR_TMP6 695 // c2+c3*r 696 fma.s1 FR_P23 = FR_COEFF3, FR_R, FR_COEFF2 697 nop.i 0 698} 699{.mfi 700 // add sign to exponent 701 or GR_SEXP = GR_EBIAS, GR_SIGN 702 // c4+c5*r 703 fma.s1 FR_P45 = FR_COEFF5, FR_R, FR_COEFF4 704 mov GR_TMP63 = 63+0xffff ;; 705} 706 707{.mmi 708 // FR_2EXP = sign*2^{exponent/3} 709 setf.exp FR_2EXP = GR_SEXP 710 // adjust T_table pointer by 2nd index 711 shladd GR_INDEX = GR_REM, 3, GR_ITB1 712 // is the argument of the form 2^(3*k) ? 713 // get (significand - leading 1) | (exponent mod 3) 714 or GR_TEST = GR_REM, GR_SIGNIF2 ;; 715} 716 717{.mmi 718 // 2^63 719 setf.exp FR_TMP63 = GR_TMP63 720 // load T 721 ldf8 f8 = [ GR_INDEX ] 722 // is the argument of the form 2^(3*k) ? 723 cmp.eq p14, p0 = GR_TEST, r0 ;; 724} 725 726{.mfi 727 nop.m 0 728 // (c2+c3*r)+r^2*(c4+c5*r) 729 fma.s1 FR_P25 = FR_P45, FR_R2, FR_P23 730 nop.i 0 731} 732{.mfi 733 nop.m 0 734 // c1*r 735 fma.s1 FR_P1 = FR_COEFF1, FR_R, f0 736 nop.i 0 ;; 737} 738 739{.mfb 740 nop.m 0 741 (p14) fma.d.s0 f8 = FR_2EXP, FR_TMP63, f0 742 (p14) br.ret.spnt b0 ;; 743} 744 745{.mfi 746 nop.m 0 747 // P = c1*r+r^2* [ (c2+c3*r)+r^2*(c4+c5*r) ] 748 fma.s1 FR_P15 = FR_P25, FR_R2, FR_P1 749 nop.i 0 750} 751{.mfi 752 nop.m 0 753 // T' = T*(2^exp) 754 fma.s1 f8 = f8, FR_2EXP, f0 755 nop.i 0 ;; 756} 757 758{.mfb 759 nop.m 0 760 // result = T'+T'*P 761 fma.d.s0 f8 = f8, FR_P15, f8 762 br.ret.sptk b0 ;; 763} 764 765 766GLOBAL_LIBM_END(cbrt) 767libm_alias_double_other (cbrt, cbrt) 768