Changeset 26e7d6d in mainline for uspace/lib/softfloat/generic/common.c
- Timestamp:
- 2011-09-19T16:31:00Z (13 years ago)
- Branches:
- lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
- Children:
- a347a11
- Parents:
- 3842a955 (diff), 086290d (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the(diff)
links above to see all the changes relative to each parent. - File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/softfloat/generic/common.c
r3842a955 r26e7d6d 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Common helper operations. 33 34 */ 34 35 … … 36 37 #include <common.h> 37 38 38 /* Table for fast leading zeroes counting */39 /* Table for fast leading zeroes counting. */ 39 40 char zeroTable[256] = { 40 41 8, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, \ … … 56 57 }; 57 58 58 59 60 /** Take fraction shifted by 10 bits to left, round it, normalize it and detect exceptions 61 * @param cexp exponent with bias 62 * @param cfrac fraction shifted 10 places left with added hidden bit 63 * @param sign 64 * @return valied float64 59 /** 60 * Take fraction shifted by 10 bits to the left, round it, normalize it 61 * and detect exceptions 62 * 63 * @param cexp Exponent with bias. 64 * @param cfrac Fraction shifted 10 bits to the left with added hidden bit. 65 * @param sign Resulting sign. 66 * @return Finished double-precision float. 65 67 */ 66 68 float64 finishFloat64(int32_t cexp, uint64_t cfrac, char sign) … … 71 73 72 74 /* find first nonzero digit and shift result and detect possibly underflow */ 73 while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ) )))) { 75 while ((cexp > 0) && (cfrac) && 76 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) { 74 77 cexp--; 75 78 cfrac <<= 1; 76 /* TODO: fix underflow */ 77 }; 78 79 if ((cexp < 0) || ( cexp == 0 && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) { 79 /* TODO: fix underflow */ 80 } 81 82 if ((cexp < 0) || (cexp == 0 && 83 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) { 80 84 /* FIXME: underflow */ 81 85 result.parts.exp = 0; … … 93 97 94 98 if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) { 95 96 result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2) ) & (~FLOAT64_HIDDEN_BIT_MASK));99 result.parts.fraction = 100 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK)); 97 101 return result; 98 102 } … … 103 107 ++cexp; 104 108 105 if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 109 if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) { 106 110 ++cexp; 107 111 cfrac >>= 1; … … 109 113 110 114 /* check overflow */ 111 if (cexp >= FLOAT64_MAX_EXPONENT 115 if (cexp >= FLOAT64_MAX_EXPONENT) { 112 116 /* FIXME: overflow, return infinity */ 113 117 result.parts.exp = FLOAT64_MAX_EXPONENT; … … 116 120 } 117 121 118 result.parts.exp = (uint32_t)cexp; 119 120 result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2 ) ) & (~FLOAT64_HIDDEN_BIT_MASK)); 122 result.parts.exp = (uint32_t) cexp; 123 124 result.parts.fraction = 125 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK)); 121 126 122 127 return result; 123 128 } 124 129 125 /** Counts leading zeroes in 64bit unsigned integer 126 * @param i 130 /** 131 * Take fraction, round it, normalize it and detect exceptions 132 * 133 * @param cexp Exponent with bias. 134 * @param cfrac_hi High part of the fraction shifted 14 bits to the left 135 * with added hidden bit. 136 * @param cfrac_lo Low part of the fraction shifted 14 bits to the left 137 * with added hidden bit. 138 * @param sign Resulting sign. 139 * @param shift_out Bits right-shifted out from fraction by the caller. 140 * @return Finished quadruple-precision float. 141 */ 142 float128 finishFloat128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo, 143 char sign, uint64_t shift_out) 144 { 145 float128 result; 146 uint64_t tmp_hi, tmp_lo; 147 148 result.parts.sign = sign; 149 150 /* find first nonzero digit and shift result and detect possibly underflow */ 151 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 152 1, &tmp_hi, &tmp_lo); 153 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 154 while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) && 155 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) { 156 cexp--; 157 lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo); 158 /* TODO: fix underflow */ 159 160 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 161 1, &tmp_hi, &tmp_lo); 162 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 163 } 164 165 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 166 1, &tmp_hi, &tmp_lo); 167 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 168 if ((cexp < 0) || (cexp == 0 && 169 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) { 170 /* FIXME: underflow */ 171 result.parts.exp = 0; 172 if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */ 173 result.parts.frac_hi = 0x0ll; 174 result.parts.frac_lo = 0x0ll; 175 return result; 176 } 177 178 while (cexp < 0) { 179 cexp++; 180 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo); 181 } 182 183 if (shift_out & (0x1ull < 64)) { 184 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo); 185 } 186 187 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 188 1, &tmp_hi, &tmp_lo); 189 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 190 if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 191 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 192 &tmp_hi, &tmp_lo); 193 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 194 result.parts.frac_hi = tmp_hi; 195 result.parts.frac_lo = tmp_lo; 196 return result; 197 } 198 } else { 199 if (shift_out & (0x1ull < 64)) { 200 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo); 201 } 202 } 203 204 ++cexp; 205 206 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 207 1, &tmp_hi, &tmp_lo); 208 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 209 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 210 ++cexp; 211 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo); 212 } 213 214 /* check overflow */ 215 if (cexp >= FLOAT128_MAX_EXPONENT) { 216 /* FIXME: overflow, return infinity */ 217 result.parts.exp = FLOAT128_MAX_EXPONENT; 218 result.parts.frac_hi = 0x0ll; 219 result.parts.frac_lo = 0x0ll; 220 return result; 221 } 222 223 result.parts.exp = (uint32_t) cexp; 224 225 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 226 &tmp_hi, &tmp_lo); 227 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 228 result.parts.frac_hi = tmp_hi; 229 result.parts.frac_lo = tmp_lo; 230 231 return result; 232 } 233 234 /** 235 * Counts leading zeroes in byte. 236 * 237 * @param i Byte for which to count leading zeroes. 238 * @return Number of detected leading zeroes. 239 */ 240 int countZeroes8(uint8_t i) 241 { 242 return zeroTable[i]; 243 } 244 245 /** 246 * Counts leading zeroes in 32bit unsigned integer. 247 * 248 * @param i Integer for which to count leading zeroes. 249 * @return Number of detected leading zeroes. 250 */ 251 int countZeroes32(uint32_t i) 252 { 253 int j; 254 for (j = 0; j < 32; j += 8) { 255 if (i & (0xFF << (24 - j))) { 256 return (j + countZeroes8(i >> (24 - j))); 257 } 258 } 259 260 return 32; 261 } 262 263 /** 264 * Counts leading zeroes in 64bit unsigned integer. 265 * 266 * @param i Integer for which to count leading zeroes. 267 * @return Number of detected leading zeroes. 127 268 */ 128 269 int countZeroes64(uint64_t i) 129 270 { 130 271 int j; 131 for (j = 0; j < 64; j += 8) {132 if ( 272 for (j = 0; j < 64; j += 8) { 273 if (i & (0xFFll << (56 - j))) { 133 274 return (j + countZeroes8(i >> (56 - j))); 134 275 } … … 138 279 } 139 280 140 /** Counts leading zeroes in 32bit unsigned integer 141 * @param i 142 */ 143 int countZeroes32(uint32_t i) 144 { 145 int j; 146 for (j =0; j < 32; j += 8) { 147 if ( i & (0xFF << (24 - j))) { 148 return (j + countZeroes8(i >> (24 - j))); 149 } 150 } 151 152 return 32; 153 } 154 155 /** Counts leading zeroes in byte 156 * @param i 157 */ 158 int countZeroes8(uint8_t i) 159 { 160 return zeroTable[i]; 161 } 162 163 /** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 30. bit 164 * @param exp exponent 165 * @param fraction part with hidden bit shifted to 30. bit 281 /** 282 * Round and normalize number expressed by exponent and fraction with 283 * first bit (equal to hidden bit) at 30th bit. 284 * 285 * @param exp Exponent part. 286 * @param fraction Fraction with hidden bit shifted to 30th bit. 166 287 */ 167 288 void roundFloat32(int32_t *exp, uint32_t *fraction) 168 289 { 169 290 /* rounding - if first bit after fraction is set then round up */ 170 (*fraction) += (0x1 << 6); 171 172 if ((*fraction) & (FLOAT32_HIDDEN_BIT_MASK << 8)) { 291 (*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3)); 292 293 if ((*fraction) & 294 (FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) { 173 295 /* rounding overflow */ 174 296 ++(*exp); 175 297 (*fraction) >>= 1; 176 } ;177 178 if (((*exp) >= FLOAT32_MAX_EXPONENT 298 } 299 300 if (((*exp) >= FLOAT32_MAX_EXPONENT) || ((*exp) < 0)) { 179 301 /* overflow - set infinity as result */ 180 302 (*exp) = FLOAT32_MAX_EXPONENT; 181 303 (*fraction) = 0; 182 return;183 184 185 return; 186 } 187 188 /** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 62. bit 189 * @param exp exponent190 * @param fraction part with hidden bit shifted to 62. bit304 } 305 } 306 307 /** 308 * Round and normalize number expressed by exponent and fraction with 309 * first bit (equal to hidden bit) at 62nd bit. 310 * 311 * @param exp Exponent part. 312 * @param fraction Fraction with hidden bit shifted to 62nd bit. 191 313 */ 192 314 void roundFloat64(int32_t *exp, uint64_t *fraction) 193 315 { 194 316 /* rounding - if first bit after fraction is set then round up */ 195 (*fraction) += (0x1 << 9); 196 197 if ((*fraction) & (FLOAT64_HIDDEN_BIT_MASK << 11)) { 317 (*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3)); 318 319 if ((*fraction) & 320 (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 3))) { 198 321 /* rounding overflow */ 199 322 ++(*exp); 200 323 (*fraction) >>= 1; 201 } ;202 203 if (((*exp) >= FLOAT64_MAX_EXPONENT 324 } 325 326 if (((*exp) >= FLOAT64_MAX_EXPONENT) || ((*exp) < 0)) { 204 327 /* overflow - set infinity as result */ 205 328 (*exp) = FLOAT64_MAX_EXPONENT; 206 329 (*fraction) = 0; 207 return; 208 } 209 210 return; 330 } 331 } 332 333 /** 334 * Round and normalize number expressed by exponent and fraction with 335 * first bit (equal to hidden bit) at 126th bit. 336 * 337 * @param exp Exponent part. 338 * @param frac_hi High part of fraction part with hidden bit shifted to 126th bit. 339 * @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit. 340 */ 341 void roundFloat128(int32_t *exp, uint64_t *frac_hi, uint64_t *frac_lo) 342 { 343 uint64_t tmp_hi, tmp_lo; 344 345 /* rounding - if first bit after fraction is set then round up */ 346 lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo); 347 add128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo); 348 349 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 350 (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo); 351 and128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 352 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 353 /* rounding overflow */ 354 ++(*exp); 355 rshift128(*frac_hi, *frac_lo, 1, frac_hi, frac_lo); 356 } 357 358 if (((*exp) >= FLOAT128_MAX_EXPONENT) || ((*exp) < 0)) { 359 /* overflow - set infinity as result */ 360 (*exp) = FLOAT128_MAX_EXPONENT; 361 (*frac_hi) = 0; 362 (*frac_lo) = 0; 363 } 364 } 365 366 /** 367 * Logical shift left on the 128-bit operand. 368 * 369 * @param a_hi High part of the input operand. 370 * @param a_lo Low part of the input operand. 371 * @param shift Number of bits by witch to shift. 372 * @param r_hi Address to store high part of the result. 373 * @param r_lo Address to store low part of the result. 374 */ 375 void lshift128( 376 uint64_t a_hi, uint64_t a_lo, int shift, 377 uint64_t *r_hi, uint64_t *r_lo) 378 { 379 if (shift <= 0) { 380 /* do nothing */ 381 } else if (shift >= 128) { 382 a_hi = 0; 383 a_lo = 0; 384 } else if (shift >= 64) { 385 a_hi = a_lo << (shift - 64); 386 a_lo = 0; 387 } else { 388 a_hi <<= shift; 389 a_hi |= a_lo >> (64 - shift); 390 a_lo <<= shift; 391 } 392 393 *r_hi = a_hi; 394 *r_lo = a_lo; 395 } 396 397 /** 398 * Logical shift right on the 128-bit operand. 399 * 400 * @param a_hi High part of the input operand. 401 * @param a_lo Low part of the input operand. 402 * @param shift Number of bits by witch to shift. 403 * @param r_hi Address to store high part of the result. 404 * @param r_lo Address to store low part of the result. 405 */ 406 void rshift128( 407 uint64_t a_hi, uint64_t a_lo, int shift, 408 uint64_t *r_hi, uint64_t *r_lo) 409 { 410 if (shift <= 0) { 411 /* do nothing */ 412 } else if (shift >= 128) { 413 a_hi = 0; 414 a_lo = 0; 415 } else if (shift >= 64) { 416 a_lo = a_hi >> (shift - 64); 417 a_hi = 0; 418 } else { 419 a_lo >>= shift; 420 a_lo |= a_hi << (64 - shift); 421 a_hi >>= shift; 422 } 423 424 *r_hi = a_hi; 425 *r_lo = a_lo; 426 } 427 428 /** 429 * Bitwise AND on 128-bit operands. 430 * 431 * @param a_hi High part of the first input operand. 432 * @param a_lo Low part of the first input operand. 433 * @param b_hi High part of the second input operand. 434 * @param b_lo Low part of the second input operand. 435 * @param r_hi Address to store high part of the result. 436 * @param r_lo Address to store low part of the result. 437 */ 438 void and128( 439 uint64_t a_hi, uint64_t a_lo, 440 uint64_t b_hi, uint64_t b_lo, 441 uint64_t *r_hi, uint64_t *r_lo) 442 { 443 *r_hi = a_hi & b_hi; 444 *r_lo = a_lo & b_lo; 445 } 446 447 /** 448 * Bitwise inclusive OR on 128-bit operands. 449 * 450 * @param a_hi High part of the first input operand. 451 * @param a_lo Low part of the first input operand. 452 * @param b_hi High part of the second input operand. 453 * @param b_lo Low part of the second input operand. 454 * @param r_hi Address to store high part of the result. 455 * @param r_lo Address to store low part of the result. 456 */ 457 void or128( 458 uint64_t a_hi, uint64_t a_lo, 459 uint64_t b_hi, uint64_t b_lo, 460 uint64_t *r_hi, uint64_t *r_lo) 461 { 462 *r_hi = a_hi | b_hi; 463 *r_lo = a_lo | b_lo; 464 } 465 466 /** 467 * Bitwise exclusive OR on 128-bit operands. 468 * 469 * @param a_hi High part of the first input operand. 470 * @param a_lo Low part of the first input operand. 471 * @param b_hi High part of the second input operand. 472 * @param b_lo Low part of the second input operand. 473 * @param r_hi Address to store high part of the result. 474 * @param r_lo Address to store low part of the result. 475 */ 476 void xor128( 477 uint64_t a_hi, uint64_t a_lo, 478 uint64_t b_hi, uint64_t b_lo, 479 uint64_t *r_hi, uint64_t *r_lo) 480 { 481 *r_hi = a_hi ^ b_hi; 482 *r_lo = a_lo ^ b_lo; 483 } 484 485 /** 486 * Bitwise NOT on the 128-bit operand. 487 * 488 * @param a_hi High part of the input operand. 489 * @param a_lo Low part of the input operand. 490 * @param r_hi Address to store high part of the result. 491 * @param r_lo Address to store low part of the result. 492 */ 493 void not128( 494 uint64_t a_hi, uint64_t a_lo, 495 uint64_t *r_hi, uint64_t *r_lo) 496 { 497 *r_hi = ~a_hi; 498 *r_lo = ~a_lo; 499 } 500 501 /** 502 * Equality comparison of 128-bit operands. 503 * 504 * @param a_hi High part of the first input operand. 505 * @param a_lo Low part of the first input operand. 506 * @param b_hi High part of the second input operand. 507 * @param b_lo Low part of the second input operand. 508 * @return 1 if operands are equal, 0 otherwise. 509 */ 510 int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo) 511 { 512 return (a_hi == b_hi) && (a_lo == b_lo); 513 } 514 515 /** 516 * Lower-or-equal comparison of 128-bit operands. 517 * 518 * @param a_hi High part of the first input operand. 519 * @param a_lo Low part of the first input operand. 520 * @param b_hi High part of the second input operand. 521 * @param b_lo Low part of the second input operand. 522 * @return 1 if a is lower or equal to b, 0 otherwise. 523 */ 524 int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo) 525 { 526 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo <= b_lo)); 527 } 528 529 /** 530 * Lower-than comparison of 128-bit operands. 531 * 532 * @param a_hi High part of the first input operand. 533 * @param a_lo Low part of the first input operand. 534 * @param b_hi High part of the second input operand. 535 * @param b_lo Low part of the second input operand. 536 * @return 1 if a is lower than b, 0 otherwise. 537 */ 538 int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo) 539 { 540 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo < b_lo)); 541 } 542 543 /** 544 * Addition of two 128-bit unsigned integers. 545 * 546 * @param a_hi High part of the first input operand. 547 * @param a_lo Low part of the first input operand. 548 * @param b_hi High part of the second input operand. 549 * @param b_lo Low part of the second input operand. 550 * @param r_hi Address to store high part of the result. 551 * @param r_lo Address to store low part of the result. 552 */ 553 void add128(uint64_t a_hi, uint64_t a_lo, 554 uint64_t b_hi, uint64_t b_lo, 555 uint64_t *r_hi, uint64_t *r_lo) 556 { 557 uint64_t low = a_lo + b_lo; 558 *r_lo = low; 559 /* detect overflow to add a carry */ 560 *r_hi = a_hi + b_hi + (low < a_lo); 561 } 562 563 /** 564 * Substraction of two 128-bit unsigned integers. 565 * 566 * @param a_hi High part of the first input operand. 567 * @param a_lo Low part of the first input operand. 568 * @param b_hi High part of the second input operand. 569 * @param b_lo Low part of the second input operand. 570 * @param r_hi Address to store high part of the result. 571 * @param r_lo Address to store low part of the result. 572 */ 573 void sub128(uint64_t a_hi, uint64_t a_lo, 574 uint64_t b_hi, uint64_t b_lo, 575 uint64_t *r_hi, uint64_t *r_lo) 576 { 577 *r_lo = a_lo - b_lo; 578 /* detect underflow to substract a carry */ 579 *r_hi = a_hi - b_hi - (a_lo < b_lo); 580 } 581 582 /** 583 * Multiplication of two 64-bit unsigned integers. 584 * 585 * @param a First input operand. 586 * @param b Second input operand. 587 * @param r_hi Address to store high part of the result. 588 * @param r_lo Address to store low part of the result. 589 */ 590 void mul64(uint64_t a, uint64_t b, uint64_t *r_hi, uint64_t *r_lo) 591 { 592 uint64_t low, high, middle1, middle2; 593 uint32_t alow, blow; 594 595 alow = a & 0xFFFFFFFF; 596 blow = b & 0xFFFFFFFF; 597 598 a >>= 32; 599 b >>= 32; 600 601 low = ((uint64_t) alow) * blow; 602 middle1 = a * blow; 603 middle2 = alow * b; 604 high = a * b; 605 606 middle1 += middle2; 607 high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32); 608 middle1 <<= 32; 609 low += middle1; 610 high += (low < middle1); 611 *r_lo = low; 612 *r_hi = high; 613 } 614 615 /** 616 * Multiplication of two 128-bit unsigned integers. 617 * 618 * @param a_hi High part of the first input operand. 619 * @param a_lo Low part of the first input operand. 620 * @param b_hi High part of the second input operand. 621 * @param b_lo Low part of the second input operand. 622 * @param r_hihi Address to store first (highest) quarter of the result. 623 * @param r_hilo Address to store second quarter of the result. 624 * @param r_lohi Address to store third quarter of the result. 625 * @param r_lolo Address to store fourth (lowest) quarter of the result. 626 */ 627 void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo, 628 uint64_t *r_hihi, uint64_t *r_hilo, uint64_t *r_lohi, uint64_t *r_lolo) 629 { 630 uint64_t hihi, hilo, lohi, lolo; 631 uint64_t tmp1, tmp2; 632 633 mul64(a_lo, b_lo, &lohi, &lolo); 634 mul64(a_lo, b_hi, &hilo, &tmp2); 635 add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi); 636 mul64(a_hi, b_hi, &hihi, &tmp1); 637 add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo); 638 mul64(a_hi, b_lo, &tmp1, &tmp2); 639 add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi); 640 add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo); 641 642 *r_hihi = hihi; 643 *r_hilo = hilo; 644 *r_lohi = lohi; 645 *r_lolo = lolo; 646 } 647 648 /** 649 * Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned 650 * divisor. 651 * 652 * @param a_hi High part of the divident. 653 * @param a_lo Low part of the divident. 654 * @param b Divisor. 655 * @return Quotient approximation. 656 */ 657 uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b) 658 { 659 uint64_t b_hi, b_lo; 660 uint64_t rem_hi, rem_lo; 661 uint64_t tmp_hi, tmp_lo; 662 uint64_t result; 663 664 if (b <= a_hi) { 665 return 0xFFFFFFFFFFFFFFFFull; 666 } 667 668 b_hi = b >> 32; 669 result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32; 670 mul64(b, result, &tmp_hi, &tmp_lo); 671 sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo); 672 673 while ((int64_t) rem_hi < 0) { 674 result -= 0x1ll << 32; 675 b_lo = b << 32; 676 add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo); 677 } 678 679 rem_hi = (rem_hi << 32) | (rem_lo >> 32); 680 if ((b_hi << 32) <= rem_hi) { 681 result |= 0xFFFFFFFF; 682 } else { 683 result |= rem_hi / b_hi; 684 } 685 686 return result; 211 687 } 212 688
Note:
See TracChangeset
for help on using the changeset viewer.