Ignore:
Timestamp:
2011-09-19T16:31:00Z (13 years ago)
Author:
Vojtech Horky <vojtechhorky@…>
Branches:
lfn, master, serial, ticket/834-toolchain-update, topic/msim-upgrade, topic/simplify-dev-export
Children:
a347a11
Parents:
3842a955 (diff), 086290d (diff)
Note: this is a merge changeset, the changes displayed below correspond to the merge itself.
Use the (diff) links above to see all the changes relative to each parent.
Message:

Merge mainline changes

File:
1 edited

Legend:

Unmodified
Added
Removed
  • uspace/lib/softfloat/generic/common.c

    r3842a955 r26e7d6d  
    11/*
    22 * Copyright (c) 2005 Josef Cejka
     3 * Copyright (c) 2011 Petr Koupy
    34 * All rights reserved.
    45 *
     
    3031 * @{
    3132 */
    32 /** @file
     33/** @file Common helper operations.
    3334 */
    3435
     
    3637#include <common.h>
    3738
    38 /* Table for fast leading zeroes counting */
     39/* Table for fast leading zeroes counting. */
    3940char zeroTable[256] = {
    4041        8, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, \
     
    5657};
    5758
    58 
    59 
    60 /** Take fraction shifted by 10 bits to left, round it, normalize it and detect exceptions
    61  * @param cexp exponent with bias
    62  * @param cfrac fraction shifted 10 places left with added hidden bit
    63  * @param sign
    64  * @return valied float64
     59/**
     60 * Take fraction shifted by 10 bits to the left, round it, normalize it
     61 * and detect exceptions
     62 *
     63 * @param cexp Exponent with bias.
     64 * @param cfrac Fraction shifted 10 bits to the left with added hidden bit.
     65 * @param sign Resulting sign.
     66 * @return Finished double-precision float.
    6567 */
    6668float64 finishFloat64(int32_t cexp, uint64_t cfrac, char sign)
     
    7173
    7274        /* find first nonzero digit and shift result and detect possibly underflow */
    73         while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ) )))) {
     75        while ((cexp > 0) && (cfrac) &&
     76            (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) {
    7477                cexp--;
    7578                cfrac <<= 1;
    76                         /* TODO: fix underflow */
    77         };
    78        
    79         if ((cexp < 0) || ( cexp == 0 && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
     79                /* TODO: fix underflow */
     80        }
     81       
     82        if ((cexp < 0) || (cexp == 0 &&
     83            (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) {
    8084                /* FIXME: underflow */
    8185                result.parts.exp = 0;
     
    9397               
    9498                if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) {
    95                        
    96                         result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2) ) & (~FLOAT64_HIDDEN_BIT_MASK));
     99                        result.parts.fraction =
     100                            ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
    97101                        return result;
    98102                }       
     
    103107        ++cexp;
    104108
    105         if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ))) {
     109        if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) {
    106110                ++cexp;
    107111                cfrac >>= 1;
     
    109113
    110114        /* check overflow */
    111         if (cexp >= FLOAT64_MAX_EXPONENT ) {
     115        if (cexp >= FLOAT64_MAX_EXPONENT) {
    112116                /* FIXME: overflow, return infinity */
    113117                result.parts.exp = FLOAT64_MAX_EXPONENT;
     
    116120        }
    117121
    118         result.parts.exp = (uint32_t)cexp;
    119        
    120         result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2 ) ) & (~FLOAT64_HIDDEN_BIT_MASK));
     122        result.parts.exp = (uint32_t) cexp;
     123       
     124        result.parts.fraction =
     125            ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));
    121126       
    122127        return result; 
    123128}
    124129
    125 /** Counts leading zeroes in 64bit unsigned integer
    126  * @param i
     130/**
     131 * Take fraction, round it, normalize it and detect exceptions
     132 *
     133 * @param cexp Exponent with bias.
     134 * @param cfrac_hi High part of the fraction shifted 14 bits to the left
     135 *     with added hidden bit.
     136 * @param cfrac_lo Low part of the fraction shifted 14 bits to the left
     137 *     with added hidden bit.
     138 * @param sign Resulting sign.
     139 * @param shift_out Bits right-shifted out from fraction by the caller.
     140 * @return Finished quadruple-precision float.
     141 */
     142float128 finishFloat128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo,
     143    char sign, uint64_t shift_out)
     144{
     145        float128 result;
     146        uint64_t tmp_hi, tmp_lo;
     147
     148        result.parts.sign = sign;
     149
     150        /* find first nonzero digit and shift result and detect possibly underflow */
     151        lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     152            1, &tmp_hi, &tmp_lo);
     153        and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     154        while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) &&
     155            (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {
     156                cexp--;
     157                lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
     158                /* TODO: fix underflow */
     159
     160                lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     161                    1, &tmp_hi, &tmp_lo);
     162                and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     163        }
     164
     165        lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     166            1, &tmp_hi, &tmp_lo);
     167        and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     168        if ((cexp < 0) || (cexp == 0 &&
     169            (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) {
     170                /* FIXME: underflow */
     171                result.parts.exp = 0;
     172                if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */
     173                        result.parts.frac_hi = 0x0ll;
     174                        result.parts.frac_lo = 0x0ll;
     175                        return result;
     176                }
     177
     178                while (cexp < 0) {
     179                        cexp++;
     180                        rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
     181                }
     182
     183                if (shift_out & (0x1ull < 64)) {
     184                        add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
     185                }
     186
     187                lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     188                    1, &tmp_hi, &tmp_lo);
     189                and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     190                if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
     191                        not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     192                            &tmp_hi, &tmp_lo);
     193                        and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     194                        result.parts.frac_hi = tmp_hi;
     195                        result.parts.frac_lo = tmp_lo;
     196                        return result;
     197                }
     198        } else {
     199                if (shift_out & (0x1ull < 64)) {
     200                        add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo);
     201                }
     202        }
     203
     204        ++cexp;
     205
     206        lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     207            1, &tmp_hi, &tmp_lo);
     208        and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     209        if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
     210                ++cexp;
     211                rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo);
     212        }
     213
     214        /* check overflow */
     215        if (cexp >= FLOAT128_MAX_EXPONENT) {
     216                /* FIXME: overflow, return infinity */
     217                result.parts.exp = FLOAT128_MAX_EXPONENT;
     218                result.parts.frac_hi = 0x0ll;
     219                result.parts.frac_lo = 0x0ll;
     220                return result;
     221        }
     222
     223        result.parts.exp = (uint32_t) cexp;
     224
     225        not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     226            &tmp_hi, &tmp_lo);
     227        and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     228        result.parts.frac_hi = tmp_hi;
     229        result.parts.frac_lo = tmp_lo;
     230
     231        return result; 
     232}
     233
     234/**
     235 * Counts leading zeroes in byte.
     236 *
     237 * @param i Byte for which to count leading zeroes.
     238 * @return Number of detected leading zeroes.
     239 */
     240int countZeroes8(uint8_t i)
     241{
     242        return zeroTable[i];
     243}
     244
     245/**
     246 * Counts leading zeroes in 32bit unsigned integer.
     247 *
     248 * @param i Integer for which to count leading zeroes.
     249 * @return Number of detected leading zeroes.
     250 */
     251int countZeroes32(uint32_t i)
     252{
     253        int j;
     254        for (j = 0; j < 32; j += 8) {
     255                if (i & (0xFF << (24 - j))) {
     256                        return (j + countZeroes8(i >> (24 - j)));
     257                }
     258        }
     259
     260        return 32;
     261}
     262
     263/**
     264 * Counts leading zeroes in 64bit unsigned integer.
     265 *
     266 * @param i Integer for which to count leading zeroes.
     267 * @return Number of detected leading zeroes.
    127268 */
    128269int countZeroes64(uint64_t i)
    129270{
    130271        int j;
    131         for (j =0; j < 64; j += 8) {
    132                 if ( i & (0xFFll << (56 - j))) {
     272        for (j = 0; j < 64; j += 8) {
     273                if (i & (0xFFll << (56 - j))) {
    133274                        return (j + countZeroes8(i >> (56 - j)));
    134275                }
     
    138279}
    139280
    140 /** Counts leading zeroes in 32bit unsigned integer
    141  * @param i
    142  */
    143 int countZeroes32(uint32_t i)
    144 {
    145         int j;
    146         for (j =0; j < 32; j += 8) {
    147                 if ( i & (0xFF << (24 - j))) {
    148                         return (j + countZeroes8(i >> (24 - j)));
    149                 }
    150         }
    151 
    152         return 32;
    153 }
    154 
    155 /** Counts leading zeroes in byte
    156  * @param i
    157  */
    158 int countZeroes8(uint8_t i)
    159 {
    160         return zeroTable[i];
    161 }
    162 
    163 /** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 30. bit
    164  * @param exp exponent
    165  * @param fraction part with hidden bit shifted to 30. bit
     281/**
     282 * Round and normalize number expressed by exponent and fraction with
     283 * first bit (equal to hidden bit) at 30th bit.
     284 *
     285 * @param exp Exponent part.
     286 * @param fraction Fraction with hidden bit shifted to 30th bit.
    166287 */
    167288void roundFloat32(int32_t *exp, uint32_t *fraction)
    168289{
    169290        /* rounding - if first bit after fraction is set then round up */
    170         (*fraction) += (0x1 << 6);
    171        
    172         if ((*fraction) & (FLOAT32_HIDDEN_BIT_MASK << 8)) {
     291        (*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3));
     292       
     293        if ((*fraction) &
     294            (FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) {
    173295                /* rounding overflow */
    174296                ++(*exp);
    175297                (*fraction) >>= 1;
    176         };
    177        
    178         if (((*exp) >= FLOAT32_MAX_EXPONENT ) || ((*exp) < 0)) {
     298        }
     299       
     300        if (((*exp) >= FLOAT32_MAX_EXPONENT) || ((*exp) < 0)) {
    179301                /* overflow - set infinity as result */
    180302                (*exp) = FLOAT32_MAX_EXPONENT;
    181303                (*fraction) = 0;
    182                 return;
    183         }
    184 
    185         return;
    186 }
    187 
    188 /** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 62. bit
    189  * @param exp exponent
    190  * @param fraction part with hidden bit shifted to 62. bit
     304        }
     305}
     306
     307/**
     308 * Round and normalize number expressed by exponent and fraction with
     309 * first bit (equal to hidden bit) at 62nd bit.
     310 *
     311 * @param exp Exponent part.
     312 * @param fraction Fraction with hidden bit shifted to 62nd bit.
    191313 */
    192314void roundFloat64(int32_t *exp, uint64_t *fraction)
    193315{
    194316        /* rounding - if first bit after fraction is set then round up */
    195         (*fraction) += (0x1 << 9);
    196        
    197         if ((*fraction) & (FLOAT64_HIDDEN_BIT_MASK << 11)) {
     317        (*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3));
     318       
     319        if ((*fraction) &
     320            (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 3))) {
    198321                /* rounding overflow */
    199322                ++(*exp);
    200323                (*fraction) >>= 1;
    201         };
    202        
    203         if (((*exp) >= FLOAT64_MAX_EXPONENT ) || ((*exp) < 0)) {
     324        }
     325       
     326        if (((*exp) >= FLOAT64_MAX_EXPONENT) || ((*exp) < 0)) {
    204327                /* overflow - set infinity as result */
    205328                (*exp) = FLOAT64_MAX_EXPONENT;
    206329                (*fraction) = 0;
    207                 return;
    208         }
    209 
    210         return;
     330        }
     331}
     332
     333/**
     334 * Round and normalize number expressed by exponent and fraction with
     335 * first bit (equal to hidden bit) at 126th bit.
     336 *
     337 * @param exp Exponent part.
     338 * @param frac_hi High part of fraction part with hidden bit shifted to 126th bit.
     339 * @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit.
     340 */
     341void roundFloat128(int32_t *exp, uint64_t *frac_hi, uint64_t *frac_lo)
     342{
     343        uint64_t tmp_hi, tmp_lo;
     344
     345        /* rounding - if first bit after fraction is set then round up */
     346        lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
     347        add128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo);
     348
     349        lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,
     350            (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo);
     351        and128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);
     352        if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {
     353                /* rounding overflow */
     354                ++(*exp);
     355                rshift128(*frac_hi, *frac_lo, 1, frac_hi, frac_lo);
     356        }
     357
     358        if (((*exp) >= FLOAT128_MAX_EXPONENT) || ((*exp) < 0)) {
     359                /* overflow - set infinity as result */
     360                (*exp) = FLOAT128_MAX_EXPONENT;
     361                (*frac_hi) = 0;
     362                (*frac_lo) = 0;
     363        }
     364}
     365
     366/**
     367 * Logical shift left on the 128-bit operand.
     368 *
     369 * @param a_hi High part of the input operand.
     370 * @param a_lo Low part of the input operand.
     371 * @param shift Number of bits by witch to shift.
     372 * @param r_hi Address to store high part of the result.
     373 * @param r_lo Address to store low part of the result.
     374 */
     375void lshift128(
     376    uint64_t a_hi, uint64_t a_lo, int shift,
     377    uint64_t *r_hi, uint64_t *r_lo)
     378{
     379        if (shift <= 0) {
     380                /* do nothing */
     381        } else if (shift >= 128) {
     382                a_hi = 0;
     383                a_lo = 0;
     384        } else if (shift >= 64) {
     385                a_hi = a_lo << (shift - 64);
     386                a_lo = 0;
     387        } else {
     388                a_hi <<= shift;
     389                a_hi |= a_lo >> (64 - shift);
     390                a_lo <<= shift;
     391        }
     392
     393        *r_hi = a_hi;
     394        *r_lo = a_lo;
     395}
     396
     397/**
     398 * Logical shift right on the 128-bit operand.
     399 *
     400 * @param a_hi High part of the input operand.
     401 * @param a_lo Low part of the input operand.
     402 * @param shift Number of bits by witch to shift.
     403 * @param r_hi Address to store high part of the result.
     404 * @param r_lo Address to store low part of the result.
     405 */
     406void rshift128(
     407    uint64_t a_hi, uint64_t a_lo, int shift,
     408    uint64_t *r_hi, uint64_t *r_lo)
     409{
     410        if (shift <= 0) {
     411                /* do nothing */
     412        } else  if (shift >= 128) {
     413                a_hi = 0;
     414                a_lo = 0;
     415        } else if (shift >= 64) {
     416                a_lo = a_hi >> (shift - 64);
     417                a_hi = 0;
     418        } else {
     419                a_lo >>= shift;
     420                a_lo |= a_hi << (64 - shift);
     421                a_hi >>= shift;
     422        }
     423
     424        *r_hi = a_hi;
     425        *r_lo = a_lo;
     426}
     427
     428/**
     429 * Bitwise AND on 128-bit operands.
     430 *
     431 * @param a_hi High part of the first input operand.
     432 * @param a_lo Low part of the first input operand.
     433 * @param b_hi High part of the second input operand.
     434 * @param b_lo Low part of the second input operand.
     435 * @param r_hi Address to store high part of the result.
     436 * @param r_lo Address to store low part of the result.
     437 */
     438void and128(
     439    uint64_t a_hi, uint64_t a_lo,
     440    uint64_t b_hi, uint64_t b_lo,
     441    uint64_t *r_hi, uint64_t *r_lo)
     442{
     443        *r_hi = a_hi & b_hi;
     444        *r_lo = a_lo & b_lo;
     445}
     446
     447/**
     448 * Bitwise inclusive OR on 128-bit operands.
     449 *
     450 * @param a_hi High part of the first input operand.
     451 * @param a_lo Low part of the first input operand.
     452 * @param b_hi High part of the second input operand.
     453 * @param b_lo Low part of the second input operand.
     454 * @param r_hi Address to store high part of the result.
     455 * @param r_lo Address to store low part of the result.
     456 */
     457void or128(
     458    uint64_t a_hi, uint64_t a_lo,
     459    uint64_t b_hi, uint64_t b_lo,
     460    uint64_t *r_hi, uint64_t *r_lo)
     461{
     462        *r_hi = a_hi | b_hi;
     463        *r_lo = a_lo | b_lo;
     464}
     465
     466/**
     467 * Bitwise exclusive OR on 128-bit operands.
     468 *
     469 * @param a_hi High part of the first input operand.
     470 * @param a_lo Low part of the first input operand.
     471 * @param b_hi High part of the second input operand.
     472 * @param b_lo Low part of the second input operand.
     473 * @param r_hi Address to store high part of the result.
     474 * @param r_lo Address to store low part of the result.
     475 */
     476void xor128(
     477    uint64_t a_hi, uint64_t a_lo,
     478    uint64_t b_hi, uint64_t b_lo,
     479    uint64_t *r_hi, uint64_t *r_lo)
     480{
     481        *r_hi = a_hi ^ b_hi;
     482        *r_lo = a_lo ^ b_lo;
     483}
     484
     485/**
     486 * Bitwise NOT on the 128-bit operand.
     487 *
     488 * @param a_hi High part of the input operand.
     489 * @param a_lo Low part of the input operand.
     490 * @param r_hi Address to store high part of the result.
     491 * @param r_lo Address to store low part of the result.
     492 */
     493void not128(
     494    uint64_t a_hi, uint64_t a_lo,
     495        uint64_t *r_hi, uint64_t *r_lo)
     496{
     497        *r_hi = ~a_hi;
     498        *r_lo = ~a_lo;
     499}
     500
     501/**
     502 * Equality comparison of 128-bit operands.
     503 *
     504 * @param a_hi High part of the first input operand.
     505 * @param a_lo Low part of the first input operand.
     506 * @param b_hi High part of the second input operand.
     507 * @param b_lo Low part of the second input operand.
     508 * @return 1 if operands are equal, 0 otherwise.
     509 */
     510int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
     511{
     512        return (a_hi == b_hi) && (a_lo == b_lo);
     513}
     514
     515/**
     516 * Lower-or-equal comparison of 128-bit operands.
     517 *
     518 * @param a_hi High part of the first input operand.
     519 * @param a_lo Low part of the first input operand.
     520 * @param b_hi High part of the second input operand.
     521 * @param b_lo Low part of the second input operand.
     522 * @return 1 if a is lower or equal to b, 0 otherwise.
     523 */
     524int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
     525{
     526        return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo <= b_lo));
     527}
     528
     529/**
     530 * Lower-than comparison of 128-bit operands.
     531 *
     532 * @param a_hi High part of the first input operand.
     533 * @param a_lo Low part of the first input operand.
     534 * @param b_hi High part of the second input operand.
     535 * @param b_lo Low part of the second input operand.
     536 * @return 1 if a is lower than b, 0 otherwise.
     537 */
     538int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo)
     539{
     540        return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo < b_lo));
     541}
     542
     543/**
     544 * Addition of two 128-bit unsigned integers.
     545 *
     546 * @param a_hi High part of the first input operand.
     547 * @param a_lo Low part of the first input operand.
     548 * @param b_hi High part of the second input operand.
     549 * @param b_lo Low part of the second input operand.
     550 * @param r_hi Address to store high part of the result.
     551 * @param r_lo Address to store low part of the result.
     552 */
     553void add128(uint64_t a_hi, uint64_t a_lo,
     554    uint64_t b_hi, uint64_t b_lo,
     555    uint64_t *r_hi, uint64_t *r_lo)
     556{
     557        uint64_t low = a_lo + b_lo;
     558        *r_lo = low;
     559        /* detect overflow to add a carry */
     560        *r_hi = a_hi + b_hi + (low < a_lo);
     561}
     562
     563/**
     564 * Substraction of two 128-bit unsigned integers.
     565 *
     566 * @param a_hi High part of the first input operand.
     567 * @param a_lo Low part of the first input operand.
     568 * @param b_hi High part of the second input operand.
     569 * @param b_lo Low part of the second input operand.
     570 * @param r_hi Address to store high part of the result.
     571 * @param r_lo Address to store low part of the result.
     572 */
     573void sub128(uint64_t a_hi, uint64_t a_lo,
     574    uint64_t b_hi, uint64_t b_lo,
     575    uint64_t *r_hi, uint64_t *r_lo)
     576{
     577        *r_lo = a_lo - b_lo;
     578        /* detect underflow to substract a carry */
     579        *r_hi = a_hi - b_hi - (a_lo < b_lo);
     580}
     581
     582/**
     583 * Multiplication of two 64-bit unsigned integers.
     584 *
     585 * @param a First input operand.
     586 * @param b Second input operand.
     587 * @param r_hi Address to store high part of the result.
     588 * @param r_lo Address to store low part of the result.
     589 */
     590void mul64(uint64_t a, uint64_t b, uint64_t *r_hi, uint64_t *r_lo)
     591{
     592        uint64_t low, high, middle1, middle2;
     593        uint32_t alow, blow;
     594
     595        alow = a & 0xFFFFFFFF;
     596        blow = b & 0xFFFFFFFF;
     597
     598        a >>= 32;
     599        b >>= 32;
     600
     601        low = ((uint64_t) alow) * blow;
     602        middle1 = a * blow;
     603        middle2 = alow * b;
     604        high = a * b;
     605
     606        middle1 += middle2;
     607        high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32);
     608        middle1 <<= 32;
     609        low += middle1;
     610        high += (low < middle1);
     611        *r_lo = low;
     612        *r_hi = high;
     613}
     614
     615/**
     616 * Multiplication of two 128-bit unsigned integers.
     617 *
     618 * @param a_hi High part of the first input operand.
     619 * @param a_lo Low part of the first input operand.
     620 * @param b_hi High part of the second input operand.
     621 * @param b_lo Low part of the second input operand.
     622 * @param r_hihi Address to store first (highest) quarter of the result.
     623 * @param r_hilo Address to store second quarter of the result.
     624 * @param r_lohi Address to store third quarter of the result.
     625 * @param r_lolo Address to store fourth (lowest) quarter of the result.
     626 */
     627void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo,
     628    uint64_t *r_hihi, uint64_t *r_hilo, uint64_t *r_lohi, uint64_t *r_lolo)
     629{
     630        uint64_t hihi, hilo, lohi, lolo;
     631        uint64_t tmp1, tmp2;
     632
     633        mul64(a_lo, b_lo, &lohi, &lolo);
     634        mul64(a_lo, b_hi, &hilo, &tmp2);
     635        add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi);
     636        mul64(a_hi, b_hi, &hihi, &tmp1);
     637        add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo);
     638        mul64(a_hi, b_lo, &tmp1, &tmp2);
     639        add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi);
     640        add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo);
     641
     642        *r_hihi = hihi;
     643        *r_hilo = hilo;
     644        *r_lohi = lohi;
     645        *r_lolo = lolo;
     646}
     647
     648/**
     649 * Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned
     650 * divisor.
     651 *
     652 * @param a_hi High part of the divident.
     653 * @param a_lo Low part of the divident.
     654 * @param b Divisor.
     655 * @return Quotient approximation.
     656 */
     657uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b)
     658{
     659        uint64_t b_hi, b_lo;
     660        uint64_t rem_hi, rem_lo;
     661        uint64_t tmp_hi, tmp_lo;
     662        uint64_t result;
     663
     664        if (b <= a_hi) {
     665                return 0xFFFFFFFFFFFFFFFFull;
     666        }
     667
     668        b_hi = b >> 32;
     669        result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32;
     670        mul64(b, result, &tmp_hi, &tmp_lo);
     671        sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo);
     672       
     673        while ((int64_t) rem_hi < 0) {
     674                result -= 0x1ll << 32;
     675                b_lo = b << 32;
     676                add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo);
     677        }
     678
     679        rem_hi = (rem_hi << 32) | (rem_lo >> 32);
     680        if ((b_hi << 32) <= rem_hi) {
     681                result |= 0xFFFFFFFF;
     682        } else {
     683                result |= rem_hi / b_hi;
     684        }
     685
     686        return result;
    211687}
    212688
Note: See TracChangeset for help on using the changeset viewer.