Changes in / [d3e241a:b7ee0369] in mainline
- Location:
- uspace/lib
- Files:
-
- 30 edited
Legend:
- Unmodified
- Added
- Removed
-
TabularUnified uspace/lib/posix/stdio.h ¶
rd3e241a rb7ee0369 127 127 128 128 #ifndef LIBPOSIX_INTERNAL 129 /* DEBUG macro does not belong to POSIX stdio.h. Its unconditional130 * definition in the native stdio.h causes unexpected behaviour of131 * applications which uses their own DEBUG macro (e.g. debugging132 * output is printed even if not desirable). */133 #undef DEBUG134 135 129 #define ctermid posix_ctermid 136 130 -
TabularUnified uspace/lib/softfloat/arch/abs32le/include/functions.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2010 Martin Decky 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 47 46 #define float64_to_longlong(X) float64_to_int64(X); 48 47 49 #define float128_to_int(X) float128_to_int32(X);50 #define float128_to_long(X) float128_to_int32(X);51 #define float128_to_longlong(X) float128_to_int64(X);52 53 48 #define float32_to_uint(X) float32_to_uint32(X); 54 49 #define float32_to_ulong(X) float32_to_uint32(X); … … 58 53 #define float64_to_ulong(X) float64_to_uint32(X); 59 54 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X);62 #define float128_to_ulong(X) float128_to_uint32(X);63 #define float128_to_ulonglong(X) float128_to_uint64(X);64 55 65 56 #define int_to_float32(X) int32_to_float32(X); … … 71 62 #define longlong_to_float64(X) int64_to_float64(X); 72 63 73 #define int_to_float128(X) int32_to_float128(X);74 #define long_to_float128(X) int32_to_float128(X);75 #define longlong_to_float128(X) int64_to_float128(X);76 77 64 #define uint_to_float32(X) uint32_to_float32(X); 78 65 #define ulong_to_float32(X) uint32_to_float32(X); … … 83 70 #define ulonglong_to_float64(X) uint64_to_float64(X); 84 71 85 #define uint_to_float128(X) uint32_to_float128(X);86 #define ulong_to_float128(X) uint32_to_float128(X);87 #define ulonglong_to_float128(X) uint64_to_float128(X);88 89 72 #endif 90 73 -
TabularUnified uspace/lib/softfloat/arch/amd64/include/functions.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 47 46 #define float64_to_longlong(X) float64_to_int64(X); 48 47 49 #define float128_to_int(X) float128_to_int32(X);50 #define float128_to_long(X) float128_to_int64(X);51 #define float128_to_longlong(X) float128_to_int64(X);52 53 48 #define float32_to_uint(X) float32_to_uint32(X); 54 49 #define float32_to_ulong(X) float32_to_uint64(X); … … 58 53 #define float64_to_ulong(X) float64_to_uint64(X); 59 54 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X);62 #define float128_to_ulong(X) float128_to_uint64(X);63 #define float128_to_ulonglong(X) float128_to_uint64(X);64 55 65 56 #define int_to_float32(X) int32_to_float32(X); … … 71 62 #define longlong_to_float64(X) int64_to_float64(X); 72 63 73 #define int_to_float128(X) int32_to_float128(X);74 #define long_to_float128(X) int64_to_float128(X);75 #define longlong_to_float128(X) int64_to_float128(X);76 77 64 #define uint_to_float32(X) uint32_to_float32(X); 78 65 #define ulong_to_float32(X) uint64_to_float32(X); … … 83 70 #define ulonglong_to_float64(X) uint64_to_float64(X); 84 71 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint64_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 72 #endif 88 73 89 #endif90 74 91 75 /** @} 92 76 */ 77 -
TabularUnified uspace/lib/softfloat/arch/arm32/include/functions.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 34 33 */ 35 34 /** @file 35 * @brief Softfloat architecture dependent definitions. 36 36 */ 37 37 … … 47 47 #define float64_to_longlong(X) float64_to_int64(X); 48 48 49 #define float128_to_int(X) float128_to_int32(X);50 #define float128_to_long(X) float128_to_int32(X);51 #define float128_to_longlong(X) float128_to_int64(X);52 53 49 #define float32_to_uint(X) float32_to_uint32(X); 54 50 #define float32_to_ulong(X) float32_to_uint32(X); … … 58 54 #define float64_to_ulong(X) float64_to_uint32(X); 59 55 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X);62 #define float128_to_ulong(X) float128_to_uint32(X);63 #define float128_to_ulonglong(X) float128_to_uint64(X);64 56 65 57 #define int_to_float32(X) int32_to_float32(X); … … 71 63 #define longlong_to_float64(X) int64_to_float64(X); 72 64 73 #define int_to_float128(X) int32_to_float128(X);74 #define long_to_float128(X) int32_to_float128(X);75 #define longlong_to_float128(X) int64_to_float128(X);76 77 65 #define uint_to_float32(X) uint32_to_float32(X); 78 66 #define ulong_to_float32(X) uint32_to_float32(X); … … 83 71 #define ulonglong_to_float64(X) uint64_to_float64(X); 84 72 85 #define uint_to_float128(X) uint32_to_float128(X);86 #define ulong_to_float128(X) uint32_to_float128(X);87 #define ulonglong_to_float128(X) uint64_to_float128(X);88 89 73 #endif 90 74 -
TabularUnified uspace/lib/softfloat/arch/ia32/include/functions.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 47 46 #define float64_to_longlong(X) float64_to_int64(X); 48 47 49 #define float128_to_int(X) float128_to_int32(X);50 #define float128_to_long(X) float128_to_int32(X);51 #define float128_to_longlong(X) float128_to_int64(X);52 53 48 #define float32_to_uint(X) float32_to_uint32(X); 54 49 #define float32_to_ulong(X) float32_to_uint32(X); … … 58 53 #define float64_to_ulong(X) float64_to_uint32(X); 59 54 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X);62 #define float128_to_ulong(X) float128_to_uint32(X);63 #define float128_to_ulonglong(X) float128_to_uint64(X);64 55 65 56 #define int_to_float32(X) int32_to_float32(X); … … 71 62 #define longlong_to_float64(X) int64_to_float64(X); 72 63 73 #define int_to_float128(X) int32_to_float128(X);74 #define long_to_float128(X) int32_to_float128(X);75 #define longlong_to_float128(X) int64_to_float128(X);76 77 64 #define uint_to_float32(X) uint32_to_float32(X); 78 65 #define ulong_to_float32(X) uint32_to_float32(X); … … 83 70 #define ulonglong_to_float64(X) uint64_to_float64(X); 84 71 85 #define uint_to_float128(X) uint32_to_float128(X);86 #define ulong_to_float128(X) uint32_to_float128(X);87 #define ulonglong_to_float128(X) uint64_to_float128(X);88 89 72 #endif 90 73 -
TabularUnified uspace/lib/softfloat/arch/ia64/include/functions.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 47 46 #define float64_to_longlong(X) float64_to_int64(X); 48 47 49 #define float128_to_int(X) float128_to_int32(X);50 #define float128_to_long(X) float128_to_int64(X);51 #define float128_to_longlong(X) float128_to_int64(X);52 53 48 #define float32_to_uint(X) float32_to_uint32(X); 54 49 #define float32_to_ulong(X) float32_to_uint64(X); … … 58 53 #define float64_to_ulong(X) float64_to_uint64(X); 59 54 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X);62 #define float128_to_ulong(X) float128_to_uint64(X);63 #define float128_to_ulonglong(X) float128_to_uint64(X);64 55 65 56 #define int_to_float32(X) int32_to_float32(X); … … 71 62 #define longlong_to_float64(X) int64_to_float64(X); 72 63 73 #define int_to_float128(X) int32_to_float128(X);74 #define long_to_float128(X) int64_to_float128(X);75 #define longlong_to_float128(X) int64_to_float128(X);76 77 64 #define uint_to_float32(X) uint32_to_float32(X); 78 65 #define ulong_to_float32(X) uint64_to_float32(X); … … 83 70 #define ulonglong_to_float64(X) uint64_to_float64(X); 84 71 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint64_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 72 #endif 88 73 89 #endif90 74 91 75 /** @} 92 76 */ 77 -
TabularUnified uspace/lib/softfloat/arch/mips32/include/functions.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 47 46 #define float64_to_longlong(X) float64_to_int64(X); 48 47 49 #define float128_to_int(X) float128_to_int32(X);50 #define float128_to_long(X) float128_to_int32(X);51 #define float128_to_longlong(X) float128_to_int64(X);52 53 48 #define float32_to_uint(X) float32_to_uint32(X); 54 49 #define float32_to_ulong(X) float32_to_uint32(X); … … 58 53 #define float64_to_ulong(X) float64_to_uint32(X); 59 54 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X);62 #define float128_to_ulong(X) float128_to_uint32(X);63 #define float128_to_ulonglong(X) float128_to_uint64(X);64 55 65 56 #define int_to_float32(X) int32_to_float32(X); … … 71 62 #define longlong_to_float64(X) int64_to_float64(X); 72 63 73 #define int_to_float128(X) int32_to_float128(X);74 #define long_to_float128(X) int32_to_float128(X);75 #define longlong_to_float128(X) int64_to_float128(X);76 77 64 #define uint_to_float32(X) uint32_to_float32(X); 78 65 #define ulong_to_float32(X) uint32_to_float32(X); … … 83 70 #define ulonglong_to_float64(X) uint64_to_float64(X); 84 71 85 #define uint_to_float128(X) uint32_to_float128(X);86 #define ulong_to_float128(X) uint32_to_float128(X);87 #define ulonglong_to_float128(X) uint64_to_float128(X);88 89 72 #endif 90 73 -
TabularUnified uspace/lib/softfloat/arch/mips32eb/include/functions.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 47 46 #define float64_to_longlong(X) float64_to_int64(X); 48 47 49 #define float128_to_int(X) float128_to_int32(X);50 #define float128_to_long(X) float128_to_int32(X);51 #define float128_to_longlong(X) float128_to_int64(X);52 53 48 #define float32_to_uint(X) float32_to_uint32(X); 54 49 #define float32_to_ulong(X) float32_to_uint32(X); … … 58 53 #define float64_to_ulong(X) float64_to_uint32(X); 59 54 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X);62 #define float128_to_ulong(X) float128_to_uint32(X);63 #define float128_to_ulonglong(X) float128_to_uint64(X);64 55 65 56 #define int_to_float32(X) int32_to_float32(X); … … 71 62 #define longlong_to_float64(X) int64_to_float64(X); 72 63 73 #define int_to_float128(X) int32_to_float128(X);74 #define long_to_float128(X) int32_to_float128(X);75 #define longlong_to_float128(X) int64_to_float128(X);76 77 64 #define uint_to_float32(X) uint32_to_float32(X); 78 65 #define ulong_to_float32(X) uint32_to_float32(X); … … 83 70 #define ulonglong_to_float64(X) uint64_to_float64(X); 84 71 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint32_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 72 #endif 88 73 89 #endif90 74 91 75 /** @} 92 76 */ 77 -
TabularUnified uspace/lib/softfloat/arch/mips64/include/functions.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 47 46 #define float64_to_longlong(X) float64_to_int64(X); 48 47 49 #define float128_to_int(X) float128_to_int32(X);50 #define float128_to_long(X) float128_to_int64(X);51 #define float128_to_longlong(X) float128_to_int64(X);52 53 48 #define float32_to_uint(X) float32_to_uint32(X); 54 49 #define float32_to_ulong(X) float32_to_uint64(X); … … 58 53 #define float64_to_ulong(X) float64_to_uint64(X); 59 54 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X);62 #define float128_to_ulong(X) float128_to_uint64(X);63 #define float128_to_ulonglong(X) float128_to_uint64(X);64 55 65 56 #define int_to_float32(X) int32_to_float32(X); … … 71 62 #define longlong_to_float64(X) int64_to_float64(X); 72 63 73 #define int_to_float128(X) int32_to_float128(X);74 #define long_to_float128(X) int64_to_float128(X);75 #define longlong_to_float128(X) int64_to_float128(X);76 77 64 #define uint_to_float32(X) uint32_to_float32(X); 78 65 #define ulong_to_float32(X) uint64_to_float32(X); … … 83 70 #define ulonglong_to_float64(X) uint64_to_float64(X); 84 71 85 #define uint_to_float128(X) uint32_to_float128(X);86 #define ulong_to_float128(X) uint64_to_float128(X);87 #define ulonglong_to_float128(X) uint64_to_float128(X);88 89 72 #endif 90 73 -
TabularUnified uspace/lib/softfloat/arch/ppc32/include/functions.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 47 46 #define float64_to_longlong(X) float64_to_int64(X); 48 47 49 #define float128_to_int(X) float128_to_int32(X);50 #define float128_to_long(X) float128_to_int32(X);51 #define float128_to_longlong(X) float128_to_int64(X);52 53 48 #define float32_to_uint(X) float32_to_uint32(X); 54 49 #define float32_to_ulong(X) float32_to_uint32(X); … … 58 53 #define float64_to_ulong(X) float64_to_uint32(X); 59 54 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X);62 #define float128_to_ulong(X) float128_to_uint32(X);63 #define float128_to_ulonglong(X) float128_to_uint64(X);64 55 65 56 #define int_to_float32(X) int32_to_float32(X); … … 71 62 #define longlong_to_float64(X) int64_to_float64(X); 72 63 73 #define int_to_float128(X) int32_to_float128(X);74 #define long_to_float128(X) int32_to_float128(X);75 #define longlong_to_float128(X) int64_to_float128(X);76 77 64 #define uint_to_float32(X) uint32_to_float32(X); 78 65 #define ulong_to_float32(X) uint32_to_float32(X); … … 83 70 #define ulonglong_to_float64(X) uint64_to_float64(X); 84 71 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint32_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 72 #endif 88 73 89 #endif90 74 91 75 /** @} 92 76 */ 77 -
TabularUnified uspace/lib/softfloat/arch/sparc64/include/functions.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 39 38 #define __SOFTFLOAT_FUNCTIONS_H__ 40 39 41 #define SPARC_SOFTFLOAT42 43 40 #define float32_to_int(X) float32_to_int32(X); 44 41 #define float32_to_long(X) float32_to_int64(X); … … 48 45 #define float64_to_long(X) float64_to_int64(X); 49 46 #define float64_to_longlong(X) float64_to_int64(X); 50 51 #define float128_to_int(X) float128_to_int32(X);52 #define float128_to_long(X) float128_to_int64(X);53 #define float128_to_longlong(X) float128_to_int64(X);54 47 55 48 #define float32_to_uint(X) float32_to_uint32(X); … … 61 54 #define float64_to_ulonglong(X) float64_to_uint64(X); 62 55 63 #define float128_to_uint(X) float128_to_uint32(X);64 #define float128_to_ulong(X) float128_to_uint64(X);65 #define float128_to_ulonglong(X) float128_to_uint64(X);66 67 56 #define int_to_float32(X) int32_to_float32(X); 68 57 #define long_to_float32(X) int64_to_float32(X); … … 72 61 #define long_to_float64(X) int64_to_float64(X); 73 62 #define longlong_to_float64(X) int64_to_float64(X); 74 75 #define int_to_float128(X) int32_to_float128(X);76 #define long_to_float128(X) int64_to_float128(X);77 #define longlong_to_float128(X) int64_to_float128(X);78 63 79 64 #define uint_to_float32(X) uint32_to_float32(X); … … 85 70 #define ulonglong_to_float64(X) uint64_to_float64(X); 86 71 87 #define uint_to_float128(X) uint32_to_float128(X); 88 #define ulong_to_float128(X) uint64_to_float128(X); 89 #define ulonglong_to_float128(X) uint64_to_float128(X); 72 #endif 90 73 91 #endif92 74 93 75 /** @} 94 76 */ 77 -
TabularUnified uspace/lib/softfloat/generic/add.c ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Addition functions.32 /** @file 34 33 */ 35 34 … … 37 36 #include <add.h> 38 37 #include <comparison.h> 39 #include <common.h> 40 41 /** 42 * Add two single-precision floats with the same signs. 43 * 44 * @param a First input operand. 45 * @param b Second input operand. 46 * @return Result of addition. 38 39 /** Add two Float32 numbers with same signs 47 40 */ 48 41 float32 addFloat32(float32 a, float32 b) 49 42 { 50 43 int expdiff; 51 uint32_t exp1, exp2, 44 uint32_t exp1, exp2,frac1, frac2; 52 45 53 46 expdiff = a.parts.exp - b.parts.exp; … … 56 49 /* TODO: fix SigNaN */ 57 50 if (isFloat32SigNaN(b)) { 58 } 59 60 return b; 61 } 51 }; 52 53 return b; 54 }; 62 55 63 56 if (b.parts.exp == FLOAT32_MAX_EXPONENT) { … … 74 67 /* TODO: fix SigNaN */ 75 68 if (isFloat32SigNaN(a) || isFloat32SigNaN(b)) { 76 } 77 return (isFloat32NaN(a) ? a :b);78 } 69 }; 70 return (isFloat32NaN(a)?a:b); 71 }; 79 72 80 73 if (a.parts.exp == FLOAT32_MAX_EXPONENT) { … … 86 79 frac2 = b.parts.fraction; 87 80 exp2 = b.parts.exp; 88 } 81 }; 89 82 90 83 if (exp1 == 0) { … … 94 87 /* result is not denormalized */ 95 88 a.parts.exp = 1; 96 } 89 }; 97 90 a.parts.fraction = frac1; 98 91 return a; 99 } 92 }; 100 93 101 94 frac1 |= FLOAT32_HIDDEN_BIT_MASK; /* add hidden bit */ … … 107 100 /* add hidden bit to second operand */ 108 101 frac2 |= FLOAT32_HIDDEN_BIT_MASK; 109 } 102 }; 110 103 111 104 /* create some space for rounding */ … … 125 118 ++exp1; 126 119 frac1 >>= 1; 127 } 120 }; 128 121 129 122 /* rounding - if first bit after fraction is set then round up */ … … 134 127 ++exp1; 135 128 frac1 >>= 1; 136 } 129 }; 130 137 131 138 132 if ((exp1 == FLOAT32_MAX_EXPONENT ) || (exp2 > exp1)) { 139 /* overflow - set infinity as result */140 a.parts.exp = FLOAT32_MAX_EXPONENT;141 a.parts.fraction = 0;142 return a;143 }133 /* overflow - set infinity as result */ 134 a.parts.exp = FLOAT32_MAX_EXPONENT; 135 a.parts.fraction = 0; 136 return a; 137 } 144 138 145 139 a.parts.exp = exp1; 146 140 147 141 /* Clear hidden bit and shift */ 148 a.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK)) ;142 a.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK)) ; 149 143 return a; 150 144 } 151 145 152 /** 153 * Add two double-precision floats with the same signs. 154 * 155 * @param a First input operand. 156 * @param b Second input operand. 157 * @return Result of addition. 146 /** Add two Float64 numbers with same signs 158 147 */ 159 148 float64 addFloat64(float64 a, float64 b) … … 163 152 uint64_t frac1, frac2; 164 153 165 expdiff = ((int )a.parts.exp) - b.parts.exp;154 expdiff = ((int )a.parts.exp) - b.parts.exp; 166 155 if (expdiff < 0) { 167 156 if (isFloat64NaN(b)) { 168 157 /* TODO: fix SigNaN */ 169 158 if (isFloat64SigNaN(b)) { 170 } 171 172 return b; 173 } 159 }; 160 161 return b; 162 }; 174 163 175 164 /* b is infinity and a not */ 176 if (b.parts.exp == FLOAT64_MAX_EXPONENT ) {165 if (b.parts.exp == FLOAT64_MAX_EXPONENT ) { 177 166 return b; 178 167 } … … 187 176 /* TODO: fix SigNaN */ 188 177 if (isFloat64SigNaN(a) || isFloat64SigNaN(b)) { 189 } 190 return a; 191 } 178 }; 179 return a; 180 }; 192 181 193 182 /* a is infinity and b not */ 194 if (a.parts.exp == FLOAT64_MAX_EXPONENT ) {183 if (a.parts.exp == FLOAT64_MAX_EXPONENT ) { 195 184 return a; 196 185 } … … 200 189 frac2 = b.parts.fraction; 201 190 exp2 = b.parts.exp; 202 } 191 }; 203 192 204 193 if (exp1 == 0) { … … 208 197 /* result is not denormalized */ 209 198 a.parts.exp = 1; 210 } 199 }; 211 200 a.parts.fraction = frac1; 212 201 return a; 213 } 202 }; 214 203 215 204 /* add hidden bit - frac1 is sure not denormalized */ … … 223 212 /* is not denormalized */ 224 213 frac2 |= FLOAT64_HIDDEN_BIT_MASK; 225 } 214 }; 226 215 227 216 /* create some space for rounding */ … … 229 218 frac2 <<= 6; 230 219 231 if (expdiff < (FLOAT64_FRACTION_SIZE + 2) ) {220 if (expdiff < (FLOAT64_FRACTION_SIZE + 2) ) { 232 221 frac2 >>= expdiff; 233 222 frac1 += frac2; … … 238 227 } 239 228 240 if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7) ) {241 ++exp1; 242 frac1 >>= 1; 243 } 229 if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7) ) { 230 ++exp1; 231 frac1 >>= 1; 232 }; 244 233 245 234 /* rounding - if first bit after fraction is set then round up */ … … 250 239 ++exp1; 251 240 frac1 >>= 1; 252 } 241 }; 253 242 254 243 if ((exp1 == FLOAT64_MAX_EXPONENT ) || (exp2 > exp1)) { 255 /* overflow - set infinity as result */256 a.parts.exp = FLOAT64_MAX_EXPONENT;257 a.parts.fraction = 0;258 return a;259 }244 /* overflow - set infinity as result */ 245 a.parts.exp = FLOAT64_MAX_EXPONENT; 246 a.parts.fraction = 0; 247 return a; 248 } 260 249 261 250 a.parts.exp = exp1; 262 251 /* Clear hidden bit and shift */ 263 a.parts.fraction = ((frac1 >> 6 ) & (~FLOAT64_HIDDEN_BIT_MASK)); 252 a.parts.fraction = ( (frac1 >> 6 ) & (~FLOAT64_HIDDEN_BIT_MASK)); 253 264 254 return a; 265 255 } 266 256 267 /**268 * Add two quadruple-precision floats with the same signs.269 *270 * @param a First input operand.271 * @param b Second input operand.272 * @return Result of addition.273 */274 float128 addFloat128(float128 a, float128 b)275 {276 int expdiff;277 uint32_t exp1, exp2;278 uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo;279 280 expdiff = ((int) a.parts.exp) - b.parts.exp;281 if (expdiff < 0) {282 if (isFloat128NaN(b)) {283 /* TODO: fix SigNaN */284 if (isFloat128SigNaN(b)) {285 }286 287 return b;288 }289 290 /* b is infinity and a not */291 if (b.parts.exp == FLOAT128_MAX_EXPONENT) {292 return b;293 }294 295 frac1_hi = b.parts.frac_hi;296 frac1_lo = b.parts.frac_lo;297 exp1 = b.parts.exp;298 frac2_hi = a.parts.frac_hi;299 frac2_lo = a.parts.frac_lo;300 exp2 = a.parts.exp;301 expdiff *= -1;302 } else {303 if (isFloat128NaN(a)) {304 /* TODO: fix SigNaN */305 if (isFloat128SigNaN(a) || isFloat128SigNaN(b)) {306 }307 return a;308 }309 310 /* a is infinity and b not */311 if (a.parts.exp == FLOAT128_MAX_EXPONENT) {312 return a;313 }314 315 frac1_hi = a.parts.frac_hi;316 frac1_lo = a.parts.frac_lo;317 exp1 = a.parts.exp;318 frac2_hi = b.parts.frac_hi;319 frac2_lo = b.parts.frac_lo;320 exp2 = b.parts.exp;321 }322 323 if (exp1 == 0) {324 /* both are denormalized */325 add128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &frac1_hi, &frac1_lo);326 327 and128(frac1_hi, frac1_lo,328 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,329 &tmp_hi, &tmp_lo);330 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {331 /* result is not denormalized */332 a.parts.exp = 1;333 }334 335 a.parts.frac_hi = frac1_hi;336 a.parts.frac_lo = frac1_lo;337 return a;338 }339 340 /* add hidden bit - frac1 is sure not denormalized */341 or128(frac1_hi, frac1_lo,342 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,343 &frac1_hi, &frac1_lo);344 345 /* second operand ... */346 if (exp2 == 0) {347 /* ... is denormalized */348 --expdiff;349 } else {350 /* is not denormalized */351 or128(frac2_hi, frac2_lo,352 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,353 &frac2_hi, &frac2_lo);354 }355 356 /* create some space for rounding */357 lshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);358 lshift128(frac2_hi, frac2_lo, 6, &frac2_hi, &frac2_lo);359 360 if (expdiff < (FLOAT128_FRACTION_SIZE + 2)) {361 rshift128(frac2_hi, frac2_lo, expdiff, &frac2_hi, &frac2_lo);362 add128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &frac1_hi, &frac1_lo);363 } else {364 a.parts.exp = exp1;365 366 rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);367 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,368 &tmp_hi, &tmp_lo);369 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);370 371 a.parts.frac_hi = tmp_hi;372 a.parts.frac_lo = tmp_lo;373 return a;374 }375 376 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7,377 &tmp_hi, &tmp_lo);378 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);379 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {380 ++exp1;381 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);382 }383 384 /* rounding - if first bit after fraction is set then round up */385 add128(frac1_hi, frac1_lo, 0x0ll, 0x1ll << 5, &frac1_hi, &frac1_lo);386 387 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7,388 &tmp_hi, &tmp_lo);389 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);390 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {391 /* rounding overflow */392 ++exp1;393 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);394 }395 396 if ((exp1 == FLOAT128_MAX_EXPONENT ) || (exp2 > exp1)) {397 /* overflow - set infinity as result */398 a.parts.exp = FLOAT64_MAX_EXPONENT;399 a.parts.frac_hi = 0;400 a.parts.frac_lo = 0;401 return a;402 }403 404 a.parts.exp = exp1;405 406 /* Clear hidden bit and shift */407 rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);408 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,409 &tmp_hi, &tmp_lo);410 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);411 412 a.parts.frac_hi = tmp_hi;413 a.parts.frac_lo = tmp_lo;414 415 return a;416 }417 418 257 /** @} 419 258 */ -
TabularUnified uspace/lib/softfloat/generic/common.c ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Common helper operations.32 /** @file 34 33 */ 35 34 … … 37 36 #include <common.h> 38 37 39 /* Table for fast leading zeroes counting .*/38 /* Table for fast leading zeroes counting */ 40 39 char zeroTable[256] = { 41 40 8, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, \ … … 57 56 }; 58 57 59 /** 60 * Take fraction shifted by 10 bits to the left, round it, normalize it 61 * and detect exceptions 62 * 63 * @param cexp Exponent with bias. 64 * @param cfrac Fraction shifted 10 bits to the left with added hidden bit. 65 * @param sign Resulting sign. 66 * @return Finished double-precision float. 58 59 60 /** Take fraction shifted by 10 bits to left, round it, normalize it and detect exceptions 61 * @param cexp exponent with bias 62 * @param cfrac fraction shifted 10 places left with added hidden bit 63 * @param sign 64 * @return valied float64 67 65 */ 68 66 float64 finishFloat64(int32_t cexp, uint64_t cfrac, char sign) … … 73 71 74 72 /* find first nonzero digit and shift result and detect possibly underflow */ 75 while ((cexp > 0) && (cfrac) && 76 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) { 73 while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ) )))) { 77 74 cexp--; 78 75 cfrac <<= 1; 79 /* TODO: fix underflow */ 80 } 81 82 if ((cexp < 0) || (cexp == 0 && 83 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) { 76 /* TODO: fix underflow */ 77 }; 78 79 if ((cexp < 0) || ( cexp == 0 && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) { 84 80 /* FIXME: underflow */ 85 81 result.parts.exp = 0; … … 97 93 98 94 if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) { 99 result.parts.fraction =100 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK));95 96 result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2) ) & (~FLOAT64_HIDDEN_BIT_MASK)); 101 97 return result; 102 98 } … … 107 103 ++cexp; 108 104 109 if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ))) {105 if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ))) { 110 106 ++cexp; 111 107 cfrac >>= 1; … … 113 109 114 110 /* check overflow */ 115 if (cexp >= FLOAT64_MAX_EXPONENT ) {111 if (cexp >= FLOAT64_MAX_EXPONENT ) { 116 112 /* FIXME: overflow, return infinity */ 117 113 result.parts.exp = FLOAT64_MAX_EXPONENT; … … 120 116 } 121 117 122 result.parts.exp = (uint32_t) cexp; 123 124 result.parts.fraction = 125 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK)); 118 result.parts.exp = (uint32_t)cexp; 119 120 result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2 ) ) & (~FLOAT64_HIDDEN_BIT_MASK)); 126 121 127 122 return result; 128 123 } 129 124 130 /** 131 * Take fraction, round it, normalize it and detect exceptions 132 * 133 * @param cexp Exponent with bias. 134 * @param cfrac_hi High part of the fraction shifted 14 bits to the left 135 * with added hidden bit. 136 * @param cfrac_lo Low part of the fraction shifted 14 bits to the left 137 * with added hidden bit. 138 * @param sign Resulting sign. 139 * @param shift_out Bits right-shifted out from fraction by the caller. 140 * @return Finished quadruple-precision float. 141 */ 142 float128 finishFloat128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo, 143 char sign, uint64_t shift_out) 144 { 145 float128 result; 146 uint64_t tmp_hi, tmp_lo; 147 148 result.parts.sign = sign; 149 150 /* find first nonzero digit and shift result and detect possibly underflow */ 151 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 152 1, &tmp_hi, &tmp_lo); 153 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 154 while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) && 155 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) { 156 cexp--; 157 lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo); 158 /* TODO: fix underflow */ 159 160 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 161 1, &tmp_hi, &tmp_lo); 162 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 163 } 164 165 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 166 1, &tmp_hi, &tmp_lo); 167 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 168 if ((cexp < 0) || (cexp == 0 && 169 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) { 170 /* FIXME: underflow */ 171 result.parts.exp = 0; 172 if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */ 173 result.parts.frac_hi = 0x0ll; 174 result.parts.frac_lo = 0x0ll; 175 return result; 176 } 177 178 while (cexp < 0) { 179 cexp++; 180 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo); 181 } 182 183 if (shift_out & (0x1ull < 64)) { 184 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo); 185 } 186 187 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 188 1, &tmp_hi, &tmp_lo); 189 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 190 if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 191 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 192 &tmp_hi, &tmp_lo); 193 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 194 result.parts.frac_hi = tmp_hi; 195 result.parts.frac_lo = tmp_lo; 196 return result; 197 } 198 } else { 199 if (shift_out & (0x1ull < 64)) { 200 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo); 201 } 202 } 203 204 ++cexp; 205 206 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 207 1, &tmp_hi, &tmp_lo); 208 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 209 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 210 ++cexp; 211 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo); 212 } 213 214 /* check overflow */ 215 if (cexp >= FLOAT128_MAX_EXPONENT) { 216 /* FIXME: overflow, return infinity */ 217 result.parts.exp = FLOAT128_MAX_EXPONENT; 218 result.parts.frac_hi = 0x0ll; 219 result.parts.frac_lo = 0x0ll; 220 return result; 221 } 222 223 result.parts.exp = (uint32_t) cexp; 224 225 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 226 &tmp_hi, &tmp_lo); 227 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 228 result.parts.frac_hi = tmp_hi; 229 result.parts.frac_lo = tmp_lo; 230 231 return result; 232 } 233 234 /** 235 * Counts leading zeroes in byte. 236 * 237 * @param i Byte for which to count leading zeroes. 238 * @return Number of detected leading zeroes. 125 /** Counts leading zeroes in 64bit unsigned integer 126 * @param i 127 */ 128 int countZeroes64(uint64_t i) 129 { 130 int j; 131 for (j =0; j < 64; j += 8) { 132 if ( i & (0xFFll << (56 - j))) { 133 return (j + countZeroes8(i >> (56 - j))); 134 } 135 } 136 137 return 64; 138 } 139 140 /** Counts leading zeroes in 32bit unsigned integer 141 * @param i 142 */ 143 int countZeroes32(uint32_t i) 144 { 145 int j; 146 for (j =0; j < 32; j += 8) { 147 if ( i & (0xFF << (24 - j))) { 148 return (j + countZeroes8(i >> (24 - j))); 149 } 150 } 151 152 return 32; 153 } 154 155 /** Counts leading zeroes in byte 156 * @param i 239 157 */ 240 158 int countZeroes8(uint8_t i) … … 243 161 } 244 162 245 /** 246 * Counts leading zeroes in 32bit unsigned integer. 247 * 248 * @param i Integer for which to count leading zeroes. 249 * @return Number of detected leading zeroes. 250 */ 251 int countZeroes32(uint32_t i) 252 { 253 int j; 254 for (j = 0; j < 32; j += 8) { 255 if (i & (0xFF << (24 - j))) { 256 return (j + countZeroes8(i >> (24 - j))); 257 } 258 } 259 260 return 32; 261 } 262 263 /** 264 * Counts leading zeroes in 64bit unsigned integer. 265 * 266 * @param i Integer for which to count leading zeroes. 267 * @return Number of detected leading zeroes. 268 */ 269 int countZeroes64(uint64_t i) 270 { 271 int j; 272 for (j = 0; j < 64; j += 8) { 273 if (i & (0xFFll << (56 - j))) { 274 return (j + countZeroes8(i >> (56 - j))); 275 } 276 } 277 278 return 64; 279 } 280 281 /** 282 * Round and normalize number expressed by exponent and fraction with 283 * first bit (equal to hidden bit) at 30th bit. 284 * 285 * @param exp Exponent part. 286 * @param fraction Fraction with hidden bit shifted to 30th bit. 163 /** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 30. bit 164 * @param exp exponent 165 * @param fraction part with hidden bit shifted to 30. bit 287 166 */ 288 167 void roundFloat32(int32_t *exp, uint32_t *fraction) 289 168 { 290 169 /* rounding - if first bit after fraction is set then round up */ 291 (*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3)); 292 293 if ((*fraction) & 294 (FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) { 170 (*fraction) += (0x1 << 6); 171 172 if ((*fraction) & (FLOAT32_HIDDEN_BIT_MASK << 8)) { 295 173 /* rounding overflow */ 296 174 ++(*exp); 297 175 (*fraction) >>= 1; 298 } 299 300 if (((*exp) >= FLOAT32_MAX_EXPONENT ) || ((*exp) < 0)) {176 }; 177 178 if (((*exp) >= FLOAT32_MAX_EXPONENT ) || ((*exp) < 0)) { 301 179 /* overflow - set infinity as result */ 302 180 (*exp) = FLOAT32_MAX_EXPONENT; 303 181 (*fraction) = 0; 304 }305 }306 307 /** 308 * Round and normalize number expressed by exponent and fraction with 309 * first bit (equal to hidden bit) at 62nd bit. 310 * 311 * @param exp Exponent part.312 * @param fraction Fraction with hidden bit shifted to 62nd bit.182 return; 183 } 184 185 return; 186 } 187 188 /** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 62. bit 189 * @param exp exponent 190 * @param fraction part with hidden bit shifted to 62. bit 313 191 */ 314 192 void roundFloat64(int32_t *exp, uint64_t *fraction) 315 193 { 316 194 /* rounding - if first bit after fraction is set then round up */ 317 (*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3)); 318 319 if ((*fraction) & 320 (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 3))) { 195 (*fraction) += (0x1 << 9); 196 197 if ((*fraction) & (FLOAT64_HIDDEN_BIT_MASK << 11)) { 321 198 /* rounding overflow */ 322 199 ++(*exp); 323 200 (*fraction) >>= 1; 324 } 325 326 if (((*exp) >= FLOAT64_MAX_EXPONENT ) || ((*exp) < 0)) {201 }; 202 203 if (((*exp) >= FLOAT64_MAX_EXPONENT ) || ((*exp) < 0)) { 327 204 /* overflow - set infinity as result */ 328 205 (*exp) = FLOAT64_MAX_EXPONENT; 329 206 (*fraction) = 0; 330 } 331 } 332 333 /** 334 * Round and normalize number expressed by exponent and fraction with 335 * first bit (equal to hidden bit) at 126th bit. 336 * 337 * @param exp Exponent part. 338 * @param frac_hi High part of fraction part with hidden bit shifted to 126th bit. 339 * @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit. 340 */ 341 void roundFloat128(int32_t *exp, uint64_t *frac_hi, uint64_t *frac_lo) 342 { 343 uint64_t tmp_hi, tmp_lo; 344 345 /* rounding - if first bit after fraction is set then round up */ 346 lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo); 347 add128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo); 348 349 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 350 (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo); 351 and128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 352 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 353 /* rounding overflow */ 354 ++(*exp); 355 rshift128(*frac_hi, *frac_lo, 1, frac_hi, frac_lo); 356 } 357 358 if (((*exp) >= FLOAT128_MAX_EXPONENT) || ((*exp) < 0)) { 359 /* overflow - set infinity as result */ 360 (*exp) = FLOAT128_MAX_EXPONENT; 361 (*frac_hi) = 0; 362 (*frac_lo) = 0; 363 } 364 } 365 366 /** 367 * Logical shift left on the 128-bit operand. 368 * 369 * @param a_hi High part of the input operand. 370 * @param a_lo Low part of the input operand. 371 * @param shift Number of bits by witch to shift. 372 * @param r_hi Address to store high part of the result. 373 * @param r_lo Address to store low part of the result. 374 */ 375 void lshift128( 376 uint64_t a_hi, uint64_t a_lo, int shift, 377 uint64_t *r_hi, uint64_t *r_lo) 378 { 379 if (shift <= 0) { 380 /* do nothing */ 381 } else if (shift >= 128) { 382 a_hi = 0; 383 a_lo = 0; 384 } else if (shift >= 64) { 385 a_hi = a_lo << (shift - 64); 386 a_lo = 0; 387 } else { 388 a_hi <<= shift; 389 a_hi |= a_lo >> (64 - shift); 390 a_lo <<= shift; 391 } 392 393 *r_hi = a_hi; 394 *r_lo = a_lo; 395 } 396 397 /** 398 * Logical shift right on the 128-bit operand. 399 * 400 * @param a_hi High part of the input operand. 401 * @param a_lo Low part of the input operand. 402 * @param shift Number of bits by witch to shift. 403 * @param r_hi Address to store high part of the result. 404 * @param r_lo Address to store low part of the result. 405 */ 406 void rshift128( 407 uint64_t a_hi, uint64_t a_lo, int shift, 408 uint64_t *r_hi, uint64_t *r_lo) 409 { 410 if (shift <= 0) { 411 /* do nothing */ 412 } else if (shift >= 128) { 413 a_hi = 0; 414 a_lo = 0; 415 } else if (shift >= 64) { 416 a_lo = a_hi >> (shift - 64); 417 a_hi = 0; 418 } else { 419 a_lo >>= shift; 420 a_lo |= a_hi << (64 - shift); 421 a_hi >>= shift; 422 } 423 424 *r_hi = a_hi; 425 *r_lo = a_lo; 426 } 427 428 /** 429 * Bitwise AND on 128-bit operands. 430 * 431 * @param a_hi High part of the first input operand. 432 * @param a_lo Low part of the first input operand. 433 * @param b_hi High part of the second input operand. 434 * @param b_lo Low part of the second input operand. 435 * @param r_hi Address to store high part of the result. 436 * @param r_lo Address to store low part of the result. 437 */ 438 void and128( 439 uint64_t a_hi, uint64_t a_lo, 440 uint64_t b_hi, uint64_t b_lo, 441 uint64_t *r_hi, uint64_t *r_lo) 442 { 443 *r_hi = a_hi & b_hi; 444 *r_lo = a_lo & b_lo; 445 } 446 447 /** 448 * Bitwise inclusive OR on 128-bit operands. 449 * 450 * @param a_hi High part of the first input operand. 451 * @param a_lo Low part of the first input operand. 452 * @param b_hi High part of the second input operand. 453 * @param b_lo Low part of the second input operand. 454 * @param r_hi Address to store high part of the result. 455 * @param r_lo Address to store low part of the result. 456 */ 457 void or128( 458 uint64_t a_hi, uint64_t a_lo, 459 uint64_t b_hi, uint64_t b_lo, 460 uint64_t *r_hi, uint64_t *r_lo) 461 { 462 *r_hi = a_hi | b_hi; 463 *r_lo = a_lo | b_lo; 464 } 465 466 /** 467 * Bitwise exclusive OR on 128-bit operands. 468 * 469 * @param a_hi High part of the first input operand. 470 * @param a_lo Low part of the first input operand. 471 * @param b_hi High part of the second input operand. 472 * @param b_lo Low part of the second input operand. 473 * @param r_hi Address to store high part of the result. 474 * @param r_lo Address to store low part of the result. 475 */ 476 void xor128( 477 uint64_t a_hi, uint64_t a_lo, 478 uint64_t b_hi, uint64_t b_lo, 479 uint64_t *r_hi, uint64_t *r_lo) 480 { 481 *r_hi = a_hi ^ b_hi; 482 *r_lo = a_lo ^ b_lo; 483 } 484 485 /** 486 * Bitwise NOT on the 128-bit operand. 487 * 488 * @param a_hi High part of the input operand. 489 * @param a_lo Low part of the input operand. 490 * @param r_hi Address to store high part of the result. 491 * @param r_lo Address to store low part of the result. 492 */ 493 void not128( 494 uint64_t a_hi, uint64_t a_lo, 495 uint64_t *r_hi, uint64_t *r_lo) 496 { 497 *r_hi = ~a_hi; 498 *r_lo = ~a_lo; 499 } 500 501 /** 502 * Equality comparison of 128-bit operands. 503 * 504 * @param a_hi High part of the first input operand. 505 * @param a_lo Low part of the first input operand. 506 * @param b_hi High part of the second input operand. 507 * @param b_lo Low part of the second input operand. 508 * @return 1 if operands are equal, 0 otherwise. 509 */ 510 int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo) 511 { 512 return (a_hi == b_hi) && (a_lo == b_lo); 513 } 514 515 /** 516 * Lower-or-equal comparison of 128-bit operands. 517 * 518 * @param a_hi High part of the first input operand. 519 * @param a_lo Low part of the first input operand. 520 * @param b_hi High part of the second input operand. 521 * @param b_lo Low part of the second input operand. 522 * @return 1 if a is lower or equal to b, 0 otherwise. 523 */ 524 int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo) 525 { 526 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo <= b_lo)); 527 } 528 529 /** 530 * Lower-than comparison of 128-bit operands. 531 * 532 * @param a_hi High part of the first input operand. 533 * @param a_lo Low part of the first input operand. 534 * @param b_hi High part of the second input operand. 535 * @param b_lo Low part of the second input operand. 536 * @return 1 if a is lower than b, 0 otherwise. 537 */ 538 int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo) 539 { 540 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo < b_lo)); 541 } 542 543 /** 544 * Addition of two 128-bit unsigned integers. 545 * 546 * @param a_hi High part of the first input operand. 547 * @param a_lo Low part of the first input operand. 548 * @param b_hi High part of the second input operand. 549 * @param b_lo Low part of the second input operand. 550 * @param r_hi Address to store high part of the result. 551 * @param r_lo Address to store low part of the result. 552 */ 553 void add128(uint64_t a_hi, uint64_t a_lo, 554 uint64_t b_hi, uint64_t b_lo, 555 uint64_t *r_hi, uint64_t *r_lo) 556 { 557 uint64_t low = a_lo + b_lo; 558 *r_lo = low; 559 /* detect overflow to add a carry */ 560 *r_hi = a_hi + b_hi + (low < a_lo); 561 } 562 563 /** 564 * Substraction of two 128-bit unsigned integers. 565 * 566 * @param a_hi High part of the first input operand. 567 * @param a_lo Low part of the first input operand. 568 * @param b_hi High part of the second input operand. 569 * @param b_lo Low part of the second input operand. 570 * @param r_hi Address to store high part of the result. 571 * @param r_lo Address to store low part of the result. 572 */ 573 void sub128(uint64_t a_hi, uint64_t a_lo, 574 uint64_t b_hi, uint64_t b_lo, 575 uint64_t *r_hi, uint64_t *r_lo) 576 { 577 *r_lo = a_lo - b_lo; 578 /* detect underflow to substract a carry */ 579 *r_hi = a_hi - b_hi - (a_lo < b_lo); 580 } 581 582 /** 583 * Multiplication of two 64-bit unsigned integers. 584 * 585 * @param a First input operand. 586 * @param b Second input operand. 587 * @param r_hi Address to store high part of the result. 588 * @param r_lo Address to store low part of the result. 589 */ 590 void mul64(uint64_t a, uint64_t b, uint64_t *r_hi, uint64_t *r_lo) 591 { 592 uint64_t low, high, middle1, middle2; 593 uint32_t alow, blow; 594 595 alow = a & 0xFFFFFFFF; 596 blow = b & 0xFFFFFFFF; 597 598 a >>= 32; 599 b >>= 32; 600 601 low = ((uint64_t) alow) * blow; 602 middle1 = a * blow; 603 middle2 = alow * b; 604 high = a * b; 605 606 middle1 += middle2; 607 high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32); 608 middle1 <<= 32; 609 low += middle1; 610 high += (low < middle1); 611 *r_lo = low; 612 *r_hi = high; 613 } 614 615 /** 616 * Multiplication of two 128-bit unsigned integers. 617 * 618 * @param a_hi High part of the first input operand. 619 * @param a_lo Low part of the first input operand. 620 * @param b_hi High part of the second input operand. 621 * @param b_lo Low part of the second input operand. 622 * @param r_hihi Address to store first (highest) quarter of the result. 623 * @param r_hilo Address to store second quarter of the result. 624 * @param r_lohi Address to store third quarter of the result. 625 * @param r_lolo Address to store fourth (lowest) quarter of the result. 626 */ 627 void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo, 628 uint64_t *r_hihi, uint64_t *r_hilo, uint64_t *r_lohi, uint64_t *r_lolo) 629 { 630 uint64_t hihi, hilo, lohi, lolo; 631 uint64_t tmp1, tmp2; 632 633 mul64(a_lo, b_lo, &lohi, &lolo); 634 mul64(a_lo, b_hi, &hilo, &tmp2); 635 add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi); 636 mul64(a_hi, b_hi, &hihi, &tmp1); 637 add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo); 638 mul64(a_hi, b_lo, &tmp1, &tmp2); 639 add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi); 640 add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo); 641 642 *r_hihi = hihi; 643 *r_hilo = hilo; 644 *r_lohi = lohi; 645 *r_lolo = lolo; 646 } 647 648 /** 649 * Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned 650 * divisor. 651 * 652 * @param a_hi High part of the divident. 653 * @param a_lo Low part of the divident. 654 * @param b Divisor. 655 * @return Quotient approximation. 656 */ 657 uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b) 658 { 659 uint64_t b_hi, b_lo; 660 uint64_t rem_hi, rem_lo; 661 uint64_t tmp_hi, tmp_lo; 662 uint64_t result; 663 664 if (b <= a_hi) { 665 return 0xFFFFFFFFFFFFFFFFull; 666 } 667 668 b_hi = b >> 32; 669 result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32; 670 mul64(b, result, &tmp_hi, &tmp_lo); 671 sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo); 672 673 while ((int64_t) rem_hi < 0) { 674 result -= 0x1ll << 32; 675 b_lo = b << 32; 676 add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo); 677 } 678 679 rem_hi = (rem_hi << 32) | (rem_lo >> 32); 680 if ((b_hi << 32) <= rem_hi) { 681 result |= 0xFFFFFFFF; 682 } else { 683 result |= rem_hi / b_hi; 684 } 685 686 return result; 207 return; 208 } 209 210 return; 687 211 } 688 212 -
TabularUnified uspace/lib/softfloat/generic/comparison.c ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Comparison functions.32 /** @file 34 33 */ 35 34 36 35 #include <sftypes.h> 37 36 #include <comparison.h> 38 #include <common.h>39 37 40 /** 41 * Determines whether the given float represents NaN (either signalling NaN or 42 * quiet NaN). 43 * 44 * @param f Single-precision float. 45 * @return 1 if float is NaN, 0 otherwise. 46 */ 38 /* NaN : exp = 0xff and nonzero fraction */ 47 39 int isFloat32NaN(float32 f) 48 40 { 49 /* NaN : exp = 0xff and nonzero fraction */50 41 return ((f.parts.exp == 0xFF) && (f.parts.fraction)); 51 42 } 52 43 53 /** 54 * Determines whether the given float represents NaN (either signalling NaN or 55 * quiet NaN). 56 * 57 * @param d Double-precision float. 58 * @return 1 if float is NaN, 0 otherwise. 59 */ 44 /* NaN : exp = 0x7ff and nonzero fraction */ 60 45 int isFloat64NaN(float64 d) 61 46 { 62 /* NaN : exp = 0x7ff and nonzero fraction */63 47 return ((d.parts.exp == 0x7FF) && (d.parts.fraction)); 64 48 } 65 49 66 /** 67 * Determines whether the given float represents NaN (either signalling NaN or 68 * quiet NaN). 69 * 70 * @param ld Quadruple-precision float. 71 * @return 1 if float is NaN, 0 otherwise. 72 */ 73 int isFloat128NaN(float128 ld) 50 /* SigNaN : exp = 0xff fraction = 0xxxxx..x (binary), where at least one x is nonzero */ 51 int isFloat32SigNaN(float32 f) 74 52 { 75 /* NaN : exp = 0x7fff and nonzero fraction */ 76 return ((ld.parts.exp == 0x7FF) && 77 !eq128(ld.parts.frac_hi, ld.parts.frac_lo, 0x0ll, 0x0ll)); 53 return ((f.parts.exp == 0xFF) && (f.parts.fraction < 0x400000) && (f.parts.fraction)); 78 54 } 79 55 80 /** 81 * Determines whether the given float represents signalling NaN. 82 * 83 * @param f Single-precision float. 84 * @return 1 if float is signalling NaN, 0 otherwise. 85 */ 86 int isFloat32SigNaN(float32 f) 56 /* SigNaN : exp = 0x7ff fraction = 0xxxxx..x (binary), where at least one x is nonzero */ 57 int isFloat64SigNaN(float64 d) 87 58 { 88 /* SigNaN : exp = 0xff and fraction = 0xxxxx..x (binary), 89 * where at least one x is nonzero */ 90 return ((f.parts.exp == 0xFF) && 91 (f.parts.fraction < 0x400000) && (f.parts.fraction)); 59 return ((d.parts.exp == 0x7FF) && (d.parts.fraction) && (d.parts.fraction < 0x8000000000000ll)); 92 60 } 93 61 94 /**95 * Determines whether the given float represents signalling NaN.96 *97 * @param d Double-precision float.98 * @return 1 if float is signalling NaN, 0 otherwise.99 */100 int isFloat64SigNaN(float64 d)101 {102 /* SigNaN : exp = 0x7ff and fraction = 0xxxxx..x (binary),103 * where at least one x is nonzero */104 return ((d.parts.exp == 0x7FF) &&105 (d.parts.fraction) && (d.parts.fraction < 0x8000000000000ll));106 }107 108 /**109 * Determines whether the given float represents signalling NaN.110 *111 * @param ld Quadruple-precision float.112 * @return 1 if float is signalling NaN, 0 otherwise.113 */114 int isFloat128SigNaN(float128 ld)115 {116 /* SigNaN : exp = 0x7fff and fraction = 0xxxxx..x (binary),117 * where at least one x is nonzero */118 return ((ld.parts.exp == 0x7FFF) &&119 (ld.parts.frac_hi || ld.parts.frac_lo) &&120 lt128(ld.parts.frac_hi, ld.parts.frac_lo, 0x800000000000ll, 0x0ll));121 122 }123 124 /**125 * Determines whether the given float represents positive or negative infinity.126 *127 * @param f Single-precision float.128 * @return 1 if float is infinite, 0 otherwise.129 */130 62 int isFloat32Infinity(float32 f) 131 63 { 132 /* NaN : exp = 0x7ff and zero fraction */133 64 return ((f.parts.exp == 0xFF) && (f.parts.fraction == 0x0)); 134 65 } 135 66 136 /**137 * Determines whether the given float represents positive or negative infinity.138 *139 * @param d Double-precision float.140 * @return 1 if float is infinite, 0 otherwise.141 */142 67 int isFloat64Infinity(float64 d) 143 68 { 144 /* NaN : exp = 0x7ff and zero fraction */145 69 return ((d.parts.exp == 0x7FF) && (d.parts.fraction == 0x0)); 146 70 } 147 71 148 /**149 * Determines whether the given float represents positive or negative infinity.150 *151 * @param ld Quadruple-precision float.152 * @return 1 if float is infinite, 0 otherwise.153 */154 int isFloat128Infinity(float128 ld)155 {156 /* NaN : exp = 0x7fff and zero fraction */157 return ((ld.parts.exp == 0x7FFF) &&158 eq128(ld.parts.frac_hi, ld.parts.frac_lo, 0x0ll, 0x0ll));159 }160 161 /**162 * Determines whether the given float represents positive or negative zero.163 *164 * @param f Single-precision float.165 * @return 1 if float is zero, 0 otherwise.166 */167 72 int isFloat32Zero(float32 f) 168 73 { … … 170 75 } 171 76 172 /**173 * Determines whether the given float represents positive or negative zero.174 *175 * @param d Double-precision float.176 * @return 1 if float is zero, 0 otherwise.177 */178 77 int isFloat64Zero(float64 d) 179 78 { … … 182 81 183 82 /** 184 * Determines whether the given float represents positive or negative zero. 185 * 186 * @param ld Quadruple-precision float. 187 * @return 1 if float is zero, 0 otherwise. 188 */ 189 int isFloat128Zero(float128 ld) 190 { 191 uint64_t tmp_hi; 192 uint64_t tmp_lo; 193 194 and128(ld.binary.hi, ld.binary.lo, 195 0x7FFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, &tmp_hi, &tmp_lo); 196 197 return eq128(tmp_hi, tmp_lo, 0x0ll, 0x0ll); 198 } 199 200 /** 201 * Determine whether two floats are equal. NaNs are not recognized. 202 * 203 * @a First single-precision operand. 204 * @b Second single-precision operand. 205 * @return 1 if both floats are equal, 0 otherwise. 83 * @return 1 if both floats are equal - but NaNs are not recognized 206 84 */ 207 85 int isFloat32eq(float32 a, float32 b) 208 86 { 209 87 /* a equals to b or both are zeros (with any sign) */ 210 return ((a.binary == b.binary) || 211 (((a.binary | b.binary) & 0x7FFFFFFF) == 0)); 88 return ((a.binary==b.binary) || (((a.binary | b.binary) & 0x7FFFFFFF) == 0)); 212 89 } 213 90 214 91 /** 215 * Determine whether two floats are equal. NaNs are not recognized. 216 * 217 * @a First double-precision operand. 218 * @b Second double-precision operand. 219 * @return 1 if both floats are equal, 0 otherwise. 220 */ 221 int isFloat64eq(float64 a, float64 b) 222 { 223 /* a equals to b or both are zeros (with any sign) */ 224 return ((a.binary == b.binary) || 225 (((a.binary | b.binary) & 0x7FFFFFFFFFFFFFFFll) == 0)); 226 } 227 228 /** 229 * Determine whether two floats are equal. NaNs are not recognized. 230 * 231 * @a First quadruple-precision operand. 232 * @b Second quadruple-precision operand. 233 * @return 1 if both floats are equal, 0 otherwise. 234 */ 235 int isFloat128eq(float128 a, float128 b) 236 { 237 uint64_t tmp_hi; 238 uint64_t tmp_lo; 239 240 /* both are zeros (with any sign) */ 241 or128(a.binary.hi, a.binary.lo, 242 b.binary.hi, b.binary.lo, &tmp_hi, &tmp_lo); 243 and128(tmp_hi, tmp_lo, 244 0x7FFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, &tmp_hi, &tmp_lo); 245 int both_zero = eq128(tmp_hi, tmp_lo, 0x0ll, 0x0ll); 246 247 /* a equals to b */ 248 int are_equal = eq128(a.binary.hi, a.binary.lo, b.binary.hi, b.binary.lo); 249 250 return are_equal || both_zero; 251 } 252 253 /** 254 * Lower-than comparison between two floats. NaNs are not recognized. 255 * 256 * @a First single-precision operand. 257 * @b Second single-precision operand. 258 * @return 1 if a is lower than b, 0 otherwise. 92 * @return 1 if a < b - but NaNs are not recognized 259 93 */ 260 94 int isFloat32lt(float32 a, float32 b) 261 95 { 262 if (((a.binary | b.binary) & 0x7FFFFFFF) == 0) {96 if (((a.binary | b.binary) & 0x7FFFFFFF) == 0) 263 97 return 0; /* +- zeroes */ 264 }265 98 266 if ((a.parts.sign) && (b.parts.sign)) {99 if ((a.parts.sign) && (b.parts.sign)) 267 100 /* if both are negative, smaller is that with greater binary value */ 268 101 return (a.binary > b.binary); 269 }270 102 271 /* lets negate signs - now will be positive numbers allways bigger than 272 * negative (first bit will be set for unsigned integer comparison) */ 103 /* lets negate signs - now will be positive numbers allways bigger than negative (first bit will be set for unsigned integer comparison) */ 273 104 a.parts.sign = !a.parts.sign; 274 105 b.parts.sign = !b.parts.sign; … … 277 108 278 109 /** 279 * Lower-than comparison between two floats. NaNs are not recognized. 280 * 281 * @a First double-precision operand. 282 * @b Second double-precision operand. 283 * @return 1 if a is lower than b, 0 otherwise. 284 */ 285 int isFloat64lt(float64 a, float64 b) 286 { 287 if (((a.binary | b.binary) & 0x7FFFFFFFFFFFFFFFll) == 0) { 288 return 0; /* +- zeroes */ 289 } 290 291 if ((a.parts.sign) && (b.parts.sign)) { 292 /* if both are negative, smaller is that with greater binary value */ 293 return (a.binary > b.binary); 294 } 295 296 /* lets negate signs - now will be positive numbers allways bigger than 297 * negative (first bit will be set for unsigned integer comparison) */ 298 a.parts.sign = !a.parts.sign; 299 b.parts.sign = !b.parts.sign; 300 return (a.binary < b.binary); 301 } 302 303 /** 304 * Lower-than comparison between two floats. NaNs are not recognized. 305 * 306 * @a First quadruple-precision operand. 307 * @b Second quadruple-precision operand. 308 * @return 1 if a is lower than b, 0 otherwise. 309 */ 310 int isFloat128lt(float128 a, float128 b) 311 { 312 uint64_t tmp_hi; 313 uint64_t tmp_lo; 314 315 or128(a.binary.hi, a.binary.lo, 316 b.binary.hi, b.binary.lo, &tmp_hi, &tmp_lo); 317 and128(tmp_hi, tmp_lo, 318 0x7FFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, &tmp_hi, &tmp_lo); 319 if (eq128(tmp_hi, tmp_lo, 0x0ll, 0x0ll)) { 320 return 0; /* +- zeroes */ 321 } 322 323 if ((a.parts.sign) && (b.parts.sign)) { 324 /* if both are negative, smaller is that with greater binary value */ 325 return lt128(b.binary.hi, b.binary.lo, a.binary.hi, a.binary.lo); 326 } 327 328 /* lets negate signs - now will be positive numbers allways bigger than 329 * negative (first bit will be set for unsigned integer comparison) */ 330 a.parts.sign = !a.parts.sign; 331 b.parts.sign = !b.parts.sign; 332 return lt128(a.binary.hi, a.binary.lo, b.binary.hi, b.binary.lo); 333 } 334 335 /** 336 * Greater-than comparison between two floats. NaNs are not recognized. 337 * 338 * @a First single-precision operand. 339 * @b Second single-precision operand. 340 * @return 1 if a is greater than b, 0 otherwise. 110 * @return 1 if a > b - but NaNs are not recognized 341 111 */ 342 112 int isFloat32gt(float32 a, float32 b) 343 113 { 344 if (((a.binary | b.binary) & 0x7FFFFFFF) == 0) {114 if (((a.binary | b.binary) & 0x7FFFFFFF) == 0) 345 115 return 0; /* zeroes are equal with any sign */ 346 }347 116 348 if ((a.parts.sign) && (b.parts.sign)) {117 if ((a.parts.sign) && (b.parts.sign)) 349 118 /* if both are negative, greater is that with smaller binary value */ 350 119 return (a.binary < b.binary); 351 }352 120 353 /* lets negate signs - now will be positive numbers allways bigger than 354 * negative (first bit will be set for unsigned integer comparison) */ 121 /* lets negate signs - now will be positive numbers allways bigger than negative (first bit will be set for unsigned integer comparison) */ 355 122 a.parts.sign = !a.parts.sign; 356 123 b.parts.sign = !b.parts.sign; … … 358 125 } 359 126 360 /**361 * Greater-than comparison between two floats. NaNs are not recognized.362 *363 * @a First double-precision operand.364 * @b Second double-precision operand.365 * @return 1 if a is greater than b, 0 otherwise.366 */367 int isFloat64gt(float64 a, float64 b)368 {369 if (((a.binary | b.binary) & 0x7FFFFFFFFFFFFFFFll) == 0) {370 return 0; /* zeroes are equal with any sign */371 }372 373 if ((a.parts.sign) && (b.parts.sign)) {374 /* if both are negative, greater is that with smaller binary value */375 return (a.binary < b.binary);376 }377 378 /* lets negate signs - now will be positive numbers allways bigger than379 * negative (first bit will be set for unsigned integer comparison) */380 a.parts.sign = !a.parts.sign;381 b.parts.sign = !b.parts.sign;382 return (a.binary > b.binary);383 }384 385 /**386 * Greater-than comparison between two floats. NaNs are not recognized.387 *388 * @a First quadruple-precision operand.389 * @b Second quadruple-precision operand.390 * @return 1 if a is greater than b, 0 otherwise.391 */392 int isFloat128gt(float128 a, float128 b)393 {394 uint64_t tmp_hi;395 uint64_t tmp_lo;396 397 or128(a.binary.hi, a.binary.lo,398 b.binary.hi, b.binary.lo, &tmp_hi, &tmp_lo);399 and128(tmp_hi, tmp_lo,400 0x7FFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, &tmp_hi, &tmp_lo);401 if (eq128(tmp_hi, tmp_lo, 0x0ll, 0x0ll)) {402 return 0; /* zeroes are equal with any sign */403 }404 405 if ((a.parts.sign) && (b.parts.sign)) {406 /* if both are negative, greater is that with smaller binary value */407 return lt128(a.binary.hi, a.binary.lo, b.binary.hi, b.binary.lo);408 }409 410 /* lets negate signs - now will be positive numbers allways bigger than411 * negative (first bit will be set for unsigned integer comparison) */412 a.parts.sign = !a.parts.sign;413 b.parts.sign = !b.parts.sign;414 return lt128(b.binary.hi, b.binary.lo, a.binary.hi, a.binary.lo);415 }416 417 127 /** @} 418 128 */ -
TabularUnified uspace/lib/softfloat/generic/conversion.c ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Conversion of precision and conversion between integers and floats.34 */ 35 36 #include <sftypes.h>37 #include <conversion.h>38 #include <comparison.h>39 #include <common.h>32 /** @file 33 */ 34 35 #include "sftypes.h" 36 #include "conversion.h" 37 #include "comparison.h" 38 #include "common.h" 40 39 41 40 float64 convertFloat32ToFloat64(float32 a) … … 49 48 50 49 if ((isFloat32Infinity(a)) || (isFloat32NaN(a))) { 51 result.parts.exp = FLOAT64_MAX_EXPONENT;50 result.parts.exp = 0x7FF; 52 51 /* TODO; check if its correct for SigNaNs*/ 53 52 return result; 54 } 53 }; 55 54 56 55 result.parts.exp = a.parts.exp + ((int) FLOAT64_BIAS - FLOAT32_BIAS); … … 58 57 /* normalize denormalized numbers */ 59 58 60 if (result.parts.fraction == 0 ) { /* fix zero */61 result.parts.exp = 0 ;59 if (result.parts.fraction == 0ll) { /* fix zero */ 60 result.parts.exp = 0ll; 62 61 return result; 63 62 } … … 65 64 frac = result.parts.fraction; 66 65 67 while (!(frac & FLOAT64_HIDDEN_BIT_MASK)) {66 while (!(frac & (0x10000000000000ll))) { 68 67 frac <<= 1; 69 68 --result.parts.exp; 70 } 69 }; 71 70 72 71 ++result.parts.exp; 73 72 result.parts.fraction = frac; 74 } 73 }; 75 74 76 75 return result; 77 } 78 79 float128 convertFloat32ToFloat128(float32 a) 80 { 81 float128 result; 82 uint64_t frac_hi, frac_lo; 83 uint64_t tmp_hi, tmp_lo; 84 85 result.parts.sign = a.parts.sign; 86 result.parts.frac_hi = 0; 87 result.parts.frac_lo = a.parts.fraction; 88 lshift128(result.parts.frac_hi, result.parts.frac_lo, 89 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE), 90 &frac_hi, &frac_lo); 91 result.parts.frac_hi = frac_hi; 92 result.parts.frac_lo = frac_lo; 93 94 if ((isFloat32Infinity(a)) || (isFloat32NaN(a))) { 95 result.parts.exp = FLOAT128_MAX_EXPONENT; 96 /* TODO; check if its correct for SigNaNs*/ 97 return result; 98 } 99 100 result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT32_BIAS); 101 if (a.parts.exp == 0) { 102 /* normalize denormalized numbers */ 103 104 if (eq128(result.parts.frac_hi, 105 result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */ 106 result.parts.exp = 0; 107 return result; 108 } 109 110 frac_hi = result.parts.frac_hi; 111 frac_lo = result.parts.frac_lo; 112 113 and128(frac_hi, frac_lo, 114 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 115 &tmp_hi, &tmp_lo); 116 while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 117 lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo); 118 --result.parts.exp; 119 } 120 121 ++result.parts.exp; 122 result.parts.frac_hi = frac_hi; 123 result.parts.frac_lo = frac_lo; 124 } 125 126 return result; 127 } 128 129 float128 convertFloat64ToFloat128(float64 a) 130 { 131 float128 result; 132 uint64_t frac_hi, frac_lo; 133 uint64_t tmp_hi, tmp_lo; 134 135 result.parts.sign = a.parts.sign; 136 result.parts.frac_hi = 0; 137 result.parts.frac_lo = a.parts.fraction; 138 lshift128(result.parts.frac_hi, result.parts.frac_lo, 139 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE), 140 &frac_hi, &frac_lo); 141 result.parts.frac_hi = frac_hi; 142 result.parts.frac_lo = frac_lo; 143 144 if ((isFloat64Infinity(a)) || (isFloat64NaN(a))) { 145 result.parts.exp = FLOAT128_MAX_EXPONENT; 146 /* TODO; check if its correct for SigNaNs*/ 147 return result; 148 } 149 150 result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT64_BIAS); 151 if (a.parts.exp == 0) { 152 /* normalize denormalized numbers */ 153 154 if (eq128(result.parts.frac_hi, 155 result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */ 156 result.parts.exp = 0; 157 return result; 158 } 159 160 frac_hi = result.parts.frac_hi; 161 frac_lo = result.parts.frac_lo; 162 163 and128(frac_hi, frac_lo, 164 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 165 &tmp_hi, &tmp_lo); 166 while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 167 lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo); 168 --result.parts.exp; 169 } 170 171 ++result.parts.exp; 172 result.parts.frac_hi = frac_hi; 173 result.parts.frac_lo = frac_lo; 174 } 175 176 return result; 76 177 77 } 178 78 … … 186 86 187 87 if (isFloat64NaN(a)) { 188 result.parts.exp = FLOAT32_MAX_EXPONENT; 88 89 result.parts.exp = 0xFF; 189 90 190 91 if (isFloat64SigNaN(a)) { 191 /* set first bit of fraction nonzero */ 192 result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1; 92 result.parts.fraction = 0x400000; /* set first bit of fraction nonzero */ 193 93 return result; 194 94 } 195 196 /* fraction nonzero but its first bit is zero */ 197 result.parts.fraction = 0x1; 198 return result; 199 } 95 96 result.parts.fraction = 0x1; /* fraction nonzero but its first bit is zero */ 97 return result; 98 }; 200 99 201 100 if (isFloat64Infinity(a)) { 202 101 result.parts.fraction = 0; 203 result.parts.exp = FLOAT32_MAX_EXPONENT;204 return result; 205 } 206 207 exp = (int) 208 209 if (exp >= FLOAT32_MAX_EXPONENT) {210 /* FIXME: overflow*/102 result.parts.exp = 0xFF; 103 return result; 104 }; 105 106 exp = (int)a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS; 107 108 if (exp >= 0xFF) { 109 /*FIXME: overflow*/ 211 110 result.parts.fraction = 0; 212 result.parts.exp = FLOAT32_MAX_EXPONENT; 213 return result; 214 } else if (exp <= 0) { 111 result.parts.exp = 0xFF; 112 return result; 113 114 } else if (exp <= 0 ) { 115 215 116 /* underflow or denormalized */ 216 117 … … 218 119 219 120 exp *= -1; 220 if (exp > FLOAT32_FRACTION_SIZE ) {121 if (exp > FLOAT32_FRACTION_SIZE ) { 221 122 /* FIXME: underflow */ 222 123 result.parts.fraction = 0; 223 124 return result; 224 } 125 }; 225 126 226 127 /* denormalized */ 227 128 228 129 frac = a.parts.fraction; 229 frac |= FLOAT64_HIDDEN_BIT_MASK; /* denormalize and set hidden bit */130 frac |= 0x10000000000000ll; /* denormalize and set hidden bit */ 230 131 231 132 frac >>= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1); … … 234 135 --exp; 235 136 frac >>= 1; 236 } 137 }; 237 138 result.parts.fraction = frac; 238 139 239 140 return result; 240 } 141 }; 241 142 242 143 result.parts.exp = exp; 243 result.parts.fraction = 244 a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE); 144 result.parts.fraction = a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE); 245 145 return result; 246 146 } 247 147 248 float32 convertFloat128ToFloat32(float128 a) 249 { 250 float32 result; 251 int32_t exp; 252 uint64_t frac_hi, frac_lo; 253 254 result.parts.sign = a.parts.sign; 255 256 if (isFloat128NaN(a)) { 257 result.parts.exp = FLOAT32_MAX_EXPONENT; 258 259 if (isFloat128SigNaN(a)) { 260 /* set first bit of fraction nonzero */ 261 result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1; 262 return result; 263 } 264 265 /* fraction nonzero but its first bit is zero */ 266 result.parts.fraction = 0x1; 267 return result; 268 } 269 270 if (isFloat128Infinity(a)) { 271 result.parts.fraction = 0; 272 result.parts.exp = FLOAT32_MAX_EXPONENT; 273 return result; 274 } 275 276 exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT32_BIAS; 277 278 if (exp >= FLOAT32_MAX_EXPONENT) { 279 /* FIXME: overflow */ 280 result.parts.fraction = 0; 281 result.parts.exp = FLOAT32_MAX_EXPONENT; 282 return result; 283 } else if (exp <= 0) { 284 /* underflow or denormalized */ 285 286 result.parts.exp = 0; 287 288 exp *= -1; 289 if (exp > FLOAT32_FRACTION_SIZE) { 290 /* FIXME: underflow */ 291 result.parts.fraction = 0; 292 return result; 293 } 294 295 /* denormalized */ 296 297 frac_hi = a.parts.frac_hi; 298 frac_lo = a.parts.frac_lo; 299 300 /* denormalize and set hidden bit */ 301 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI; 302 303 rshift128(frac_hi, frac_lo, 304 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1), 305 &frac_hi, &frac_lo); 306 307 while (exp > 0) { 308 --exp; 309 rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo); 310 } 311 result.parts.fraction = frac_lo; 312 313 return result; 314 } 315 316 result.parts.exp = exp; 317 frac_hi = a.parts.frac_hi; 318 frac_lo = a.parts.frac_lo; 319 rshift128(frac_hi, frac_lo, 320 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1), 321 &frac_hi, &frac_lo); 322 result.parts.fraction = frac_lo; 323 return result; 324 } 325 326 float64 convertFloat128ToFloat64(float128 a) 327 { 328 float64 result; 329 int32_t exp; 330 uint64_t frac_hi, frac_lo; 331 332 result.parts.sign = a.parts.sign; 333 334 if (isFloat128NaN(a)) { 335 result.parts.exp = FLOAT64_MAX_EXPONENT; 336 337 if (isFloat128SigNaN(a)) { 338 /* set first bit of fraction nonzero */ 339 result.parts.fraction = FLOAT64_HIDDEN_BIT_MASK >> 1; 340 return result; 341 } 342 343 /* fraction nonzero but its first bit is zero */ 344 result.parts.fraction = 0x1; 345 return result; 346 } 347 348 if (isFloat128Infinity(a)) { 349 result.parts.fraction = 0; 350 result.parts.exp = FLOAT64_MAX_EXPONENT; 351 return result; 352 } 353 354 exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT64_BIAS; 355 356 if (exp >= FLOAT64_MAX_EXPONENT) { 357 /* FIXME: overflow */ 358 result.parts.fraction = 0; 359 result.parts.exp = FLOAT64_MAX_EXPONENT; 360 return result; 361 } else if (exp <= 0) { 362 /* underflow or denormalized */ 363 364 result.parts.exp = 0; 365 366 exp *= -1; 367 if (exp > FLOAT64_FRACTION_SIZE) { 368 /* FIXME: underflow */ 369 result.parts.fraction = 0; 370 return result; 371 } 372 373 /* denormalized */ 374 375 frac_hi = a.parts.frac_hi; 376 frac_lo = a.parts.frac_lo; 377 378 /* denormalize and set hidden bit */ 379 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI; 380 381 rshift128(frac_hi, frac_lo, 382 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1), 383 &frac_hi, &frac_lo); 384 385 while (exp > 0) { 386 --exp; 387 rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo); 388 } 389 result.parts.fraction = frac_lo; 390 391 return result; 392 } 393 394 result.parts.exp = exp; 395 frac_hi = a.parts.frac_hi; 396 frac_lo = a.parts.frac_lo; 397 rshift128(frac_hi, frac_lo, 398 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1), 399 &frac_hi, &frac_lo); 400 result.parts.fraction = frac_lo; 401 return result; 402 } 403 404 405 /** 406 * Helping procedure for converting float32 to uint32. 407 * 408 * @param a Floating point number in normalized form 409 * (NaNs or Inf are not checked). 410 * @return Converted unsigned integer. 148 149 /** Helping procedure for converting float32 to uint32 150 * @param a floating point number in normalized form (no NaNs or Inf are checked ) 151 * @return unsigned integer 411 152 */ 412 153 static uint32_t _float32_to_uint32_helper(float32 a) … … 415 156 416 157 if (a.parts.exp < FLOAT32_BIAS) { 417 /* TODO: rounding*/158 /*TODO: rounding*/ 418 159 return 0; 419 160 } … … 434 175 } 435 176 436 /* 177 /* Convert float to unsigned int32 437 178 * FIXME: Im not sure what to return if overflow/underflow happens 438 179 * - now its the biggest or the smallest int … … 453 194 } 454 195 455 /* 196 /* Convert float to signed int32 456 197 * FIXME: Im not sure what to return if overflow/underflow happens 457 198 * - now its the biggest or the smallest int … … 473 214 474 215 475 /** 476 * Helping procedure for converting float32 to uint64. 477 * 478 * @param a Floating point number in normalized form 479 * (NaNs or Inf are not checked). 480 * @return Converted unsigned integer. 216 /** Helping procedure for converting float64 to uint64 217 * @param a floating point number in normalized form (no NaNs or Inf are checked ) 218 * @return unsigned integer 219 */ 220 static uint64_t _float64_to_uint64_helper(float64 a) 221 { 222 uint64_t frac; 223 224 if (a.parts.exp < FLOAT64_BIAS) { 225 /*TODO: rounding*/ 226 return 0; 227 } 228 229 frac = a.parts.fraction; 230 231 frac |= FLOAT64_HIDDEN_BIT_MASK; 232 /* shift fraction to left so hidden bit will be the most significant bit */ 233 frac <<= 64 - FLOAT64_FRACTION_SIZE - 1; 234 235 frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1; 236 if ((a.parts.sign == 1) && (frac != 0)) { 237 frac = ~frac; 238 ++frac; 239 } 240 241 return frac; 242 } 243 244 /* Convert float to unsigned int64 245 * FIXME: Im not sure what to return if overflow/underflow happens 246 * - now its the biggest or the smallest int 247 */ 248 uint64_t float64_to_uint64(float64 a) 249 { 250 if (isFloat64NaN(a)) 251 return UINT64_MAX; 252 253 254 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) { 255 if (a.parts.sign) 256 return UINT64_MIN; 257 258 return UINT64_MAX; 259 } 260 261 return _float64_to_uint64_helper(a); 262 } 263 264 /* Convert float to signed int64 265 * FIXME: Im not sure what to return if overflow/underflow happens 266 * - now its the biggest or the smallest int 267 */ 268 int64_t float64_to_int64(float64 a) 269 { 270 if (isFloat64NaN(a)) 271 return INT64_MAX; 272 273 274 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) { 275 if (a.parts.sign) 276 return INT64_MIN; 277 278 return INT64_MAX; 279 } 280 281 return _float64_to_uint64_helper(a); 282 } 283 284 285 286 287 288 /** Helping procedure for converting float32 to uint64 289 * @param a floating point number in normalized form (no NaNs or Inf are checked ) 290 * @return unsigned integer 481 291 */ 482 292 static uint64_t _float32_to_uint64_helper(float32 a) 483 293 { 484 294 uint64_t frac; 485 295 486 296 if (a.parts.exp < FLOAT32_BIAS) { 487 297 /*TODO: rounding*/ 488 298 return 0; 489 299 } 490 300 491 301 frac = a.parts.fraction; 492 302 493 303 frac |= FLOAT32_HIDDEN_BIT_MASK; 494 304 /* shift fraction to left so hidden bit will be the most significant bit */ 495 frac <<= 64 - FLOAT32_FRACTION_SIZE - 1; 305 frac <<= 64 - FLOAT32_FRACTION_SIZE - 1; 496 306 497 307 frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1; … … 500 310 ++frac; 501 311 } 502 312 503 313 return frac; 504 314 } 505 315 506 /* 507 * FIXME: Im not sure what to return if overflow/underflow happens 508 * - now its the biggest or the smallest int 509 */ 316 /* Convert float to unsigned int64 317 * FIXME: Im not sure what to return if overflow/underflow happens 318 * - now its the biggest or the smallest int 319 */ 510 320 uint64_t float32_to_uint64(float32 a) 511 321 { 512 322 if (isFloat32NaN(a)) 513 323 return UINT64_MAX; 514 515 324 325 516 326 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) { 517 327 if (a.parts.sign) 518 328 return UINT64_MIN; 519 329 520 330 return UINT64_MAX; 521 331 } 522 332 523 333 return _float32_to_uint64_helper(a); 524 334 } 525 335 526 /* 527 * FIXME: Im not sure what to return if overflow/underflow happens 528 * - now its the biggest or the smallest int 529 */ 336 /* Convert float to signed int64 337 * FIXME: Im not sure what to return if overflow/underflow happens 338 * - now its the biggest or the smallest int 339 */ 530 340 int64_t float32_to_int64(float32 a) 531 341 { 532 342 if (isFloat32NaN(a)) 533 343 return INT64_MAX; 534 344 535 345 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) { 536 346 if (a.parts.sign) 537 347 return INT64_MIN; 538 348 539 349 return INT64_MAX; 540 350 } 541 351 542 352 return _float32_to_uint64_helper(a); 543 353 } 544 354 545 355 546 /** 547 * Helping procedure for converting float64 to uint64. 548 * 549 * @param a Floating point number in normalized form 550 * (NaNs or Inf are not checked). 551 * @return Converted unsigned integer. 552 */ 553 static uint64_t _float64_to_uint64_helper(float64 a) 554 { 555 uint64_t frac; 556 557 if (a.parts.exp < FLOAT64_BIAS) { 558 /*TODO: rounding*/ 559 return 0; 560 } 561 562 frac = a.parts.fraction; 563 564 frac |= FLOAT64_HIDDEN_BIT_MASK; 565 /* shift fraction to left so hidden bit will be the most significant bit */ 566 frac <<= 64 - FLOAT64_FRACTION_SIZE - 1; 567 568 frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1; 569 if ((a.parts.sign == 1) && (frac != 0)) { 570 frac = ~frac; 571 ++frac; 572 } 573 574 return frac; 575 } 576 577 /* 578 * FIXME: Im not sure what to return if overflow/underflow happens 579 * - now its the biggest or the smallest int 580 */ 356 /* Convert float64 to unsigned int32 357 * FIXME: Im not sure what to return if overflow/underflow happens 358 * - now its the biggest or the smallest int 359 */ 581 360 uint32_t float64_to_uint32(float64 a) 582 361 { 583 362 if (isFloat64NaN(a)) 584 363 return UINT32_MAX; 585 364 365 586 366 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) { 587 367 if (a.parts.sign) 588 368 return UINT32_MIN; 589 369 590 370 return UINT32_MAX; 591 371 } 592 372 593 373 return (uint32_t) _float64_to_uint64_helper(a); 594 374 } 595 375 596 /* 597 * FIXME: Im not sure what to return if overflow/underflow happens 598 * - now its the biggest or the smallest int 599 */ 376 /* Convert float64 to signed int32 377 * FIXME: Im not sure what to return if overflow/underflow happens 378 * - now its the biggest or the smallest int 379 */ 600 380 int32_t float64_to_int32(float64 a) 601 381 { 602 382 if (isFloat64NaN(a)) 603 383 return INT32_MAX; 604 384 385 605 386 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) { 606 387 if (a.parts.sign) 607 388 return INT32_MIN; 608 389 609 390 return INT32_MAX; 610 391 } 611 392 612 393 return (int32_t) _float64_to_uint64_helper(a); 613 394 } 614 395 615 616 /* 617 * FIXME: Im not sure what to return if overflow/underflow happens 618 * - now its the biggest or the smallest int 619 */ 620 uint64_t float64_to_uint64(float64 a) 621 { 622 if (isFloat64NaN(a)) 623 return UINT64_MAX; 624 625 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) { 626 if (a.parts.sign) 627 return UINT64_MIN; 628 629 return UINT64_MAX; 630 } 631 632 return _float64_to_uint64_helper(a); 633 } 634 635 /* 636 * FIXME: Im not sure what to return if overflow/underflow happens 637 * - now its the biggest or the smallest int 638 */ 639 int64_t float64_to_int64(float64 a) 640 { 641 if (isFloat64NaN(a)) 642 return INT64_MAX; 643 644 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) { 645 if (a.parts.sign) 646 return INT64_MIN; 647 648 return INT64_MAX; 649 } 650 651 return _float64_to_uint64_helper(a); 652 } 653 654 655 /** 656 * Helping procedure for converting float128 to uint64. 657 * 658 * @param a Floating point number in normalized form 659 * (NaNs or Inf are not checked). 660 * @return Converted unsigned integer. 661 */ 662 static uint64_t _float128_to_uint64_helper(float128 a) 663 { 664 uint64_t frac_hi, frac_lo; 665 666 if (a.parts.exp < FLOAT128_BIAS) { 667 /*TODO: rounding*/ 668 return 0; 669 } 670 671 frac_hi = a.parts.frac_hi; 672 frac_lo = a.parts.frac_lo; 673 674 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI; 675 /* shift fraction to left so hidden bit will be the most significant bit */ 676 lshift128(frac_hi, frac_lo, 677 (128 - FLOAT128_FRACTION_SIZE - 1), &frac_hi, &frac_lo); 678 679 rshift128(frac_hi, frac_lo, 680 (128 - (a.parts.exp - FLOAT128_BIAS) - 1), &frac_hi, &frac_lo); 681 if ((a.parts.sign == 1) && !eq128(frac_hi, frac_lo, 0x0ll, 0x0ll)) { 682 not128(frac_hi, frac_lo, &frac_hi, &frac_lo); 683 add128(frac_hi, frac_lo, 0x0ll, 0x1ll, &frac_hi, &frac_lo); 684 } 685 686 return frac_lo; 687 } 688 689 /* 690 * FIXME: Im not sure what to return if overflow/underflow happens 691 * - now its the biggest or the smallest int 692 */ 693 uint32_t float128_to_uint32(float128 a) 694 { 695 if (isFloat128NaN(a)) 696 return UINT32_MAX; 697 698 if (isFloat128Infinity(a) || (a.parts.exp >= (32 + FLOAT128_BIAS))) { 699 if (a.parts.sign) 700 return UINT32_MIN; 701 702 return UINT32_MAX; 703 } 704 705 return (uint32_t) _float128_to_uint64_helper(a); 706 } 707 708 /* 709 * FIXME: Im not sure what to return if overflow/underflow happens 710 * - now its the biggest or the smallest int 711 */ 712 int32_t float128_to_int32(float128 a) 713 { 714 if (isFloat128NaN(a)) 715 return INT32_MAX; 716 717 if (isFloat128Infinity(a) || (a.parts.exp >= (32 + FLOAT128_BIAS))) { 718 if (a.parts.sign) 719 return INT32_MIN; 720 721 return INT32_MAX; 722 } 723 724 return (int32_t) _float128_to_uint64_helper(a); 725 } 726 727 728 /* 729 * FIXME: Im not sure what to return if overflow/underflow happens 730 * - now its the biggest or the smallest int 731 */ 732 uint64_t float128_to_uint64(float128 a) 733 { 734 if (isFloat128NaN(a)) 735 return UINT64_MAX; 736 737 if (isFloat128Infinity(a) || (a.parts.exp >= (64 + FLOAT128_BIAS))) { 738 if (a.parts.sign) 739 return UINT64_MIN; 740 741 return UINT64_MAX; 742 } 743 744 return _float128_to_uint64_helper(a); 745 } 746 747 /* 748 * FIXME: Im not sure what to return if overflow/underflow happens 749 * - now its the biggest or the smallest int 750 */ 751 int64_t float128_to_int64(float128 a) 752 { 753 if (isFloat128NaN(a)) 754 return INT64_MAX; 755 756 if (isFloat128Infinity(a) || (a.parts.exp >= (64 + FLOAT128_BIAS))) { 757 if (a.parts.sign) 758 return INT64_MIN; 759 760 return INT64_MAX; 761 } 762 763 return _float128_to_uint64_helper(a); 764 } 765 766 396 /** Convert unsigned integer to float32 397 * 398 * 399 */ 767 400 float32 uint32_to_float32(uint32_t i) 768 401 { … … 791 424 roundFloat32(&exp, &i); 792 425 793 result.parts.fraction = i >> (32 - FLOAT32_FRACTION_SIZE - 2);426 result.parts.fraction = i >> 7; 794 427 result.parts.exp = exp; 795 428 … … 802 435 803 436 if (i < 0) { 804 result = uint32_to_float32((uint32_t) 805 } else { 806 result = uint32_to_float32((uint32_t) 437 result = uint32_to_float32((uint32_t)(-i)); 438 } else { 439 result = uint32_to_float32((uint32_t)i); 807 440 } 808 441 … … 832 465 } 833 466 834 /* Shift all to the first 31 bits (31 st will be hidden 1)*/467 /* Shift all to the first 31 bits (31. will be hidden 1)*/ 835 468 if (counter > 33) { 836 469 i <<= counter - 1 - 32; … … 839 472 } 840 473 841 j = (uint32_t) 474 j = (uint32_t)i; 842 475 roundFloat32(&exp, &j); 843 476 844 result.parts.fraction = j >> (32 - FLOAT32_FRACTION_SIZE - 2);477 result.parts.fraction = j >> 7; 845 478 result.parts.exp = exp; 846 479 return result; … … 852 485 853 486 if (i < 0) { 854 result = uint64_to_float32((uint64_t) 855 } else { 856 result = uint64_to_float32((uint64_t) 487 result = uint64_to_float32((uint64_t)(-i)); 488 } else { 489 result = uint64_to_float32((uint64_t)i); 857 490 } 858 491 … … 862 495 } 863 496 497 /** Convert unsigned integer to float64 498 * 499 * 500 */ 864 501 float64 uint32_to_float64(uint32_t i) 865 502 { … … 886 523 roundFloat64(&exp, &frac); 887 524 888 result.parts.fraction = frac >> (64 - FLOAT64_FRACTION_SIZE - 2);525 result.parts.fraction = frac >> 10; 889 526 result.parts.exp = exp; 890 527 … … 897 534 898 535 if (i < 0) { 899 result = uint32_to_float64((uint32_t) 900 } else { 901 result = uint32_to_float64((uint32_t) 536 result = uint32_to_float64((uint32_t)(-i)); 537 } else { 538 result = uint32_to_float64((uint32_t)i); 902 539 } 903 540 … … 934 571 roundFloat64(&exp, &i); 935 572 936 result.parts.fraction = i >> (64 - FLOAT64_FRACTION_SIZE - 2);573 result.parts.fraction = i >> 10; 937 574 result.parts.exp = exp; 938 575 return result; … … 944 581 945 582 if (i < 0) { 946 result = uint64_to_float64((uint64_t) 947 } else { 948 result = uint64_to_float64((uint64_t) 583 result = uint64_to_float64((uint64_t)(-i)); 584 } else { 585 result = uint64_to_float64((uint64_t)i); 949 586 } 950 587 … … 954 591 } 955 592 956 957 float128 uint32_to_float128(uint32_t i)958 {959 int counter;960 int32_t exp;961 float128 result;962 uint64_t frac_hi, frac_lo;963 964 result.parts.sign = 0;965 result.parts.frac_hi = 0;966 result.parts.frac_lo = 0;967 968 counter = countZeroes32(i);969 970 exp = FLOAT128_BIAS + 32 - counter - 1;971 972 if (counter == 32) {973 result.binary.hi = 0;974 result.binary.lo = 0;975 return result;976 }977 978 frac_hi = 0;979 frac_lo = i;980 lshift128(frac_hi, frac_lo, (counter + 96 - 1), &frac_hi, &frac_lo);981 982 roundFloat128(&exp, &frac_hi, &frac_lo);983 984 rshift128(frac_hi, frac_lo,985 (128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo);986 result.parts.frac_hi = frac_hi;987 result.parts.frac_lo = frac_lo;988 result.parts.exp = exp;989 990 return result;991 }992 993 float128 int32_to_float128(int32_t i)994 {995 float128 result;996 997 if (i < 0) {998 result = uint32_to_float128((uint32_t) (-i));999 } else {1000 result = uint32_to_float128((uint32_t) i);1001 }1002 1003 result.parts.sign = i < 0;1004 1005 return result;1006 }1007 1008 1009 float128 uint64_to_float128(uint64_t i)1010 {1011 int counter;1012 int32_t exp;1013 float128 result;1014 uint64_t frac_hi, frac_lo;1015 1016 result.parts.sign = 0;1017 result.parts.frac_hi = 0;1018 result.parts.frac_lo = 0;1019 1020 counter = countZeroes64(i);1021 1022 exp = FLOAT128_BIAS + 64 - counter - 1;1023 1024 if (counter == 64) {1025 result.binary.hi = 0;1026 result.binary.lo = 0;1027 return result;1028 }1029 1030 frac_hi = 0;1031 frac_lo = i;1032 lshift128(frac_hi, frac_lo, (counter + 64 - 1), &frac_hi, &frac_lo);1033 1034 roundFloat128(&exp, &frac_hi, &frac_lo);1035 1036 rshift128(frac_hi, frac_lo,1037 (128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo);1038 result.parts.frac_hi = frac_hi;1039 result.parts.frac_lo = frac_lo;1040 result.parts.exp = exp;1041 1042 return result;1043 }1044 1045 float128 int64_to_float128(int64_t i)1046 {1047 float128 result;1048 1049 if (i < 0) {1050 result = uint64_to_float128((uint64_t) (-i));1051 } else {1052 result = uint64_to_float128((uint64_t) i);1053 }1054 1055 result.parts.sign = i < 0;1056 1057 return result;1058 }1059 1060 593 /** @} 1061 594 */ -
TabularUnified uspace/lib/softfloat/generic/div.c ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Division functions.32 /** @file 34 33 */ 35 34 … … 41 40 #include <common.h> 42 41 43 /**44 * Divide two single-precision floats.45 *46 * @param a Nominator.47 * @param b Denominator.48 * @return Result of division.49 */50 42 float32 divFloat32(float32 a, float32 b) 51 43 { … … 108 100 return result; 109 101 } 102 110 103 111 104 afrac = a.parts.fraction; … … 117 110 if (aexp == 0) { 118 111 if (afrac == 0) { 119 result.parts.exp = 0; 120 result.parts.fraction = 0; 121 return result; 122 } 123 112 result.parts.exp = 0; 113 result.parts.fraction = 0; 114 return result; 115 } 124 116 /* normalize it*/ 117 125 118 afrac <<= 1; 126 /* afrac is nonzero => it must stop */127 while (! (afrac & FLOAT32_HIDDEN_BIT_MASK)) {119 /* afrac is nonzero => it must stop */ 120 while (! (afrac & FLOAT32_HIDDEN_BIT_MASK) ) { 128 121 afrac <<= 1; 129 122 aexp--; … … 133 126 if (bexp == 0) { 134 127 bfrac <<= 1; 135 /* bfrac is nonzero => it must stop */136 while (! (bfrac & FLOAT32_HIDDEN_BIT_MASK)) {128 /* bfrac is nonzero => it must stop */ 129 while (! (bfrac & FLOAT32_HIDDEN_BIT_MASK) ) { 137 130 bfrac <<= 1; 138 131 bexp--; … … 140 133 } 141 134 142 afrac = (afrac | FLOAT32_HIDDEN_BIT_MASK ) << (32 - FLOAT32_FRACTION_SIZE - 1);143 bfrac = (bfrac | FLOAT32_HIDDEN_BIT_MASK ) << (32 - FLOAT32_FRACTION_SIZE);144 145 if ( bfrac <= (afrac << 1)) {135 afrac = (afrac | FLOAT32_HIDDEN_BIT_MASK ) << (32 - FLOAT32_FRACTION_SIZE - 1 ); 136 bfrac = (bfrac | FLOAT32_HIDDEN_BIT_MASK ) << (32 - FLOAT32_FRACTION_SIZE ); 137 138 if ( bfrac <= (afrac << 1) ) { 146 139 afrac >>= 1; 147 140 aexp++; … … 151 144 152 145 cfrac = (afrac << 32) / bfrac; 153 if (( cfrac & 0x3F) == 0) {154 cfrac |= ( bfrac * cfrac != afrac << 32);146 if (( cfrac & 0x3F ) == 0) { 147 cfrac |= ( bfrac * cfrac != afrac << 32 ); 155 148 } 156 149 … … 158 151 159 152 /* find first nonzero digit and shift result and detect possibly underflow */ 160 while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7 )))) {153 while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7 )))) { 161 154 cexp--; 162 155 cfrac <<= 1; 163 /* TODO: fix underflow */164 } 156 /* TODO: fix underflow */ 157 }; 165 158 166 159 cfrac += (0x1 << 6); /* FIXME: 7 is not sure*/ … … 169 162 ++cexp; 170 163 cfrac >>= 1; 171 }164 } 172 165 173 166 /* check overflow */ 174 if (cexp >= FLOAT32_MAX_EXPONENT ) {167 if (cexp >= FLOAT32_MAX_EXPONENT ) { 175 168 /* FIXME: overflow, return infinity */ 176 169 result.parts.exp = FLOAT32_MAX_EXPONENT; … … 188 181 cfrac >>= 1; 189 182 while (cexp < 0) { 190 cexp ++;183 cexp ++; 191 184 cfrac >>= 1; 192 } 185 } 186 193 187 } else { 194 result.parts.exp = (uint32_t) 188 result.parts.exp = (uint32_t)cexp; 195 189 } 196 190 … … 200 194 } 201 195 202 /**203 * Divide two double-precision floats.204 *205 * @param a Nominator.206 * @param b Denominator.207 * @return Result of division.208 */209 196 float64 divFloat64(float64 a, float64 b) 210 197 { … … 213 200 uint64_t afrac, bfrac, cfrac; 214 201 uint64_t remlo, remhi; 215 uint64_t tmplo, tmphi;216 202 217 203 result.parts.sign = a.parts.sign ^ b.parts.sign; 218 204 219 205 if (isFloat64NaN(a)) { 206 220 207 if (isFloat64SigNaN(b)) { 221 208 /*FIXME: SigNaN*/ … … 275 262 } 276 263 264 277 265 afrac = a.parts.fraction; 278 266 aexp = a.parts.exp; … … 287 275 return result; 288 276 } 289 290 277 /* normalize it*/ 278 291 279 aexp++; 292 /* afrac is nonzero => it must stop */293 while (! (afrac & FLOAT64_HIDDEN_BIT_MASK)) {280 /* afrac is nonzero => it must stop */ 281 while (! (afrac & FLOAT64_HIDDEN_BIT_MASK) ) { 294 282 afrac <<= 1; 295 283 aexp--; … … 299 287 if (bexp == 0) { 300 288 bexp++; 301 /* bfrac is nonzero => it must stop */302 while (! (bfrac & FLOAT64_HIDDEN_BIT_MASK)) {289 /* bfrac is nonzero => it must stop */ 290 while (! (bfrac & FLOAT64_HIDDEN_BIT_MASK) ) { 303 291 bfrac <<= 1; 304 292 bexp--; … … 306 294 } 307 295 308 afrac = (afrac | FLOAT64_HIDDEN_BIT_MASK ) << (64 - FLOAT64_FRACTION_SIZE - 2);309 bfrac = (bfrac | FLOAT64_HIDDEN_BIT_MASK ) << (64 - FLOAT64_FRACTION_SIZE - 1);310 311 if ( bfrac <= (afrac << 1)) {296 afrac = (afrac | FLOAT64_HIDDEN_BIT_MASK ) << (64 - FLOAT64_FRACTION_SIZE - 2 ); 297 bfrac = (bfrac | FLOAT64_HIDDEN_BIT_MASK ) << (64 - FLOAT64_FRACTION_SIZE - 1); 298 299 if ( bfrac <= (afrac << 1) ) { 312 300 afrac >>= 1; 313 301 aexp++; … … 316 304 cexp = aexp - bexp + FLOAT64_BIAS - 2; 317 305 318 cfrac = div128est(afrac, 0x0ll, bfrac); 319 320 if ((cfrac & 0x1FF) <= 2) { 321 mul64(bfrac, cfrac, &tmphi, &tmplo); 322 sub128(afrac, 0x0ll, tmphi, tmplo, &remhi, &remlo); 306 cfrac = divFloat64estim(afrac, bfrac); 307 308 if (( cfrac & 0x1FF ) <= 2) { /*FIXME:?? */ 309 mul64integers( bfrac, cfrac, &remlo, &remhi); 310 /* (__u128)afrac << 64 - ( ((__u128)remhi<<64) + (__u128)remlo )*/ 311 remhi = afrac - remhi - ( remlo > 0); 312 remlo = - remlo; 323 313 324 314 while ((int64_t) remhi < 0) { 325 315 cfrac--; 326 add128(remhi, remlo, 0x0ll, bfrac, &remhi, &remlo); 327 } 328 cfrac |= (remlo != 0); 316 remlo += bfrac; 317 remhi += ( remlo < bfrac ); 318 } 319 cfrac |= ( remlo != 0 ); 329 320 } 330 321 … … 332 323 result = finishFloat64(cexp, cfrac, result.parts.sign); 333 324 return result; 325 334 326 } 335 327 336 /** 337 * Divide two quadruple-precision floats. 338 * 339 * @param a Nominator. 340 * @param b Denominator. 341 * @return Result of division. 342 */ 343 float128 divFloat128(float128 a, float128 b) 328 uint64_t divFloat64estim(uint64_t a, uint64_t b) 344 329 { 345 float128 result; 346 int64_t aexp, bexp, cexp; 347 uint64_t afrac_hi, afrac_lo, bfrac_hi, bfrac_lo, cfrac_hi, cfrac_lo; 348 uint64_t shift_out; 349 uint64_t rem_hihi, rem_hilo, rem_lohi, rem_lolo; 350 uint64_t tmp_hihi, tmp_hilo, tmp_lohi, tmp_lolo; 351 352 result.parts.sign = a.parts.sign ^ b.parts.sign; 353 354 if (isFloat128NaN(a)) { 355 if (isFloat128SigNaN(b)) { 356 /*FIXME: SigNaN*/ 357 return b; 358 } 359 360 if (isFloat128SigNaN(a)) { 361 /*FIXME: SigNaN*/ 362 } 363 /*NaN*/ 364 return a; 365 } 366 367 if (isFloat128NaN(b)) { 368 if (isFloat128SigNaN(b)) { 369 /*FIXME: SigNaN*/ 370 } 371 /*NaN*/ 372 return b; 373 } 374 375 if (isFloat128Infinity(a)) { 376 if (isFloat128Infinity(b) || isFloat128Zero(b)) { 377 /*FIXME: inf / inf */ 378 result.binary.hi = FLOAT128_NAN_HI; 379 result.binary.lo = FLOAT128_NAN_LO; 380 return result; 381 } 382 /* inf / num */ 383 result.parts.exp = a.parts.exp; 384 result.parts.frac_hi = a.parts.frac_hi; 385 result.parts.frac_lo = a.parts.frac_lo; 386 return result; 387 } 388 389 if (isFloat128Infinity(b)) { 390 if (isFloat128Zero(a)) { 391 /* FIXME 0 / inf */ 392 result.parts.exp = 0; 393 result.parts.frac_hi = 0; 394 result.parts.frac_lo = 0; 395 return result; 396 } 397 /* FIXME: num / inf*/ 398 result.parts.exp = 0; 399 result.parts.frac_hi = 0; 400 result.parts.frac_lo = 0; 401 return result; 402 } 403 404 if (isFloat128Zero(b)) { 405 if (isFloat128Zero(a)) { 406 /*FIXME: 0 / 0*/ 407 result.binary.hi = FLOAT128_NAN_HI; 408 result.binary.lo = FLOAT128_NAN_LO; 409 return result; 410 } 411 /* FIXME: division by zero */ 412 result.parts.exp = 0; 413 result.parts.frac_hi = 0; 414 result.parts.frac_lo = 0; 415 return result; 416 } 417 418 afrac_hi = a.parts.frac_hi; 419 afrac_lo = a.parts.frac_lo; 420 aexp = a.parts.exp; 421 bfrac_hi = b.parts.frac_hi; 422 bfrac_lo = b.parts.frac_lo; 423 bexp = b.parts.exp; 424 425 /* denormalized numbers */ 426 if (aexp == 0) { 427 if (eq128(afrac_hi, afrac_lo, 0x0ll, 0x0ll)) { 428 result.parts.exp = 0; 429 result.parts.frac_hi = 0; 430 result.parts.frac_lo = 0; 431 return result; 432 } 433 434 /* normalize it*/ 435 aexp++; 436 /* afrac is nonzero => it must stop */ 437 and128(afrac_hi, afrac_lo, 438 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 439 &tmp_hihi, &tmp_lolo); 440 while (!lt128(0x0ll, 0x0ll, tmp_hihi, tmp_lolo)) { 441 lshift128(afrac_hi, afrac_lo, 1, &afrac_hi, &afrac_lo); 442 aexp--; 443 } 444 } 445 446 if (bexp == 0) { 447 bexp++; 448 /* bfrac is nonzero => it must stop */ 449 and128(bfrac_hi, bfrac_lo, 450 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 451 &tmp_hihi, &tmp_lolo); 452 while (!lt128(0x0ll, 0x0ll, tmp_hihi, tmp_lolo)) { 453 lshift128(bfrac_hi, bfrac_lo, 1, &bfrac_hi, &bfrac_lo); 454 bexp--; 455 } 456 } 457 458 or128(afrac_hi, afrac_lo, 459 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 460 &afrac_hi, &afrac_lo); 461 lshift128(afrac_hi, afrac_lo, 462 (128 - FLOAT128_FRACTION_SIZE - 1), &afrac_hi, &afrac_lo); 463 or128(bfrac_hi, bfrac_lo, 464 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 465 &bfrac_hi, &bfrac_lo); 466 lshift128(bfrac_hi, bfrac_lo, 467 (128 - FLOAT128_FRACTION_SIZE - 1), &bfrac_hi, &bfrac_lo); 468 469 if (le128(bfrac_hi, bfrac_lo, afrac_hi, afrac_lo)) { 470 rshift128(afrac_hi, afrac_lo, 1, &afrac_hi, &afrac_lo); 471 aexp++; 472 } 473 474 cexp = aexp - bexp + FLOAT128_BIAS - 2; 475 476 cfrac_hi = div128est(afrac_hi, afrac_lo, bfrac_hi); 477 478 mul128(bfrac_hi, bfrac_lo, 0x0ll, cfrac_hi, 479 &tmp_lolo /* dummy */, &tmp_hihi, &tmp_hilo, &tmp_lohi); 480 481 /* sub192(afrac_hi, afrac_lo, 0, 482 * tmp_hihi, tmp_hilo, tmp_lohi 483 * &rem_hihi, &rem_hilo, &rem_lohi); */ 484 sub128(afrac_hi, afrac_lo, tmp_hihi, tmp_hilo, &rem_hihi, &rem_hilo); 485 if (tmp_lohi > 0) { 486 sub128(rem_hihi, rem_hilo, 0x0ll, 0x1ll, &rem_hihi, &rem_hilo); 487 } 488 rem_lohi = -tmp_lohi; 489 490 while ((int64_t) rem_hihi < 0) { 491 --cfrac_hi; 492 /* add192(rem_hihi, rem_hilo, rem_lohi, 493 * 0, bfrac_hi, bfrac_lo, 494 * &rem_hihi, &rem_hilo, &rem_lohi); */ 495 add128(rem_hilo, rem_lohi, bfrac_hi, bfrac_lo, &rem_hilo, &rem_lohi); 496 if (lt128(rem_hilo, rem_lohi, bfrac_hi, bfrac_lo)) { 497 ++rem_hihi; 498 } 499 } 500 501 cfrac_lo = div128est(rem_hilo, rem_lohi, bfrac_lo); 502 503 if ((cfrac_lo & 0x3FFF) <= 4) { 504 mul128(bfrac_hi, bfrac_lo, 0x0ll, cfrac_lo, 505 &tmp_hihi /* dummy */, &tmp_hilo, &tmp_lohi, &tmp_lolo); 506 507 /* sub192(rem_hilo, rem_lohi, 0, 508 * tmp_hilo, tmp_lohi, tmp_lolo, 509 * &rem_hilo, &rem_lohi, &rem_lolo); */ 510 sub128(rem_hilo, rem_lohi, tmp_hilo, tmp_lohi, &rem_hilo, &rem_lohi); 511 if (tmp_lolo > 0) { 512 sub128(rem_hilo, rem_lohi, 0x0ll, 0x1ll, &rem_hilo, &rem_lohi); 513 } 514 rem_lolo = -tmp_lolo; 515 516 while ((int64_t) rem_hilo < 0) { 517 --cfrac_lo; 518 /* add192(rem_hilo, rem_lohi, rem_lolo, 519 * 0, bfrac_hi, bfrac_lo, 520 * &rem_hilo, &rem_lohi, &rem_lolo); */ 521 add128(rem_lohi, rem_lolo, bfrac_hi, bfrac_lo, &rem_lohi, &rem_lolo); 522 if (lt128(rem_lohi, rem_lolo, bfrac_hi, bfrac_lo)) { 523 ++rem_hilo; 524 } 525 } 526 527 cfrac_lo |= ((rem_hilo | rem_lohi | rem_lolo) != 0 ); 528 } 529 530 shift_out = cfrac_lo << (64 - (128 - FLOAT128_FRACTION_SIZE - 1)); 531 rshift128(cfrac_hi, cfrac_lo, (128 - FLOAT128_FRACTION_SIZE - 1), 532 &cfrac_hi, &cfrac_lo); 533 534 result = finishFloat128(cexp, cfrac_hi, cfrac_lo, result.parts.sign, shift_out); 330 uint64_t bhi; 331 uint64_t remhi, remlo; 332 uint64_t result; 333 334 if ( b <= a ) { 335 return 0xFFFFFFFFFFFFFFFFull; 336 } 337 338 bhi = b >> 32; 339 result = ((bhi << 32) <= a) ?( 0xFFFFFFFFull << 32) : ( a / bhi) << 32; 340 mul64integers(b, result, &remlo, &remhi); 341 342 remhi = a - remhi - (remlo > 0); 343 remlo = - remlo; 344 345 b <<= 32; 346 while ( (int64_t) remhi < 0 ) { 347 result -= 0x1ll << 32; 348 remlo += b; 349 remhi += bhi + ( remlo < b ); 350 } 351 remhi = (remhi << 32) | (remlo >> 32); 352 if (( bhi << 32) <= remhi) { 353 result |= 0xFFFFFFFF; 354 } else { 355 result |= remhi / bhi; 356 } 357 358 535 359 return result; 536 360 } -
TabularUnified uspace/lib/softfloat/generic/mul.c ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Multiplication functions.32 /** @file 34 33 */ 35 34 … … 39 38 #include <common.h> 40 39 41 /** 42 * Multiply two single-precision floats. 43 * 44 * @param a First input operand. 45 * @param b Second input operand. 46 * @return Result of multiplication. 40 /** Multiply two 32 bit float numbers 41 * 47 42 */ 48 43 float32 mulFloat32(float32 a, float32 b) … … 54 49 result.parts.sign = a.parts.sign ^ b.parts.sign; 55 50 56 if (isFloat32NaN(a) || isFloat32NaN(b) ) {51 if (isFloat32NaN(a) || isFloat32NaN(b) ) { 57 52 /* TODO: fix SigNaNs */ 58 53 if (isFloat32SigNaN(a)) { … … 60 55 result.parts.exp = a.parts.exp; 61 56 return result; 62 } 57 }; 63 58 if (isFloat32SigNaN(b)) { /* TODO: fix SigNaN */ 64 59 result.parts.fraction = b.parts.fraction; 65 60 result.parts.exp = b.parts.exp; 66 61 return result; 67 } 62 }; 68 63 /* set NaN as result */ 69 64 result.binary = FLOAT32_NAN; 70 65 return result; 71 } 66 }; 72 67 73 68 if (isFloat32Infinity(a)) { … … 103 98 result.parts.sign = a.parts.sign ^ b.parts.sign; 104 99 return result; 105 } 100 }; 106 101 107 102 if (exp < 0) { … … 111 106 result.parts.exp = 0x0; 112 107 return result; 113 } 108 }; 114 109 115 110 frac1 = a.parts.fraction; … … 118 113 } else { 119 114 ++exp; 120 } 115 }; 121 116 122 117 frac2 = b.parts.fraction; … … 126 121 } else { 127 122 ++exp; 128 } 123 }; 129 124 130 125 frac1 <<= 1; /* one bit space for rounding */ 131 126 132 127 frac1 = frac1 * frac2; 133 134 /* round and return */135 while ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= ( 1 << (FLOAT32_FRACTION_SIZE + 2)))) {136 /* 23 bits of fraction + one more for hidden bit (all shifted 1 bit left) 128 /* round and return */ 129 130 while ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= ( 1 << (FLOAT32_FRACTION_SIZE + 2)))) { 131 /* 23 bits of fraction + one more for hidden bit (all shifted 1 bit left)*/ 137 132 ++exp; 138 133 frac1 >>= 1; 139 } 134 }; 140 135 141 136 /* rounding */ … … 146 141 ++exp; 147 142 frac1 >>= 1; 148 } 149 150 if (exp >= FLOAT32_MAX_EXPONENT ) {143 }; 144 145 if (exp >= FLOAT32_MAX_EXPONENT ) { 151 146 /* TODO: fix overflow */ 152 147 /* return infinity*/ … … 164 159 frac1 >>= 1; 165 160 ++exp; 166 } 161 }; 167 162 if (frac1 == 0) { 168 163 /* FIXME : underflow */ 169 170 171 172 } 173 } 164 result.parts.exp = 0; 165 result.parts.fraction = 0; 166 return result; 167 }; 168 }; 174 169 result.parts.exp = exp; 175 result.parts.fraction = frac1 & ( (1 << FLOAT32_FRACTION_SIZE) - 1);170 result.parts.fraction = frac1 & ( (1 << FLOAT32_FRACTION_SIZE) - 1); 176 171 177 172 return result; 173 178 174 } 179 175 180 /** 181 * Multiply two double-precision floats. 182 * 183 * @param a First input operand. 184 * @param b Second input operand. 185 * @return Result of multiplication. 176 /** Multiply two 64 bit float numbers 177 * 186 178 */ 187 179 float64 mulFloat64(float64 a, float64 b) … … 193 185 result.parts.sign = a.parts.sign ^ b.parts.sign; 194 186 195 if (isFloat64NaN(a) || isFloat64NaN(b) ) {187 if (isFloat64NaN(a) || isFloat64NaN(b) ) { 196 188 /* TODO: fix SigNaNs */ 197 189 if (isFloat64SigNaN(a)) { … … 199 191 result.parts.exp = a.parts.exp; 200 192 return result; 201 } 193 }; 202 194 if (isFloat64SigNaN(b)) { /* TODO: fix SigNaN */ 203 195 result.parts.fraction = b.parts.fraction; 204 196 result.parts.exp = b.parts.exp; 205 197 return result; 206 } 198 }; 207 199 /* set NaN as result */ 208 200 result.binary = FLOAT64_NAN; 209 201 return result; 210 } 202 }; 211 203 212 204 if (isFloat64Infinity(a)) { … … 241 233 } else { 242 234 ++exp; 243 } 235 }; 244 236 245 237 frac2 = b.parts.fraction; … … 249 241 } else { 250 242 ++exp; 251 } 243 }; 252 244 253 245 frac1 <<= (64 - FLOAT64_FRACTION_SIZE - 1); 254 246 frac2 <<= (64 - FLOAT64_FRACTION_SIZE - 2); 255 247 256 mul64 (frac1, frac2, &frac1, &frac2);257 258 frac 1 |= (frac2!= 0);259 if (frac 1& (0x1ll << 62)) {260 frac 1<<= 1;248 mul64integers(frac1, frac2, &frac1, &frac2); 249 250 frac2 |= (frac1 != 0); 251 if (frac2 & (0x1ll << 62)) { 252 frac2 <<= 1; 261 253 exp--; 262 254 } 263 255 264 result = finishFloat64(exp, frac 1, result.parts.sign);256 result = finishFloat64(exp, frac2, result.parts.sign); 265 257 return result; 266 258 } 267 259 268 /** 269 * Multiply two quadruple-precision floats. 270 * 271 * @param a First input operand. 272 * @param b Second input operand. 273 * @return Result of multiplication. 274 */ 275 float128 mulFloat128(float128 a, float128 b) 260 /** Multiply two 64 bit numbers and return result in two parts 261 * @param a first operand 262 * @param b second operand 263 * @param lo lower part from result 264 * @param hi higher part of result 265 */ 266 void mul64integers(uint64_t a,uint64_t b, uint64_t *lo, uint64_t *hi) 276 267 { 277 float128 result; 278 uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo; 279 int32_t exp; 280 281 result.parts.sign = a.parts.sign ^ b.parts.sign; 282 283 if (isFloat128NaN(a) || isFloat128NaN(b)) { 284 /* TODO: fix SigNaNs */ 285 if (isFloat128SigNaN(a)) { 286 result.parts.frac_hi = a.parts.frac_hi; 287 result.parts.frac_lo = a.parts.frac_lo; 288 result.parts.exp = a.parts.exp; 289 return result; 290 } 291 if (isFloat128SigNaN(b)) { /* TODO: fix SigNaN */ 292 result.parts.frac_hi = b.parts.frac_hi; 293 result.parts.frac_lo = b.parts.frac_lo; 294 result.parts.exp = b.parts.exp; 295 return result; 296 } 297 /* set NaN as result */ 298 result.binary.hi = FLOAT128_NAN_HI; 299 result.binary.lo = FLOAT128_NAN_LO; 300 return result; 301 } 302 303 if (isFloat128Infinity(a)) { 304 if (isFloat128Zero(b)) { 305 /* FIXME: zero * infinity */ 306 result.binary.hi = FLOAT128_NAN_HI; 307 result.binary.lo = FLOAT128_NAN_LO; 308 return result; 309 } 310 result.parts.frac_hi = a.parts.frac_hi; 311 result.parts.frac_lo = a.parts.frac_lo; 312 result.parts.exp = a.parts.exp; 313 return result; 314 } 315 316 if (isFloat128Infinity(b)) { 317 if (isFloat128Zero(a)) { 318 /* FIXME: zero * infinity */ 319 result.binary.hi = FLOAT128_NAN_HI; 320 result.binary.lo = FLOAT128_NAN_LO; 321 return result; 322 } 323 result.parts.frac_hi = b.parts.frac_hi; 324 result.parts.frac_lo = b.parts.frac_lo; 325 result.parts.exp = b.parts.exp; 326 return result; 327 } 328 329 /* exp is signed so we can easy detect underflow */ 330 exp = a.parts.exp + b.parts.exp - FLOAT128_BIAS - 1; 331 332 frac1_hi = a.parts.frac_hi; 333 frac1_lo = a.parts.frac_lo; 334 335 if (a.parts.exp > 0) { 336 or128(frac1_hi, frac1_lo, 337 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 338 &frac1_hi, &frac1_lo); 339 } else { 340 ++exp; 341 } 342 343 frac2_hi = b.parts.frac_hi; 344 frac2_lo = b.parts.frac_lo; 345 346 if (b.parts.exp > 0) { 347 or128(frac2_hi, frac2_lo, 348 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 349 &frac2_hi, &frac2_lo); 350 } else { 351 ++exp; 352 } 353 354 lshift128(frac2_hi, frac2_lo, 355 128 - FLOAT128_FRACTION_SIZE, &frac2_hi, &frac2_lo); 356 357 tmp_hi = frac1_hi; 358 tmp_lo = frac1_lo; 359 mul128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, 360 &frac1_hi, &frac1_lo, &frac2_hi, &frac2_lo); 361 add128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &frac1_hi, &frac1_lo); 362 frac2_hi |= (frac2_lo != 0x0ll); 363 364 if ((FLOAT128_HIDDEN_BIT_MASK_HI << 1) <= frac1_hi) { 365 frac2_hi >>= 1; 366 if (frac1_lo & 0x1ll) { 367 frac2_hi |= (0x1ull < 64); 368 } 369 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo); 370 ++exp; 371 } 372 373 result = finishFloat128(exp, frac1_hi, frac1_lo, result.parts.sign, frac2_hi); 374 return result; 268 uint64_t low, high, middle1, middle2; 269 uint32_t alow, blow; 270 271 alow = a & 0xFFFFFFFF; 272 blow = b & 0xFFFFFFFF; 273 274 a >>= 32; 275 b >>= 32; 276 277 low = ((uint64_t)alow) * blow; 278 middle1 = a * blow; 279 middle2 = alow * b; 280 high = a * b; 281 282 middle1 += middle2; 283 high += (((uint64_t)(middle1 < middle2)) << 32) + (middle1 >> 32); 284 middle1 <<= 32; 285 low += middle1; 286 high += (low < middle1); 287 *lo = low; 288 *hi = high; 289 290 return; 375 291 } 376 292 -
TabularUnified uspace/lib/softfloat/generic/other.c ¶
rd3e241a rb7ee0369 30 30 * @{ 31 31 */ 32 /** @file Other functions (power, complex).32 /** @file 33 33 */ 34 34 -
TabularUnified uspace/lib/softfloat/generic/softfloat.c ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 33 32 * @{ 34 33 */ 35 /** @file Softfloat API.34 /** @file 36 35 */ 37 36 … … 82 81 } 83 82 return addFloat64(da, db).d; 84 }85 86 long double __addtf3(long double a, long double b)87 {88 float128 ta, tb;89 ta.ld = a;90 tb.ld = b;91 if (ta.parts.sign != tb.parts.sign) {92 if (ta.parts.sign) {93 ta.parts.sign = 0;94 return subFloat128(tb, ta).ld;95 };96 tb.parts.sign = 0;97 return subFloat128(ta, tb).ld;98 }99 return addFloat128(ta, tb).ld;100 83 } 101 84 … … 124 107 } 125 108 126 long double __subtf3(long double a, long double b)127 {128 float128 ta, tb;129 ta.ld = a;130 tb.ld = b;131 if (ta.parts.sign != tb.parts.sign) {132 tb.parts.sign = !tb.parts.sign;133 return addFloat128(ta, tb).ld;134 }135 return subFloat128(ta, tb).ld;136 }137 138 109 float __mulsf3(float a, float b) 139 110 { … … 152 123 } 153 124 154 long double __multf3(long double a, long double b)155 {156 float128 ta, tb;157 ta.ld = a;158 tb.ld = b;159 return mulFloat128(ta, tb).ld;160 }161 162 125 float __divsf3(float a, float b) 163 126 { … … 176 139 } 177 140 178 long double __divtf3(long double a, long double b)179 {180 float128 ta, tb;181 ta.ld = a;182 tb.ld = b;183 return divFloat128(ta, tb).ld;184 }185 186 141 float __negsf2(float a) 187 142 { … … 194 149 double __negdf2(double a) 195 150 { 196 float64 da; 197 da.d = a; 198 da.parts.sign = !da.parts.sign; 199 return da.d; 200 } 201 202 long double __negtf2(long double a) 203 { 204 float128 ta; 205 ta.ld = a; 206 ta.parts.sign = !ta.parts.sign; 207 return ta.ld; 151 float64 fa; 152 fa.d = a; 153 fa.parts.sign = !fa.parts.sign; 154 return fa.d; 208 155 } 209 156 … … 217 164 } 218 165 219 long double __extendsftf2(float a)220 {221 float32 fa;222 fa.f = a;223 return convertFloat32ToFloat128(fa).ld;224 }225 226 long double __extenddftf2(double a)227 {228 float64 da;229 da.d = a;230 return convertFloat64ToFloat128(da).ld;231 }232 233 166 float __truncdfsf2(double a) 234 167 { … … 238 171 } 239 172 240 float __trunctfsf2(long double a)241 {242 float128 ta;243 ta.ld = a;244 return convertFloat128ToFloat32(ta).f;245 }246 247 double __trunctfdf2(long double a)248 {249 float128 ta;250 ta.ld = a;251 return convertFloat128ToFloat64(ta).d;252 }253 254 173 int __fixsfsi(float a) 255 174 { … … 259 178 return float32_to_int(fa); 260 179 } 261 262 180 int __fixdfsi(double a) 263 181 { … … 267 185 return float64_to_int(da); 268 186 } 269 270 int __fixtfsi(long double a)271 {272 float128 ta;273 ta.ld = a;274 275 return float128_to_int(ta);276 }277 187 278 188 long __fixsfdi(float a) … … 283 193 return float32_to_long(fa); 284 194 } 285 286 195 long __fixdfdi(double a) 287 196 { … … 291 200 return float64_to_long(da); 292 201 } 293 294 long __fixtfdi(long double a)295 {296 float128 ta;297 ta.ld = a;298 299 return float128_to_long(ta);300 }301 202 302 203 long long __fixsfti(float a) … … 307 208 return float32_to_longlong(fa); 308 209 } 309 310 210 long long __fixdfti(double a) 311 211 { … … 316 216 } 317 217 318 long long __fixtfti(long double a)319 {320 float128 ta;321 ta.ld = a;322 323 return float128_to_longlong(ta);324 }325 326 218 unsigned int __fixunssfsi(float a) 327 219 { … … 331 223 return float32_to_uint(fa); 332 224 } 333 334 225 unsigned int __fixunsdfsi(double a) 335 226 { … … 339 230 return float64_to_uint(da); 340 231 } 341 342 unsigned int __fixunstfsi(long double a)343 {344 float128 ta;345 ta.ld = a;346 347 return float128_to_uint(ta);348 }349 232 350 233 unsigned long __fixunssfdi(float a) … … 355 238 return float32_to_ulong(fa); 356 239 } 357 358 240 unsigned long __fixunsdfdi(double a) 359 241 { … … 363 245 return float64_to_ulong(da); 364 246 } 365 366 unsigned long __fixunstfdi(long double a)367 {368 float128 ta;369 ta.ld = a;370 371 return float128_to_ulong(ta);372 }373 247 374 248 unsigned long long __fixunssfti(float a) … … 379 253 return float32_to_ulonglong(fa); 380 254 } 381 382 255 unsigned long long __fixunsdfti(double a) 383 256 { … … 387 260 return float64_to_ulonglong(da); 388 261 } 389 390 unsigned long long __fixunstfti(long double a)391 {392 float128 ta;393 ta.ld = a;394 395 return float128_to_ulonglong(ta);396 }397 262 398 263 float __floatsisf(int i) … … 403 268 return fa.f; 404 269 } 405 406 270 double __floatsidf(int i) 407 271 { … … 411 275 return da.d; 412 276 } 413 414 long double __floatsitf(int i)415 {416 float128 ta;417 418 ta = int_to_float128(i);419 return ta.ld;420 }421 277 422 278 float __floatdisf(long i) … … 427 283 return fa.f; 428 284 } 429 430 285 double __floatdidf(long i) 431 286 { … … 435 290 return da.d; 436 291 } 437 438 long double __floatditf(long i)439 {440 float128 ta;441 442 ta = long_to_float128(i);443 return ta.ld;444 }445 292 446 293 float __floattisf(long long i) … … 451 298 return fa.f; 452 299 } 453 454 300 double __floattidf(long long i) 455 301 { … … 460 306 } 461 307 462 long double __floattitf(long long i)463 {464 float128 ta;465 466 ta = longlong_to_float128(i);467 return ta.ld;468 }469 470 308 float __floatunsisf(unsigned int i) 471 309 { … … 475 313 return fa.f; 476 314 } 477 478 315 double __floatunsidf(unsigned int i) 479 316 { … … 483 320 return da.d; 484 321 } 485 486 long double __floatunsitf(unsigned int i)487 {488 float128 ta;489 490 ta = uint_to_float128(i);491 return ta.ld;492 }493 322 494 323 float __floatundisf(unsigned long i) … … 499 328 return fa.f; 500 329 } 501 502 330 double __floatundidf(unsigned long i) 503 331 { … … 507 335 return da.d; 508 336 } 509 510 long double __floatunditf(unsigned long i)511 {512 float128 ta;513 514 ta = ulong_to_float128(i);515 return ta.ld;516 }517 337 518 338 float __floatuntisf(unsigned long long i) … … 523 343 return fa.f; 524 344 } 525 526 345 double __floatuntidf(unsigned long long i) 527 346 { … … 532 351 } 533 352 534 long double __floatuntitf(unsigned long long i)535 {536 float128 ta;537 538 ta = ulonglong_to_float128(i);539 return ta.ld;540 }541 542 353 /* Comparison functions */ 354 /* Comparison functions */ 355 356 /* a<b .. -1 357 * a=b .. 0 358 * a>b .. 1 359 * */ 543 360 544 361 int __cmpsf2(float a, float b) … … 547 364 fa.f = a; 548 365 fb.f = b; 549 550 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 366 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ) { 551 367 return 1; /* no special constant for unordered - maybe signaled? */ 552 } 368 }; 369 553 370 554 371 if (isFloat32eq(fa, fb)) { 555 372 return 0; 556 } 373 }; 557 374 558 375 if (isFloat32lt(fa, fb)) { 559 376 return -1; 560 } 561 377 }; 562 378 return 1; 563 379 } 564 380 565 int __cmpdf2(double a, double b) 566 { 567 float64 da, db; 568 da.d = a; 569 db.d = b; 570 571 if ((isFloat64NaN(da)) || (isFloat64NaN(db))) { 572 return 1; /* no special constant for unordered - maybe signaled? */ 573 } 574 575 if (isFloat64eq(da, db)) { 576 return 0; 577 } 578 579 if (isFloat64lt(da, db)) { 381 int __unordsf2(float a, float b) 382 { 383 float32 fa, fb; 384 fa.f = a; 385 fb.f = b; 386 return ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ); 387 } 388 389 /** 390 * @return zero, if neither argument is a NaN and are equal 391 * */ 392 int __eqsf2(float a, float b) 393 { 394 float32 fa, fb; 395 fa.f = a; 396 fb.f = b; 397 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ) { 398 /* TODO: sigNaNs*/ 399 return 1; 400 }; 401 return isFloat32eq(fa, fb) - 1; 402 } 403 404 /* strange behavior, but it was in gcc documentation */ 405 int __nesf2(float a, float b) 406 { 407 return __eqsf2(a, b); 408 } 409 410 /* return value >= 0 if a>=b and neither is NaN */ 411 int __gesf2(float a, float b) 412 { 413 float32 fa, fb; 414 fa.f = a; 415 fb.f = b; 416 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ) { 417 /* TODO: sigNaNs*/ 580 418 return -1; 581 } 582 583 return 1; 584 } 585 586 int __cmptf2(long double a, long double b) 587 { 588 float128 ta, tb; 589 ta.ld = a; 590 tb.ld = b; 591 592 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 593 return 1; /* no special constant for unordered - maybe signaled? */ 594 } 595 596 if (isFloat128eq(ta, tb)) { 597 return 0; 598 } 599 600 if (isFloat128lt(ta, tb)) { 601 return -1; 602 } 603 604 return 1; 605 } 606 607 int __unordsf2(float a, float b) 608 { 609 float32 fa, fb; 610 fa.f = a; 611 fb.f = b; 612 return ((isFloat32NaN(fa)) || (isFloat32NaN(fb))); 613 } 614 615 int __unorddf2(double a, double b) 616 { 617 float64 da, db; 618 da.d = a; 619 db.d = b; 620 return ((isFloat64NaN(da)) || (isFloat64NaN(db))); 621 } 622 623 int __unordtf2(long double a, long double b) 624 { 625 float128 ta, tb; 626 ta.ld = a; 627 tb.ld = b; 628 return ((isFloat128NaN(ta)) || (isFloat128NaN(tb))); 629 } 630 631 int __eqsf2(float a, float b) 632 { 633 float32 fa, fb; 634 fa.f = a; 635 fb.f = b; 636 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 637 /* TODO: sigNaNs */ 638 return 1; 639 } 640 return isFloat32eq(fa, fb) - 1; 641 } 642 643 int __eqdf2(double a, double b) 644 { 645 float64 da, db; 646 da.d = a; 647 db.d = b; 648 if ((isFloat64NaN(da)) || (isFloat64NaN(db))) { 649 /* TODO: sigNaNs */ 650 return 1; 651 } 652 return isFloat64eq(da, db) - 1; 653 } 654 655 int __eqtf2(long double a, long double b) 656 { 657 float128 ta, tb; 658 ta.ld = a; 659 tb.ld = b; 660 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 661 /* TODO: sigNaNs */ 662 return 1; 663 } 664 return isFloat128eq(ta, tb) - 1; 665 } 666 667 int __nesf2(float a, float b) 668 { 669 /* strange behavior, but it was in gcc documentation */ 670 return __eqsf2(a, b); 671 } 672 673 int __nedf2(double a, double b) 674 { 675 /* strange behavior, but it was in gcc documentation */ 676 return __eqdf2(a, b); 677 } 678 679 int __netf2(long double a, long double b) 680 { 681 /* strange behavior, but it was in gcc documentation */ 682 return __eqtf2(a, b); 683 } 684 685 int __gesf2(float a, float b) 686 { 687 float32 fa, fb; 688 fa.f = a; 689 fb.f = b; 690 691 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 692 /* TODO: sigNaNs */ 693 return -1; 694 } 419 }; 695 420 696 421 if (isFloat32eq(fa, fb)) { 697 422 return 0; 698 } 423 }; 699 424 700 425 if (isFloat32gt(fa, fb)) { 701 426 return 1; 702 }427 }; 703 428 704 429 return -1; 705 430 } 706 431 707 int __gedf2(double a, double b) 708 { 709 float64 da, db; 710 da.d = a; 711 db.d = b; 712 713 if ((isFloat64NaN(da)) || (isFloat64NaN(db))) { 714 /* TODO: sigNaNs */ 715 return -1; 716 } 717 718 if (isFloat64eq(da, db)) { 719 return 0; 720 } 721 722 if (isFloat64gt(da, db)) { 432 /** Return negative value, if a<b and neither is NaN*/ 433 int __ltsf2(float a, float b) 434 { 435 float32 fa, fb; 436 fa.f = a; 437 fb.f = b; 438 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ) { 439 /* TODO: sigNaNs*/ 723 440 return 1; 724 } 725 726 return -1; 727 } 728 729 int __getf2(long double a, long double b) 730 { 731 float128 ta, tb; 732 ta.ld = a; 733 tb.ld = b; 734 735 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 736 /* TODO: sigNaNs */ 737 return -1; 738 } 739 740 if (isFloat128eq(ta, tb)) { 741 return 0; 742 } 743 744 if (isFloat128gt(ta, tb)) { 745 return 1; 746 } 747 748 return -1; 749 } 750 751 int __ltsf2(float a, float b) 752 { 753 float32 fa, fb; 754 fa.f = a; 755 fb.f = b; 756 757 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 758 /* TODO: sigNaNs */ 759 return 1; 760 } 761 441 }; 762 442 if (isFloat32lt(fa, fb)) { 763 443 return -1; 764 } 765 444 }; 766 445 return 0; 767 446 } 768 447 769 int __ltdf2(double a, double b) 770 { 771 float64 da, db; 772 da.d = a;773 db.d = b;774 775 if ( (isFloat64NaN(da)) || (isFloat64NaN(db))) {776 /* TODO: sigNaNs 448 /* return value <= 0 if a<=b and neither is NaN */ 449 int __lesf2(float a, float b) 450 { 451 float32 fa, fb; 452 fa.f = a; 453 fb.f = b; 454 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ) { 455 /* TODO: sigNaNs*/ 777 456 return 1; 778 } 779 780 if (isFloat64lt(da, db)) { 781 return -1; 782 } 783 784 return 0; 785 } 786 787 int __lttf2(long double a, long double b) 788 { 789 float128 ta, tb; 790 ta.ld = a; 791 tb.ld = b; 792 793 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 794 /* TODO: sigNaNs */ 795 return 1; 796 } 797 798 if (isFloat128lt(ta, tb)) { 799 return -1; 800 } 801 802 return 0; 803 } 804 805 int __lesf2(float a, float b) 806 { 807 float32 fa, fb; 808 fa.f = a; 809 fb.f = b; 810 811 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 812 /* TODO: sigNaNs */ 813 return 1; 814 } 457 }; 815 458 816 459 if (isFloat32eq(fa, fb)) { 817 460 return 0; 818 } 461 }; 819 462 820 463 if (isFloat32lt(fa, fb)) { 821 464 return -1; 822 }465 }; 823 466 824 467 return 1; 825 468 } 826 469 827 int __ledf2(double a, double b) 828 { 829 float64 da, db; 830 da.d = a; 831 db.d = b; 832 833 if ((isFloat64NaN(da)) || (isFloat64NaN(db))) { 834 /* TODO: sigNaNs */ 835 return 1; 836 } 837 838 if (isFloat64eq(da, db)) { 839 return 0; 840 } 841 842 if (isFloat64lt(da, db)) { 470 /** Return positive value, if a>b and neither is NaN*/ 471 int __gtsf2(float a, float b) 472 { 473 float32 fa, fb; 474 fa.f = a; 475 fb.f = b; 476 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ) { 477 /* TODO: sigNaNs*/ 843 478 return -1; 844 } 845 846 return 1; 847 } 848 849 int __letf2(long double a, long double b) 850 { 851 float128 ta, tb; 852 ta.ld = a; 853 tb.ld = b; 854 855 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 856 /* TODO: sigNaNs */ 857 return 1; 858 } 859 860 if (isFloat128eq(ta, tb)) { 861 return 0; 862 } 863 864 if (isFloat128lt(ta, tb)) { 865 return -1; 866 } 867 868 return 1; 869 } 870 871 int __gtsf2(float a, float b) 872 { 873 float32 fa, fb; 874 fa.f = a; 875 fb.f = b; 876 877 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 878 /* TODO: sigNaNs */ 879 return -1; 880 } 881 479 }; 882 480 if (isFloat32gt(fa, fb)) { 883 481 return 1; 884 } 885 482 }; 886 483 return 0; 887 484 } 888 485 889 int __gtdf2(double a, double b) 890 { 891 float64 da, db; 892 da.d = a; 893 db.d = b; 894 895 if ((isFloat64NaN(da)) || (isFloat64NaN(db))) { 896 /* TODO: sigNaNs */ 897 return -1; 898 } 899 900 if (isFloat64gt(da, db)) { 901 return 1; 902 } 903 904 return 0; 905 } 906 907 int __gttf2(long double a, long double b) 908 { 909 float128 ta, tb; 910 ta.ld = a; 911 tb.ld = b; 912 913 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 914 /* TODO: sigNaNs */ 915 return -1; 916 } 917 918 if (isFloat128gt(ta, tb)) { 919 return 1; 920 } 921 922 return 0; 923 } 924 925 926 927 #ifdef SPARC_SOFTFLOAT 928 929 /* SPARC quadruple-precision wrappers */ 930 931 void _Qp_add(long double *c, long double *a, long double *b) 932 { 933 *c = __addtf3(*a, *b); 934 } 935 936 void _Qp_sub(long double *c, long double *a, long double *b) 937 { 938 *c = __subtf3(*a, *b); 939 } 940 941 void _Qp_mul(long double *c, long double *a, long double *b) 942 { 943 *c = __multf3(*a, *b); 944 } 945 946 void _Qp_div(long double *c, long double *a, long double *b) 947 { 948 *c = __divtf3(*a, *b); 949 } 950 951 void _Qp_neg(long double *c, long double *a) 952 { 953 *c = __negtf2(*a); 954 } 955 956 void _Qp_stoq(long double *c, float a) 957 { 958 *c = __extendsftf2(a); 959 } 960 961 void _Qp_dtoq(long double *c, double a) 962 { 963 *c = __extenddftf2(a); 964 } 965 966 float _Qp_qtos(long double *a) 967 { 968 return __trunctfsf2(*a); 969 } 970 971 double _Qp_qtod(long double *a) 972 { 973 return __trunctfdf2(*a); 974 } 975 976 int _Qp_qtoi(long double *a) 977 { 978 return __fixtfsi(*a); 979 } 980 981 unsigned int _Qp_qtoui(long double *a) 982 { 983 return __fixunstfsi(*a); 984 } 985 986 long _Qp_qtox(long double *a) 987 { 988 return __fixtfdi(*a); 989 } 990 991 unsigned long _Qp_qtoux(long double *a) 992 { 993 return __fixunstfdi(*a); 994 } 995 996 void _Qp_itoq(long double *c, int a) 997 { 998 *c = __floatsitf(a); 999 } 1000 1001 void _Qp_uitoq(long double *c, unsigned int a) 1002 { 1003 *c = __floatunsitf(a); 1004 } 1005 1006 void _Qp_xtoq(long double *c, long a) 1007 { 1008 *c = __floatditf(a); 1009 } 1010 1011 void _Qp_uxtoq(long double *c, unsigned long a) 1012 { 1013 *c = __floatunditf(a); 1014 } 1015 1016 int _Qp_cmp(long double *a, long double *b) 1017 { 1018 float128 ta, tb; 1019 ta.ld = *a; 1020 tb.ld = *b; 1021 1022 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1023 return 3; 1024 } 1025 1026 if (isFloat128eq(ta, tb)) { 1027 return 0; 1028 } 1029 1030 if (isFloat128lt(ta, tb)) { 1031 return 1; 1032 } 1033 1034 return 2; 1035 } 1036 1037 int _Qp_cmpe(long double *a, long double *b) 1038 { 1039 /* strange, but is defined this way in SPARC Compliance Definition */ 1040 return _Qp_cmp(a, b); 1041 } 1042 1043 int _Qp_feq(long double *a, long double *b) 1044 { 1045 float128 ta, tb; 1046 ta.ld = *a; 1047 tb.ld = *b; 1048 1049 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1050 return 0; 1051 } 1052 1053 return isFloat128eq(ta, tb); 1054 } 1055 1056 int _Qp_fge(long double *a, long double *b) 1057 { 1058 float128 ta, tb; 1059 ta.ld = *a; 1060 tb.ld = *b; 1061 1062 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1063 return 0; 1064 } 1065 1066 return isFloat128eq(ta, tb) || isFloat128gt(ta, tb); 1067 } 1068 1069 int _Qp_fgt(long double *a, long double *b) 1070 { 1071 float128 ta, tb; 1072 ta.ld = *a; 1073 tb.ld = *b; 1074 1075 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1076 return 0; 1077 } 1078 1079 return isFloat128gt(ta, tb); 1080 } 1081 1082 int _Qp_fle(long double*a, long double *b) 1083 { 1084 float128 ta, tb; 1085 ta.ld = *a; 1086 tb.ld = *b; 1087 1088 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1089 return 0; 1090 } 1091 1092 return isFloat128eq(ta, tb) || isFloat128lt(ta, tb); 1093 } 1094 1095 int _Qp_flt(long double *a, long double *b) 1096 { 1097 float128 ta, tb; 1098 ta.ld = *a; 1099 tb.ld = *b; 1100 1101 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1102 return 0; 1103 } 1104 1105 return isFloat128lt(ta, tb); 1106 } 1107 1108 int _Qp_fne(long double *a, long double *b) 1109 { 1110 float128 ta, tb; 1111 ta.ld = *a; 1112 tb.ld = *b; 1113 1114 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1115 return 1; 1116 } 1117 1118 return !isFloat128eq(ta, tb); 1119 } 1120 1121 #endif 486 /* Other functions */ 487 488 float __powisf2(float a, int b) 489 { 490 /* TODO: */ 491 float32 fa; 492 fa.binary = FLOAT32_NAN; 493 return fa.f; 494 } 1122 495 1123 496 /** @} -
TabularUnified uspace/lib/softfloat/generic/sub.c ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Substraction functions.32 /** @file 34 33 */ 35 34 … … 37 36 #include <sub.h> 38 37 #include <comparison.h> 39 #include <common.h> 40 41 /** 42 * Subtract two single-precision floats with the same signs. 43 * 44 * @param a First input operand. 45 * @param b Second input operand. 46 * @return Result of substraction. 38 39 /** Subtract two float32 numbers with same signs 47 40 */ 48 41 float32 subFloat32(float32 a, float32 b) … … 59 52 /* TODO: fix SigNaN */ 60 53 if (isFloat32SigNaN(b)) { 61 } 62 return b; 63 } 54 }; 55 return b; 56 }; 64 57 65 58 if (b.parts.exp == FLOAT32_MAX_EXPONENT) { … … 79 72 /* TODO: fix SigNaN */ 80 73 if (isFloat32SigNaN(a) || isFloat32SigNaN(b)) { 81 } 82 return a; 83 } 74 }; 75 return a; 76 }; 84 77 85 78 if (a.parts.exp == FLOAT32_MAX_EXPONENT) { … … 89 82 result.binary = FLOAT32_NAN; 90 83 return result; 91 } 84 }; 92 85 return a; 93 86 } … … 99 92 frac2 = b.parts.fraction; 100 93 exp2 = b.parts.exp; 101 } 94 }; 95 96 if (exp1 == 0) { 97 /* both are denormalized */ 98 result.parts.fraction = frac1-frac2; 99 if (result.parts.fraction > frac1) { 100 /* TODO: underflow exception */ 101 return result; 102 }; 103 result.parts.exp = 0; 104 return result; 105 }; 106 107 /* add hidden bit */ 108 frac1 |= FLOAT32_HIDDEN_BIT_MASK; 109 110 if (exp2 == 0) { 111 /* denormalized */ 112 --expdiff; 113 } else { 114 /* normalized */ 115 frac2 |= FLOAT32_HIDDEN_BIT_MASK; 116 }; 117 118 /* create some space for rounding */ 119 frac1 <<= 6; 120 frac2 <<= 6; 121 122 if (expdiff > FLOAT32_FRACTION_SIZE + 1) { 123 goto done; 124 }; 125 126 frac1 = frac1 - (frac2 >> expdiff); 127 done: 128 /* TODO: find first nonzero digit and shift result and detect possibly underflow */ 129 while ((exp1 > 0) && (!(frac1 & (FLOAT32_HIDDEN_BIT_MASK << 6 )))) { 130 --exp1; 131 frac1 <<= 1; 132 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */ 133 }; 134 135 /* rounding - if first bit after fraction is set then round up */ 136 frac1 += 0x20; 137 138 if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7)) { 139 ++exp1; 140 frac1 >>= 1; 141 }; 142 143 /*Clear hidden bit and shift */ 144 result.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK)); 145 result.parts.exp = exp1; 146 147 return result; 148 } 149 150 /** Subtract two float64 numbers with same signs 151 */ 152 float64 subFloat64(float64 a, float64 b) 153 { 154 int expdiff; 155 uint32_t exp1, exp2; 156 uint64_t frac1, frac2; 157 float64 result; 158 159 result.d = 0; 160 161 expdiff = a.parts.exp - b.parts.exp; 162 if ((expdiff < 0 ) || ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) { 163 if (isFloat64NaN(b)) { 164 /* TODO: fix SigNaN */ 165 if (isFloat64SigNaN(b)) { 166 }; 167 return b; 168 }; 169 170 if (b.parts.exp == FLOAT64_MAX_EXPONENT) { 171 b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */ 172 return b; 173 } 174 175 result.parts.sign = !a.parts.sign; 176 177 frac1 = b.parts.fraction; 178 exp1 = b.parts.exp; 179 frac2 = a.parts.fraction; 180 exp2 = a.parts.exp; 181 expdiff *= -1; 182 } else { 183 if (isFloat64NaN(a)) { 184 /* TODO: fix SigNaN */ 185 if (isFloat64SigNaN(a) || isFloat64SigNaN(b)) { 186 }; 187 return a; 188 }; 189 190 if (a.parts.exp == FLOAT64_MAX_EXPONENT) { 191 if (b.parts.exp == FLOAT64_MAX_EXPONENT) { 192 /* inf - inf => nan */ 193 /* TODO: fix exception */ 194 result.binary = FLOAT64_NAN; 195 return result; 196 }; 197 return a; 198 } 199 200 result.parts.sign = a.parts.sign; 201 202 frac1 = a.parts.fraction; 203 exp1 = a.parts.exp; 204 frac2 = b.parts.fraction; 205 exp2 = b.parts.exp; 206 }; 102 207 103 208 if (exp1 == 0) { … … 107 212 /* TODO: underflow exception */ 108 213 return result; 109 } 214 }; 110 215 result.parts.exp = 0; 111 216 return result; 112 } 217 }; 113 218 114 219 /* add hidden bit */ 115 frac1 |= FLOAT 32_HIDDEN_BIT_MASK;220 frac1 |= FLOAT64_HIDDEN_BIT_MASK; 116 221 117 222 if (exp2 == 0) { … … 120 225 } else { 121 226 /* normalized */ 122 frac2 |= FLOAT 32_HIDDEN_BIT_MASK;123 } 227 frac2 |= FLOAT64_HIDDEN_BIT_MASK; 228 }; 124 229 125 230 /* create some space for rounding */ … … 127 232 frac2 <<= 6; 128 233 129 if (expdiff > FLOAT 32_FRACTION_SIZE + 1) {130 goto done;131 }234 if (expdiff > FLOAT64_FRACTION_SIZE + 1) { 235 goto done; 236 }; 132 237 133 238 frac1 = frac1 - (frac2 >> expdiff); 134 135 done:136 /* TODO: find first nonzero digit and shift result and detect possibly underflow */137 while ((exp1 > 0) && (!(frac1 & (FLOAT32_HIDDEN_BIT_MASK << 6 )))) {138 --exp1;139 frac1 <<= 1;140 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */141 }142 143 /* rounding - if first bit after fraction is set then round up */144 frac1 += 0x20;145 146 if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7)) {147 ++exp1;148 frac1 >>= 1;149 }150 151 /* Clear hidden bit and shift */152 result.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK));153 result.parts.exp = exp1;154 155 return result;156 }157 158 /**159 * Subtract two double-precision floats with the same signs.160 *161 * @param a First input operand.162 * @param b Second input operand.163 * @return Result of substraction.164 */165 float64 subFloat64(float64 a, float64 b)166 {167 int expdiff;168 uint32_t exp1, exp2;169 uint64_t frac1, frac2;170 float64 result;171 172 result.d = 0;173 174 expdiff = a.parts.exp - b.parts.exp;175 if ((expdiff < 0 ) || ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) {176 if (isFloat64NaN(b)) {177 /* TODO: fix SigNaN */178 if (isFloat64SigNaN(b)) {179 }180 return b;181 }182 183 if (b.parts.exp == FLOAT64_MAX_EXPONENT) {184 b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */185 return b;186 }187 188 result.parts.sign = !a.parts.sign;189 190 frac1 = b.parts.fraction;191 exp1 = b.parts.exp;192 frac2 = a.parts.fraction;193 exp2 = a.parts.exp;194 expdiff *= -1;195 } else {196 if (isFloat64NaN(a)) {197 /* TODO: fix SigNaN */198 if (isFloat64SigNaN(a) || isFloat64SigNaN(b)) {199 }200 return a;201 }202 203 if (a.parts.exp == FLOAT64_MAX_EXPONENT) {204 if (b.parts.exp == FLOAT64_MAX_EXPONENT) {205 /* inf - inf => nan */206 /* TODO: fix exception */207 result.binary = FLOAT64_NAN;208 return result;209 }210 return a;211 }212 213 result.parts.sign = a.parts.sign;214 215 frac1 = a.parts.fraction;216 exp1 = a.parts.exp;217 frac2 = b.parts.fraction;218 exp2 = b.parts.exp;219 }220 221 if (exp1 == 0) {222 /* both are denormalized */223 result.parts.fraction = frac1 - frac2;224 if (result.parts.fraction > frac1) {225 /* TODO: underflow exception */226 return result;227 }228 result.parts.exp = 0;229 return result;230 }231 232 /* add hidden bit */233 frac1 |= FLOAT64_HIDDEN_BIT_MASK;234 235 if (exp2 == 0) {236 /* denormalized */237 --expdiff;238 } else {239 /* normalized */240 frac2 |= FLOAT64_HIDDEN_BIT_MASK;241 }242 243 /* create some space for rounding */244 frac1 <<= 6;245 frac2 <<= 6;246 247 if (expdiff > FLOAT64_FRACTION_SIZE + 1) {248 goto done;249 }250 251 frac1 = frac1 - (frac2 >> expdiff);252 253 239 done: 254 240 /* TODO: find first nonzero digit and shift result and detect possibly underflow */ … … 256 242 --exp1; 257 243 frac1 <<= 1; 258 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */259 } 244 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */ 245 }; 260 246 261 247 /* rounding - if first bit after fraction is set then round up */ … … 265 251 ++exp1; 266 252 frac1 >>= 1; 267 } 268 269 /* 253 }; 254 255 /*Clear hidden bit and shift */ 270 256 result.parts.fraction = ((frac1 >> 6) & (~FLOAT64_HIDDEN_BIT_MASK)); 271 257 result.parts.exp = exp1; … … 274 260 } 275 261 276 /**277 * Subtract two quadruple-precision floats with the same signs.278 *279 * @param a First input operand.280 * @param b Second input operand.281 * @return Result of substraction.282 */283 float128 subFloat128(float128 a, float128 b)284 {285 int expdiff;286 uint32_t exp1, exp2;287 uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo;288 float128 result;289 290 result.binary.hi = 0;291 result.binary.lo = 0;292 293 expdiff = a.parts.exp - b.parts.exp;294 if ((expdiff < 0 ) || ((expdiff == 0) &&295 lt128(a.parts.frac_hi, a.parts.frac_lo, b.parts.frac_hi, b.parts.frac_lo))) {296 if (isFloat128NaN(b)) {297 /* TODO: fix SigNaN */298 if (isFloat128SigNaN(b)) {299 }300 return b;301 }302 303 if (b.parts.exp == FLOAT128_MAX_EXPONENT) {304 b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */305 return b;306 }307 308 result.parts.sign = !a.parts.sign;309 310 frac1_hi = b.parts.frac_hi;311 frac1_lo = b.parts.frac_lo;312 exp1 = b.parts.exp;313 frac2_hi = a.parts.frac_hi;314 frac2_lo = a.parts.frac_lo;315 exp2 = a.parts.exp;316 expdiff *= -1;317 } else {318 if (isFloat128NaN(a)) {319 /* TODO: fix SigNaN */320 if (isFloat128SigNaN(a) || isFloat128SigNaN(b)) {321 }322 return a;323 }324 325 if (a.parts.exp == FLOAT128_MAX_EXPONENT) {326 if (b.parts.exp == FLOAT128_MAX_EXPONENT) {327 /* inf - inf => nan */328 /* TODO: fix exception */329 result.binary.hi = FLOAT128_NAN_HI;330 result.binary.lo = FLOAT128_NAN_LO;331 return result;332 }333 return a;334 }335 336 result.parts.sign = a.parts.sign;337 338 frac1_hi = a.parts.frac_hi;339 frac1_lo = a.parts.frac_lo;340 exp1 = a.parts.exp;341 frac2_hi = b.parts.frac_hi;342 frac2_lo = b.parts.frac_lo;343 exp2 = b.parts.exp;344 }345 346 if (exp1 == 0) {347 /* both are denormalized */348 sub128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &tmp_hi, &tmp_lo);349 result.parts.frac_hi = tmp_hi;350 result.parts.frac_lo = tmp_lo;351 if (lt128(frac1_hi, frac1_lo, result.parts.frac_hi, result.parts.frac_lo)) {352 /* TODO: underflow exception */353 return result;354 }355 result.parts.exp = 0;356 return result;357 }358 359 /* add hidden bit */360 or128(frac1_hi, frac1_lo,361 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,362 &frac1_hi, &frac1_lo);363 364 if (exp2 == 0) {365 /* denormalized */366 --expdiff;367 } else {368 /* normalized */369 or128(frac2_hi, frac2_lo,370 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,371 &frac2_hi, &frac2_lo);372 }373 374 /* create some space for rounding */375 lshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);376 lshift128(frac2_hi, frac2_lo, 6, &frac2_hi, &frac2_lo);377 378 if (expdiff > FLOAT128_FRACTION_SIZE + 1) {379 goto done;380 }381 382 rshift128(frac2_hi, frac2_lo, expdiff, &tmp_hi, &tmp_lo);383 sub128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &frac1_hi, &frac1_lo);384 385 done:386 /* TODO: find first nonzero digit and shift result and detect possibly underflow */387 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6,388 &tmp_hi, &tmp_lo);389 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);390 while ((exp1 > 0) && (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) {391 --exp1;392 lshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);393 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */394 395 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6,396 &tmp_hi, &tmp_lo);397 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);398 }399 400 /* rounding - if first bit after fraction is set then round up */401 add128(frac1_hi, frac1_lo, 0x0ll, 0x20ll, &frac1_hi, &frac1_lo);402 403 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7,404 &tmp_hi, &tmp_lo);405 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);406 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) {407 ++exp1;408 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo);409 }410 411 /* Clear hidden bit and shift */412 rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo);413 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO,414 &tmp_hi, &tmp_lo);415 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo);416 result.parts.frac_hi = tmp_hi;417 result.parts.frac_lo = tmp_lo;418 419 result.parts.exp = exp1;420 421 return result;422 }423 424 262 /** @} 425 263 */ -
TabularUnified uspace/lib/softfloat/include/add.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Addition functions.32 /** @file 34 33 */ 35 34 … … 39 38 extern float32 addFloat32(float32, float32); 40 39 extern float64 addFloat64(float64, float64); 41 extern float128 addFloat128(float128, float128);42 40 43 41 #endif -
TabularUnified uspace/lib/softfloat/include/common.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Common helper operations.32 /** @file 34 33 */ 35 34 … … 40 39 41 40 extern float64 finishFloat64(int32_t, uint64_t, char); 42 extern float128 finishFloat128(int32_t, uint64_t, uint64_t, char, uint64_t);43 41 42 extern int countZeroes64(uint64_t); 43 extern int countZeroes32(uint32_t); 44 44 extern int countZeroes8(uint8_t); 45 extern int countZeroes32(uint32_t);46 extern int countZeroes64(uint64_t);47 45 48 46 extern void roundFloat32(int32_t *, uint32_t *); 49 47 extern void roundFloat64(int32_t *, uint64_t *); 50 extern void roundFloat128(int32_t *, uint64_t *, uint64_t *);51 52 extern void lshift128(uint64_t, uint64_t, int, uint64_t *, uint64_t *);53 extern void rshift128(uint64_t, uint64_t, int, uint64_t *, uint64_t *);54 55 extern void and128(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t *, uint64_t *);56 extern void or128(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t *, uint64_t *);57 extern void xor128(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t *, uint64_t *);58 extern void not128(uint64_t, uint64_t, uint64_t *, uint64_t *);59 60 extern int eq128(uint64_t, uint64_t, uint64_t, uint64_t);61 extern int le128(uint64_t, uint64_t, uint64_t, uint64_t);62 extern int lt128(uint64_t, uint64_t, uint64_t, uint64_t);63 64 extern void add128(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t *, uint64_t *);65 extern void sub128(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t *, uint64_t *);66 67 extern void mul64(uint64_t, uint64_t, uint64_t *, uint64_t *);68 extern void mul128(uint64_t, uint64_t, uint64_t, uint64_t,69 uint64_t *, uint64_t *, uint64_t *, uint64_t *);70 71 extern uint64_t div128est(uint64_t, uint64_t, uint64_t);72 48 73 49 #endif -
TabularUnified uspace/lib/softfloat/include/comparison.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Comparison functions.32 /** @file 34 33 */ 35 34 … … 43 42 extern int isFloat32Zero(float32); 44 43 45 extern int isFloat32eq(float32, float32);46 extern int isFloat32lt(float32, float32);47 extern int isFloat32gt(float32, float32);48 49 44 extern int isFloat64NaN(float64); 50 45 extern int isFloat64SigNaN(float64); … … 53 48 extern int isFloat64Zero(float64); 54 49 55 extern int isFloat64eq(float64, float64); 56 extern int isFloat64lt(float64, float64); 57 extern int isFloat64gt(float64, float64); 58 59 extern int isFloat128NaN(float128); 60 extern int isFloat128SigNaN(float128); 61 62 extern int isFloat128Infinity(float128); 63 extern int isFloat128Zero(float128); 64 65 extern int isFloat128eq(float128, float128); 66 extern int isFloat128lt(float128, float128); 67 extern int isFloat128gt(float128, float128); 50 extern int isFloat32eq(float32, float32); 51 extern int isFloat32lt(float32, float32); 52 extern int isFloat32gt(float32, float32); 68 53 69 54 #endif -
TabularUnified uspace/lib/softfloat/include/conversion.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Conversion of precision and conversion between integers and floats.32 /** @file 34 33 */ 35 34 … … 38 37 39 38 extern float64 convertFloat32ToFloat64(float32); 40 extern float128 convertFloat32ToFloat128(float32);41 extern float128 convertFloat64ToFloat128(float64);42 43 44 39 extern float32 convertFloat64ToFloat32(float64); 45 extern float32 convertFloat128ToFloat32(float128);46 extern float64 convertFloat128ToFloat64(float128);47 48 40 49 41 extern uint32_t float32_to_uint32(float32); … … 53 45 extern int64_t float32_to_int64(float32); 54 46 55 extern uint32_t float64_to_uint32(float64);56 extern int32_t float64_to_int32(float64);57 58 47 extern uint64_t float64_to_uint64(float64); 59 48 extern int64_t float64_to_int64(float64); 60 49 61 extern uint32_t float128_to_uint32(float128); 62 extern int32_t float128_to_int32(float128); 63 64 extern uint64_t float128_to_uint64(float128); 65 extern int64_t float128_to_int64(float128); 66 50 extern uint32_t float64_to_uint32(float64); 51 extern int32_t float64_to_int32(float64); 67 52 68 53 extern float32 uint32_to_float32(uint32_t); … … 78 63 extern float64 int64_to_float64(int64_t); 79 64 80 extern float128 uint32_to_float128(uint32_t);81 extern float128 int32_to_float128(int32_t);82 83 extern float128 uint64_to_float128(uint64_t);84 extern float128 int64_to_float128(int64_t);85 86 65 #endif 87 66 -
TabularUnified uspace/lib/softfloat/include/div.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Division functions.32 /** @file 34 33 */ 35 34 … … 39 38 extern float32 divFloat32(float32, float32); 40 39 extern float64 divFloat64(float64, float64); 41 extern float128 divFloat128(float128, float128); 40 41 extern uint64_t divFloat64estim(uint64_t, uint64_t); 42 42 43 43 #endif -
TabularUnified uspace/lib/softfloat/include/mul.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Multiplication functions.32 /** @file 34 33 */ 35 34 … … 39 38 extern float32 mulFloat32(float32, float32); 40 39 extern float64 mulFloat64(float64, float64); 41 extern float128 mulFloat128(float128, float128); 40 41 extern void mul64integers(uint64_t, uint64_t, uint64_t *, uint64_t *); 42 42 43 43 #endif -
TabularUnified uspace/lib/softfloat/include/other.h ¶
rd3e241a rb7ee0369 30 30 * @{ 31 31 */ 32 /** @file Other functions (power, complex).32 /** @file 33 33 */ 34 34 -
TabularUnified uspace/lib/softfloat/include/sftypes.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Floating point types and constants.32 /** @file 34 33 */ 35 34 … … 78 77 } float64; 79 78 80 typedef union { 81 long double ld; 82 struct { 83 #if defined(__BE__) 84 uint64_t hi; 85 uint64_t lo; 86 #elif defined(__LE__) 87 uint64_t lo; 88 uint64_t hi; 89 #else 90 #error Unknown endianess 91 #endif 92 } binary; 93 94 struct { 95 #if defined(__BE__) 96 uint64_t sign : 1; 97 uint64_t exp : 15; 98 uint64_t frac_hi : 48; 99 uint64_t frac_lo : 64; 100 #elif defined(__LE__) 101 uint64_t frac_lo : 64; 102 uint64_t frac_hi : 48; 103 uint64_t exp : 15; 104 uint64_t sign : 1; 105 #else 106 #error Unknown endianess 107 #endif 108 } parts __attribute__ ((packed)); 109 } float128; 79 #define FLOAT32_MAX 0x7f800000 80 #define FLOAT32_MIN 0xff800000 81 #define FLOAT64_MAX 82 #define FLOAT64_MIN 110 83 111 84 /* 112 * For recognizing NaNs or infinity use specialized comparison functions,85 * For recognizing NaNs or infinity use isFloat32NaN and is Float32Inf, 113 86 * comparing with these constants is not sufficient. 114 87 */ … … 122 95 #define FLOAT64_INF 0x7FF0000000000000ll 123 96 124 #define FLOAT128_NAN_HI 0x7FFF800000000000ll 125 #define FLOAT128_NAN_LO 0x0000000000000001ll 126 #define FLOAT128_SIGNAN_HI 0x7FFF000000000000ll 127 #define FLOAT128_SIGNAN_LO 0x0000000000000001ll 128 #define FLOAT128_INF_HI 0x7FFF000000000000ll 129 #define FLOAT128_INF_LO 0x0000000000000000ll 97 #define FLOAT32_FRACTION_SIZE 23 98 #define FLOAT64_FRACTION_SIZE 52 130 99 131 #define FLOAT32_FRACTION_SIZE 23 132 #define FLOAT64_FRACTION_SIZE 52 133 #define FLOAT128_FRACTION_SIZE 112 134 #define FLOAT128_FRAC_HI_SIZE 48 135 #define FLOAT128_FRAC_LO_SIZE 64 136 137 #define FLOAT32_HIDDEN_BIT_MASK 0x800000 138 #define FLOAT64_HIDDEN_BIT_MASK 0x10000000000000ll 139 #define FLOAT128_HIDDEN_BIT_MASK_HI 0x1000000000000ll 140 #define FLOAT128_HIDDEN_BIT_MASK_LO 0x0000000000000000ll 100 #define FLOAT32_HIDDEN_BIT_MASK 0x800000 101 #define FLOAT64_HIDDEN_BIT_MASK 0x10000000000000ll 141 102 142 103 #define FLOAT32_MAX_EXPONENT 0xFF 143 104 #define FLOAT64_MAX_EXPONENT 0x7FF 144 #define FLOAT128_MAX_EXPONENT 0x7FFF145 105 146 106 #define FLOAT32_BIAS 0x7F 147 107 #define FLOAT64_BIAS 0x3FF 148 108 #define FLOAT80_BIAS 0x3FFF 149 #define FLOAT128_BIAS 0x3FFF150 109 151 110 #endif -
TabularUnified uspace/lib/softfloat/include/softfloat.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Softfloat API.32 /** @file 34 33 */ 35 34 … … 157 156 extern int __ltdf2(double, double); 158 157 extern int __lttf2(long double, long double); 159 160 158 extern int __lesf2(float, float); 161 159 extern int __ledf2(double, double); … … 168 166 /* Not implemented yet */ 169 167 extern float __powisf2(float, int); 170 extern double __powidf2 (double, int);171 extern long double __powitf2 (long double, int);172 extern long double __powixf2 (long double, int);173 174 175 176 /* SPARC quadruple-precision wrappers */177 178 extern void _Qp_add(long double *, long double *, long double *);179 extern void _Qp_sub(long double *, long double *, long double *);180 extern void _Qp_mul(long double *, long double *, long double *);181 extern void _Qp_div(long double *, long double *, long double *);182 extern void _Qp_neg(long double *, long double *);183 184 extern void _Qp_stoq(long double *, float);185 extern void _Qp_dtoq(long double *, double);186 extern float _Qp_qtos(long double *);187 extern double _Qp_qtod(long double *);188 189 extern int _Qp_qtoi(long double *);190 extern unsigned int _Qp_qtoui(long double *);191 extern long _Qp_qtox(long double *);192 extern unsigned long _Qp_qtoux(long double *);193 194 extern void _Qp_itoq(long double *, int);195 extern void _Qp_uitoq(long double *, unsigned int);196 extern void _Qp_xtoq(long double *, long);197 extern void _Qp_uxtoq(long double *, unsigned long);198 199 extern int _Qp_cmp(long double *, long double *);200 extern int _Qp_cmpe(long double *, long double *);201 extern int _Qp_feq(long double *, long double *);202 extern int _Qp_fge(long double *, long double *);203 extern int _Qp_fgt(long double *, long double *);204 extern int _Qp_fle(long double*, long double *);205 extern int _Qp_flt(long double *, long double *);206 extern int _Qp_fne(long double *, long double *);207 208 /* Not implemented yet */209 extern void _Qp_sqrt(long double *, long double *);210 168 211 169 #endif -
TabularUnified uspace/lib/softfloat/include/sub.h ¶
rd3e241a rb7ee0369 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy4 3 * All rights reserved. 5 4 * … … 31 30 * @{ 32 31 */ 33 /** @file Substraction functions.32 /** @file 34 33 */ 35 34 … … 39 38 extern float32 subFloat32(float32, float32); 40 39 extern float64 subFloat64(float64, float64); 41 extern float128 subFloat128(float128, float128);42 40 43 41 #endif
Note:
See TracChangeset
for help on using the changeset viewer.