Changes in / [b7ee0369:d3e241a] in mainline
- Location:
- uspace/lib
- Files:
-
- 30 edited
Legend:
- Unmodified
- Added
- Removed
-
uspace/lib/posix/stdio.h
rb7ee0369 rd3e241a 127 127 128 128 #ifndef LIBPOSIX_INTERNAL 129 /* DEBUG macro does not belong to POSIX stdio.h. Its unconditional 130 * definition in the native stdio.h causes unexpected behaviour of 131 * applications which uses their own DEBUG macro (e.g. debugging 132 * output is printed even if not desirable). */ 133 #undef DEBUG 134 129 135 #define ctermid posix_ctermid 130 136 -
uspace/lib/softfloat/arch/abs32le/include/functions.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2010 Martin Decky 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 46 47 #define float64_to_longlong(X) float64_to_int64(X); 47 48 49 #define float128_to_int(X) float128_to_int32(X); 50 #define float128_to_long(X) float128_to_int32(X); 51 #define float128_to_longlong(X) float128_to_int64(X); 52 48 53 #define float32_to_uint(X) float32_to_uint32(X); 49 54 #define float32_to_ulong(X) float32_to_uint32(X); … … 53 58 #define float64_to_ulong(X) float64_to_uint32(X); 54 59 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X); 62 #define float128_to_ulong(X) float128_to_uint32(X); 63 #define float128_to_ulonglong(X) float128_to_uint64(X); 55 64 56 65 #define int_to_float32(X) int32_to_float32(X); … … 62 71 #define longlong_to_float64(X) int64_to_float64(X); 63 72 73 #define int_to_float128(X) int32_to_float128(X); 74 #define long_to_float128(X) int32_to_float128(X); 75 #define longlong_to_float128(X) int64_to_float128(X); 76 64 77 #define uint_to_float32(X) uint32_to_float32(X); 65 78 #define ulong_to_float32(X) uint32_to_float32(X); … … 70 83 #define ulonglong_to_float64(X) uint64_to_float64(X); 71 84 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint32_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 88 72 89 #endif 73 90 -
uspace/lib/softfloat/arch/amd64/include/functions.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 46 47 #define float64_to_longlong(X) float64_to_int64(X); 47 48 49 #define float128_to_int(X) float128_to_int32(X); 50 #define float128_to_long(X) float128_to_int64(X); 51 #define float128_to_longlong(X) float128_to_int64(X); 52 48 53 #define float32_to_uint(X) float32_to_uint32(X); 49 54 #define float32_to_ulong(X) float32_to_uint64(X); … … 53 58 #define float64_to_ulong(X) float64_to_uint64(X); 54 59 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X); 62 #define float128_to_ulong(X) float128_to_uint64(X); 63 #define float128_to_ulonglong(X) float128_to_uint64(X); 55 64 56 65 #define int_to_float32(X) int32_to_float32(X); … … 62 71 #define longlong_to_float64(X) int64_to_float64(X); 63 72 73 #define int_to_float128(X) int32_to_float128(X); 74 #define long_to_float128(X) int64_to_float128(X); 75 #define longlong_to_float128(X) int64_to_float128(X); 76 64 77 #define uint_to_float32(X) uint32_to_float32(X); 65 78 #define ulong_to_float32(X) uint64_to_float32(X); … … 70 83 #define ulonglong_to_float64(X) uint64_to_float64(X); 71 84 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint64_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 88 72 89 #endif 73 74 90 75 91 /** @} 76 92 */ 77 -
uspace/lib/softfloat/arch/arm32/include/functions.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 33 34 */ 34 35 /** @file 35 * @brief Softfloat architecture dependent definitions.36 36 */ 37 37 … … 47 47 #define float64_to_longlong(X) float64_to_int64(X); 48 48 49 #define float128_to_int(X) float128_to_int32(X); 50 #define float128_to_long(X) float128_to_int32(X); 51 #define float128_to_longlong(X) float128_to_int64(X); 52 49 53 #define float32_to_uint(X) float32_to_uint32(X); 50 54 #define float32_to_ulong(X) float32_to_uint32(X); … … 54 58 #define float64_to_ulong(X) float64_to_uint32(X); 55 59 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X); 62 #define float128_to_ulong(X) float128_to_uint32(X); 63 #define float128_to_ulonglong(X) float128_to_uint64(X); 56 64 57 65 #define int_to_float32(X) int32_to_float32(X); … … 63 71 #define longlong_to_float64(X) int64_to_float64(X); 64 72 73 #define int_to_float128(X) int32_to_float128(X); 74 #define long_to_float128(X) int32_to_float128(X); 75 #define longlong_to_float128(X) int64_to_float128(X); 76 65 77 #define uint_to_float32(X) uint32_to_float32(X); 66 78 #define ulong_to_float32(X) uint32_to_float32(X); … … 71 83 #define ulonglong_to_float64(X) uint64_to_float64(X); 72 84 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint32_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 88 73 89 #endif 74 90 -
uspace/lib/softfloat/arch/ia32/include/functions.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 46 47 #define float64_to_longlong(X) float64_to_int64(X); 47 48 49 #define float128_to_int(X) float128_to_int32(X); 50 #define float128_to_long(X) float128_to_int32(X); 51 #define float128_to_longlong(X) float128_to_int64(X); 52 48 53 #define float32_to_uint(X) float32_to_uint32(X); 49 54 #define float32_to_ulong(X) float32_to_uint32(X); … … 53 58 #define float64_to_ulong(X) float64_to_uint32(X); 54 59 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X); 62 #define float128_to_ulong(X) float128_to_uint32(X); 63 #define float128_to_ulonglong(X) float128_to_uint64(X); 55 64 56 65 #define int_to_float32(X) int32_to_float32(X); … … 62 71 #define longlong_to_float64(X) int64_to_float64(X); 63 72 73 #define int_to_float128(X) int32_to_float128(X); 74 #define long_to_float128(X) int32_to_float128(X); 75 #define longlong_to_float128(X) int64_to_float128(X); 76 64 77 #define uint_to_float32(X) uint32_to_float32(X); 65 78 #define ulong_to_float32(X) uint32_to_float32(X); … … 70 83 #define ulonglong_to_float64(X) uint64_to_float64(X); 71 84 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint32_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 88 72 89 #endif 73 90 -
uspace/lib/softfloat/arch/ia64/include/functions.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 46 47 #define float64_to_longlong(X) float64_to_int64(X); 47 48 49 #define float128_to_int(X) float128_to_int32(X); 50 #define float128_to_long(X) float128_to_int64(X); 51 #define float128_to_longlong(X) float128_to_int64(X); 52 48 53 #define float32_to_uint(X) float32_to_uint32(X); 49 54 #define float32_to_ulong(X) float32_to_uint64(X); … … 53 58 #define float64_to_ulong(X) float64_to_uint64(X); 54 59 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X); 62 #define float128_to_ulong(X) float128_to_uint64(X); 63 #define float128_to_ulonglong(X) float128_to_uint64(X); 55 64 56 65 #define int_to_float32(X) int32_to_float32(X); … … 62 71 #define longlong_to_float64(X) int64_to_float64(X); 63 72 73 #define int_to_float128(X) int32_to_float128(X); 74 #define long_to_float128(X) int64_to_float128(X); 75 #define longlong_to_float128(X) int64_to_float128(X); 76 64 77 #define uint_to_float32(X) uint32_to_float32(X); 65 78 #define ulong_to_float32(X) uint64_to_float32(X); … … 70 83 #define ulonglong_to_float64(X) uint64_to_float64(X); 71 84 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint64_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 88 72 89 #endif 73 74 90 75 91 /** @} 76 92 */ 77 -
uspace/lib/softfloat/arch/mips32/include/functions.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 46 47 #define float64_to_longlong(X) float64_to_int64(X); 47 48 49 #define float128_to_int(X) float128_to_int32(X); 50 #define float128_to_long(X) float128_to_int32(X); 51 #define float128_to_longlong(X) float128_to_int64(X); 52 48 53 #define float32_to_uint(X) float32_to_uint32(X); 49 54 #define float32_to_ulong(X) float32_to_uint32(X); … … 53 58 #define float64_to_ulong(X) float64_to_uint32(X); 54 59 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X); 62 #define float128_to_ulong(X) float128_to_uint32(X); 63 #define float128_to_ulonglong(X) float128_to_uint64(X); 55 64 56 65 #define int_to_float32(X) int32_to_float32(X); … … 62 71 #define longlong_to_float64(X) int64_to_float64(X); 63 72 73 #define int_to_float128(X) int32_to_float128(X); 74 #define long_to_float128(X) int32_to_float128(X); 75 #define longlong_to_float128(X) int64_to_float128(X); 76 64 77 #define uint_to_float32(X) uint32_to_float32(X); 65 78 #define ulong_to_float32(X) uint32_to_float32(X); … … 70 83 #define ulonglong_to_float64(X) uint64_to_float64(X); 71 84 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint32_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 88 72 89 #endif 73 90 -
uspace/lib/softfloat/arch/mips32eb/include/functions.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 46 47 #define float64_to_longlong(X) float64_to_int64(X); 47 48 49 #define float128_to_int(X) float128_to_int32(X); 50 #define float128_to_long(X) float128_to_int32(X); 51 #define float128_to_longlong(X) float128_to_int64(X); 52 48 53 #define float32_to_uint(X) float32_to_uint32(X); 49 54 #define float32_to_ulong(X) float32_to_uint32(X); … … 53 58 #define float64_to_ulong(X) float64_to_uint32(X); 54 59 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X); 62 #define float128_to_ulong(X) float128_to_uint32(X); 63 #define float128_to_ulonglong(X) float128_to_uint64(X); 55 64 56 65 #define int_to_float32(X) int32_to_float32(X); … … 62 71 #define longlong_to_float64(X) int64_to_float64(X); 63 72 73 #define int_to_float128(X) int32_to_float128(X); 74 #define long_to_float128(X) int32_to_float128(X); 75 #define longlong_to_float128(X) int64_to_float128(X); 76 64 77 #define uint_to_float32(X) uint32_to_float32(X); 65 78 #define ulong_to_float32(X) uint32_to_float32(X); … … 70 83 #define ulonglong_to_float64(X) uint64_to_float64(X); 71 84 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint32_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 88 72 89 #endif 73 74 90 75 91 /** @} 76 92 */ 77 -
uspace/lib/softfloat/arch/mips64/include/functions.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 46 47 #define float64_to_longlong(X) float64_to_int64(X); 47 48 49 #define float128_to_int(X) float128_to_int32(X); 50 #define float128_to_long(X) float128_to_int64(X); 51 #define float128_to_longlong(X) float128_to_int64(X); 52 48 53 #define float32_to_uint(X) float32_to_uint32(X); 49 54 #define float32_to_ulong(X) float32_to_uint64(X); … … 53 58 #define float64_to_ulong(X) float64_to_uint64(X); 54 59 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X); 62 #define float128_to_ulong(X) float128_to_uint64(X); 63 #define float128_to_ulonglong(X) float128_to_uint64(X); 55 64 56 65 #define int_to_float32(X) int32_to_float32(X); … … 62 71 #define longlong_to_float64(X) int64_to_float64(X); 63 72 73 #define int_to_float128(X) int32_to_float128(X); 74 #define long_to_float128(X) int64_to_float128(X); 75 #define longlong_to_float128(X) int64_to_float128(X); 76 64 77 #define uint_to_float32(X) uint32_to_float32(X); 65 78 #define ulong_to_float32(X) uint64_to_float32(X); … … 70 83 #define ulonglong_to_float64(X) uint64_to_float64(X); 71 84 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint64_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 88 72 89 #endif 73 90 -
uspace/lib/softfloat/arch/ppc32/include/functions.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 46 47 #define float64_to_longlong(X) float64_to_int64(X); 47 48 49 #define float128_to_int(X) float128_to_int32(X); 50 #define float128_to_long(X) float128_to_int32(X); 51 #define float128_to_longlong(X) float128_to_int64(X); 52 48 53 #define float32_to_uint(X) float32_to_uint32(X); 49 54 #define float32_to_ulong(X) float32_to_uint32(X); … … 53 58 #define float64_to_ulong(X) float64_to_uint32(X); 54 59 #define float64_to_ulonglong(X) float64_to_uint64(X); 60 61 #define float128_to_uint(X) float128_to_uint32(X); 62 #define float128_to_ulong(X) float128_to_uint32(X); 63 #define float128_to_ulonglong(X) float128_to_uint64(X); 55 64 56 65 #define int_to_float32(X) int32_to_float32(X); … … 62 71 #define longlong_to_float64(X) int64_to_float64(X); 63 72 73 #define int_to_float128(X) int32_to_float128(X); 74 #define long_to_float128(X) int32_to_float128(X); 75 #define longlong_to_float128(X) int64_to_float128(X); 76 64 77 #define uint_to_float32(X) uint32_to_float32(X); 65 78 #define ulong_to_float32(X) uint32_to_float32(X); … … 70 83 #define ulonglong_to_float64(X) uint64_to_float64(X); 71 84 85 #define uint_to_float128(X) uint32_to_float128(X); 86 #define ulong_to_float128(X) uint32_to_float128(X); 87 #define ulonglong_to_float128(X) uint64_to_float128(X); 88 72 89 #endif 73 74 90 75 91 /** @} 76 92 */ 77 -
uspace/lib/softfloat/arch/sparc64/include/functions.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2006 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 38 39 #define __SOFTFLOAT_FUNCTIONS_H__ 39 40 41 #define SPARC_SOFTFLOAT 42 40 43 #define float32_to_int(X) float32_to_int32(X); 41 44 #define float32_to_long(X) float32_to_int64(X); … … 45 48 #define float64_to_long(X) float64_to_int64(X); 46 49 #define float64_to_longlong(X) float64_to_int64(X); 50 51 #define float128_to_int(X) float128_to_int32(X); 52 #define float128_to_long(X) float128_to_int64(X); 53 #define float128_to_longlong(X) float128_to_int64(X); 47 54 48 55 #define float32_to_uint(X) float32_to_uint32(X); … … 54 61 #define float64_to_ulonglong(X) float64_to_uint64(X); 55 62 63 #define float128_to_uint(X) float128_to_uint32(X); 64 #define float128_to_ulong(X) float128_to_uint64(X); 65 #define float128_to_ulonglong(X) float128_to_uint64(X); 66 56 67 #define int_to_float32(X) int32_to_float32(X); 57 68 #define long_to_float32(X) int64_to_float32(X); … … 61 72 #define long_to_float64(X) int64_to_float64(X); 62 73 #define longlong_to_float64(X) int64_to_float64(X); 74 75 #define int_to_float128(X) int32_to_float128(X); 76 #define long_to_float128(X) int64_to_float128(X); 77 #define longlong_to_float128(X) int64_to_float128(X); 63 78 64 79 #define uint_to_float32(X) uint32_to_float32(X); … … 70 85 #define ulonglong_to_float64(X) uint64_to_float64(X); 71 86 87 #define uint_to_float128(X) uint32_to_float128(X); 88 #define ulong_to_float128(X) uint64_to_float128(X); 89 #define ulonglong_to_float128(X) uint64_to_float128(X); 90 72 91 #endif 73 74 92 75 93 /** @} 76 94 */ 77 -
uspace/lib/softfloat/generic/add.c
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Addition functions. 33 34 */ 34 35 … … 36 37 #include <add.h> 37 38 #include <comparison.h> 38 39 /** Add two Float32 numbers with same signs 39 #include <common.h> 40 41 /** 42 * Add two single-precision floats with the same signs. 43 * 44 * @param a First input operand. 45 * @param b Second input operand. 46 * @return Result of addition. 40 47 */ 41 48 float32 addFloat32(float32 a, float32 b) 42 49 { 43 50 int expdiff; 44 uint32_t exp1, exp2, frac1, frac2;51 uint32_t exp1, exp2, frac1, frac2; 45 52 46 53 expdiff = a.parts.exp - b.parts.exp; … … 49 56 /* TODO: fix SigNaN */ 50 57 if (isFloat32SigNaN(b)) { 51 } ;52 53 return b; 54 } ;58 } 59 60 return b; 61 } 55 62 56 63 if (b.parts.exp == FLOAT32_MAX_EXPONENT) { … … 67 74 /* TODO: fix SigNaN */ 68 75 if (isFloat32SigNaN(a) || isFloat32SigNaN(b)) { 69 } ;70 return (isFloat32NaN(a) ?a:b);71 } ;76 } 77 return (isFloat32NaN(a) ? a : b); 78 } 72 79 73 80 if (a.parts.exp == FLOAT32_MAX_EXPONENT) { … … 79 86 frac2 = b.parts.fraction; 80 87 exp2 = b.parts.exp; 81 } ;88 } 82 89 83 90 if (exp1 == 0) { … … 87 94 /* result is not denormalized */ 88 95 a.parts.exp = 1; 89 } ;96 } 90 97 a.parts.fraction = frac1; 91 98 return a; 92 } ;99 } 93 100 94 101 frac1 |= FLOAT32_HIDDEN_BIT_MASK; /* add hidden bit */ … … 100 107 /* add hidden bit to second operand */ 101 108 frac2 |= FLOAT32_HIDDEN_BIT_MASK; 102 } ;109 } 103 110 104 111 /* create some space for rounding */ … … 118 125 ++exp1; 119 126 frac1 >>= 1; 120 } ;127 } 121 128 122 129 /* rounding - if first bit after fraction is set then round up */ … … 127 134 ++exp1; 128 135 frac1 >>= 1; 129 }; 130 136 } 131 137 132 138 if ((exp1 == FLOAT32_MAX_EXPONENT ) || (exp2 > exp1)) { 133 134 135 136 137 139 /* overflow - set infinity as result */ 140 a.parts.exp = FLOAT32_MAX_EXPONENT; 141 a.parts.fraction = 0; 142 return a; 143 } 138 144 139 145 a.parts.exp = exp1; 140 146 141 147 /* Clear hidden bit and shift */ 142 a.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK)) 148 a.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK)); 143 149 return a; 144 150 } 145 151 146 /** Add two Float64 numbers with same signs 152 /** 153 * Add two double-precision floats with the same signs. 154 * 155 * @param a First input operand. 156 * @param b Second input operand. 157 * @return Result of addition. 147 158 */ 148 159 float64 addFloat64(float64 a, float64 b) … … 152 163 uint64_t frac1, frac2; 153 164 154 expdiff = ((int )a.parts.exp) - b.parts.exp;165 expdiff = ((int) a.parts.exp) - b.parts.exp; 155 166 if (expdiff < 0) { 156 167 if (isFloat64NaN(b)) { 157 168 /* TODO: fix SigNaN */ 158 169 if (isFloat64SigNaN(b)) { 159 } ;160 161 return b; 162 } ;170 } 171 172 return b; 173 } 163 174 164 175 /* b is infinity and a not */ 165 if (b.parts.exp == FLOAT64_MAX_EXPONENT 176 if (b.parts.exp == FLOAT64_MAX_EXPONENT) { 166 177 return b; 167 178 } … … 176 187 /* TODO: fix SigNaN */ 177 188 if (isFloat64SigNaN(a) || isFloat64SigNaN(b)) { 178 } ;189 } 179 190 return a; 180 } ;191 } 181 192 182 193 /* a is infinity and b not */ 183 if (a.parts.exp == FLOAT64_MAX_EXPONENT 194 if (a.parts.exp == FLOAT64_MAX_EXPONENT) { 184 195 return a; 185 196 } … … 189 200 frac2 = b.parts.fraction; 190 201 exp2 = b.parts.exp; 191 } ;202 } 192 203 193 204 if (exp1 == 0) { … … 197 208 /* result is not denormalized */ 198 209 a.parts.exp = 1; 199 } ;210 } 200 211 a.parts.fraction = frac1; 201 212 return a; 202 } ;213 } 203 214 204 215 /* add hidden bit - frac1 is sure not denormalized */ … … 212 223 /* is not denormalized */ 213 224 frac2 |= FLOAT64_HIDDEN_BIT_MASK; 214 } ;225 } 215 226 216 227 /* create some space for rounding */ … … 218 229 frac2 <<= 6; 219 230 220 if (expdiff < (FLOAT64_FRACTION_SIZE + 2) 231 if (expdiff < (FLOAT64_FRACTION_SIZE + 2)) { 221 232 frac2 >>= expdiff; 222 233 frac1 += frac2; … … 227 238 } 228 239 229 if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7) 240 if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) { 230 241 ++exp1; 231 242 frac1 >>= 1; 232 } ;243 } 233 244 234 245 /* rounding - if first bit after fraction is set then round up */ … … 239 250 ++exp1; 240 251 frac1 >>= 1; 241 } ;252 } 242 253 243 254 if ((exp1 == FLOAT64_MAX_EXPONENT ) || (exp2 > exp1)) { 244 245 246 247 248 255 /* overflow - set infinity as result */ 256 a.parts.exp = FLOAT64_MAX_EXPONENT; 257 a.parts.fraction = 0; 258 return a; 259 } 249 260 250 261 a.parts.exp = exp1; 251 262 /* Clear hidden bit and shift */ 252 a.parts.fraction = ( (frac1 >> 6 ) & (~FLOAT64_HIDDEN_BIT_MASK)); 253 263 a.parts.fraction = ((frac1 >> 6 ) & (~FLOAT64_HIDDEN_BIT_MASK)); 254 264 return a; 255 265 } 256 266 267 /** 268 * Add two quadruple-precision floats with the same signs. 269 * 270 * @param a First input operand. 271 * @param b Second input operand. 272 * @return Result of addition. 273 */ 274 float128 addFloat128(float128 a, float128 b) 275 { 276 int expdiff; 277 uint32_t exp1, exp2; 278 uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo; 279 280 expdiff = ((int) a.parts.exp) - b.parts.exp; 281 if (expdiff < 0) { 282 if (isFloat128NaN(b)) { 283 /* TODO: fix SigNaN */ 284 if (isFloat128SigNaN(b)) { 285 } 286 287 return b; 288 } 289 290 /* b is infinity and a not */ 291 if (b.parts.exp == FLOAT128_MAX_EXPONENT) { 292 return b; 293 } 294 295 frac1_hi = b.parts.frac_hi; 296 frac1_lo = b.parts.frac_lo; 297 exp1 = b.parts.exp; 298 frac2_hi = a.parts.frac_hi; 299 frac2_lo = a.parts.frac_lo; 300 exp2 = a.parts.exp; 301 expdiff *= -1; 302 } else { 303 if (isFloat128NaN(a)) { 304 /* TODO: fix SigNaN */ 305 if (isFloat128SigNaN(a) || isFloat128SigNaN(b)) { 306 } 307 return a; 308 } 309 310 /* a is infinity and b not */ 311 if (a.parts.exp == FLOAT128_MAX_EXPONENT) { 312 return a; 313 } 314 315 frac1_hi = a.parts.frac_hi; 316 frac1_lo = a.parts.frac_lo; 317 exp1 = a.parts.exp; 318 frac2_hi = b.parts.frac_hi; 319 frac2_lo = b.parts.frac_lo; 320 exp2 = b.parts.exp; 321 } 322 323 if (exp1 == 0) { 324 /* both are denormalized */ 325 add128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &frac1_hi, &frac1_lo); 326 327 and128(frac1_hi, frac1_lo, 328 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 329 &tmp_hi, &tmp_lo); 330 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 331 /* result is not denormalized */ 332 a.parts.exp = 1; 333 } 334 335 a.parts.frac_hi = frac1_hi; 336 a.parts.frac_lo = frac1_lo; 337 return a; 338 } 339 340 /* add hidden bit - frac1 is sure not denormalized */ 341 or128(frac1_hi, frac1_lo, 342 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 343 &frac1_hi, &frac1_lo); 344 345 /* second operand ... */ 346 if (exp2 == 0) { 347 /* ... is denormalized */ 348 --expdiff; 349 } else { 350 /* is not denormalized */ 351 or128(frac2_hi, frac2_lo, 352 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 353 &frac2_hi, &frac2_lo); 354 } 355 356 /* create some space for rounding */ 357 lshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo); 358 lshift128(frac2_hi, frac2_lo, 6, &frac2_hi, &frac2_lo); 359 360 if (expdiff < (FLOAT128_FRACTION_SIZE + 2)) { 361 rshift128(frac2_hi, frac2_lo, expdiff, &frac2_hi, &frac2_lo); 362 add128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &frac1_hi, &frac1_lo); 363 } else { 364 a.parts.exp = exp1; 365 366 rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo); 367 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 368 &tmp_hi, &tmp_lo); 369 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 370 371 a.parts.frac_hi = tmp_hi; 372 a.parts.frac_lo = tmp_lo; 373 return a; 374 } 375 376 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7, 377 &tmp_hi, &tmp_lo); 378 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 379 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 380 ++exp1; 381 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo); 382 } 383 384 /* rounding - if first bit after fraction is set then round up */ 385 add128(frac1_hi, frac1_lo, 0x0ll, 0x1ll << 5, &frac1_hi, &frac1_lo); 386 387 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7, 388 &tmp_hi, &tmp_lo); 389 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 390 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 391 /* rounding overflow */ 392 ++exp1; 393 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo); 394 } 395 396 if ((exp1 == FLOAT128_MAX_EXPONENT ) || (exp2 > exp1)) { 397 /* overflow - set infinity as result */ 398 a.parts.exp = FLOAT64_MAX_EXPONENT; 399 a.parts.frac_hi = 0; 400 a.parts.frac_lo = 0; 401 return a; 402 } 403 404 a.parts.exp = exp1; 405 406 /* Clear hidden bit and shift */ 407 rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo); 408 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 409 &tmp_hi, &tmp_lo); 410 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 411 412 a.parts.frac_hi = tmp_hi; 413 a.parts.frac_lo = tmp_lo; 414 415 return a; 416 } 417 257 418 /** @} 258 419 */ -
uspace/lib/softfloat/generic/common.c
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Common helper operations. 33 34 */ 34 35 … … 36 37 #include <common.h> 37 38 38 /* Table for fast leading zeroes counting */39 /* Table for fast leading zeroes counting. */ 39 40 char zeroTable[256] = { 40 41 8, 7, 7, 6, 6, 6, 6, 4, 4, 4, 4, 4, 4, 4, 4, \ … … 56 57 }; 57 58 58 59 60 /** Take fraction shifted by 10 bits to left, round it, normalize it and detect exceptions 61 * @param cexp exponent with bias 62 * @param cfrac fraction shifted 10 places left with added hidden bit 63 * @param sign 64 * @return valied float64 59 /** 60 * Take fraction shifted by 10 bits to the left, round it, normalize it 61 * and detect exceptions 62 * 63 * @param cexp Exponent with bias. 64 * @param cfrac Fraction shifted 10 bits to the left with added hidden bit. 65 * @param sign Resulting sign. 66 * @return Finished double-precision float. 65 67 */ 66 68 float64 finishFloat64(int32_t cexp, uint64_t cfrac, char sign) … … 71 73 72 74 /* find first nonzero digit and shift result and detect possibly underflow */ 73 while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 ) )))) { 75 while ((cexp > 0) && (cfrac) && 76 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))))) { 74 77 cexp--; 75 78 cfrac <<= 1; 76 /* TODO: fix underflow */ 77 }; 78 79 if ((cexp < 0) || ( cexp == 0 && (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) { 79 /* TODO: fix underflow */ 80 } 81 82 if ((cexp < 0) || (cexp == 0 && 83 (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))))) { 80 84 /* FIXME: underflow */ 81 85 result.parts.exp = 0; … … 93 97 94 98 if (!(cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1)))) { 95 96 result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2) ) & (~FLOAT64_HIDDEN_BIT_MASK));99 result.parts.fraction = 100 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK)); 97 101 return result; 98 102 } … … 103 107 ++cexp; 104 108 105 if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1 109 if (cfrac & (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 1))) { 106 110 ++cexp; 107 111 cfrac >>= 1; … … 109 113 110 114 /* check overflow */ 111 if (cexp >= FLOAT64_MAX_EXPONENT 115 if (cexp >= FLOAT64_MAX_EXPONENT) { 112 116 /* FIXME: overflow, return infinity */ 113 117 result.parts.exp = FLOAT64_MAX_EXPONENT; … … 116 120 } 117 121 118 result.parts.exp = (uint32_t)cexp; 119 120 result.parts.fraction = ((cfrac >>(64 - FLOAT64_FRACTION_SIZE - 2 ) ) & (~FLOAT64_HIDDEN_BIT_MASK)); 122 result.parts.exp = (uint32_t) cexp; 123 124 result.parts.fraction = 125 ((cfrac >> (64 - FLOAT64_FRACTION_SIZE - 2)) & (~FLOAT64_HIDDEN_BIT_MASK)); 121 126 122 127 return result; 123 128 } 124 129 125 /** Counts leading zeroes in 64bit unsigned integer 126 * @param i 130 /** 131 * Take fraction, round it, normalize it and detect exceptions 132 * 133 * @param cexp Exponent with bias. 134 * @param cfrac_hi High part of the fraction shifted 14 bits to the left 135 * with added hidden bit. 136 * @param cfrac_lo Low part of the fraction shifted 14 bits to the left 137 * with added hidden bit. 138 * @param sign Resulting sign. 139 * @param shift_out Bits right-shifted out from fraction by the caller. 140 * @return Finished quadruple-precision float. 141 */ 142 float128 finishFloat128(int32_t cexp, uint64_t cfrac_hi, uint64_t cfrac_lo, 143 char sign, uint64_t shift_out) 144 { 145 float128 result; 146 uint64_t tmp_hi, tmp_lo; 147 148 result.parts.sign = sign; 149 150 /* find first nonzero digit and shift result and detect possibly underflow */ 151 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 152 1, &tmp_hi, &tmp_lo); 153 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 154 while ((cexp > 0) && (lt128(0x0ll, 0x0ll, cfrac_hi, cfrac_lo)) && 155 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) { 156 cexp--; 157 lshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo); 158 /* TODO: fix underflow */ 159 160 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 161 1, &tmp_hi, &tmp_lo); 162 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 163 } 164 165 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 166 1, &tmp_hi, &tmp_lo); 167 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 168 if ((cexp < 0) || (cexp == 0 && 169 (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)))) { 170 /* FIXME: underflow */ 171 result.parts.exp = 0; 172 if ((cexp + FLOAT128_FRACTION_SIZE + 1) < 0) { /* +1 is place for rounding */ 173 result.parts.frac_hi = 0x0ll; 174 result.parts.frac_lo = 0x0ll; 175 return result; 176 } 177 178 while (cexp < 0) { 179 cexp++; 180 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo); 181 } 182 183 if (shift_out & (0x1ull < 64)) { 184 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo); 185 } 186 187 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 188 1, &tmp_hi, &tmp_lo); 189 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 190 if (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 191 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 192 &tmp_hi, &tmp_lo); 193 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 194 result.parts.frac_hi = tmp_hi; 195 result.parts.frac_lo = tmp_lo; 196 return result; 197 } 198 } else { 199 if (shift_out & (0x1ull < 64)) { 200 add128(cfrac_hi, cfrac_lo, 0x0ll, 0x1ll, &cfrac_hi, &cfrac_lo); 201 } 202 } 203 204 ++cexp; 205 206 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 207 1, &tmp_hi, &tmp_lo); 208 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 209 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 210 ++cexp; 211 rshift128(cfrac_hi, cfrac_lo, 1, &cfrac_hi, &cfrac_lo); 212 } 213 214 /* check overflow */ 215 if (cexp >= FLOAT128_MAX_EXPONENT) { 216 /* FIXME: overflow, return infinity */ 217 result.parts.exp = FLOAT128_MAX_EXPONENT; 218 result.parts.frac_hi = 0x0ll; 219 result.parts.frac_lo = 0x0ll; 220 return result; 221 } 222 223 result.parts.exp = (uint32_t) cexp; 224 225 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 226 &tmp_hi, &tmp_lo); 227 and128(cfrac_hi, cfrac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 228 result.parts.frac_hi = tmp_hi; 229 result.parts.frac_lo = tmp_lo; 230 231 return result; 232 } 233 234 /** 235 * Counts leading zeroes in byte. 236 * 237 * @param i Byte for which to count leading zeroes. 238 * @return Number of detected leading zeroes. 239 */ 240 int countZeroes8(uint8_t i) 241 { 242 return zeroTable[i]; 243 } 244 245 /** 246 * Counts leading zeroes in 32bit unsigned integer. 247 * 248 * @param i Integer for which to count leading zeroes. 249 * @return Number of detected leading zeroes. 250 */ 251 int countZeroes32(uint32_t i) 252 { 253 int j; 254 for (j = 0; j < 32; j += 8) { 255 if (i & (0xFF << (24 - j))) { 256 return (j + countZeroes8(i >> (24 - j))); 257 } 258 } 259 260 return 32; 261 } 262 263 /** 264 * Counts leading zeroes in 64bit unsigned integer. 265 * 266 * @param i Integer for which to count leading zeroes. 267 * @return Number of detected leading zeroes. 127 268 */ 128 269 int countZeroes64(uint64_t i) 129 270 { 130 271 int j; 131 for (j = 0; j < 64; j += 8) {132 if ( 272 for (j = 0; j < 64; j += 8) { 273 if (i & (0xFFll << (56 - j))) { 133 274 return (j + countZeroes8(i >> (56 - j))); 134 275 } … … 138 279 } 139 280 140 /** Counts leading zeroes in 32bit unsigned integer 141 * @param i 142 */ 143 int countZeroes32(uint32_t i) 144 { 145 int j; 146 for (j =0; j < 32; j += 8) { 147 if ( i & (0xFF << (24 - j))) { 148 return (j + countZeroes8(i >> (24 - j))); 149 } 150 } 151 152 return 32; 153 } 154 155 /** Counts leading zeroes in byte 156 * @param i 157 */ 158 int countZeroes8(uint8_t i) 159 { 160 return zeroTable[i]; 161 } 162 163 /** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 30. bit 164 * @param exp exponent 165 * @param fraction part with hidden bit shifted to 30. bit 281 /** 282 * Round and normalize number expressed by exponent and fraction with 283 * first bit (equal to hidden bit) at 30th bit. 284 * 285 * @param exp Exponent part. 286 * @param fraction Fraction with hidden bit shifted to 30th bit. 166 287 */ 167 288 void roundFloat32(int32_t *exp, uint32_t *fraction) 168 289 { 169 290 /* rounding - if first bit after fraction is set then round up */ 170 (*fraction) += (0x1 << 6); 171 172 if ((*fraction) & (FLOAT32_HIDDEN_BIT_MASK << 8)) { 291 (*fraction) += (0x1 << (32 - FLOAT32_FRACTION_SIZE - 3)); 292 293 if ((*fraction) & 294 (FLOAT32_HIDDEN_BIT_MASK << (32 - FLOAT32_FRACTION_SIZE - 1))) { 173 295 /* rounding overflow */ 174 296 ++(*exp); 175 297 (*fraction) >>= 1; 176 } ;177 178 if (((*exp) >= FLOAT32_MAX_EXPONENT 298 } 299 300 if (((*exp) >= FLOAT32_MAX_EXPONENT) || ((*exp) < 0)) { 179 301 /* overflow - set infinity as result */ 180 302 (*exp) = FLOAT32_MAX_EXPONENT; 181 303 (*fraction) = 0; 182 return;183 184 185 return; 186 } 187 188 /** Round and normalize number expressed by exponent and fraction with first bit (equal to hidden bit) at 62. bit 189 * @param exp exponent190 * @param fraction part with hidden bit shifted to 62. bit304 } 305 } 306 307 /** 308 * Round and normalize number expressed by exponent and fraction with 309 * first bit (equal to hidden bit) at 62nd bit. 310 * 311 * @param exp Exponent part. 312 * @param fraction Fraction with hidden bit shifted to 62nd bit. 191 313 */ 192 314 void roundFloat64(int32_t *exp, uint64_t *fraction) 193 315 { 194 316 /* rounding - if first bit after fraction is set then round up */ 195 (*fraction) += (0x1 << 9); 196 197 if ((*fraction) & (FLOAT64_HIDDEN_BIT_MASK << 11)) { 317 (*fraction) += (0x1 << (64 - FLOAT64_FRACTION_SIZE - 3)); 318 319 if ((*fraction) & 320 (FLOAT64_HIDDEN_BIT_MASK << (64 - FLOAT64_FRACTION_SIZE - 3))) { 198 321 /* rounding overflow */ 199 322 ++(*exp); 200 323 (*fraction) >>= 1; 201 } ;202 203 if (((*exp) >= FLOAT64_MAX_EXPONENT 324 } 325 326 if (((*exp) >= FLOAT64_MAX_EXPONENT) || ((*exp) < 0)) { 204 327 /* overflow - set infinity as result */ 205 328 (*exp) = FLOAT64_MAX_EXPONENT; 206 329 (*fraction) = 0; 207 return; 208 } 209 210 return; 330 } 331 } 332 333 /** 334 * Round and normalize number expressed by exponent and fraction with 335 * first bit (equal to hidden bit) at 126th bit. 336 * 337 * @param exp Exponent part. 338 * @param frac_hi High part of fraction part with hidden bit shifted to 126th bit. 339 * @param frac_lo Low part of fraction part with hidden bit shifted to 126th bit. 340 */ 341 void roundFloat128(int32_t *exp, uint64_t *frac_hi, uint64_t *frac_lo) 342 { 343 uint64_t tmp_hi, tmp_lo; 344 345 /* rounding - if first bit after fraction is set then round up */ 346 lshift128(0x0ll, 0x1ll, (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo); 347 add128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, frac_hi, frac_lo); 348 349 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 350 (128 - FLOAT128_FRACTION_SIZE - 3), &tmp_hi, &tmp_lo); 351 and128(*frac_hi, *frac_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 352 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 353 /* rounding overflow */ 354 ++(*exp); 355 rshift128(*frac_hi, *frac_lo, 1, frac_hi, frac_lo); 356 } 357 358 if (((*exp) >= FLOAT128_MAX_EXPONENT) || ((*exp) < 0)) { 359 /* overflow - set infinity as result */ 360 (*exp) = FLOAT128_MAX_EXPONENT; 361 (*frac_hi) = 0; 362 (*frac_lo) = 0; 363 } 364 } 365 366 /** 367 * Logical shift left on the 128-bit operand. 368 * 369 * @param a_hi High part of the input operand. 370 * @param a_lo Low part of the input operand. 371 * @param shift Number of bits by witch to shift. 372 * @param r_hi Address to store high part of the result. 373 * @param r_lo Address to store low part of the result. 374 */ 375 void lshift128( 376 uint64_t a_hi, uint64_t a_lo, int shift, 377 uint64_t *r_hi, uint64_t *r_lo) 378 { 379 if (shift <= 0) { 380 /* do nothing */ 381 } else if (shift >= 128) { 382 a_hi = 0; 383 a_lo = 0; 384 } else if (shift >= 64) { 385 a_hi = a_lo << (shift - 64); 386 a_lo = 0; 387 } else { 388 a_hi <<= shift; 389 a_hi |= a_lo >> (64 - shift); 390 a_lo <<= shift; 391 } 392 393 *r_hi = a_hi; 394 *r_lo = a_lo; 395 } 396 397 /** 398 * Logical shift right on the 128-bit operand. 399 * 400 * @param a_hi High part of the input operand. 401 * @param a_lo Low part of the input operand. 402 * @param shift Number of bits by witch to shift. 403 * @param r_hi Address to store high part of the result. 404 * @param r_lo Address to store low part of the result. 405 */ 406 void rshift128( 407 uint64_t a_hi, uint64_t a_lo, int shift, 408 uint64_t *r_hi, uint64_t *r_lo) 409 { 410 if (shift <= 0) { 411 /* do nothing */ 412 } else if (shift >= 128) { 413 a_hi = 0; 414 a_lo = 0; 415 } else if (shift >= 64) { 416 a_lo = a_hi >> (shift - 64); 417 a_hi = 0; 418 } else { 419 a_lo >>= shift; 420 a_lo |= a_hi << (64 - shift); 421 a_hi >>= shift; 422 } 423 424 *r_hi = a_hi; 425 *r_lo = a_lo; 426 } 427 428 /** 429 * Bitwise AND on 128-bit operands. 430 * 431 * @param a_hi High part of the first input operand. 432 * @param a_lo Low part of the first input operand. 433 * @param b_hi High part of the second input operand. 434 * @param b_lo Low part of the second input operand. 435 * @param r_hi Address to store high part of the result. 436 * @param r_lo Address to store low part of the result. 437 */ 438 void and128( 439 uint64_t a_hi, uint64_t a_lo, 440 uint64_t b_hi, uint64_t b_lo, 441 uint64_t *r_hi, uint64_t *r_lo) 442 { 443 *r_hi = a_hi & b_hi; 444 *r_lo = a_lo & b_lo; 445 } 446 447 /** 448 * Bitwise inclusive OR on 128-bit operands. 449 * 450 * @param a_hi High part of the first input operand. 451 * @param a_lo Low part of the first input operand. 452 * @param b_hi High part of the second input operand. 453 * @param b_lo Low part of the second input operand. 454 * @param r_hi Address to store high part of the result. 455 * @param r_lo Address to store low part of the result. 456 */ 457 void or128( 458 uint64_t a_hi, uint64_t a_lo, 459 uint64_t b_hi, uint64_t b_lo, 460 uint64_t *r_hi, uint64_t *r_lo) 461 { 462 *r_hi = a_hi | b_hi; 463 *r_lo = a_lo | b_lo; 464 } 465 466 /** 467 * Bitwise exclusive OR on 128-bit operands. 468 * 469 * @param a_hi High part of the first input operand. 470 * @param a_lo Low part of the first input operand. 471 * @param b_hi High part of the second input operand. 472 * @param b_lo Low part of the second input operand. 473 * @param r_hi Address to store high part of the result. 474 * @param r_lo Address to store low part of the result. 475 */ 476 void xor128( 477 uint64_t a_hi, uint64_t a_lo, 478 uint64_t b_hi, uint64_t b_lo, 479 uint64_t *r_hi, uint64_t *r_lo) 480 { 481 *r_hi = a_hi ^ b_hi; 482 *r_lo = a_lo ^ b_lo; 483 } 484 485 /** 486 * Bitwise NOT on the 128-bit operand. 487 * 488 * @param a_hi High part of the input operand. 489 * @param a_lo Low part of the input operand. 490 * @param r_hi Address to store high part of the result. 491 * @param r_lo Address to store low part of the result. 492 */ 493 void not128( 494 uint64_t a_hi, uint64_t a_lo, 495 uint64_t *r_hi, uint64_t *r_lo) 496 { 497 *r_hi = ~a_hi; 498 *r_lo = ~a_lo; 499 } 500 501 /** 502 * Equality comparison of 128-bit operands. 503 * 504 * @param a_hi High part of the first input operand. 505 * @param a_lo Low part of the first input operand. 506 * @param b_hi High part of the second input operand. 507 * @param b_lo Low part of the second input operand. 508 * @return 1 if operands are equal, 0 otherwise. 509 */ 510 int eq128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo) 511 { 512 return (a_hi == b_hi) && (a_lo == b_lo); 513 } 514 515 /** 516 * Lower-or-equal comparison of 128-bit operands. 517 * 518 * @param a_hi High part of the first input operand. 519 * @param a_lo Low part of the first input operand. 520 * @param b_hi High part of the second input operand. 521 * @param b_lo Low part of the second input operand. 522 * @return 1 if a is lower or equal to b, 0 otherwise. 523 */ 524 int le128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo) 525 { 526 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo <= b_lo)); 527 } 528 529 /** 530 * Lower-than comparison of 128-bit operands. 531 * 532 * @param a_hi High part of the first input operand. 533 * @param a_lo Low part of the first input operand. 534 * @param b_hi High part of the second input operand. 535 * @param b_lo Low part of the second input operand. 536 * @return 1 if a is lower than b, 0 otherwise. 537 */ 538 int lt128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo) 539 { 540 return (a_hi < b_hi) || ((a_hi == b_hi) && (a_lo < b_lo)); 541 } 542 543 /** 544 * Addition of two 128-bit unsigned integers. 545 * 546 * @param a_hi High part of the first input operand. 547 * @param a_lo Low part of the first input operand. 548 * @param b_hi High part of the second input operand. 549 * @param b_lo Low part of the second input operand. 550 * @param r_hi Address to store high part of the result. 551 * @param r_lo Address to store low part of the result. 552 */ 553 void add128(uint64_t a_hi, uint64_t a_lo, 554 uint64_t b_hi, uint64_t b_lo, 555 uint64_t *r_hi, uint64_t *r_lo) 556 { 557 uint64_t low = a_lo + b_lo; 558 *r_lo = low; 559 /* detect overflow to add a carry */ 560 *r_hi = a_hi + b_hi + (low < a_lo); 561 } 562 563 /** 564 * Substraction of two 128-bit unsigned integers. 565 * 566 * @param a_hi High part of the first input operand. 567 * @param a_lo Low part of the first input operand. 568 * @param b_hi High part of the second input operand. 569 * @param b_lo Low part of the second input operand. 570 * @param r_hi Address to store high part of the result. 571 * @param r_lo Address to store low part of the result. 572 */ 573 void sub128(uint64_t a_hi, uint64_t a_lo, 574 uint64_t b_hi, uint64_t b_lo, 575 uint64_t *r_hi, uint64_t *r_lo) 576 { 577 *r_lo = a_lo - b_lo; 578 /* detect underflow to substract a carry */ 579 *r_hi = a_hi - b_hi - (a_lo < b_lo); 580 } 581 582 /** 583 * Multiplication of two 64-bit unsigned integers. 584 * 585 * @param a First input operand. 586 * @param b Second input operand. 587 * @param r_hi Address to store high part of the result. 588 * @param r_lo Address to store low part of the result. 589 */ 590 void mul64(uint64_t a, uint64_t b, uint64_t *r_hi, uint64_t *r_lo) 591 { 592 uint64_t low, high, middle1, middle2; 593 uint32_t alow, blow; 594 595 alow = a & 0xFFFFFFFF; 596 blow = b & 0xFFFFFFFF; 597 598 a >>= 32; 599 b >>= 32; 600 601 low = ((uint64_t) alow) * blow; 602 middle1 = a * blow; 603 middle2 = alow * b; 604 high = a * b; 605 606 middle1 += middle2; 607 high += (((uint64_t) (middle1 < middle2)) << 32) + (middle1 >> 32); 608 middle1 <<= 32; 609 low += middle1; 610 high += (low < middle1); 611 *r_lo = low; 612 *r_hi = high; 613 } 614 615 /** 616 * Multiplication of two 128-bit unsigned integers. 617 * 618 * @param a_hi High part of the first input operand. 619 * @param a_lo Low part of the first input operand. 620 * @param b_hi High part of the second input operand. 621 * @param b_lo Low part of the second input operand. 622 * @param r_hihi Address to store first (highest) quarter of the result. 623 * @param r_hilo Address to store second quarter of the result. 624 * @param r_lohi Address to store third quarter of the result. 625 * @param r_lolo Address to store fourth (lowest) quarter of the result. 626 */ 627 void mul128(uint64_t a_hi, uint64_t a_lo, uint64_t b_hi, uint64_t b_lo, 628 uint64_t *r_hihi, uint64_t *r_hilo, uint64_t *r_lohi, uint64_t *r_lolo) 629 { 630 uint64_t hihi, hilo, lohi, lolo; 631 uint64_t tmp1, tmp2; 632 633 mul64(a_lo, b_lo, &lohi, &lolo); 634 mul64(a_lo, b_hi, &hilo, &tmp2); 635 add128(hilo, tmp2, 0x0ll, lohi, &hilo, &lohi); 636 mul64(a_hi, b_hi, &hihi, &tmp1); 637 add128(hihi, tmp1, 0x0ll, hilo, &hihi, &hilo); 638 mul64(a_hi, b_lo, &tmp1, &tmp2); 639 add128(tmp1, tmp2, 0x0ll, lohi, &tmp1, &lohi); 640 add128(hihi, hilo, 0x0ll, tmp1, &hihi, &hilo); 641 642 *r_hihi = hihi; 643 *r_hilo = hilo; 644 *r_lohi = lohi; 645 *r_lolo = lolo; 646 } 647 648 /** 649 * Estimate the quotient of 128-bit unsigned divident and 64-bit unsigned 650 * divisor. 651 * 652 * @param a_hi High part of the divident. 653 * @param a_lo Low part of the divident. 654 * @param b Divisor. 655 * @return Quotient approximation. 656 */ 657 uint64_t div128est(uint64_t a_hi, uint64_t a_lo, uint64_t b) 658 { 659 uint64_t b_hi, b_lo; 660 uint64_t rem_hi, rem_lo; 661 uint64_t tmp_hi, tmp_lo; 662 uint64_t result; 663 664 if (b <= a_hi) { 665 return 0xFFFFFFFFFFFFFFFFull; 666 } 667 668 b_hi = b >> 32; 669 result = ((b_hi << 32) <= a_hi) ? (0xFFFFFFFFull << 32) : (a_hi / b_hi) << 32; 670 mul64(b, result, &tmp_hi, &tmp_lo); 671 sub128(a_hi, a_lo, tmp_hi, tmp_lo, &rem_hi, &rem_lo); 672 673 while ((int64_t) rem_hi < 0) { 674 result -= 0x1ll << 32; 675 b_lo = b << 32; 676 add128(rem_hi, rem_lo, b_hi, b_lo, &rem_hi, &rem_lo); 677 } 678 679 rem_hi = (rem_hi << 32) | (rem_lo >> 32); 680 if ((b_hi << 32) <= rem_hi) { 681 result |= 0xFFFFFFFF; 682 } else { 683 result |= rem_hi / b_hi; 684 } 685 686 return result; 211 687 } 212 688 -
uspace/lib/softfloat/generic/comparison.c
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Comparison functions. 33 34 */ 34 35 35 36 #include <sftypes.h> 36 37 #include <comparison.h> 37 38 /* NaN : exp = 0xff and nonzero fraction */ 38 #include <common.h> 39 40 /** 41 * Determines whether the given float represents NaN (either signalling NaN or 42 * quiet NaN). 43 * 44 * @param f Single-precision float. 45 * @return 1 if float is NaN, 0 otherwise. 46 */ 39 47 int isFloat32NaN(float32 f) 40 48 { 49 /* NaN : exp = 0xff and nonzero fraction */ 41 50 return ((f.parts.exp == 0xFF) && (f.parts.fraction)); 42 51 } 43 52 44 /* NaN : exp = 0x7ff and nonzero fraction */ 53 /** 54 * Determines whether the given float represents NaN (either signalling NaN or 55 * quiet NaN). 56 * 57 * @param d Double-precision float. 58 * @return 1 if float is NaN, 0 otherwise. 59 */ 45 60 int isFloat64NaN(float64 d) 46 61 { 62 /* NaN : exp = 0x7ff and nonzero fraction */ 47 63 return ((d.parts.exp == 0x7FF) && (d.parts.fraction)); 48 64 } 49 65 50 /* SigNaN : exp = 0xff fraction = 0xxxxx..x (binary), where at least one x is nonzero */ 66 /** 67 * Determines whether the given float represents NaN (either signalling NaN or 68 * quiet NaN). 69 * 70 * @param ld Quadruple-precision float. 71 * @return 1 if float is NaN, 0 otherwise. 72 */ 73 int isFloat128NaN(float128 ld) 74 { 75 /* NaN : exp = 0x7fff and nonzero fraction */ 76 return ((ld.parts.exp == 0x7FF) && 77 !eq128(ld.parts.frac_hi, ld.parts.frac_lo, 0x0ll, 0x0ll)); 78 } 79 80 /** 81 * Determines whether the given float represents signalling NaN. 82 * 83 * @param f Single-precision float. 84 * @return 1 if float is signalling NaN, 0 otherwise. 85 */ 51 86 int isFloat32SigNaN(float32 f) 52 87 { 53 return ((f.parts.exp == 0xFF) && (f.parts.fraction < 0x400000) && (f.parts.fraction)); 54 } 55 56 /* SigNaN : exp = 0x7ff fraction = 0xxxxx..x (binary), where at least one x is nonzero */ 88 /* SigNaN : exp = 0xff and fraction = 0xxxxx..x (binary), 89 * where at least one x is nonzero */ 90 return ((f.parts.exp == 0xFF) && 91 (f.parts.fraction < 0x400000) && (f.parts.fraction)); 92 } 93 94 /** 95 * Determines whether the given float represents signalling NaN. 96 * 97 * @param d Double-precision float. 98 * @return 1 if float is signalling NaN, 0 otherwise. 99 */ 57 100 int isFloat64SigNaN(float64 d) 58 101 { 59 return ((d.parts.exp == 0x7FF) && (d.parts.fraction) && (d.parts.fraction < 0x8000000000000ll)); 60 } 61 102 /* SigNaN : exp = 0x7ff and fraction = 0xxxxx..x (binary), 103 * where at least one x is nonzero */ 104 return ((d.parts.exp == 0x7FF) && 105 (d.parts.fraction) && (d.parts.fraction < 0x8000000000000ll)); 106 } 107 108 /** 109 * Determines whether the given float represents signalling NaN. 110 * 111 * @param ld Quadruple-precision float. 112 * @return 1 if float is signalling NaN, 0 otherwise. 113 */ 114 int isFloat128SigNaN(float128 ld) 115 { 116 /* SigNaN : exp = 0x7fff and fraction = 0xxxxx..x (binary), 117 * where at least one x is nonzero */ 118 return ((ld.parts.exp == 0x7FFF) && 119 (ld.parts.frac_hi || ld.parts.frac_lo) && 120 lt128(ld.parts.frac_hi, ld.parts.frac_lo, 0x800000000000ll, 0x0ll)); 121 122 } 123 124 /** 125 * Determines whether the given float represents positive or negative infinity. 126 * 127 * @param f Single-precision float. 128 * @return 1 if float is infinite, 0 otherwise. 129 */ 62 130 int isFloat32Infinity(float32 f) 63 131 { 132 /* NaN : exp = 0x7ff and zero fraction */ 64 133 return ((f.parts.exp == 0xFF) && (f.parts.fraction == 0x0)); 65 134 } 66 135 136 /** 137 * Determines whether the given float represents positive or negative infinity. 138 * 139 * @param d Double-precision float. 140 * @return 1 if float is infinite, 0 otherwise. 141 */ 67 142 int isFloat64Infinity(float64 d) 68 143 { 144 /* NaN : exp = 0x7ff and zero fraction */ 69 145 return ((d.parts.exp == 0x7FF) && (d.parts.fraction == 0x0)); 70 146 } 71 147 148 /** 149 * Determines whether the given float represents positive or negative infinity. 150 * 151 * @param ld Quadruple-precision float. 152 * @return 1 if float is infinite, 0 otherwise. 153 */ 154 int isFloat128Infinity(float128 ld) 155 { 156 /* NaN : exp = 0x7fff and zero fraction */ 157 return ((ld.parts.exp == 0x7FFF) && 158 eq128(ld.parts.frac_hi, ld.parts.frac_lo, 0x0ll, 0x0ll)); 159 } 160 161 /** 162 * Determines whether the given float represents positive or negative zero. 163 * 164 * @param f Single-precision float. 165 * @return 1 if float is zero, 0 otherwise. 166 */ 72 167 int isFloat32Zero(float32 f) 73 168 { … … 75 170 } 76 171 172 /** 173 * Determines whether the given float represents positive or negative zero. 174 * 175 * @param d Double-precision float. 176 * @return 1 if float is zero, 0 otherwise. 177 */ 77 178 int isFloat64Zero(float64 d) 78 179 { … … 81 182 82 183 /** 83 * @return 1 if both floats are equal - but NaNs are not recognized 184 * Determines whether the given float represents positive or negative zero. 185 * 186 * @param ld Quadruple-precision float. 187 * @return 1 if float is zero, 0 otherwise. 188 */ 189 int isFloat128Zero(float128 ld) 190 { 191 uint64_t tmp_hi; 192 uint64_t tmp_lo; 193 194 and128(ld.binary.hi, ld.binary.lo, 195 0x7FFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, &tmp_hi, &tmp_lo); 196 197 return eq128(tmp_hi, tmp_lo, 0x0ll, 0x0ll); 198 } 199 200 /** 201 * Determine whether two floats are equal. NaNs are not recognized. 202 * 203 * @a First single-precision operand. 204 * @b Second single-precision operand. 205 * @return 1 if both floats are equal, 0 otherwise. 84 206 */ 85 207 int isFloat32eq(float32 a, float32 b) 86 208 { 87 209 /* a equals to b or both are zeros (with any sign) */ 88 return ((a.binary==b.binary) || (((a.binary | b.binary) & 0x7FFFFFFF) == 0)); 89 } 90 91 /** 92 * @return 1 if a < b - but NaNs are not recognized 210 return ((a.binary == b.binary) || 211 (((a.binary | b.binary) & 0x7FFFFFFF) == 0)); 212 } 213 214 /** 215 * Determine whether two floats are equal. NaNs are not recognized. 216 * 217 * @a First double-precision operand. 218 * @b Second double-precision operand. 219 * @return 1 if both floats are equal, 0 otherwise. 220 */ 221 int isFloat64eq(float64 a, float64 b) 222 { 223 /* a equals to b or both are zeros (with any sign) */ 224 return ((a.binary == b.binary) || 225 (((a.binary | b.binary) & 0x7FFFFFFFFFFFFFFFll) == 0)); 226 } 227 228 /** 229 * Determine whether two floats are equal. NaNs are not recognized. 230 * 231 * @a First quadruple-precision operand. 232 * @b Second quadruple-precision operand. 233 * @return 1 if both floats are equal, 0 otherwise. 234 */ 235 int isFloat128eq(float128 a, float128 b) 236 { 237 uint64_t tmp_hi; 238 uint64_t tmp_lo; 239 240 /* both are zeros (with any sign) */ 241 or128(a.binary.hi, a.binary.lo, 242 b.binary.hi, b.binary.lo, &tmp_hi, &tmp_lo); 243 and128(tmp_hi, tmp_lo, 244 0x7FFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, &tmp_hi, &tmp_lo); 245 int both_zero = eq128(tmp_hi, tmp_lo, 0x0ll, 0x0ll); 246 247 /* a equals to b */ 248 int are_equal = eq128(a.binary.hi, a.binary.lo, b.binary.hi, b.binary.lo); 249 250 return are_equal || both_zero; 251 } 252 253 /** 254 * Lower-than comparison between two floats. NaNs are not recognized. 255 * 256 * @a First single-precision operand. 257 * @b Second single-precision operand. 258 * @return 1 if a is lower than b, 0 otherwise. 93 259 */ 94 260 int isFloat32lt(float32 a, float32 b) 95 261 { 96 if (((a.binary | b.binary) & 0x7FFFFFFF) == 0) 262 if (((a.binary | b.binary) & 0x7FFFFFFF) == 0) { 97 263 return 0; /* +- zeroes */ 264 } 98 265 99 if ((a.parts.sign) && (b.parts.sign)) 266 if ((a.parts.sign) && (b.parts.sign)) { 100 267 /* if both are negative, smaller is that with greater binary value */ 101 268 return (a.binary > b.binary); 269 } 102 270 103 /* lets negate signs - now will be positive numbers allways bigger than negative (first bit will be set for unsigned integer comparison) */ 271 /* lets negate signs - now will be positive numbers allways bigger than 272 * negative (first bit will be set for unsigned integer comparison) */ 104 273 a.parts.sign = !a.parts.sign; 105 274 b.parts.sign = !b.parts.sign; … … 108 277 109 278 /** 110 * @return 1 if a > b - but NaNs are not recognized 279 * Lower-than comparison between two floats. NaNs are not recognized. 280 * 281 * @a First double-precision operand. 282 * @b Second double-precision operand. 283 * @return 1 if a is lower than b, 0 otherwise. 284 */ 285 int isFloat64lt(float64 a, float64 b) 286 { 287 if (((a.binary | b.binary) & 0x7FFFFFFFFFFFFFFFll) == 0) { 288 return 0; /* +- zeroes */ 289 } 290 291 if ((a.parts.sign) && (b.parts.sign)) { 292 /* if both are negative, smaller is that with greater binary value */ 293 return (a.binary > b.binary); 294 } 295 296 /* lets negate signs - now will be positive numbers allways bigger than 297 * negative (first bit will be set for unsigned integer comparison) */ 298 a.parts.sign = !a.parts.sign; 299 b.parts.sign = !b.parts.sign; 300 return (a.binary < b.binary); 301 } 302 303 /** 304 * Lower-than comparison between two floats. NaNs are not recognized. 305 * 306 * @a First quadruple-precision operand. 307 * @b Second quadruple-precision operand. 308 * @return 1 if a is lower than b, 0 otherwise. 309 */ 310 int isFloat128lt(float128 a, float128 b) 311 { 312 uint64_t tmp_hi; 313 uint64_t tmp_lo; 314 315 or128(a.binary.hi, a.binary.lo, 316 b.binary.hi, b.binary.lo, &tmp_hi, &tmp_lo); 317 and128(tmp_hi, tmp_lo, 318 0x7FFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, &tmp_hi, &tmp_lo); 319 if (eq128(tmp_hi, tmp_lo, 0x0ll, 0x0ll)) { 320 return 0; /* +- zeroes */ 321 } 322 323 if ((a.parts.sign) && (b.parts.sign)) { 324 /* if both are negative, smaller is that with greater binary value */ 325 return lt128(b.binary.hi, b.binary.lo, a.binary.hi, a.binary.lo); 326 } 327 328 /* lets negate signs - now will be positive numbers allways bigger than 329 * negative (first bit will be set for unsigned integer comparison) */ 330 a.parts.sign = !a.parts.sign; 331 b.parts.sign = !b.parts.sign; 332 return lt128(a.binary.hi, a.binary.lo, b.binary.hi, b.binary.lo); 333 } 334 335 /** 336 * Greater-than comparison between two floats. NaNs are not recognized. 337 * 338 * @a First single-precision operand. 339 * @b Second single-precision operand. 340 * @return 1 if a is greater than b, 0 otherwise. 111 341 */ 112 342 int isFloat32gt(float32 a, float32 b) 113 343 { 114 if (((a.binary | b.binary) & 0x7FFFFFFF) == 0) 344 if (((a.binary | b.binary) & 0x7FFFFFFF) == 0) { 115 345 return 0; /* zeroes are equal with any sign */ 346 } 116 347 117 if ((a.parts.sign) && (b.parts.sign)) 348 if ((a.parts.sign) && (b.parts.sign)) { 118 349 /* if both are negative, greater is that with smaller binary value */ 119 350 return (a.binary < b.binary); 351 } 120 352 121 /* lets negate signs - now will be positive numbers allways bigger than negative (first bit will be set for unsigned integer comparison) */ 353 /* lets negate signs - now will be positive numbers allways bigger than 354 * negative (first bit will be set for unsigned integer comparison) */ 122 355 a.parts.sign = !a.parts.sign; 123 356 b.parts.sign = !b.parts.sign; … … 125 358 } 126 359 360 /** 361 * Greater-than comparison between two floats. NaNs are not recognized. 362 * 363 * @a First double-precision operand. 364 * @b Second double-precision operand. 365 * @return 1 if a is greater than b, 0 otherwise. 366 */ 367 int isFloat64gt(float64 a, float64 b) 368 { 369 if (((a.binary | b.binary) & 0x7FFFFFFFFFFFFFFFll) == 0) { 370 return 0; /* zeroes are equal with any sign */ 371 } 372 373 if ((a.parts.sign) && (b.parts.sign)) { 374 /* if both are negative, greater is that with smaller binary value */ 375 return (a.binary < b.binary); 376 } 377 378 /* lets negate signs - now will be positive numbers allways bigger than 379 * negative (first bit will be set for unsigned integer comparison) */ 380 a.parts.sign = !a.parts.sign; 381 b.parts.sign = !b.parts.sign; 382 return (a.binary > b.binary); 383 } 384 385 /** 386 * Greater-than comparison between two floats. NaNs are not recognized. 387 * 388 * @a First quadruple-precision operand. 389 * @b Second quadruple-precision operand. 390 * @return 1 if a is greater than b, 0 otherwise. 391 */ 392 int isFloat128gt(float128 a, float128 b) 393 { 394 uint64_t tmp_hi; 395 uint64_t tmp_lo; 396 397 or128(a.binary.hi, a.binary.lo, 398 b.binary.hi, b.binary.lo, &tmp_hi, &tmp_lo); 399 and128(tmp_hi, tmp_lo, 400 0x7FFFFFFFFFFFFFFFll, 0xFFFFFFFFFFFFFFFFll, &tmp_hi, &tmp_lo); 401 if (eq128(tmp_hi, tmp_lo, 0x0ll, 0x0ll)) { 402 return 0; /* zeroes are equal with any sign */ 403 } 404 405 if ((a.parts.sign) && (b.parts.sign)) { 406 /* if both are negative, greater is that with smaller binary value */ 407 return lt128(a.binary.hi, a.binary.lo, b.binary.hi, b.binary.lo); 408 } 409 410 /* lets negate signs - now will be positive numbers allways bigger than 411 * negative (first bit will be set for unsigned integer comparison) */ 412 a.parts.sign = !a.parts.sign; 413 b.parts.sign = !b.parts.sign; 414 return lt128(b.binary.hi, b.binary.lo, a.binary.hi, a.binary.lo); 415 } 416 127 417 /** @} 128 418 */ -
uspace/lib/softfloat/generic/conversion.c
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 */ 34 35 #include "sftypes.h"36 #include "conversion.h"37 #include "comparison.h"38 #include "common.h"33 /** @file Conversion of precision and conversion between integers and floats. 34 */ 35 36 #include <sftypes.h> 37 #include <conversion.h> 38 #include <comparison.h> 39 #include <common.h> 39 40 40 41 float64 convertFloat32ToFloat64(float32 a) … … 48 49 49 50 if ((isFloat32Infinity(a)) || (isFloat32NaN(a))) { 50 result.parts.exp = 0x7FF;51 result.parts.exp = FLOAT64_MAX_EXPONENT; 51 52 /* TODO; check if its correct for SigNaNs*/ 52 53 return result; 53 } ;54 } 54 55 55 56 result.parts.exp = a.parts.exp + ((int) FLOAT64_BIAS - FLOAT32_BIAS); … … 57 58 /* normalize denormalized numbers */ 58 59 59 if (result.parts.fraction == 0 ll) { /* fix zero */60 result.parts.exp = 0 ll;60 if (result.parts.fraction == 0) { /* fix zero */ 61 result.parts.exp = 0; 61 62 return result; 62 63 } … … 64 65 frac = result.parts.fraction; 65 66 66 while (!(frac & (0x10000000000000ll))) {67 while (!(frac & FLOAT64_HIDDEN_BIT_MASK)) { 67 68 frac <<= 1; 68 69 --result.parts.exp; 69 } ;70 } 70 71 71 72 ++result.parts.exp; 72 73 result.parts.fraction = frac; 73 }; 74 75 return result; 76 74 } 75 76 return result; 77 } 78 79 float128 convertFloat32ToFloat128(float32 a) 80 { 81 float128 result; 82 uint64_t frac_hi, frac_lo; 83 uint64_t tmp_hi, tmp_lo; 84 85 result.parts.sign = a.parts.sign; 86 result.parts.frac_hi = 0; 87 result.parts.frac_lo = a.parts.fraction; 88 lshift128(result.parts.frac_hi, result.parts.frac_lo, 89 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE), 90 &frac_hi, &frac_lo); 91 result.parts.frac_hi = frac_hi; 92 result.parts.frac_lo = frac_lo; 93 94 if ((isFloat32Infinity(a)) || (isFloat32NaN(a))) { 95 result.parts.exp = FLOAT128_MAX_EXPONENT; 96 /* TODO; check if its correct for SigNaNs*/ 97 return result; 98 } 99 100 result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT32_BIAS); 101 if (a.parts.exp == 0) { 102 /* normalize denormalized numbers */ 103 104 if (eq128(result.parts.frac_hi, 105 result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */ 106 result.parts.exp = 0; 107 return result; 108 } 109 110 frac_hi = result.parts.frac_hi; 111 frac_lo = result.parts.frac_lo; 112 113 and128(frac_hi, frac_lo, 114 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 115 &tmp_hi, &tmp_lo); 116 while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 117 lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo); 118 --result.parts.exp; 119 } 120 121 ++result.parts.exp; 122 result.parts.frac_hi = frac_hi; 123 result.parts.frac_lo = frac_lo; 124 } 125 126 return result; 127 } 128 129 float128 convertFloat64ToFloat128(float64 a) 130 { 131 float128 result; 132 uint64_t frac_hi, frac_lo; 133 uint64_t tmp_hi, tmp_lo; 134 135 result.parts.sign = a.parts.sign; 136 result.parts.frac_hi = 0; 137 result.parts.frac_lo = a.parts.fraction; 138 lshift128(result.parts.frac_hi, result.parts.frac_lo, 139 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE), 140 &frac_hi, &frac_lo); 141 result.parts.frac_hi = frac_hi; 142 result.parts.frac_lo = frac_lo; 143 144 if ((isFloat64Infinity(a)) || (isFloat64NaN(a))) { 145 result.parts.exp = FLOAT128_MAX_EXPONENT; 146 /* TODO; check if its correct for SigNaNs*/ 147 return result; 148 } 149 150 result.parts.exp = a.parts.exp + ((int) FLOAT128_BIAS - FLOAT64_BIAS); 151 if (a.parts.exp == 0) { 152 /* normalize denormalized numbers */ 153 154 if (eq128(result.parts.frac_hi, 155 result.parts.frac_lo, 0x0ll, 0x0ll)) { /* fix zero */ 156 result.parts.exp = 0; 157 return result; 158 } 159 160 frac_hi = result.parts.frac_hi; 161 frac_lo = result.parts.frac_lo; 162 163 and128(frac_hi, frac_lo, 164 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 165 &tmp_hi, &tmp_lo); 166 while (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 167 lshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo); 168 --result.parts.exp; 169 } 170 171 ++result.parts.exp; 172 result.parts.frac_hi = frac_hi; 173 result.parts.frac_lo = frac_lo; 174 } 175 176 return result; 77 177 } 78 178 … … 86 186 87 187 if (isFloat64NaN(a)) { 88 89 result.parts.exp = 0xFF; 188 result.parts.exp = FLOAT32_MAX_EXPONENT; 90 189 91 190 if (isFloat64SigNaN(a)) { 92 result.parts.fraction = 0x400000; /* set first bit of fraction nonzero */ 191 /* set first bit of fraction nonzero */ 192 result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1; 93 193 return result; 94 194 } 95 96 result.parts.fraction = 0x1; /* fraction nonzero but its first bit is zero */ 97 return result; 98 }; 195 196 /* fraction nonzero but its first bit is zero */ 197 result.parts.fraction = 0x1; 198 return result; 199 } 99 200 100 201 if (isFloat64Infinity(a)) { 101 202 result.parts.fraction = 0; 102 result.parts.exp = 0xFF;103 return result; 104 } ;105 106 exp = (int) a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS;107 108 if (exp >= 0xFF) {109 /* FIXME: overflow*/203 result.parts.exp = FLOAT32_MAX_EXPONENT; 204 return result; 205 } 206 207 exp = (int) a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS; 208 209 if (exp >= FLOAT32_MAX_EXPONENT) { 210 /* FIXME: overflow */ 110 211 result.parts.fraction = 0; 111 result.parts.exp = 0xFF; 112 return result; 113 114 } else if (exp <= 0 ) { 115 212 result.parts.exp = FLOAT32_MAX_EXPONENT; 213 return result; 214 } else if (exp <= 0) { 116 215 /* underflow or denormalized */ 117 216 … … 119 218 120 219 exp *= -1; 121 if (exp > FLOAT32_FRACTION_SIZE 220 if (exp > FLOAT32_FRACTION_SIZE) { 122 221 /* FIXME: underflow */ 123 222 result.parts.fraction = 0; 124 223 return result; 125 } ;224 } 126 225 127 226 /* denormalized */ 128 227 129 228 frac = a.parts.fraction; 130 frac |= 0x10000000000000ll; /* denormalize and set hidden bit */229 frac |= FLOAT64_HIDDEN_BIT_MASK; /* denormalize and set hidden bit */ 131 230 132 231 frac >>= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1); … … 135 234 --exp; 136 235 frac >>= 1; 137 } ;236 } 138 237 result.parts.fraction = frac; 139 238 140 239 return result; 141 } ;240 } 142 241 143 242 result.parts.exp = exp; 144 result.parts.fraction = a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE); 145 return result; 146 } 147 148 149 /** Helping procedure for converting float32 to uint32 150 * @param a floating point number in normalized form (no NaNs or Inf are checked ) 151 * @return unsigned integer 243 result.parts.fraction = 244 a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE); 245 return result; 246 } 247 248 float32 convertFloat128ToFloat32(float128 a) 249 { 250 float32 result; 251 int32_t exp; 252 uint64_t frac_hi, frac_lo; 253 254 result.parts.sign = a.parts.sign; 255 256 if (isFloat128NaN(a)) { 257 result.parts.exp = FLOAT32_MAX_EXPONENT; 258 259 if (isFloat128SigNaN(a)) { 260 /* set first bit of fraction nonzero */ 261 result.parts.fraction = FLOAT32_HIDDEN_BIT_MASK >> 1; 262 return result; 263 } 264 265 /* fraction nonzero but its first bit is zero */ 266 result.parts.fraction = 0x1; 267 return result; 268 } 269 270 if (isFloat128Infinity(a)) { 271 result.parts.fraction = 0; 272 result.parts.exp = FLOAT32_MAX_EXPONENT; 273 return result; 274 } 275 276 exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT32_BIAS; 277 278 if (exp >= FLOAT32_MAX_EXPONENT) { 279 /* FIXME: overflow */ 280 result.parts.fraction = 0; 281 result.parts.exp = FLOAT32_MAX_EXPONENT; 282 return result; 283 } else if (exp <= 0) { 284 /* underflow or denormalized */ 285 286 result.parts.exp = 0; 287 288 exp *= -1; 289 if (exp > FLOAT32_FRACTION_SIZE) { 290 /* FIXME: underflow */ 291 result.parts.fraction = 0; 292 return result; 293 } 294 295 /* denormalized */ 296 297 frac_hi = a.parts.frac_hi; 298 frac_lo = a.parts.frac_lo; 299 300 /* denormalize and set hidden bit */ 301 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI; 302 303 rshift128(frac_hi, frac_lo, 304 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1), 305 &frac_hi, &frac_lo); 306 307 while (exp > 0) { 308 --exp; 309 rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo); 310 } 311 result.parts.fraction = frac_lo; 312 313 return result; 314 } 315 316 result.parts.exp = exp; 317 frac_hi = a.parts.frac_hi; 318 frac_lo = a.parts.frac_lo; 319 rshift128(frac_hi, frac_lo, 320 (FLOAT128_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1), 321 &frac_hi, &frac_lo); 322 result.parts.fraction = frac_lo; 323 return result; 324 } 325 326 float64 convertFloat128ToFloat64(float128 a) 327 { 328 float64 result; 329 int32_t exp; 330 uint64_t frac_hi, frac_lo; 331 332 result.parts.sign = a.parts.sign; 333 334 if (isFloat128NaN(a)) { 335 result.parts.exp = FLOAT64_MAX_EXPONENT; 336 337 if (isFloat128SigNaN(a)) { 338 /* set first bit of fraction nonzero */ 339 result.parts.fraction = FLOAT64_HIDDEN_BIT_MASK >> 1; 340 return result; 341 } 342 343 /* fraction nonzero but its first bit is zero */ 344 result.parts.fraction = 0x1; 345 return result; 346 } 347 348 if (isFloat128Infinity(a)) { 349 result.parts.fraction = 0; 350 result.parts.exp = FLOAT64_MAX_EXPONENT; 351 return result; 352 } 353 354 exp = (int) a.parts.exp - FLOAT128_BIAS + FLOAT64_BIAS; 355 356 if (exp >= FLOAT64_MAX_EXPONENT) { 357 /* FIXME: overflow */ 358 result.parts.fraction = 0; 359 result.parts.exp = FLOAT64_MAX_EXPONENT; 360 return result; 361 } else if (exp <= 0) { 362 /* underflow or denormalized */ 363 364 result.parts.exp = 0; 365 366 exp *= -1; 367 if (exp > FLOAT64_FRACTION_SIZE) { 368 /* FIXME: underflow */ 369 result.parts.fraction = 0; 370 return result; 371 } 372 373 /* denormalized */ 374 375 frac_hi = a.parts.frac_hi; 376 frac_lo = a.parts.frac_lo; 377 378 /* denormalize and set hidden bit */ 379 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI; 380 381 rshift128(frac_hi, frac_lo, 382 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1), 383 &frac_hi, &frac_lo); 384 385 while (exp > 0) { 386 --exp; 387 rshift128(frac_hi, frac_lo, 1, &frac_hi, &frac_lo); 388 } 389 result.parts.fraction = frac_lo; 390 391 return result; 392 } 393 394 result.parts.exp = exp; 395 frac_hi = a.parts.frac_hi; 396 frac_lo = a.parts.frac_lo; 397 rshift128(frac_hi, frac_lo, 398 (FLOAT128_FRACTION_SIZE - FLOAT64_FRACTION_SIZE + 1), 399 &frac_hi, &frac_lo); 400 result.parts.fraction = frac_lo; 401 return result; 402 } 403 404 405 /** 406 * Helping procedure for converting float32 to uint32. 407 * 408 * @param a Floating point number in normalized form 409 * (NaNs or Inf are not checked). 410 * @return Converted unsigned integer. 152 411 */ 153 412 static uint32_t _float32_to_uint32_helper(float32 a) … … 156 415 157 416 if (a.parts.exp < FLOAT32_BIAS) { 158 /* TODO: rounding*/417 /* TODO: rounding */ 159 418 return 0; 160 419 } … … 175 434 } 176 435 177 /* Convert float to unsigned int32436 /* 178 437 * FIXME: Im not sure what to return if overflow/underflow happens 179 438 * - now its the biggest or the smallest int … … 194 453 } 195 454 196 /* Convert float to signed int32455 /* 197 456 * FIXME: Im not sure what to return if overflow/underflow happens 198 457 * - now its the biggest or the smallest int … … 214 473 215 474 216 /** Helping procedure for converting float64 to uint64 217 * @param a floating point number in normalized form (no NaNs or Inf are checked ) 218 * @return unsigned integer 475 /** 476 * Helping procedure for converting float32 to uint64. 477 * 478 * @param a Floating point number in normalized form 479 * (NaNs or Inf are not checked). 480 * @return Converted unsigned integer. 481 */ 482 static uint64_t _float32_to_uint64_helper(float32 a) 483 { 484 uint64_t frac; 485 486 if (a.parts.exp < FLOAT32_BIAS) { 487 /*TODO: rounding*/ 488 return 0; 489 } 490 491 frac = a.parts.fraction; 492 493 frac |= FLOAT32_HIDDEN_BIT_MASK; 494 /* shift fraction to left so hidden bit will be the most significant bit */ 495 frac <<= 64 - FLOAT32_FRACTION_SIZE - 1; 496 497 frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1; 498 if ((a.parts.sign == 1) && (frac != 0)) { 499 frac = ~frac; 500 ++frac; 501 } 502 503 return frac; 504 } 505 506 /* 507 * FIXME: Im not sure what to return if overflow/underflow happens 508 * - now its the biggest or the smallest int 509 */ 510 uint64_t float32_to_uint64(float32 a) 511 { 512 if (isFloat32NaN(a)) 513 return UINT64_MAX; 514 515 516 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) { 517 if (a.parts.sign) 518 return UINT64_MIN; 519 520 return UINT64_MAX; 521 } 522 523 return _float32_to_uint64_helper(a); 524 } 525 526 /* 527 * FIXME: Im not sure what to return if overflow/underflow happens 528 * - now its the biggest or the smallest int 529 */ 530 int64_t float32_to_int64(float32 a) 531 { 532 if (isFloat32NaN(a)) 533 return INT64_MAX; 534 535 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) { 536 if (a.parts.sign) 537 return INT64_MIN; 538 539 return INT64_MAX; 540 } 541 542 return _float32_to_uint64_helper(a); 543 } 544 545 546 /** 547 * Helping procedure for converting float64 to uint64. 548 * 549 * @param a Floating point number in normalized form 550 * (NaNs or Inf are not checked). 551 * @return Converted unsigned integer. 219 552 */ 220 553 static uint64_t _float64_to_uint64_helper(float64 a) 221 554 { 222 555 uint64_t frac; 223 556 224 557 if (a.parts.exp < FLOAT64_BIAS) { 225 558 /*TODO: rounding*/ 226 559 return 0; 227 560 } 228 561 229 562 frac = a.parts.fraction; 230 563 231 564 frac |= FLOAT64_HIDDEN_BIT_MASK; 232 565 /* shift fraction to left so hidden bit will be the most significant bit */ 233 frac <<= 64 - FLOAT64_FRACTION_SIZE - 1; 566 frac <<= 64 - FLOAT64_FRACTION_SIZE - 1; 234 567 235 568 frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1; … … 238 571 ++frac; 239 572 } 240 573 241 574 return frac; 242 575 } 243 576 244 /* Convert float to unsigned int64 577 /* 578 * FIXME: Im not sure what to return if overflow/underflow happens 579 * - now its the biggest or the smallest int 580 */ 581 uint32_t float64_to_uint32(float64 a) 582 { 583 if (isFloat64NaN(a)) 584 return UINT32_MAX; 585 586 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) { 587 if (a.parts.sign) 588 return UINT32_MIN; 589 590 return UINT32_MAX; 591 } 592 593 return (uint32_t) _float64_to_uint64_helper(a); 594 } 595 596 /* 597 * FIXME: Im not sure what to return if overflow/underflow happens 598 * - now its the biggest or the smallest int 599 */ 600 int32_t float64_to_int32(float64 a) 601 { 602 if (isFloat64NaN(a)) 603 return INT32_MAX; 604 605 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) { 606 if (a.parts.sign) 607 return INT32_MIN; 608 609 return INT32_MAX; 610 } 611 612 return (int32_t) _float64_to_uint64_helper(a); 613 } 614 615 616 /* 245 617 * FIXME: Im not sure what to return if overflow/underflow happens 246 618 * - now its the biggest or the smallest int … … 251 623 return UINT64_MAX; 252 624 253 254 625 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) { 255 626 if (a.parts.sign) … … 262 633 } 263 634 264 /* Convert float to signed int64635 /* 265 636 * FIXME: Im not sure what to return if overflow/underflow happens 266 637 * - now its the biggest or the smallest int … … 271 642 return INT64_MAX; 272 643 273 274 644 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) { 275 645 if (a.parts.sign) … … 283 653 284 654 285 286 287 288 /** Helping procedure for converting float32 to uint64 289 * @param a floating point number in normalized form (no NaNs or Inf are checked )290 * @return unsigned integer291 */ 292 static uint64_t _float 32_to_uint64_helper(float32a)293 { 294 uint64_t frac ;295 296 if (a.parts.exp < FLOAT 32_BIAS) {655 /** 656 * Helping procedure for converting float128 to uint64. 657 * 658 * @param a Floating point number in normalized form 659 * (NaNs or Inf are not checked). 660 * @return Converted unsigned integer. 661 */ 662 static uint64_t _float128_to_uint64_helper(float128 a) 663 { 664 uint64_t frac_hi, frac_lo; 665 666 if (a.parts.exp < FLOAT128_BIAS) { 297 667 /*TODO: rounding*/ 298 668 return 0; 299 669 } 300 301 frac = a.parts.fraction; 302 303 frac |= FLOAT32_HIDDEN_BIT_MASK; 670 671 frac_hi = a.parts.frac_hi; 672 frac_lo = a.parts.frac_lo; 673 674 frac_hi |= FLOAT128_HIDDEN_BIT_MASK_HI; 304 675 /* shift fraction to left so hidden bit will be the most significant bit */ 305 frac <<= 64 - FLOAT32_FRACTION_SIZE - 1; 306 307 frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1; 308 if ((a.parts.sign == 1) && (frac != 0)) { 309 frac = ~frac; 310 ++frac; 311 } 312 313 return frac; 314 } 315 316 /* Convert float to unsigned int64 317 * FIXME: Im not sure what to return if overflow/underflow happens 318 * - now its the biggest or the smallest int 319 */ 320 uint64_t float32_to_uint64(float32 a) 321 { 322 if (isFloat32NaN(a)) 676 lshift128(frac_hi, frac_lo, 677 (128 - FLOAT128_FRACTION_SIZE - 1), &frac_hi, &frac_lo); 678 679 rshift128(frac_hi, frac_lo, 680 (128 - (a.parts.exp - FLOAT128_BIAS) - 1), &frac_hi, &frac_lo); 681 if ((a.parts.sign == 1) && !eq128(frac_hi, frac_lo, 0x0ll, 0x0ll)) { 682 not128(frac_hi, frac_lo, &frac_hi, &frac_lo); 683 add128(frac_hi, frac_lo, 0x0ll, 0x1ll, &frac_hi, &frac_lo); 684 } 685 686 return frac_lo; 687 } 688 689 /* 690 * FIXME: Im not sure what to return if overflow/underflow happens 691 * - now its the biggest or the smallest int 692 */ 693 uint32_t float128_to_uint32(float128 a) 694 { 695 if (isFloat128NaN(a)) 696 return UINT32_MAX; 697 698 if (isFloat128Infinity(a) || (a.parts.exp >= (32 + FLOAT128_BIAS))) { 699 if (a.parts.sign) 700 return UINT32_MIN; 701 702 return UINT32_MAX; 703 } 704 705 return (uint32_t) _float128_to_uint64_helper(a); 706 } 707 708 /* 709 * FIXME: Im not sure what to return if overflow/underflow happens 710 * - now its the biggest or the smallest int 711 */ 712 int32_t float128_to_int32(float128 a) 713 { 714 if (isFloat128NaN(a)) 715 return INT32_MAX; 716 717 if (isFloat128Infinity(a) || (a.parts.exp >= (32 + FLOAT128_BIAS))) { 718 if (a.parts.sign) 719 return INT32_MIN; 720 721 return INT32_MAX; 722 } 723 724 return (int32_t) _float128_to_uint64_helper(a); 725 } 726 727 728 /* 729 * FIXME: Im not sure what to return if overflow/underflow happens 730 * - now its the biggest or the smallest int 731 */ 732 uint64_t float128_to_uint64(float128 a) 733 { 734 if (isFloat128NaN(a)) 323 735 return UINT64_MAX; 324 325 326 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) { 736 737 if (isFloat128Infinity(a) || (a.parts.exp >= (64 + FLOAT128_BIAS))) { 327 738 if (a.parts.sign) 328 739 return UINT64_MIN; 329 740 330 741 return UINT64_MAX; 331 742 } 332 333 return _float 32_to_uint64_helper(a);334 } 335 336 /* Convert float to signed int64337 * FIXME: Im not sure what to return if overflow/underflow happens 338 * - now its the biggest or the smallest int 339 */ 340 int64_t float 32_to_int64(float32a)341 { 342 if (isFloat 32NaN(a))743 744 return _float128_to_uint64_helper(a); 745 } 746 747 /* 748 * FIXME: Im not sure what to return if overflow/underflow happens 749 * - now its the biggest or the smallest int 750 */ 751 int64_t float128_to_int64(float128 a) 752 { 753 if (isFloat128NaN(a)) 343 754 return INT64_MAX; 344 345 if (isFloat 32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) {755 756 if (isFloat128Infinity(a) || (a.parts.exp >= (64 + FLOAT128_BIAS))) { 346 757 if (a.parts.sign) 347 758 return INT64_MIN; 348 759 349 760 return INT64_MAX; 350 761 } 351 352 return _float32_to_uint64_helper(a); 353 } 354 355 356 /* Convert float64 to unsigned int32 357 * FIXME: Im not sure what to return if overflow/underflow happens 358 * - now its the biggest or the smallest int 359 */ 360 uint32_t float64_to_uint32(float64 a) 361 { 362 if (isFloat64NaN(a)) 363 return UINT32_MAX; 364 365 366 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) { 367 if (a.parts.sign) 368 return UINT32_MIN; 369 370 return UINT32_MAX; 371 } 372 373 return (uint32_t) _float64_to_uint64_helper(a); 374 } 375 376 /* Convert float64 to signed int32 377 * FIXME: Im not sure what to return if overflow/underflow happens 378 * - now its the biggest or the smallest int 379 */ 380 int32_t float64_to_int32(float64 a) 381 { 382 if (isFloat64NaN(a)) 383 return INT32_MAX; 384 385 386 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) { 387 if (a.parts.sign) 388 return INT32_MIN; 389 390 return INT32_MAX; 391 } 392 393 return (int32_t) _float64_to_uint64_helper(a); 394 } 395 396 /** Convert unsigned integer to float32 397 * 398 * 399 */ 762 763 return _float128_to_uint64_helper(a); 764 } 765 766 400 767 float32 uint32_to_float32(uint32_t i) 401 768 { … … 424 791 roundFloat32(&exp, &i); 425 792 426 result.parts.fraction = i >> 7;793 result.parts.fraction = i >> (32 - FLOAT32_FRACTION_SIZE - 2); 427 794 result.parts.exp = exp; 428 795 … … 435 802 436 803 if (i < 0) { 437 result = uint32_to_float32((uint32_t) (-i));804 result = uint32_to_float32((uint32_t) (-i)); 438 805 } else { 439 result = uint32_to_float32((uint32_t) i);806 result = uint32_to_float32((uint32_t) i); 440 807 } 441 808 … … 465 832 } 466 833 467 /* Shift all to the first 31 bits (31 . will be hidden 1)*/834 /* Shift all to the first 31 bits (31st will be hidden 1) */ 468 835 if (counter > 33) { 469 836 i <<= counter - 1 - 32; … … 472 839 } 473 840 474 j = (uint32_t) i;841 j = (uint32_t) i; 475 842 roundFloat32(&exp, &j); 476 843 477 result.parts.fraction = j >> 7;844 result.parts.fraction = j >> (32 - FLOAT32_FRACTION_SIZE - 2); 478 845 result.parts.exp = exp; 479 846 return result; … … 485 852 486 853 if (i < 0) { 487 result = uint64_to_float32((uint64_t) (-i));854 result = uint64_to_float32((uint64_t) (-i)); 488 855 } else { 489 result = uint64_to_float32((uint64_t) i);856 result = uint64_to_float32((uint64_t) i); 490 857 } 491 858 … … 495 862 } 496 863 497 /** Convert unsigned integer to float64498 *499 *500 */501 864 float64 uint32_to_float64(uint32_t i) 502 865 { … … 523 886 roundFloat64(&exp, &frac); 524 887 525 result.parts.fraction = frac >> 10;888 result.parts.fraction = frac >> (64 - FLOAT64_FRACTION_SIZE - 2); 526 889 result.parts.exp = exp; 527 890 … … 534 897 535 898 if (i < 0) { 536 result = uint32_to_float64((uint32_t) (-i));899 result = uint32_to_float64((uint32_t) (-i)); 537 900 } else { 538 result = uint32_to_float64((uint32_t) i);901 result = uint32_to_float64((uint32_t) i); 539 902 } 540 903 … … 571 934 roundFloat64(&exp, &i); 572 935 573 result.parts.fraction = i >> 10;936 result.parts.fraction = i >> (64 - FLOAT64_FRACTION_SIZE - 2); 574 937 result.parts.exp = exp; 575 938 return result; … … 581 944 582 945 if (i < 0) { 583 result = uint64_to_float64((uint64_t) (-i));946 result = uint64_to_float64((uint64_t) (-i)); 584 947 } else { 585 result = uint64_to_float64((uint64_t) i);948 result = uint64_to_float64((uint64_t) i); 586 949 } 587 950 … … 591 954 } 592 955 956 957 float128 uint32_to_float128(uint32_t i) 958 { 959 int counter; 960 int32_t exp; 961 float128 result; 962 uint64_t frac_hi, frac_lo; 963 964 result.parts.sign = 0; 965 result.parts.frac_hi = 0; 966 result.parts.frac_lo = 0; 967 968 counter = countZeroes32(i); 969 970 exp = FLOAT128_BIAS + 32 - counter - 1; 971 972 if (counter == 32) { 973 result.binary.hi = 0; 974 result.binary.lo = 0; 975 return result; 976 } 977 978 frac_hi = 0; 979 frac_lo = i; 980 lshift128(frac_hi, frac_lo, (counter + 96 - 1), &frac_hi, &frac_lo); 981 982 roundFloat128(&exp, &frac_hi, &frac_lo); 983 984 rshift128(frac_hi, frac_lo, 985 (128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo); 986 result.parts.frac_hi = frac_hi; 987 result.parts.frac_lo = frac_lo; 988 result.parts.exp = exp; 989 990 return result; 991 } 992 993 float128 int32_to_float128(int32_t i) 994 { 995 float128 result; 996 997 if (i < 0) { 998 result = uint32_to_float128((uint32_t) (-i)); 999 } else { 1000 result = uint32_to_float128((uint32_t) i); 1001 } 1002 1003 result.parts.sign = i < 0; 1004 1005 return result; 1006 } 1007 1008 1009 float128 uint64_to_float128(uint64_t i) 1010 { 1011 int counter; 1012 int32_t exp; 1013 float128 result; 1014 uint64_t frac_hi, frac_lo; 1015 1016 result.parts.sign = 0; 1017 result.parts.frac_hi = 0; 1018 result.parts.frac_lo = 0; 1019 1020 counter = countZeroes64(i); 1021 1022 exp = FLOAT128_BIAS + 64 - counter - 1; 1023 1024 if (counter == 64) { 1025 result.binary.hi = 0; 1026 result.binary.lo = 0; 1027 return result; 1028 } 1029 1030 frac_hi = 0; 1031 frac_lo = i; 1032 lshift128(frac_hi, frac_lo, (counter + 64 - 1), &frac_hi, &frac_lo); 1033 1034 roundFloat128(&exp, &frac_hi, &frac_lo); 1035 1036 rshift128(frac_hi, frac_lo, 1037 (128 - FLOAT128_FRACTION_SIZE - 2), &frac_hi, &frac_lo); 1038 result.parts.frac_hi = frac_hi; 1039 result.parts.frac_lo = frac_lo; 1040 result.parts.exp = exp; 1041 1042 return result; 1043 } 1044 1045 float128 int64_to_float128(int64_t i) 1046 { 1047 float128 result; 1048 1049 if (i < 0) { 1050 result = uint64_to_float128((uint64_t) (-i)); 1051 } else { 1052 result = uint64_to_float128((uint64_t) i); 1053 } 1054 1055 result.parts.sign = i < 0; 1056 1057 return result; 1058 } 1059 593 1060 /** @} 594 1061 */ -
uspace/lib/softfloat/generic/div.c
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Division functions. 33 34 */ 34 35 … … 40 41 #include <common.h> 41 42 43 /** 44 * Divide two single-precision floats. 45 * 46 * @param a Nominator. 47 * @param b Denominator. 48 * @return Result of division. 49 */ 42 50 float32 divFloat32(float32 a, float32 b) 43 51 { … … 100 108 return result; 101 109 } 102 103 110 104 111 afrac = a.parts.fraction; … … 110 117 if (aexp == 0) { 111 118 if (afrac == 0) { 112 result.parts.exp = 0; 113 result.parts.fraction = 0; 114 return result; 115 } 119 result.parts.exp = 0; 120 result.parts.fraction = 0; 121 return result; 122 } 123 116 124 /* normalize it*/ 117 118 125 afrac <<= 1; 119 120 while (! (afrac & FLOAT32_HIDDEN_BIT_MASK)) {126 /* afrac is nonzero => it must stop */ 127 while (!(afrac & FLOAT32_HIDDEN_BIT_MASK)) { 121 128 afrac <<= 1; 122 129 aexp--; … … 126 133 if (bexp == 0) { 127 134 bfrac <<= 1; 128 129 while (! (bfrac & FLOAT32_HIDDEN_BIT_MASK)) {135 /* bfrac is nonzero => it must stop */ 136 while (!(bfrac & FLOAT32_HIDDEN_BIT_MASK)) { 130 137 bfrac <<= 1; 131 138 bexp--; … … 133 140 } 134 141 135 afrac = (afrac | FLOAT32_HIDDEN_BIT_MASK ) << (32 - FLOAT32_FRACTION_SIZE - 1);136 bfrac = (bfrac | FLOAT32_HIDDEN_BIT_MASK ) << (32 - FLOAT32_FRACTION_SIZE);137 138 if ( bfrac <= (afrac << 1)) {142 afrac = (afrac | FLOAT32_HIDDEN_BIT_MASK) << (32 - FLOAT32_FRACTION_SIZE - 1); 143 bfrac = (bfrac | FLOAT32_HIDDEN_BIT_MASK) << (32 - FLOAT32_FRACTION_SIZE); 144 145 if (bfrac <= (afrac << 1)) { 139 146 afrac >>= 1; 140 147 aexp++; … … 144 151 145 152 cfrac = (afrac << 32) / bfrac; 146 if (( cfrac & 0x3F) == 0) {147 cfrac |= ( bfrac * cfrac != afrac << 32);153 if ((cfrac & 0x3F) == 0) { 154 cfrac |= (bfrac * cfrac != afrac << 32); 148 155 } 149 156 … … 151 158 152 159 /* find first nonzero digit and shift result and detect possibly underflow */ 153 while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7 160 while ((cexp > 0) && (cfrac) && (!(cfrac & (FLOAT32_HIDDEN_BIT_MASK << 7)))) { 154 161 cexp--; 155 162 cfrac <<= 1; 156 157 } ;163 /* TODO: fix underflow */ 164 } 158 165 159 166 cfrac += (0x1 << 6); /* FIXME: 7 is not sure*/ … … 162 169 ++cexp; 163 170 cfrac >>= 1; 164 171 } 165 172 166 173 /* check overflow */ 167 if (cexp >= FLOAT32_MAX_EXPONENT 174 if (cexp >= FLOAT32_MAX_EXPONENT) { 168 175 /* FIXME: overflow, return infinity */ 169 176 result.parts.exp = FLOAT32_MAX_EXPONENT; … … 181 188 cfrac >>= 1; 182 189 while (cexp < 0) { 183 cexp 190 cexp++; 184 191 cfrac >>= 1; 185 } 186 192 } 187 193 } else { 188 result.parts.exp = (uint32_t) cexp;194 result.parts.exp = (uint32_t) cexp; 189 195 } 190 196 … … 194 200 } 195 201 202 /** 203 * Divide two double-precision floats. 204 * 205 * @param a Nominator. 206 * @param b Denominator. 207 * @return Result of division. 208 */ 196 209 float64 divFloat64(float64 a, float64 b) 197 210 { … … 200 213 uint64_t afrac, bfrac, cfrac; 201 214 uint64_t remlo, remhi; 215 uint64_t tmplo, tmphi; 202 216 203 217 result.parts.sign = a.parts.sign ^ b.parts.sign; 204 218 205 219 if (isFloat64NaN(a)) { 206 207 220 if (isFloat64SigNaN(b)) { 208 221 /*FIXME: SigNaN*/ … … 262 275 } 263 276 264 265 277 afrac = a.parts.fraction; 266 278 aexp = a.parts.exp; … … 275 287 return result; 276 288 } 289 277 290 /* normalize it*/ 278 279 291 aexp++; 280 281 while (! (afrac & FLOAT64_HIDDEN_BIT_MASK)) {292 /* afrac is nonzero => it must stop */ 293 while (!(afrac & FLOAT64_HIDDEN_BIT_MASK)) { 282 294 afrac <<= 1; 283 295 aexp--; … … 287 299 if (bexp == 0) { 288 300 bexp++; 289 290 while (! (bfrac & FLOAT64_HIDDEN_BIT_MASK)) {301 /* bfrac is nonzero => it must stop */ 302 while (!(bfrac & FLOAT64_HIDDEN_BIT_MASK)) { 291 303 bfrac <<= 1; 292 304 bexp--; … … 294 306 } 295 307 296 afrac = (afrac | FLOAT64_HIDDEN_BIT_MASK ) << (64 - FLOAT64_FRACTION_SIZE - 2);297 bfrac = (bfrac | FLOAT64_HIDDEN_BIT_MASK 298 299 if ( bfrac <= (afrac << 1)) {308 afrac = (afrac | FLOAT64_HIDDEN_BIT_MASK) << (64 - FLOAT64_FRACTION_SIZE - 2); 309 bfrac = (bfrac | FLOAT64_HIDDEN_BIT_MASK) << (64 - FLOAT64_FRACTION_SIZE - 1); 310 311 if (bfrac <= (afrac << 1)) { 300 312 afrac >>= 1; 301 313 aexp++; … … 304 316 cexp = aexp - bexp + FLOAT64_BIAS - 2; 305 317 306 cfrac = divFloat64estim(afrac, bfrac); 307 308 if (( cfrac & 0x1FF ) <= 2) { /*FIXME:?? */ 309 mul64integers( bfrac, cfrac, &remlo, &remhi); 310 /* (__u128)afrac << 64 - ( ((__u128)remhi<<64) + (__u128)remlo )*/ 311 remhi = afrac - remhi - ( remlo > 0); 312 remlo = - remlo; 318 cfrac = div128est(afrac, 0x0ll, bfrac); 319 320 if ((cfrac & 0x1FF) <= 2) { 321 mul64(bfrac, cfrac, &tmphi, &tmplo); 322 sub128(afrac, 0x0ll, tmphi, tmplo, &remhi, &remlo); 313 323 314 324 while ((int64_t) remhi < 0) { 315 325 cfrac--; 316 remlo += bfrac; 317 remhi += ( remlo < bfrac ); 318 } 319 cfrac |= ( remlo != 0 ); 326 add128(remhi, remlo, 0x0ll, bfrac, &remhi, &remlo); 327 } 328 cfrac |= (remlo != 0); 320 329 } 321 330 … … 323 332 result = finishFloat64(cexp, cfrac, result.parts.sign); 324 333 return result; 325 326 334 } 327 335 328 uint64_t divFloat64estim(uint64_t a, uint64_t b) 336 /** 337 * Divide two quadruple-precision floats. 338 * 339 * @param a Nominator. 340 * @param b Denominator. 341 * @return Result of division. 342 */ 343 float128 divFloat128(float128 a, float128 b) 329 344 { 330 uint64_t bhi; 331 uint64_t remhi, remlo; 332 uint64_t result; 333 334 if ( b <= a ) { 335 return 0xFFFFFFFFFFFFFFFFull; 336 } 337 338 bhi = b >> 32; 339 result = ((bhi << 32) <= a) ?( 0xFFFFFFFFull << 32) : ( a / bhi) << 32; 340 mul64integers(b, result, &remlo, &remhi); 341 342 remhi = a - remhi - (remlo > 0); 343 remlo = - remlo; 344 345 b <<= 32; 346 while ( (int64_t) remhi < 0 ) { 347 result -= 0x1ll << 32; 348 remlo += b; 349 remhi += bhi + ( remlo < b ); 350 } 351 remhi = (remhi << 32) | (remlo >> 32); 352 if (( bhi << 32) <= remhi) { 353 result |= 0xFFFFFFFF; 354 } else { 355 result |= remhi / bhi; 356 } 357 358 345 float128 result; 346 int64_t aexp, bexp, cexp; 347 uint64_t afrac_hi, afrac_lo, bfrac_hi, bfrac_lo, cfrac_hi, cfrac_lo; 348 uint64_t shift_out; 349 uint64_t rem_hihi, rem_hilo, rem_lohi, rem_lolo; 350 uint64_t tmp_hihi, tmp_hilo, tmp_lohi, tmp_lolo; 351 352 result.parts.sign = a.parts.sign ^ b.parts.sign; 353 354 if (isFloat128NaN(a)) { 355 if (isFloat128SigNaN(b)) { 356 /*FIXME: SigNaN*/ 357 return b; 358 } 359 360 if (isFloat128SigNaN(a)) { 361 /*FIXME: SigNaN*/ 362 } 363 /*NaN*/ 364 return a; 365 } 366 367 if (isFloat128NaN(b)) { 368 if (isFloat128SigNaN(b)) { 369 /*FIXME: SigNaN*/ 370 } 371 /*NaN*/ 372 return b; 373 } 374 375 if (isFloat128Infinity(a)) { 376 if (isFloat128Infinity(b) || isFloat128Zero(b)) { 377 /*FIXME: inf / inf */ 378 result.binary.hi = FLOAT128_NAN_HI; 379 result.binary.lo = FLOAT128_NAN_LO; 380 return result; 381 } 382 /* inf / num */ 383 result.parts.exp = a.parts.exp; 384 result.parts.frac_hi = a.parts.frac_hi; 385 result.parts.frac_lo = a.parts.frac_lo; 386 return result; 387 } 388 389 if (isFloat128Infinity(b)) { 390 if (isFloat128Zero(a)) { 391 /* FIXME 0 / inf */ 392 result.parts.exp = 0; 393 result.parts.frac_hi = 0; 394 result.parts.frac_lo = 0; 395 return result; 396 } 397 /* FIXME: num / inf*/ 398 result.parts.exp = 0; 399 result.parts.frac_hi = 0; 400 result.parts.frac_lo = 0; 401 return result; 402 } 403 404 if (isFloat128Zero(b)) { 405 if (isFloat128Zero(a)) { 406 /*FIXME: 0 / 0*/ 407 result.binary.hi = FLOAT128_NAN_HI; 408 result.binary.lo = FLOAT128_NAN_LO; 409 return result; 410 } 411 /* FIXME: division by zero */ 412 result.parts.exp = 0; 413 result.parts.frac_hi = 0; 414 result.parts.frac_lo = 0; 415 return result; 416 } 417 418 afrac_hi = a.parts.frac_hi; 419 afrac_lo = a.parts.frac_lo; 420 aexp = a.parts.exp; 421 bfrac_hi = b.parts.frac_hi; 422 bfrac_lo = b.parts.frac_lo; 423 bexp = b.parts.exp; 424 425 /* denormalized numbers */ 426 if (aexp == 0) { 427 if (eq128(afrac_hi, afrac_lo, 0x0ll, 0x0ll)) { 428 result.parts.exp = 0; 429 result.parts.frac_hi = 0; 430 result.parts.frac_lo = 0; 431 return result; 432 } 433 434 /* normalize it*/ 435 aexp++; 436 /* afrac is nonzero => it must stop */ 437 and128(afrac_hi, afrac_lo, 438 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 439 &tmp_hihi, &tmp_lolo); 440 while (!lt128(0x0ll, 0x0ll, tmp_hihi, tmp_lolo)) { 441 lshift128(afrac_hi, afrac_lo, 1, &afrac_hi, &afrac_lo); 442 aexp--; 443 } 444 } 445 446 if (bexp == 0) { 447 bexp++; 448 /* bfrac is nonzero => it must stop */ 449 and128(bfrac_hi, bfrac_lo, 450 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 451 &tmp_hihi, &tmp_lolo); 452 while (!lt128(0x0ll, 0x0ll, tmp_hihi, tmp_lolo)) { 453 lshift128(bfrac_hi, bfrac_lo, 1, &bfrac_hi, &bfrac_lo); 454 bexp--; 455 } 456 } 457 458 or128(afrac_hi, afrac_lo, 459 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 460 &afrac_hi, &afrac_lo); 461 lshift128(afrac_hi, afrac_lo, 462 (128 - FLOAT128_FRACTION_SIZE - 1), &afrac_hi, &afrac_lo); 463 or128(bfrac_hi, bfrac_lo, 464 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 465 &bfrac_hi, &bfrac_lo); 466 lshift128(bfrac_hi, bfrac_lo, 467 (128 - FLOAT128_FRACTION_SIZE - 1), &bfrac_hi, &bfrac_lo); 468 469 if (le128(bfrac_hi, bfrac_lo, afrac_hi, afrac_lo)) { 470 rshift128(afrac_hi, afrac_lo, 1, &afrac_hi, &afrac_lo); 471 aexp++; 472 } 473 474 cexp = aexp - bexp + FLOAT128_BIAS - 2; 475 476 cfrac_hi = div128est(afrac_hi, afrac_lo, bfrac_hi); 477 478 mul128(bfrac_hi, bfrac_lo, 0x0ll, cfrac_hi, 479 &tmp_lolo /* dummy */, &tmp_hihi, &tmp_hilo, &tmp_lohi); 480 481 /* sub192(afrac_hi, afrac_lo, 0, 482 * tmp_hihi, tmp_hilo, tmp_lohi 483 * &rem_hihi, &rem_hilo, &rem_lohi); */ 484 sub128(afrac_hi, afrac_lo, tmp_hihi, tmp_hilo, &rem_hihi, &rem_hilo); 485 if (tmp_lohi > 0) { 486 sub128(rem_hihi, rem_hilo, 0x0ll, 0x1ll, &rem_hihi, &rem_hilo); 487 } 488 rem_lohi = -tmp_lohi; 489 490 while ((int64_t) rem_hihi < 0) { 491 --cfrac_hi; 492 /* add192(rem_hihi, rem_hilo, rem_lohi, 493 * 0, bfrac_hi, bfrac_lo, 494 * &rem_hihi, &rem_hilo, &rem_lohi); */ 495 add128(rem_hilo, rem_lohi, bfrac_hi, bfrac_lo, &rem_hilo, &rem_lohi); 496 if (lt128(rem_hilo, rem_lohi, bfrac_hi, bfrac_lo)) { 497 ++rem_hihi; 498 } 499 } 500 501 cfrac_lo = div128est(rem_hilo, rem_lohi, bfrac_lo); 502 503 if ((cfrac_lo & 0x3FFF) <= 4) { 504 mul128(bfrac_hi, bfrac_lo, 0x0ll, cfrac_lo, 505 &tmp_hihi /* dummy */, &tmp_hilo, &tmp_lohi, &tmp_lolo); 506 507 /* sub192(rem_hilo, rem_lohi, 0, 508 * tmp_hilo, tmp_lohi, tmp_lolo, 509 * &rem_hilo, &rem_lohi, &rem_lolo); */ 510 sub128(rem_hilo, rem_lohi, tmp_hilo, tmp_lohi, &rem_hilo, &rem_lohi); 511 if (tmp_lolo > 0) { 512 sub128(rem_hilo, rem_lohi, 0x0ll, 0x1ll, &rem_hilo, &rem_lohi); 513 } 514 rem_lolo = -tmp_lolo; 515 516 while ((int64_t) rem_hilo < 0) { 517 --cfrac_lo; 518 /* add192(rem_hilo, rem_lohi, rem_lolo, 519 * 0, bfrac_hi, bfrac_lo, 520 * &rem_hilo, &rem_lohi, &rem_lolo); */ 521 add128(rem_lohi, rem_lolo, bfrac_hi, bfrac_lo, &rem_lohi, &rem_lolo); 522 if (lt128(rem_lohi, rem_lolo, bfrac_hi, bfrac_lo)) { 523 ++rem_hilo; 524 } 525 } 526 527 cfrac_lo |= ((rem_hilo | rem_lohi | rem_lolo) != 0 ); 528 } 529 530 shift_out = cfrac_lo << (64 - (128 - FLOAT128_FRACTION_SIZE - 1)); 531 rshift128(cfrac_hi, cfrac_lo, (128 - FLOAT128_FRACTION_SIZE - 1), 532 &cfrac_hi, &cfrac_lo); 533 534 result = finishFloat128(cexp, cfrac_hi, cfrac_lo, result.parts.sign, shift_out); 359 535 return result; 360 536 } -
uspace/lib/softfloat/generic/mul.c
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Multiplication functions. 33 34 */ 34 35 … … 38 39 #include <common.h> 39 40 40 /** Multiply two 32 bit float numbers 41 * 41 /** 42 * Multiply two single-precision floats. 43 * 44 * @param a First input operand. 45 * @param b Second input operand. 46 * @return Result of multiplication. 42 47 */ 43 48 float32 mulFloat32(float32 a, float32 b) … … 49 54 result.parts.sign = a.parts.sign ^ b.parts.sign; 50 55 51 if (isFloat32NaN(a) || isFloat32NaN(b) 56 if (isFloat32NaN(a) || isFloat32NaN(b)) { 52 57 /* TODO: fix SigNaNs */ 53 58 if (isFloat32SigNaN(a)) { … … 55 60 result.parts.exp = a.parts.exp; 56 61 return result; 57 } ;62 } 58 63 if (isFloat32SigNaN(b)) { /* TODO: fix SigNaN */ 59 64 result.parts.fraction = b.parts.fraction; 60 65 result.parts.exp = b.parts.exp; 61 66 return result; 62 } ;67 } 63 68 /* set NaN as result */ 64 69 result.binary = FLOAT32_NAN; 65 70 return result; 66 } ;71 } 67 72 68 73 if (isFloat32Infinity(a)) { … … 98 103 result.parts.sign = a.parts.sign ^ b.parts.sign; 99 104 return result; 100 } ;105 } 101 106 102 107 if (exp < 0) { … … 106 111 result.parts.exp = 0x0; 107 112 return result; 108 } ;113 } 109 114 110 115 frac1 = a.parts.fraction; … … 113 118 } else { 114 119 ++exp; 115 } ;120 } 116 121 117 122 frac2 = b.parts.fraction; … … 121 126 } else { 122 127 ++exp; 123 } ;128 } 124 129 125 130 frac1 <<= 1; /* one bit space for rounding */ 126 131 127 132 frac1 = frac1 * frac2; 128 /* round and return */ 129 130 while ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= ( 131 /* 23 bits of fraction + one more for hidden bit (all shifted 1 bit left) */133 134 /* round and return */ 135 while ((exp < FLOAT32_MAX_EXPONENT) && (frac1 >= (1 << (FLOAT32_FRACTION_SIZE + 2)))) { 136 /* 23 bits of fraction + one more for hidden bit (all shifted 1 bit left) */ 132 137 ++exp; 133 138 frac1 >>= 1; 134 } ;139 } 135 140 136 141 /* rounding */ … … 141 146 ++exp; 142 147 frac1 >>= 1; 143 } ;144 145 if (exp >= FLOAT32_MAX_EXPONENT 148 } 149 150 if (exp >= FLOAT32_MAX_EXPONENT) { 146 151 /* TODO: fix overflow */ 147 152 /* return infinity*/ … … 159 164 frac1 >>= 1; 160 165 ++exp; 161 } ;166 } 162 167 if (frac1 == 0) { 163 168 /* FIXME : underflow */ 164 result.parts.exp = 0;165 result.parts.fraction = 0;166 return result;167 } ;168 } ;169 result.parts.exp = 0; 170 result.parts.fraction = 0; 171 return result; 172 } 173 } 169 174 result.parts.exp = exp; 170 result.parts.fraction = frac1 & ( 175 result.parts.fraction = frac1 & ((1 << FLOAT32_FRACTION_SIZE) - 1); 171 176 172 177 return result; 173 174 178 } 175 179 176 /** Multiply two 64 bit float numbers 177 * 180 /** 181 * Multiply two double-precision floats. 182 * 183 * @param a First input operand. 184 * @param b Second input operand. 185 * @return Result of multiplication. 178 186 */ 179 187 float64 mulFloat64(float64 a, float64 b) … … 185 193 result.parts.sign = a.parts.sign ^ b.parts.sign; 186 194 187 if (isFloat64NaN(a) || isFloat64NaN(b) 195 if (isFloat64NaN(a) || isFloat64NaN(b)) { 188 196 /* TODO: fix SigNaNs */ 189 197 if (isFloat64SigNaN(a)) { … … 191 199 result.parts.exp = a.parts.exp; 192 200 return result; 193 } ;201 } 194 202 if (isFloat64SigNaN(b)) { /* TODO: fix SigNaN */ 195 203 result.parts.fraction = b.parts.fraction; 196 204 result.parts.exp = b.parts.exp; 197 205 return result; 198 } ;206 } 199 207 /* set NaN as result */ 200 208 result.binary = FLOAT64_NAN; 201 209 return result; 202 } ;210 } 203 211 204 212 if (isFloat64Infinity(a)) { … … 233 241 } else { 234 242 ++exp; 235 } ;243 } 236 244 237 245 frac2 = b.parts.fraction; … … 241 249 } else { 242 250 ++exp; 243 } ;251 } 244 252 245 253 frac1 <<= (64 - FLOAT64_FRACTION_SIZE - 1); 246 254 frac2 <<= (64 - FLOAT64_FRACTION_SIZE - 2); 247 255 248 mul64 integers(frac1, frac2, &frac1, &frac2);249 250 frac 2 |= (frac1!= 0);251 if (frac 2& (0x1ll << 62)) {252 frac 2<<= 1;256 mul64(frac1, frac2, &frac1, &frac2); 257 258 frac1 |= (frac2 != 0); 259 if (frac1 & (0x1ll << 62)) { 260 frac1 <<= 1; 253 261 exp--; 254 262 } 255 263 256 result = finishFloat64(exp, frac 2, result.parts.sign);264 result = finishFloat64(exp, frac1, result.parts.sign); 257 265 return result; 258 266 } 259 267 260 /** Multiply two 64 bit numbers and return result in two parts 261 * @param a first operand 262 * @param b second operand 263 * @param lo lower part from result 264 * @param hi higher part of result 265 */ 266 void mul64integers(uint64_t a,uint64_t b, uint64_t *lo, uint64_t *hi) 268 /** 269 * Multiply two quadruple-precision floats. 270 * 271 * @param a First input operand. 272 * @param b Second input operand. 273 * @return Result of multiplication. 274 */ 275 float128 mulFloat128(float128 a, float128 b) 267 276 { 268 uint64_t low, high, middle1, middle2; 269 uint32_t alow, blow; 270 271 alow = a & 0xFFFFFFFF; 272 blow = b & 0xFFFFFFFF; 273 274 a >>= 32; 275 b >>= 32; 276 277 low = ((uint64_t)alow) * blow; 278 middle1 = a * blow; 279 middle2 = alow * b; 280 high = a * b; 281 282 middle1 += middle2; 283 high += (((uint64_t)(middle1 < middle2)) << 32) + (middle1 >> 32); 284 middle1 <<= 32; 285 low += middle1; 286 high += (low < middle1); 287 *lo = low; 288 *hi = high; 289 290 return; 277 float128 result; 278 uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo; 279 int32_t exp; 280 281 result.parts.sign = a.parts.sign ^ b.parts.sign; 282 283 if (isFloat128NaN(a) || isFloat128NaN(b)) { 284 /* TODO: fix SigNaNs */ 285 if (isFloat128SigNaN(a)) { 286 result.parts.frac_hi = a.parts.frac_hi; 287 result.parts.frac_lo = a.parts.frac_lo; 288 result.parts.exp = a.parts.exp; 289 return result; 290 } 291 if (isFloat128SigNaN(b)) { /* TODO: fix SigNaN */ 292 result.parts.frac_hi = b.parts.frac_hi; 293 result.parts.frac_lo = b.parts.frac_lo; 294 result.parts.exp = b.parts.exp; 295 return result; 296 } 297 /* set NaN as result */ 298 result.binary.hi = FLOAT128_NAN_HI; 299 result.binary.lo = FLOAT128_NAN_LO; 300 return result; 301 } 302 303 if (isFloat128Infinity(a)) { 304 if (isFloat128Zero(b)) { 305 /* FIXME: zero * infinity */ 306 result.binary.hi = FLOAT128_NAN_HI; 307 result.binary.lo = FLOAT128_NAN_LO; 308 return result; 309 } 310 result.parts.frac_hi = a.parts.frac_hi; 311 result.parts.frac_lo = a.parts.frac_lo; 312 result.parts.exp = a.parts.exp; 313 return result; 314 } 315 316 if (isFloat128Infinity(b)) { 317 if (isFloat128Zero(a)) { 318 /* FIXME: zero * infinity */ 319 result.binary.hi = FLOAT128_NAN_HI; 320 result.binary.lo = FLOAT128_NAN_LO; 321 return result; 322 } 323 result.parts.frac_hi = b.parts.frac_hi; 324 result.parts.frac_lo = b.parts.frac_lo; 325 result.parts.exp = b.parts.exp; 326 return result; 327 } 328 329 /* exp is signed so we can easy detect underflow */ 330 exp = a.parts.exp + b.parts.exp - FLOAT128_BIAS - 1; 331 332 frac1_hi = a.parts.frac_hi; 333 frac1_lo = a.parts.frac_lo; 334 335 if (a.parts.exp > 0) { 336 or128(frac1_hi, frac1_lo, 337 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 338 &frac1_hi, &frac1_lo); 339 } else { 340 ++exp; 341 } 342 343 frac2_hi = b.parts.frac_hi; 344 frac2_lo = b.parts.frac_lo; 345 346 if (b.parts.exp > 0) { 347 or128(frac2_hi, frac2_lo, 348 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 349 &frac2_hi, &frac2_lo); 350 } else { 351 ++exp; 352 } 353 354 lshift128(frac2_hi, frac2_lo, 355 128 - FLOAT128_FRACTION_SIZE, &frac2_hi, &frac2_lo); 356 357 tmp_hi = frac1_hi; 358 tmp_lo = frac1_lo; 359 mul128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, 360 &frac1_hi, &frac1_lo, &frac2_hi, &frac2_lo); 361 add128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &frac1_hi, &frac1_lo); 362 frac2_hi |= (frac2_lo != 0x0ll); 363 364 if ((FLOAT128_HIDDEN_BIT_MASK_HI << 1) <= frac1_hi) { 365 frac2_hi >>= 1; 366 if (frac1_lo & 0x1ll) { 367 frac2_hi |= (0x1ull < 64); 368 } 369 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo); 370 ++exp; 371 } 372 373 result = finishFloat128(exp, frac1_hi, frac1_lo, result.parts.sign, frac2_hi); 374 return result; 291 375 } 292 376 -
uspace/lib/softfloat/generic/other.c
rb7ee0369 rd3e241a 30 30 * @{ 31 31 */ 32 /** @file 32 /** @file Other functions (power, complex). 33 33 */ 34 34 -
uspace/lib/softfloat/generic/softfloat.c
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 32 33 * @{ 33 34 */ 34 /** @file 35 /** @file Softfloat API. 35 36 */ 36 37 … … 83 84 } 84 85 86 long double __addtf3(long double a, long double b) 87 { 88 float128 ta, tb; 89 ta.ld = a; 90 tb.ld = b; 91 if (ta.parts.sign != tb.parts.sign) { 92 if (ta.parts.sign) { 93 ta.parts.sign = 0; 94 return subFloat128(tb, ta).ld; 95 }; 96 tb.parts.sign = 0; 97 return subFloat128(ta, tb).ld; 98 } 99 return addFloat128(ta, tb).ld; 100 } 101 85 102 float __subsf3(float a, float b) 86 103 { … … 107 124 } 108 125 126 long double __subtf3(long double a, long double b) 127 { 128 float128 ta, tb; 129 ta.ld = a; 130 tb.ld = b; 131 if (ta.parts.sign != tb.parts.sign) { 132 tb.parts.sign = !tb.parts.sign; 133 return addFloat128(ta, tb).ld; 134 } 135 return subFloat128(ta, tb).ld; 136 } 137 109 138 float __mulsf3(float a, float b) 110 139 { … … 123 152 } 124 153 154 long double __multf3(long double a, long double b) 155 { 156 float128 ta, tb; 157 ta.ld = a; 158 tb.ld = b; 159 return mulFloat128(ta, tb).ld; 160 } 161 125 162 float __divsf3(float a, float b) 126 163 { … … 139 176 } 140 177 178 long double __divtf3(long double a, long double b) 179 { 180 float128 ta, tb; 181 ta.ld = a; 182 tb.ld = b; 183 return divFloat128(ta, tb).ld; 184 } 185 141 186 float __negsf2(float a) 142 187 { … … 149 194 double __negdf2(double a) 150 195 { 151 float64 fa; 152 fa.d = a; 153 fa.parts.sign = !fa.parts.sign; 154 return fa.d; 196 float64 da; 197 da.d = a; 198 da.parts.sign = !da.parts.sign; 199 return da.d; 200 } 201 202 long double __negtf2(long double a) 203 { 204 float128 ta; 205 ta.ld = a; 206 ta.parts.sign = !ta.parts.sign; 207 return ta.ld; 155 208 } 156 209 … … 164 217 } 165 218 219 long double __extendsftf2(float a) 220 { 221 float32 fa; 222 fa.f = a; 223 return convertFloat32ToFloat128(fa).ld; 224 } 225 226 long double __extenddftf2(double a) 227 { 228 float64 da; 229 da.d = a; 230 return convertFloat64ToFloat128(da).ld; 231 } 232 166 233 float __truncdfsf2(double a) 167 234 { … … 171 238 } 172 239 240 float __trunctfsf2(long double a) 241 { 242 float128 ta; 243 ta.ld = a; 244 return convertFloat128ToFloat32(ta).f; 245 } 246 247 double __trunctfdf2(long double a) 248 { 249 float128 ta; 250 ta.ld = a; 251 return convertFloat128ToFloat64(ta).d; 252 } 253 173 254 int __fixsfsi(float a) 174 255 { … … 178 259 return float32_to_int(fa); 179 260 } 261 180 262 int __fixdfsi(double a) 181 263 { … … 184 266 185 267 return float64_to_int(da); 268 } 269 270 int __fixtfsi(long double a) 271 { 272 float128 ta; 273 ta.ld = a; 274 275 return float128_to_int(ta); 186 276 } 187 277 … … 193 283 return float32_to_long(fa); 194 284 } 285 195 286 long __fixdfdi(double a) 196 287 { … … 199 290 200 291 return float64_to_long(da); 292 } 293 294 long __fixtfdi(long double a) 295 { 296 float128 ta; 297 ta.ld = a; 298 299 return float128_to_long(ta); 201 300 } 202 301 … … 208 307 return float32_to_longlong(fa); 209 308 } 309 210 310 long long __fixdfti(double a) 211 311 { … … 216 316 } 217 317 318 long long __fixtfti(long double a) 319 { 320 float128 ta; 321 ta.ld = a; 322 323 return float128_to_longlong(ta); 324 } 325 218 326 unsigned int __fixunssfsi(float a) 219 327 { … … 223 331 return float32_to_uint(fa); 224 332 } 333 225 334 unsigned int __fixunsdfsi(double a) 226 335 { … … 229 338 230 339 return float64_to_uint(da); 340 } 341 342 unsigned int __fixunstfsi(long double a) 343 { 344 float128 ta; 345 ta.ld = a; 346 347 return float128_to_uint(ta); 231 348 } 232 349 … … 238 355 return float32_to_ulong(fa); 239 356 } 357 240 358 unsigned long __fixunsdfdi(double a) 241 359 { … … 244 362 245 363 return float64_to_ulong(da); 364 } 365 366 unsigned long __fixunstfdi(long double a) 367 { 368 float128 ta; 369 ta.ld = a; 370 371 return float128_to_ulong(ta); 246 372 } 247 373 … … 253 379 return float32_to_ulonglong(fa); 254 380 } 381 255 382 unsigned long long __fixunsdfti(double a) 256 383 { … … 259 386 260 387 return float64_to_ulonglong(da); 388 } 389 390 unsigned long long __fixunstfti(long double a) 391 { 392 float128 ta; 393 ta.ld = a; 394 395 return float128_to_ulonglong(ta); 261 396 } 262 397 … … 268 403 return fa.f; 269 404 } 405 270 406 double __floatsidf(int i) 271 407 { … … 275 411 return da.d; 276 412 } 413 414 long double __floatsitf(int i) 415 { 416 float128 ta; 417 418 ta = int_to_float128(i); 419 return ta.ld; 420 } 277 421 278 422 float __floatdisf(long i) … … 283 427 return fa.f; 284 428 } 429 285 430 double __floatdidf(long i) 286 431 { … … 290 435 return da.d; 291 436 } 437 438 long double __floatditf(long i) 439 { 440 float128 ta; 441 442 ta = long_to_float128(i); 443 return ta.ld; 444 } 292 445 293 446 float __floattisf(long long i) … … 298 451 return fa.f; 299 452 } 453 300 454 double __floattidf(long long i) 301 455 { … … 306 460 } 307 461 462 long double __floattitf(long long i) 463 { 464 float128 ta; 465 466 ta = longlong_to_float128(i); 467 return ta.ld; 468 } 469 308 470 float __floatunsisf(unsigned int i) 309 471 { … … 313 475 return fa.f; 314 476 } 477 315 478 double __floatunsidf(unsigned int i) 316 479 { … … 320 483 return da.d; 321 484 } 485 486 long double __floatunsitf(unsigned int i) 487 { 488 float128 ta; 489 490 ta = uint_to_float128(i); 491 return ta.ld; 492 } 322 493 323 494 float __floatundisf(unsigned long i) … … 328 499 return fa.f; 329 500 } 501 330 502 double __floatundidf(unsigned long i) 331 503 { … … 335 507 return da.d; 336 508 } 509 510 long double __floatunditf(unsigned long i) 511 { 512 float128 ta; 513 514 ta = ulong_to_float128(i); 515 return ta.ld; 516 } 337 517 338 518 float __floatuntisf(unsigned long long i) … … 343 523 return fa.f; 344 524 } 525 345 526 double __floatuntidf(unsigned long long i) 346 527 { … … 351 532 } 352 533 534 long double __floatuntitf(unsigned long long i) 535 { 536 float128 ta; 537 538 ta = ulonglong_to_float128(i); 539 return ta.ld; 540 } 541 353 542 /* Comparison functions */ 354 /* Comparison functions */355 356 /* a<b .. -1357 * a=b .. 0358 * a>b .. 1359 * */360 543 361 544 int __cmpsf2(float a, float b) … … 364 547 fa.f = a; 365 548 fb.f = b; 366 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ) { 549 550 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 367 551 return 1; /* no special constant for unordered - maybe signaled? */ 368 }; 369 552 } 370 553 371 554 if (isFloat32eq(fa, fb)) { 372 555 return 0; 373 } ;556 } 374 557 375 558 if (isFloat32lt(fa, fb)) { 376 559 return -1; 377 }; 560 } 561 378 562 return 1; 379 563 } 380 564 565 int __cmpdf2(double a, double b) 566 { 567 float64 da, db; 568 da.d = a; 569 db.d = b; 570 571 if ((isFloat64NaN(da)) || (isFloat64NaN(db))) { 572 return 1; /* no special constant for unordered - maybe signaled? */ 573 } 574 575 if (isFloat64eq(da, db)) { 576 return 0; 577 } 578 579 if (isFloat64lt(da, db)) { 580 return -1; 581 } 582 583 return 1; 584 } 585 586 int __cmptf2(long double a, long double b) 587 { 588 float128 ta, tb; 589 ta.ld = a; 590 tb.ld = b; 591 592 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 593 return 1; /* no special constant for unordered - maybe signaled? */ 594 } 595 596 if (isFloat128eq(ta, tb)) { 597 return 0; 598 } 599 600 if (isFloat128lt(ta, tb)) { 601 return -1; 602 } 603 604 return 1; 605 } 606 381 607 int __unordsf2(float a, float b) 382 608 { … … 384 610 fa.f = a; 385 611 fb.f = b; 386 return ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ); 387 } 388 389 /** 390 * @return zero, if neither argument is a NaN and are equal 391 * */ 612 return ((isFloat32NaN(fa)) || (isFloat32NaN(fb))); 613 } 614 615 int __unorddf2(double a, double b) 616 { 617 float64 da, db; 618 da.d = a; 619 db.d = b; 620 return ((isFloat64NaN(da)) || (isFloat64NaN(db))); 621 } 622 623 int __unordtf2(long double a, long double b) 624 { 625 float128 ta, tb; 626 ta.ld = a; 627 tb.ld = b; 628 return ((isFloat128NaN(ta)) || (isFloat128NaN(tb))); 629 } 630 392 631 int __eqsf2(float a, float b) 393 632 { … … 395 634 fa.f = a; 396 635 fb.f = b; 397 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb))) {398 /* TODO: sigNaNs */399 return 1; 400 };636 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 637 /* TODO: sigNaNs */ 638 return 1; 639 } 401 640 return isFloat32eq(fa, fb) - 1; 402 641 } 403 642 404 /* strange behavior, but it was in gcc documentation */ 643 int __eqdf2(double a, double b) 644 { 645 float64 da, db; 646 da.d = a; 647 db.d = b; 648 if ((isFloat64NaN(da)) || (isFloat64NaN(db))) { 649 /* TODO: sigNaNs */ 650 return 1; 651 } 652 return isFloat64eq(da, db) - 1; 653 } 654 655 int __eqtf2(long double a, long double b) 656 { 657 float128 ta, tb; 658 ta.ld = a; 659 tb.ld = b; 660 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 661 /* TODO: sigNaNs */ 662 return 1; 663 } 664 return isFloat128eq(ta, tb) - 1; 665 } 666 405 667 int __nesf2(float a, float b) 406 668 { 669 /* strange behavior, but it was in gcc documentation */ 407 670 return __eqsf2(a, b); 408 671 } 409 672 410 /* return value >= 0 if a>=b and neither is NaN */ 673 int __nedf2(double a, double b) 674 { 675 /* strange behavior, but it was in gcc documentation */ 676 return __eqdf2(a, b); 677 } 678 679 int __netf2(long double a, long double b) 680 { 681 /* strange behavior, but it was in gcc documentation */ 682 return __eqtf2(a, b); 683 } 684 411 685 int __gesf2(float a, float b) 412 686 { … … 414 688 fa.f = a; 415 689 fb.f = b; 416 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ) { 417 /* TODO: sigNaNs*/ 418 return -1; 419 }; 690 691 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 692 /* TODO: sigNaNs */ 693 return -1; 694 } 420 695 421 696 if (isFloat32eq(fa, fb)) { 422 697 return 0; 423 } ;698 } 424 699 425 700 if (isFloat32gt(fa, fb)) { 426 701 return 1; 427 };702 } 428 703 429 704 return -1; 430 705 } 431 706 432 /** Return negative value, if a<b and neither is NaN*/ 707 int __gedf2(double a, double b) 708 { 709 float64 da, db; 710 da.d = a; 711 db.d = b; 712 713 if ((isFloat64NaN(da)) || (isFloat64NaN(db))) { 714 /* TODO: sigNaNs */ 715 return -1; 716 } 717 718 if (isFloat64eq(da, db)) { 719 return 0; 720 } 721 722 if (isFloat64gt(da, db)) { 723 return 1; 724 } 725 726 return -1; 727 } 728 729 int __getf2(long double a, long double b) 730 { 731 float128 ta, tb; 732 ta.ld = a; 733 tb.ld = b; 734 735 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 736 /* TODO: sigNaNs */ 737 return -1; 738 } 739 740 if (isFloat128eq(ta, tb)) { 741 return 0; 742 } 743 744 if (isFloat128gt(ta, tb)) { 745 return 1; 746 } 747 748 return -1; 749 } 750 433 751 int __ltsf2(float a, float b) 434 752 { … … 436 754 fa.f = a; 437 755 fb.f = b; 438 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ) { 439 /* TODO: sigNaNs*/ 440 return 1; 441 }; 756 757 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 758 /* TODO: sigNaNs */ 759 return 1; 760 } 761 442 762 if (isFloat32lt(fa, fb)) { 443 763 return -1; 444 }; 764 } 765 445 766 return 0; 446 767 } 447 768 448 /* return value <= 0 if a<=b and neither is NaN */ 769 int __ltdf2(double a, double b) 770 { 771 float64 da, db; 772 da.d = a; 773 db.d = b; 774 775 if ((isFloat64NaN(da)) || (isFloat64NaN(db))) { 776 /* TODO: sigNaNs */ 777 return 1; 778 } 779 780 if (isFloat64lt(da, db)) { 781 return -1; 782 } 783 784 return 0; 785 } 786 787 int __lttf2(long double a, long double b) 788 { 789 float128 ta, tb; 790 ta.ld = a; 791 tb.ld = b; 792 793 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 794 /* TODO: sigNaNs */ 795 return 1; 796 } 797 798 if (isFloat128lt(ta, tb)) { 799 return -1; 800 } 801 802 return 0; 803 } 804 449 805 int __lesf2(float a, float b) 450 806 { … … 452 808 fa.f = a; 453 809 fb.f = b; 454 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ) { 455 /* TODO: sigNaNs*/ 456 return 1; 457 }; 810 811 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 812 /* TODO: sigNaNs */ 813 return 1; 814 } 458 815 459 816 if (isFloat32eq(fa, fb)) { 460 817 return 0; 461 } ;818 } 462 819 463 820 if (isFloat32lt(fa, fb)) { 464 821 return -1; 465 };822 } 466 823 467 824 return 1; 468 825 } 469 826 470 /** Return positive value, if a>b and neither is NaN*/ 827 int __ledf2(double a, double b) 828 { 829 float64 da, db; 830 da.d = a; 831 db.d = b; 832 833 if ((isFloat64NaN(da)) || (isFloat64NaN(db))) { 834 /* TODO: sigNaNs */ 835 return 1; 836 } 837 838 if (isFloat64eq(da, db)) { 839 return 0; 840 } 841 842 if (isFloat64lt(da, db)) { 843 return -1; 844 } 845 846 return 1; 847 } 848 849 int __letf2(long double a, long double b) 850 { 851 float128 ta, tb; 852 ta.ld = a; 853 tb.ld = b; 854 855 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 856 /* TODO: sigNaNs */ 857 return 1; 858 } 859 860 if (isFloat128eq(ta, tb)) { 861 return 0; 862 } 863 864 if (isFloat128lt(ta, tb)) { 865 return -1; 866 } 867 868 return 1; 869 } 870 471 871 int __gtsf2(float a, float b) 472 872 { … … 474 874 fa.f = a; 475 875 fb.f = b; 476 if ( (isFloat32NaN(fa)) || (isFloat32NaN(fb)) ) { 477 /* TODO: sigNaNs*/ 478 return -1; 479 }; 876 877 if ((isFloat32NaN(fa)) || (isFloat32NaN(fb))) { 878 /* TODO: sigNaNs */ 879 return -1; 880 } 881 480 882 if (isFloat32gt(fa, fb)) { 481 883 return 1; 482 }; 884 } 885 483 886 return 0; 484 887 } 485 888 486 /* Other functions */ 487 488 float __powisf2(float a, int b) 489 { 490 /* TODO: */ 491 float32 fa; 492 fa.binary = FLOAT32_NAN; 493 return fa.f; 494 } 889 int __gtdf2(double a, double b) 890 { 891 float64 da, db; 892 da.d = a; 893 db.d = b; 894 895 if ((isFloat64NaN(da)) || (isFloat64NaN(db))) { 896 /* TODO: sigNaNs */ 897 return -1; 898 } 899 900 if (isFloat64gt(da, db)) { 901 return 1; 902 } 903 904 return 0; 905 } 906 907 int __gttf2(long double a, long double b) 908 { 909 float128 ta, tb; 910 ta.ld = a; 911 tb.ld = b; 912 913 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 914 /* TODO: sigNaNs */ 915 return -1; 916 } 917 918 if (isFloat128gt(ta, tb)) { 919 return 1; 920 } 921 922 return 0; 923 } 924 925 926 927 #ifdef SPARC_SOFTFLOAT 928 929 /* SPARC quadruple-precision wrappers */ 930 931 void _Qp_add(long double *c, long double *a, long double *b) 932 { 933 *c = __addtf3(*a, *b); 934 } 935 936 void _Qp_sub(long double *c, long double *a, long double *b) 937 { 938 *c = __subtf3(*a, *b); 939 } 940 941 void _Qp_mul(long double *c, long double *a, long double *b) 942 { 943 *c = __multf3(*a, *b); 944 } 945 946 void _Qp_div(long double *c, long double *a, long double *b) 947 { 948 *c = __divtf3(*a, *b); 949 } 950 951 void _Qp_neg(long double *c, long double *a) 952 { 953 *c = __negtf2(*a); 954 } 955 956 void _Qp_stoq(long double *c, float a) 957 { 958 *c = __extendsftf2(a); 959 } 960 961 void _Qp_dtoq(long double *c, double a) 962 { 963 *c = __extenddftf2(a); 964 } 965 966 float _Qp_qtos(long double *a) 967 { 968 return __trunctfsf2(*a); 969 } 970 971 double _Qp_qtod(long double *a) 972 { 973 return __trunctfdf2(*a); 974 } 975 976 int _Qp_qtoi(long double *a) 977 { 978 return __fixtfsi(*a); 979 } 980 981 unsigned int _Qp_qtoui(long double *a) 982 { 983 return __fixunstfsi(*a); 984 } 985 986 long _Qp_qtox(long double *a) 987 { 988 return __fixtfdi(*a); 989 } 990 991 unsigned long _Qp_qtoux(long double *a) 992 { 993 return __fixunstfdi(*a); 994 } 995 996 void _Qp_itoq(long double *c, int a) 997 { 998 *c = __floatsitf(a); 999 } 1000 1001 void _Qp_uitoq(long double *c, unsigned int a) 1002 { 1003 *c = __floatunsitf(a); 1004 } 1005 1006 void _Qp_xtoq(long double *c, long a) 1007 { 1008 *c = __floatditf(a); 1009 } 1010 1011 void _Qp_uxtoq(long double *c, unsigned long a) 1012 { 1013 *c = __floatunditf(a); 1014 } 1015 1016 int _Qp_cmp(long double *a, long double *b) 1017 { 1018 float128 ta, tb; 1019 ta.ld = *a; 1020 tb.ld = *b; 1021 1022 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1023 return 3; 1024 } 1025 1026 if (isFloat128eq(ta, tb)) { 1027 return 0; 1028 } 1029 1030 if (isFloat128lt(ta, tb)) { 1031 return 1; 1032 } 1033 1034 return 2; 1035 } 1036 1037 int _Qp_cmpe(long double *a, long double *b) 1038 { 1039 /* strange, but is defined this way in SPARC Compliance Definition */ 1040 return _Qp_cmp(a, b); 1041 } 1042 1043 int _Qp_feq(long double *a, long double *b) 1044 { 1045 float128 ta, tb; 1046 ta.ld = *a; 1047 tb.ld = *b; 1048 1049 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1050 return 0; 1051 } 1052 1053 return isFloat128eq(ta, tb); 1054 } 1055 1056 int _Qp_fge(long double *a, long double *b) 1057 { 1058 float128 ta, tb; 1059 ta.ld = *a; 1060 tb.ld = *b; 1061 1062 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1063 return 0; 1064 } 1065 1066 return isFloat128eq(ta, tb) || isFloat128gt(ta, tb); 1067 } 1068 1069 int _Qp_fgt(long double *a, long double *b) 1070 { 1071 float128 ta, tb; 1072 ta.ld = *a; 1073 tb.ld = *b; 1074 1075 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1076 return 0; 1077 } 1078 1079 return isFloat128gt(ta, tb); 1080 } 1081 1082 int _Qp_fle(long double*a, long double *b) 1083 { 1084 float128 ta, tb; 1085 ta.ld = *a; 1086 tb.ld = *b; 1087 1088 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1089 return 0; 1090 } 1091 1092 return isFloat128eq(ta, tb) || isFloat128lt(ta, tb); 1093 } 1094 1095 int _Qp_flt(long double *a, long double *b) 1096 { 1097 float128 ta, tb; 1098 ta.ld = *a; 1099 tb.ld = *b; 1100 1101 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1102 return 0; 1103 } 1104 1105 return isFloat128lt(ta, tb); 1106 } 1107 1108 int _Qp_fne(long double *a, long double *b) 1109 { 1110 float128 ta, tb; 1111 ta.ld = *a; 1112 tb.ld = *b; 1113 1114 if ((isFloat128NaN(ta)) || (isFloat128NaN(tb))) { 1115 return 1; 1116 } 1117 1118 return !isFloat128eq(ta, tb); 1119 } 1120 1121 #endif 495 1122 496 1123 /** @} -
uspace/lib/softfloat/generic/sub.c
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Substraction functions. 33 34 */ 34 35 … … 36 37 #include <sub.h> 37 38 #include <comparison.h> 38 39 /** Subtract two float32 numbers with same signs 39 #include <common.h> 40 41 /** 42 * Subtract two single-precision floats with the same signs. 43 * 44 * @param a First input operand. 45 * @param b Second input operand. 46 * @return Result of substraction. 40 47 */ 41 48 float32 subFloat32(float32 a, float32 b) … … 52 59 /* TODO: fix SigNaN */ 53 60 if (isFloat32SigNaN(b)) { 54 } ;55 return b; 56 } ;61 } 62 return b; 63 } 57 64 58 65 if (b.parts.exp == FLOAT32_MAX_EXPONENT) { … … 72 79 /* TODO: fix SigNaN */ 73 80 if (isFloat32SigNaN(a) || isFloat32SigNaN(b)) { 74 } ;75 return a; 76 } ;81 } 82 return a; 83 } 77 84 78 85 if (a.parts.exp == FLOAT32_MAX_EXPONENT) { … … 82 89 result.binary = FLOAT32_NAN; 83 90 return result; 84 } ;91 } 85 92 return a; 86 93 } … … 92 99 frac2 = b.parts.fraction; 93 100 exp2 = b.parts.exp; 94 } ;101 } 95 102 96 103 if (exp1 == 0) { 97 104 /* both are denormalized */ 98 result.parts.fraction = frac1 -frac2;105 result.parts.fraction = frac1 - frac2; 99 106 if (result.parts.fraction > frac1) { 100 107 /* TODO: underflow exception */ 101 108 return result; 102 } ;109 } 103 110 result.parts.exp = 0; 104 111 return result; 105 } ;112 } 106 113 107 114 /* add hidden bit */ … … 114 121 /* normalized */ 115 122 frac2 |= FLOAT32_HIDDEN_BIT_MASK; 116 } ;123 } 117 124 118 125 /* create some space for rounding */ … … 121 128 122 129 if (expdiff > FLOAT32_FRACTION_SIZE + 1) { 123 goto done;124 };130 goto done; 131 } 125 132 126 133 frac1 = frac1 - (frac2 >> expdiff); 134 127 135 done: 128 136 /* TODO: find first nonzero digit and shift result and detect possibly underflow */ … … 130 138 --exp1; 131 139 frac1 <<= 1; 132 133 } ;140 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */ 141 } 134 142 135 143 /* rounding - if first bit after fraction is set then round up */ … … 139 147 ++exp1; 140 148 frac1 >>= 1; 141 } ;142 143 /* Clear hidden bit and shift */149 } 150 151 /* Clear hidden bit and shift */ 144 152 result.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK)); 145 153 result.parts.exp = exp1; … … 148 156 } 149 157 150 /** Subtract two float64 numbers with same signs 158 /** 159 * Subtract two double-precision floats with the same signs. 160 * 161 * @param a First input operand. 162 * @param b Second input operand. 163 * @return Result of substraction. 151 164 */ 152 165 float64 subFloat64(float64 a, float64 b) … … 164 177 /* TODO: fix SigNaN */ 165 178 if (isFloat64SigNaN(b)) { 166 } ;167 return b; 168 } ;179 } 180 return b; 181 } 169 182 170 183 if (b.parts.exp == FLOAT64_MAX_EXPONENT) { … … 184 197 /* TODO: fix SigNaN */ 185 198 if (isFloat64SigNaN(a) || isFloat64SigNaN(b)) { 186 } ;187 return a; 188 } ;199 } 200 return a; 201 } 189 202 190 203 if (a.parts.exp == FLOAT64_MAX_EXPONENT) { … … 194 207 result.binary = FLOAT64_NAN; 195 208 return result; 196 } ;209 } 197 210 return a; 198 211 } … … 204 217 frac2 = b.parts.fraction; 205 218 exp2 = b.parts.exp; 206 } ;219 } 207 220 208 221 if (exp1 == 0) { … … 212 225 /* TODO: underflow exception */ 213 226 return result; 214 } ;227 } 215 228 result.parts.exp = 0; 216 229 return result; 217 } ;230 } 218 231 219 232 /* add hidden bit */ … … 226 239 /* normalized */ 227 240 frac2 |= FLOAT64_HIDDEN_BIT_MASK; 228 } ;241 } 229 242 230 243 /* create some space for rounding */ … … 233 246 234 247 if (expdiff > FLOAT64_FRACTION_SIZE + 1) { 235 goto done;236 };248 goto done; 249 } 237 250 238 251 frac1 = frac1 - (frac2 >> expdiff); 252 239 253 done: 240 254 /* TODO: find first nonzero digit and shift result and detect possibly underflow */ … … 242 256 --exp1; 243 257 frac1 <<= 1; 244 245 } ;258 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */ 259 } 246 260 247 261 /* rounding - if first bit after fraction is set then round up */ … … 251 265 ++exp1; 252 266 frac1 >>= 1; 253 } ;254 255 /* Clear hidden bit and shift */267 } 268 269 /* Clear hidden bit and shift */ 256 270 result.parts.fraction = ((frac1 >> 6) & (~FLOAT64_HIDDEN_BIT_MASK)); 257 271 result.parts.exp = exp1; … … 260 274 } 261 275 276 /** 277 * Subtract two quadruple-precision floats with the same signs. 278 * 279 * @param a First input operand. 280 * @param b Second input operand. 281 * @return Result of substraction. 282 */ 283 float128 subFloat128(float128 a, float128 b) 284 { 285 int expdiff; 286 uint32_t exp1, exp2; 287 uint64_t frac1_hi, frac1_lo, frac2_hi, frac2_lo, tmp_hi, tmp_lo; 288 float128 result; 289 290 result.binary.hi = 0; 291 result.binary.lo = 0; 292 293 expdiff = a.parts.exp - b.parts.exp; 294 if ((expdiff < 0 ) || ((expdiff == 0) && 295 lt128(a.parts.frac_hi, a.parts.frac_lo, b.parts.frac_hi, b.parts.frac_lo))) { 296 if (isFloat128NaN(b)) { 297 /* TODO: fix SigNaN */ 298 if (isFloat128SigNaN(b)) { 299 } 300 return b; 301 } 302 303 if (b.parts.exp == FLOAT128_MAX_EXPONENT) { 304 b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */ 305 return b; 306 } 307 308 result.parts.sign = !a.parts.sign; 309 310 frac1_hi = b.parts.frac_hi; 311 frac1_lo = b.parts.frac_lo; 312 exp1 = b.parts.exp; 313 frac2_hi = a.parts.frac_hi; 314 frac2_lo = a.parts.frac_lo; 315 exp2 = a.parts.exp; 316 expdiff *= -1; 317 } else { 318 if (isFloat128NaN(a)) { 319 /* TODO: fix SigNaN */ 320 if (isFloat128SigNaN(a) || isFloat128SigNaN(b)) { 321 } 322 return a; 323 } 324 325 if (a.parts.exp == FLOAT128_MAX_EXPONENT) { 326 if (b.parts.exp == FLOAT128_MAX_EXPONENT) { 327 /* inf - inf => nan */ 328 /* TODO: fix exception */ 329 result.binary.hi = FLOAT128_NAN_HI; 330 result.binary.lo = FLOAT128_NAN_LO; 331 return result; 332 } 333 return a; 334 } 335 336 result.parts.sign = a.parts.sign; 337 338 frac1_hi = a.parts.frac_hi; 339 frac1_lo = a.parts.frac_lo; 340 exp1 = a.parts.exp; 341 frac2_hi = b.parts.frac_hi; 342 frac2_lo = b.parts.frac_lo; 343 exp2 = b.parts.exp; 344 } 345 346 if (exp1 == 0) { 347 /* both are denormalized */ 348 sub128(frac1_hi, frac1_lo, frac2_hi, frac2_lo, &tmp_hi, &tmp_lo); 349 result.parts.frac_hi = tmp_hi; 350 result.parts.frac_lo = tmp_lo; 351 if (lt128(frac1_hi, frac1_lo, result.parts.frac_hi, result.parts.frac_lo)) { 352 /* TODO: underflow exception */ 353 return result; 354 } 355 result.parts.exp = 0; 356 return result; 357 } 358 359 /* add hidden bit */ 360 or128(frac1_hi, frac1_lo, 361 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 362 &frac1_hi, &frac1_lo); 363 364 if (exp2 == 0) { 365 /* denormalized */ 366 --expdiff; 367 } else { 368 /* normalized */ 369 or128(frac2_hi, frac2_lo, 370 FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 371 &frac2_hi, &frac2_lo); 372 } 373 374 /* create some space for rounding */ 375 lshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo); 376 lshift128(frac2_hi, frac2_lo, 6, &frac2_hi, &frac2_lo); 377 378 if (expdiff > FLOAT128_FRACTION_SIZE + 1) { 379 goto done; 380 } 381 382 rshift128(frac2_hi, frac2_lo, expdiff, &tmp_hi, &tmp_lo); 383 sub128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &frac1_hi, &frac1_lo); 384 385 done: 386 /* TODO: find first nonzero digit and shift result and detect possibly underflow */ 387 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6, 388 &tmp_hi, &tmp_lo); 389 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 390 while ((exp1 > 0) && (!lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo))) { 391 --exp1; 392 lshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo); 393 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */ 394 395 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 6, 396 &tmp_hi, &tmp_lo); 397 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 398 } 399 400 /* rounding - if first bit after fraction is set then round up */ 401 add128(frac1_hi, frac1_lo, 0x0ll, 0x20ll, &frac1_hi, &frac1_lo); 402 403 lshift128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 7, 404 &tmp_hi, &tmp_lo); 405 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 406 if (lt128(0x0ll, 0x0ll, tmp_hi, tmp_lo)) { 407 ++exp1; 408 rshift128(frac1_hi, frac1_lo, 1, &frac1_hi, &frac1_lo); 409 } 410 411 /* Clear hidden bit and shift */ 412 rshift128(frac1_hi, frac1_lo, 6, &frac1_hi, &frac1_lo); 413 not128(FLOAT128_HIDDEN_BIT_MASK_HI, FLOAT128_HIDDEN_BIT_MASK_LO, 414 &tmp_hi, &tmp_lo); 415 and128(frac1_hi, frac1_lo, tmp_hi, tmp_lo, &tmp_hi, &tmp_lo); 416 result.parts.frac_hi = tmp_hi; 417 result.parts.frac_lo = tmp_lo; 418 419 result.parts.exp = exp1; 420 421 return result; 422 } 423 262 424 /** @} 263 425 */ -
uspace/lib/softfloat/include/add.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Addition functions. 33 34 */ 34 35 … … 38 39 extern float32 addFloat32(float32, float32); 39 40 extern float64 addFloat64(float64, float64); 41 extern float128 addFloat128(float128, float128); 40 42 41 43 #endif -
uspace/lib/softfloat/include/common.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Common helper operations. 33 34 */ 34 35 … … 39 40 40 41 extern float64 finishFloat64(int32_t, uint64_t, char); 42 extern float128 finishFloat128(int32_t, uint64_t, uint64_t, char, uint64_t); 41 43 44 extern int countZeroes8(uint8_t); 45 extern int countZeroes32(uint32_t); 42 46 extern int countZeroes64(uint64_t); 43 extern int countZeroes32(uint32_t);44 extern int countZeroes8(uint8_t);45 47 46 48 extern void roundFloat32(int32_t *, uint32_t *); 47 49 extern void roundFloat64(int32_t *, uint64_t *); 50 extern void roundFloat128(int32_t *, uint64_t *, uint64_t *); 51 52 extern void lshift128(uint64_t, uint64_t, int, uint64_t *, uint64_t *); 53 extern void rshift128(uint64_t, uint64_t, int, uint64_t *, uint64_t *); 54 55 extern void and128(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t *, uint64_t *); 56 extern void or128(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t *, uint64_t *); 57 extern void xor128(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t *, uint64_t *); 58 extern void not128(uint64_t, uint64_t, uint64_t *, uint64_t *); 59 60 extern int eq128(uint64_t, uint64_t, uint64_t, uint64_t); 61 extern int le128(uint64_t, uint64_t, uint64_t, uint64_t); 62 extern int lt128(uint64_t, uint64_t, uint64_t, uint64_t); 63 64 extern void add128(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t *, uint64_t *); 65 extern void sub128(uint64_t, uint64_t, uint64_t, uint64_t, uint64_t *, uint64_t *); 66 67 extern void mul64(uint64_t, uint64_t, uint64_t *, uint64_t *); 68 extern void mul128(uint64_t, uint64_t, uint64_t, uint64_t, 69 uint64_t *, uint64_t *, uint64_t *, uint64_t *); 70 71 extern uint64_t div128est(uint64_t, uint64_t, uint64_t); 48 72 49 73 #endif -
uspace/lib/softfloat/include/comparison.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Comparison functions. 33 34 */ 34 35 … … 42 43 extern int isFloat32Zero(float32); 43 44 45 extern int isFloat32eq(float32, float32); 46 extern int isFloat32lt(float32, float32); 47 extern int isFloat32gt(float32, float32); 48 44 49 extern int isFloat64NaN(float64); 45 50 extern int isFloat64SigNaN(float64); … … 48 53 extern int isFloat64Zero(float64); 49 54 50 extern int isFloat32eq(float32, float32); 51 extern int isFloat32lt(float32, float32); 52 extern int isFloat32gt(float32, float32); 55 extern int isFloat64eq(float64, float64); 56 extern int isFloat64lt(float64, float64); 57 extern int isFloat64gt(float64, float64); 58 59 extern int isFloat128NaN(float128); 60 extern int isFloat128SigNaN(float128); 61 62 extern int isFloat128Infinity(float128); 63 extern int isFloat128Zero(float128); 64 65 extern int isFloat128eq(float128, float128); 66 extern int isFloat128lt(float128, float128); 67 extern int isFloat128gt(float128, float128); 53 68 54 69 #endif -
uspace/lib/softfloat/include/conversion.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Conversion of precision and conversion between integers and floats. 33 34 */ 34 35 … … 37 38 38 39 extern float64 convertFloat32ToFloat64(float32); 40 extern float128 convertFloat32ToFloat128(float32); 41 extern float128 convertFloat64ToFloat128(float64); 42 43 39 44 extern float32 convertFloat64ToFloat32(float64); 45 extern float32 convertFloat128ToFloat32(float128); 46 extern float64 convertFloat128ToFloat64(float128); 47 40 48 41 49 extern uint32_t float32_to_uint32(float32); … … 45 53 extern int64_t float32_to_int64(float32); 46 54 55 extern uint32_t float64_to_uint32(float64); 56 extern int32_t float64_to_int32(float64); 57 47 58 extern uint64_t float64_to_uint64(float64); 48 59 extern int64_t float64_to_int64(float64); 49 60 50 extern uint32_t float64_to_uint32(float64); 51 extern int32_t float64_to_int32(float64); 61 extern uint32_t float128_to_uint32(float128); 62 extern int32_t float128_to_int32(float128); 63 64 extern uint64_t float128_to_uint64(float128); 65 extern int64_t float128_to_int64(float128); 66 52 67 53 68 extern float32 uint32_to_float32(uint32_t); … … 63 78 extern float64 int64_to_float64(int64_t); 64 79 80 extern float128 uint32_to_float128(uint32_t); 81 extern float128 int32_to_float128(int32_t); 82 83 extern float128 uint64_to_float128(uint64_t); 84 extern float128 int64_to_float128(int64_t); 85 65 86 #endif 66 87 -
uspace/lib/softfloat/include/div.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Division functions. 33 34 */ 34 35 … … 38 39 extern float32 divFloat32(float32, float32); 39 40 extern float64 divFloat64(float64, float64); 40 41 extern uint64_t divFloat64estim(uint64_t, uint64_t); 41 extern float128 divFloat128(float128, float128); 42 42 43 43 #endif -
uspace/lib/softfloat/include/mul.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Multiplication functions. 33 34 */ 34 35 … … 38 39 extern float32 mulFloat32(float32, float32); 39 40 extern float64 mulFloat64(float64, float64); 40 41 extern void mul64integers(uint64_t, uint64_t, uint64_t *, uint64_t *); 41 extern float128 mulFloat128(float128, float128); 42 42 43 43 #endif -
uspace/lib/softfloat/include/other.h
rb7ee0369 rd3e241a 30 30 * @{ 31 31 */ 32 /** @file 32 /** @file Other functions (power, complex). 33 33 */ 34 34 -
uspace/lib/softfloat/include/sftypes.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Floating point types and constants. 33 34 */ 34 35 … … 77 78 } float64; 78 79 79 #define FLOAT32_MAX 0x7f800000 80 #define FLOAT32_MIN 0xff800000 81 #define FLOAT64_MAX 82 #define FLOAT64_MIN 80 typedef union { 81 long double ld; 82 struct { 83 #if defined(__BE__) 84 uint64_t hi; 85 uint64_t lo; 86 #elif defined(__LE__) 87 uint64_t lo; 88 uint64_t hi; 89 #else 90 #error Unknown endianess 91 #endif 92 } binary; 93 94 struct { 95 #if defined(__BE__) 96 uint64_t sign : 1; 97 uint64_t exp : 15; 98 uint64_t frac_hi : 48; 99 uint64_t frac_lo : 64; 100 #elif defined(__LE__) 101 uint64_t frac_lo : 64; 102 uint64_t frac_hi : 48; 103 uint64_t exp : 15; 104 uint64_t sign : 1; 105 #else 106 #error Unknown endianess 107 #endif 108 } parts __attribute__ ((packed)); 109 } float128; 83 110 84 111 /* 85 * For recognizing NaNs or infinity use isFloat32NaN and is Float32Inf,112 * For recognizing NaNs or infinity use specialized comparison functions, 86 113 * comparing with these constants is not sufficient. 87 114 */ … … 95 122 #define FLOAT64_INF 0x7FF0000000000000ll 96 123 97 #define FLOAT32_FRACTION_SIZE 23 98 #define FLOAT64_FRACTION_SIZE 52 124 #define FLOAT128_NAN_HI 0x7FFF800000000000ll 125 #define FLOAT128_NAN_LO 0x0000000000000001ll 126 #define FLOAT128_SIGNAN_HI 0x7FFF000000000000ll 127 #define FLOAT128_SIGNAN_LO 0x0000000000000001ll 128 #define FLOAT128_INF_HI 0x7FFF000000000000ll 129 #define FLOAT128_INF_LO 0x0000000000000000ll 99 130 100 #define FLOAT32_HIDDEN_BIT_MASK 0x800000 101 #define FLOAT64_HIDDEN_BIT_MASK 0x10000000000000ll 131 #define FLOAT32_FRACTION_SIZE 23 132 #define FLOAT64_FRACTION_SIZE 52 133 #define FLOAT128_FRACTION_SIZE 112 134 #define FLOAT128_FRAC_HI_SIZE 48 135 #define FLOAT128_FRAC_LO_SIZE 64 136 137 #define FLOAT32_HIDDEN_BIT_MASK 0x800000 138 #define FLOAT64_HIDDEN_BIT_MASK 0x10000000000000ll 139 #define FLOAT128_HIDDEN_BIT_MASK_HI 0x1000000000000ll 140 #define FLOAT128_HIDDEN_BIT_MASK_LO 0x0000000000000000ll 102 141 103 142 #define FLOAT32_MAX_EXPONENT 0xFF 104 143 #define FLOAT64_MAX_EXPONENT 0x7FF 144 #define FLOAT128_MAX_EXPONENT 0x7FFF 105 145 106 146 #define FLOAT32_BIAS 0x7F 107 147 #define FLOAT64_BIAS 0x3FF 108 148 #define FLOAT80_BIAS 0x3FFF 149 #define FLOAT128_BIAS 0x3FFF 109 150 110 151 #endif -
uspace/lib/softfloat/include/softfloat.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Softfloat API. 33 34 */ 34 35 … … 156 157 extern int __ltdf2(double, double); 157 158 extern int __lttf2(long double, long double); 159 158 160 extern int __lesf2(float, float); 159 161 extern int __ledf2(double, double); … … 166 168 /* Not implemented yet */ 167 169 extern float __powisf2(float, int); 170 extern double __powidf2 (double, int); 171 extern long double __powitf2 (long double, int); 172 extern long double __powixf2 (long double, int); 173 174 175 176 /* SPARC quadruple-precision wrappers */ 177 178 extern void _Qp_add(long double *, long double *, long double *); 179 extern void _Qp_sub(long double *, long double *, long double *); 180 extern void _Qp_mul(long double *, long double *, long double *); 181 extern void _Qp_div(long double *, long double *, long double *); 182 extern void _Qp_neg(long double *, long double *); 183 184 extern void _Qp_stoq(long double *, float); 185 extern void _Qp_dtoq(long double *, double); 186 extern float _Qp_qtos(long double *); 187 extern double _Qp_qtod(long double *); 188 189 extern int _Qp_qtoi(long double *); 190 extern unsigned int _Qp_qtoui(long double *); 191 extern long _Qp_qtox(long double *); 192 extern unsigned long _Qp_qtoux(long double *); 193 194 extern void _Qp_itoq(long double *, int); 195 extern void _Qp_uitoq(long double *, unsigned int); 196 extern void _Qp_xtoq(long double *, long); 197 extern void _Qp_uxtoq(long double *, unsigned long); 198 199 extern int _Qp_cmp(long double *, long double *); 200 extern int _Qp_cmpe(long double *, long double *); 201 extern int _Qp_feq(long double *, long double *); 202 extern int _Qp_fge(long double *, long double *); 203 extern int _Qp_fgt(long double *, long double *); 204 extern int _Qp_fle(long double*, long double *); 205 extern int _Qp_flt(long double *, long double *); 206 extern int _Qp_fne(long double *, long double *); 207 208 /* Not implemented yet */ 209 extern void _Qp_sqrt(long double *, long double *); 168 210 169 211 #endif -
uspace/lib/softfloat/include/sub.h
rb7ee0369 rd3e241a 1 1 /* 2 2 * Copyright (c) 2005 Josef Cejka 3 * Copyright (c) 2011 Petr Koupy 3 4 * All rights reserved. 4 5 * … … 30 31 * @{ 31 32 */ 32 /** @file 33 /** @file Substraction functions. 33 34 */ 34 35 … … 38 39 extern float32 subFloat32(float32, float32); 39 40 extern float64 subFloat64(float64, float64); 41 extern float128 subFloat128(float128, float128); 40 42 41 43 #endif
Note:
See TracChangeset
for help on using the changeset viewer.