00001 /* 00002 * Copyright (C) 2005 Josef Cejka 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * - Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * - Redistributions in binary form must reproduce the above copyright 00012 * notice, this list of conditions and the following disclaimer in the 00013 * documentation and/or other materials provided with the distribution. 00014 * - The name of the author may not be used to endorse or promote products 00015 * derived from this software without specific prior written permission. 00016 * 00017 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 00018 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00019 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 00020 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 00021 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 00022 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00023 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00024 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00025 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 00026 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00027 */ 00028 00035 #include<sftypes.h> 00036 #include<sub.h> 00037 #include<comparison.h> 00038 00041 float32 subFloat32(float32 a, float32 b) 00042 { 00043 int expdiff; 00044 uint32_t exp1, exp2, frac1, frac2; 00045 float32 result; 00046 00047 result.f = 0; 00048 00049 expdiff = a.parts.exp - b.parts.exp; 00050 if ((expdiff < 0 ) || ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) { 00051 if (isFloat32NaN(b)) { 00052 /* TODO: fix SigNaN */ 00053 if (isFloat32SigNaN(b)) { 00054 }; 00055 return b; 00056 }; 00057 00058 if (b.parts.exp == FLOAT32_MAX_EXPONENT) { 00059 b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */ 00060 return b; 00061 } 00062 00063 result.parts.sign = !a.parts.sign; 00064 00065 frac1 = b.parts.fraction; 00066 exp1 = b.parts.exp; 00067 frac2 = a.parts.fraction; 00068 exp2 = a.parts.exp; 00069 expdiff *= -1; 00070 } else { 00071 if (isFloat32NaN(a)) { 00072 /* TODO: fix SigNaN */ 00073 if (isFloat32SigNaN(a) || isFloat32SigNaN(b)) { 00074 }; 00075 return a; 00076 }; 00077 00078 if (a.parts.exp == FLOAT32_MAX_EXPONENT) { 00079 if (b.parts.exp == FLOAT32_MAX_EXPONENT) { 00080 /* inf - inf => nan */ 00081 /* TODO: fix exception */ 00082 result.binary = FLOAT32_NAN; 00083 return result; 00084 }; 00085 return a; 00086 } 00087 00088 result.parts.sign = a.parts.sign; 00089 00090 frac1 = a.parts.fraction; 00091 exp1 = a.parts.exp; 00092 frac2 = b.parts.fraction; 00093 exp2 = b.parts.exp; 00094 }; 00095 00096 if (exp1 == 0) { 00097 /* both are denormalized */ 00098 result.parts.fraction = frac1-frac2; 00099 if (result.parts.fraction > frac1) { 00100 /* TODO: underflow exception */ 00101 return result; 00102 }; 00103 result.parts.exp = 0; 00104 return result; 00105 }; 00106 00107 /* add hidden bit */ 00108 frac1 |= FLOAT32_HIDDEN_BIT_MASK; 00109 00110 if (exp2 == 0) { 00111 /* denormalized */ 00112 --expdiff; 00113 } else { 00114 /* normalized */ 00115 frac2 |= FLOAT32_HIDDEN_BIT_MASK; 00116 }; 00117 00118 /* create some space for rounding */ 00119 frac1 <<= 6; 00120 frac2 <<= 6; 00121 00122 if (expdiff > FLOAT32_FRACTION_SIZE + 1) { 00123 goto done; 00124 }; 00125 00126 frac1 = frac1 - (frac2 >> expdiff); 00127 done: 00128 /* TODO: find first nonzero digit and shift result and detect possibly underflow */ 00129 while ((exp1 > 0) && (!(frac1 & (FLOAT32_HIDDEN_BIT_MASK << 6 )))) { 00130 --exp1; 00131 frac1 <<= 1; 00132 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */ 00133 }; 00134 00135 /* rounding - if first bit after fraction is set then round up */ 00136 frac1 += 0x20; 00137 00138 if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7)) { 00139 ++exp1; 00140 frac1 >>= 1; 00141 }; 00142 00143 /*Clear hidden bit and shift */ 00144 result.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK)); 00145 result.parts.exp = exp1; 00146 00147 return result; 00148 } 00149 00152 float64 subFloat64(float64 a, float64 b) 00153 { 00154 int expdiff; 00155 uint32_t exp1, exp2; 00156 uint64_t frac1, frac2; 00157 float64 result; 00158 00159 result.d = 0; 00160 00161 expdiff = a.parts.exp - b.parts.exp; 00162 if ((expdiff < 0 ) || ((expdiff == 0) && (a.parts.fraction < b.parts.fraction))) { 00163 if (isFloat64NaN(b)) { 00164 /* TODO: fix SigNaN */ 00165 if (isFloat64SigNaN(b)) { 00166 }; 00167 return b; 00168 }; 00169 00170 if (b.parts.exp == FLOAT64_MAX_EXPONENT) { 00171 b.parts.sign = !b.parts.sign; /* num -(+-inf) = -+inf */ 00172 return b; 00173 } 00174 00175 result.parts.sign = !a.parts.sign; 00176 00177 frac1 = b.parts.fraction; 00178 exp1 = b.parts.exp; 00179 frac2 = a.parts.fraction; 00180 exp2 = a.parts.exp; 00181 expdiff *= -1; 00182 } else { 00183 if (isFloat64NaN(a)) { 00184 /* TODO: fix SigNaN */ 00185 if (isFloat64SigNaN(a) || isFloat64SigNaN(b)) { 00186 }; 00187 return a; 00188 }; 00189 00190 if (a.parts.exp == FLOAT64_MAX_EXPONENT) { 00191 if (b.parts.exp == FLOAT64_MAX_EXPONENT) { 00192 /* inf - inf => nan */ 00193 /* TODO: fix exception */ 00194 result.binary = FLOAT64_NAN; 00195 return result; 00196 }; 00197 return a; 00198 } 00199 00200 result.parts.sign = a.parts.sign; 00201 00202 frac1 = a.parts.fraction; 00203 exp1 = a.parts.exp; 00204 frac2 = b.parts.fraction; 00205 exp2 = b.parts.exp; 00206 }; 00207 00208 if (exp1 == 0) { 00209 /* both are denormalized */ 00210 result.parts.fraction = frac1 - frac2; 00211 if (result.parts.fraction > frac1) { 00212 /* TODO: underflow exception */ 00213 return result; 00214 }; 00215 result.parts.exp = 0; 00216 return result; 00217 }; 00218 00219 /* add hidden bit */ 00220 frac1 |= FLOAT64_HIDDEN_BIT_MASK; 00221 00222 if (exp2 == 0) { 00223 /* denormalized */ 00224 --expdiff; 00225 } else { 00226 /* normalized */ 00227 frac2 |= FLOAT64_HIDDEN_BIT_MASK; 00228 }; 00229 00230 /* create some space for rounding */ 00231 frac1 <<= 6; 00232 frac2 <<= 6; 00233 00234 if (expdiff > FLOAT64_FRACTION_SIZE + 1) { 00235 goto done; 00236 }; 00237 00238 frac1 = frac1 - (frac2 >> expdiff); 00239 done: 00240 /* TODO: find first nonzero digit and shift result and detect possibly underflow */ 00241 while ((exp1 > 0) && (!(frac1 & (FLOAT64_HIDDEN_BIT_MASK << 6 )))) { 00242 --exp1; 00243 frac1 <<= 1; 00244 /* TODO: fix underflow - frac1 == 0 does not necessary means underflow... */ 00245 }; 00246 00247 /* rounding - if first bit after fraction is set then round up */ 00248 frac1 += 0x20; 00249 00250 if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) { 00251 ++exp1; 00252 frac1 >>= 1; 00253 }; 00254 00255 /*Clear hidden bit and shift */ 00256 result.parts.fraction = ((frac1 >> 6) & (~FLOAT64_HIDDEN_BIT_MASK)); 00257 result.parts.exp = exp1; 00258 00259 return result; 00260 } 00261 00262