00001 /* 00002 * Copyright (C) 2005 Josef Cejka 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * - Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * - Redistributions in binary form must reproduce the above copyright 00012 * notice, this list of conditions and the following disclaimer in the 00013 * documentation and/or other materials provided with the distribution. 00014 * - The name of the author may not be used to endorse or promote products 00015 * derived from this software without specific prior written permission. 00016 * 00017 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 00018 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00019 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 00020 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 00021 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 00022 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00023 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00024 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00025 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 00026 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00027 */ 00028 00035 #include<sftypes.h> 00036 #include<add.h> 00037 #include<comparison.h> 00038 00041 float32 addFloat32(float32 a, float32 b) 00042 { 00043 int expdiff; 00044 uint32_t exp1, exp2,frac1, frac2; 00045 00046 expdiff = a.parts.exp - b.parts.exp; 00047 if (expdiff < 0) { 00048 if (isFloat32NaN(b)) { 00049 /* TODO: fix SigNaN */ 00050 if (isFloat32SigNaN(b)) { 00051 }; 00052 00053 return b; 00054 }; 00055 00056 if (b.parts.exp == FLOAT32_MAX_EXPONENT) { 00057 return b; 00058 } 00059 00060 frac1 = b.parts.fraction; 00061 exp1 = b.parts.exp; 00062 frac2 = a.parts.fraction; 00063 exp2 = a.parts.exp; 00064 expdiff *= -1; 00065 } else { 00066 if ((isFloat32NaN(a)) || (isFloat32NaN(b))) { 00067 /* TODO: fix SigNaN */ 00068 if (isFloat32SigNaN(a) || isFloat32SigNaN(b)) { 00069 }; 00070 return (isFloat32NaN(a)?a:b); 00071 }; 00072 00073 if (a.parts.exp == FLOAT32_MAX_EXPONENT) { 00074 return a; 00075 } 00076 00077 frac1 = a.parts.fraction; 00078 exp1 = a.parts.exp; 00079 frac2 = b.parts.fraction; 00080 exp2 = b.parts.exp; 00081 }; 00082 00083 if (exp1 == 0) { 00084 /* both are denormalized */ 00085 frac1 += frac2; 00086 if (frac1 & FLOAT32_HIDDEN_BIT_MASK ) { 00087 /* result is not denormalized */ 00088 a.parts.exp = 1; 00089 }; 00090 a.parts.fraction = frac1; 00091 return a; 00092 }; 00093 00094 frac1 |= FLOAT32_HIDDEN_BIT_MASK; /* add hidden bit */ 00095 00096 if (exp2 == 0) { 00097 /* second operand is denormalized */ 00098 --expdiff; 00099 } else { 00100 /* add hidden bit to second operand */ 00101 frac2 |= FLOAT32_HIDDEN_BIT_MASK; 00102 }; 00103 00104 /* create some space for rounding */ 00105 frac1 <<= 6; 00106 frac2 <<= 6; 00107 00108 if (expdiff < (FLOAT32_FRACTION_SIZE + 2) ) { 00109 frac2 >>= expdiff; 00110 frac1 += frac2; 00111 } else { 00112 a.parts.exp = exp1; 00113 a.parts.fraction = (frac1 >> 6) & (~(FLOAT32_HIDDEN_BIT_MASK)); 00114 return a; 00115 } 00116 00117 if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7) ) { 00118 ++exp1; 00119 frac1 >>= 1; 00120 }; 00121 00122 /* rounding - if first bit after fraction is set then round up */ 00123 frac1 += (0x1 << 5); 00124 00125 if (frac1 & (FLOAT32_HIDDEN_BIT_MASK << 7)) { 00126 /* rounding overflow */ 00127 ++exp1; 00128 frac1 >>= 1; 00129 }; 00130 00131 00132 if ((exp1 == FLOAT32_MAX_EXPONENT ) || (exp2 > exp1)) { 00133 /* overflow - set infinity as result */ 00134 a.parts.exp = FLOAT32_MAX_EXPONENT; 00135 a.parts.fraction = 0; 00136 return a; 00137 } 00138 00139 a.parts.exp = exp1; 00140 00141 /*Clear hidden bit and shift */ 00142 a.parts.fraction = ((frac1 >> 6) & (~FLOAT32_HIDDEN_BIT_MASK)) ; 00143 return a; 00144 } 00145 00146 00149 float64 addFloat64(float64 a, float64 b) 00150 { 00151 int expdiff; 00152 uint32_t exp1, exp2; 00153 uint64_t frac1, frac2; 00154 00155 expdiff = ((int )a.parts.exp) - b.parts.exp; 00156 if (expdiff < 0) { 00157 if (isFloat64NaN(b)) { 00158 /* TODO: fix SigNaN */ 00159 if (isFloat64SigNaN(b)) { 00160 }; 00161 00162 return b; 00163 }; 00164 00165 /* b is infinity and a not */ 00166 if (b.parts.exp == FLOAT64_MAX_EXPONENT ) { 00167 return b; 00168 } 00169 00170 frac1 = b.parts.fraction; 00171 exp1 = b.parts.exp; 00172 frac2 = a.parts.fraction; 00173 exp2 = a.parts.exp; 00174 expdiff *= -1; 00175 } else { 00176 if (isFloat64NaN(a)) { 00177 /* TODO: fix SigNaN */ 00178 if (isFloat64SigNaN(a) || isFloat64SigNaN(b)) { 00179 }; 00180 return a; 00181 }; 00182 00183 /* a is infinity and b not */ 00184 if (a.parts.exp == FLOAT64_MAX_EXPONENT ) { 00185 return a; 00186 } 00187 00188 frac1 = a.parts.fraction; 00189 exp1 = a.parts.exp; 00190 frac2 = b.parts.fraction; 00191 exp2 = b.parts.exp; 00192 }; 00193 00194 if (exp1 == 0) { 00195 /* both are denormalized */ 00196 frac1 += frac2; 00197 if (frac1 & FLOAT64_HIDDEN_BIT_MASK) { 00198 /* result is not denormalized */ 00199 a.parts.exp = 1; 00200 }; 00201 a.parts.fraction = frac1; 00202 return a; 00203 }; 00204 00205 /* add hidden bit - frac1 is sure not denormalized */ 00206 frac1 |= FLOAT64_HIDDEN_BIT_MASK; 00207 00208 /* second operand ... */ 00209 if (exp2 == 0) { 00210 /* ... is denormalized */ 00211 --expdiff; 00212 } else { 00213 /* is not denormalized */ 00214 frac2 |= FLOAT64_HIDDEN_BIT_MASK; 00215 }; 00216 00217 /* create some space for rounding */ 00218 frac1 <<= 6; 00219 frac2 <<= 6; 00220 00221 if (expdiff < (FLOAT64_FRACTION_SIZE + 2) ) { 00222 frac2 >>= expdiff; 00223 frac1 += frac2; 00224 } else { 00225 a.parts.exp = exp1; 00226 a.parts.fraction = (frac1 >> 6) & (~(FLOAT64_HIDDEN_BIT_MASK)); 00227 return a; 00228 } 00229 00230 if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7) ) { 00231 ++exp1; 00232 frac1 >>= 1; 00233 }; 00234 00235 /* rounding - if first bit after fraction is set then round up */ 00236 frac1 += (0x1 << 5); 00237 00238 if (frac1 & (FLOAT64_HIDDEN_BIT_MASK << 7)) { 00239 /* rounding overflow */ 00240 ++exp1; 00241 frac1 >>= 1; 00242 }; 00243 00244 if ((exp1 == FLOAT64_MAX_EXPONENT ) || (exp2 > exp1)) { 00245 /* overflow - set infinity as result */ 00246 a.parts.exp = FLOAT64_MAX_EXPONENT; 00247 a.parts.fraction = 0; 00248 return a; 00249 } 00250 00251 a.parts.exp = exp1; 00252 /*Clear hidden bit and shift */ 00253 a.parts.fraction = ( (frac1 >> 6 ) & (~FLOAT64_HIDDEN_BIT_MASK)); 00254 00255 return a; 00256 } 00257