00001 /* 00002 * Copyright (C) 2005 Josef Cejka 00003 * All rights reserved. 00004 * 00005 * Redistribution and use in source and binary forms, with or without 00006 * modification, are permitted provided that the following conditions 00007 * are met: 00008 * 00009 * - Redistributions of source code must retain the above copyright 00010 * notice, this list of conditions and the following disclaimer. 00011 * - Redistributions in binary form must reproduce the above copyright 00012 * notice, this list of conditions and the following disclaimer in the 00013 * documentation and/or other materials provided with the distribution. 00014 * - The name of the author may not be used to endorse or promote products 00015 * derived from this software without specific prior written permission. 00016 * 00017 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 00018 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 00019 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 00020 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 00021 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 00022 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 00023 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 00024 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 00025 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 00026 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 00027 */ 00028 00035 #include "sftypes.h" 00036 #include "conversion.h" 00037 #include "comparison.h" 00038 #include "common.h" 00039 00040 float64 convertFloat32ToFloat64(float32 a) 00041 { 00042 float64 result; 00043 uint64_t frac; 00044 00045 result.parts.sign = a.parts.sign; 00046 result.parts.fraction = a.parts.fraction; 00047 result.parts.fraction <<= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE ); 00048 00049 if ((isFloat32Infinity(a))||(isFloat32NaN(a))) { 00050 result.parts.exp = 0x7FF; 00051 /* TODO; check if its correct for SigNaNs*/ 00052 return result; 00053 }; 00054 00055 result.parts.exp = a.parts.exp + ( (int)FLOAT64_BIAS - FLOAT32_BIAS ); 00056 if (a.parts.exp == 0) { 00057 /* normalize denormalized numbers */ 00058 00059 if (result.parts.fraction == 0ll) { /* fix zero */ 00060 result.parts.exp = 0ll; 00061 return result; 00062 } 00063 00064 frac = result.parts.fraction; 00065 00066 while (!(frac & (0x10000000000000ll))) { 00067 frac <<= 1; 00068 --result.parts.exp; 00069 }; 00070 00071 ++result.parts.exp; 00072 result.parts.fraction = frac; 00073 }; 00074 00075 return result; 00076 00077 } 00078 00079 float32 convertFloat64ToFloat32(float64 a) 00080 { 00081 float32 result; 00082 int32_t exp; 00083 uint64_t frac; 00084 00085 result.parts.sign = a.parts.sign; 00086 00087 if (isFloat64NaN(a)) { 00088 00089 result.parts.exp = 0xFF; 00090 00091 if (isFloat64SigNaN(a)) { 00092 result.parts.fraction = 0x400000; /* set first bit of fraction nonzero */ 00093 return result; 00094 } 00095 00096 result.parts.fraction = 0x1; /* fraction nonzero but its first bit is zero */ 00097 return result; 00098 }; 00099 00100 if (isFloat64Infinity(a)) { 00101 result.parts.fraction = 0; 00102 result.parts.exp = 0xFF; 00103 return result; 00104 }; 00105 00106 exp = (int)a.parts.exp - FLOAT64_BIAS + FLOAT32_BIAS; 00107 00108 if (exp >= 0xFF) { 00109 /*FIXME: overflow*/ 00110 result.parts.fraction = 0; 00111 result.parts.exp = 0xFF; 00112 return result; 00113 00114 } else if (exp <= 0 ) { 00115 00116 /* underflow or denormalized */ 00117 00118 result.parts.exp = 0; 00119 00120 exp *= -1; 00121 if (exp > FLOAT32_FRACTION_SIZE ) { 00122 /* FIXME: underflow */ 00123 result.parts.fraction = 0; 00124 return result; 00125 }; 00126 00127 /* denormalized */ 00128 00129 frac = a.parts.fraction; 00130 frac |= 0x10000000000000ll; /* denormalize and set hidden bit */ 00131 00132 frac >>= (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE + 1); 00133 00134 while (exp > 0) { 00135 --exp; 00136 frac >>= 1; 00137 }; 00138 result.parts.fraction = frac; 00139 00140 return result; 00141 }; 00142 00143 result.parts.exp = exp; 00144 result.parts.fraction = a.parts.fraction >> (FLOAT64_FRACTION_SIZE - FLOAT32_FRACTION_SIZE); 00145 return result; 00146 } 00147 00148 00153 static uint32_t _float32_to_uint32_helper(float32 a) 00154 { 00155 uint32_t frac; 00156 00157 if (a.parts.exp < FLOAT32_BIAS) { 00158 /*TODO: rounding*/ 00159 return 0; 00160 } 00161 00162 frac = a.parts.fraction; 00163 00164 frac |= FLOAT32_HIDDEN_BIT_MASK; 00165 /* shift fraction to left so hidden bit will be the most significant bit */ 00166 frac <<= 32 - FLOAT32_FRACTION_SIZE - 1; 00167 00168 frac >>= 32 - (a.parts.exp - FLOAT32_BIAS) - 1; 00169 if ((a.parts.sign == 1) && (frac != 0)) { 00170 frac = ~frac; 00171 ++frac; 00172 } 00173 00174 return frac; 00175 } 00176 00177 /* Convert float to unsigned int32 00178 * FIXME: Im not sure what to return if overflow/underflow happens 00179 * - now its the biggest or the smallest int 00180 */ 00181 uint32_t float32_to_uint32(float32 a) 00182 { 00183 if (isFloat32NaN(a)) { 00184 return MAX_UINT32; 00185 } 00186 00187 if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) { 00188 if (a.parts.sign) { 00189 return MIN_UINT32; 00190 } 00191 return MAX_UINT32; 00192 } 00193 00194 return _float32_to_uint32_helper(a); 00195 } 00196 00197 /* Convert float to signed int32 00198 * FIXME: Im not sure what to return if overflow/underflow happens 00199 * - now its the biggest or the smallest int 00200 */ 00201 int32_t float32_to_int32(float32 a) 00202 { 00203 if (isFloat32NaN(a)) { 00204 return MAX_INT32; 00205 } 00206 00207 if (isFloat32Infinity(a) || (a.parts.exp >= (32 + FLOAT32_BIAS))) { 00208 if (a.parts.sign) { 00209 return MIN_INT32; 00210 } 00211 return MAX_INT32; 00212 } 00213 return _float32_to_uint32_helper(a); 00214 } 00215 00216 00221 static uint64_t _float64_to_uint64_helper(float64 a) 00222 { 00223 uint64_t frac; 00224 00225 if (a.parts.exp < FLOAT64_BIAS) { 00226 /*TODO: rounding*/ 00227 return 0; 00228 } 00229 00230 frac = a.parts.fraction; 00231 00232 frac |= FLOAT64_HIDDEN_BIT_MASK; 00233 /* shift fraction to left so hidden bit will be the most significant bit */ 00234 frac <<= 64 - FLOAT64_FRACTION_SIZE - 1; 00235 00236 frac >>= 64 - (a.parts.exp - FLOAT64_BIAS) - 1; 00237 if ((a.parts.sign == 1) && (frac != 0)) { 00238 frac = ~frac; 00239 ++frac; 00240 } 00241 00242 return frac; 00243 } 00244 00245 /* Convert float to unsigned int64 00246 * FIXME: Im not sure what to return if overflow/underflow happens 00247 * - now its the biggest or the smallest int 00248 */ 00249 uint64_t float64_to_uint64(float64 a) 00250 { 00251 if (isFloat64NaN(a)) { 00252 return MAX_UINT64; 00253 } 00254 00255 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) { 00256 if (a.parts.sign) { 00257 return MIN_UINT64; 00258 } 00259 return MAX_UINT64; 00260 } 00261 00262 return _float64_to_uint64_helper(a); 00263 } 00264 00265 /* Convert float to signed int64 00266 * FIXME: Im not sure what to return if overflow/underflow happens 00267 * - now its the biggest or the smallest int 00268 */ 00269 int64_t float64_to_int64(float64 a) 00270 { 00271 if (isFloat64NaN(a)) { 00272 return MAX_INT64; 00273 } 00274 00275 if (isFloat64Infinity(a) || (a.parts.exp >= (64 + FLOAT64_BIAS))) { 00276 if (a.parts.sign) { 00277 return MIN_INT64; 00278 } 00279 return MAX_INT64; 00280 } 00281 return _float64_to_uint64_helper(a); 00282 } 00283 00284 00285 00286 00287 00292 static uint64_t _float32_to_uint64_helper(float32 a) 00293 { 00294 uint64_t frac; 00295 00296 if (a.parts.exp < FLOAT32_BIAS) { 00297 /*TODO: rounding*/ 00298 return 0; 00299 } 00300 00301 frac = a.parts.fraction; 00302 00303 frac |= FLOAT32_HIDDEN_BIT_MASK; 00304 /* shift fraction to left so hidden bit will be the most significant bit */ 00305 frac <<= 64 - FLOAT32_FRACTION_SIZE - 1; 00306 00307 frac >>= 64 - (a.parts.exp - FLOAT32_BIAS) - 1; 00308 if ((a.parts.sign == 1) && (frac != 0)) { 00309 frac = ~frac; 00310 ++frac; 00311 } 00312 00313 return frac; 00314 } 00315 00316 /* Convert float to unsigned int64 00317 * FIXME: Im not sure what to return if overflow/underflow happens 00318 * - now its the biggest or the smallest int 00319 */ 00320 uint64_t float32_to_uint64(float32 a) 00321 { 00322 if (isFloat32NaN(a)) { 00323 return MAX_UINT64; 00324 } 00325 00326 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) { 00327 if (a.parts.sign) { 00328 return MIN_UINT64; 00329 } 00330 return MAX_UINT64; 00331 } 00332 00333 return _float32_to_uint64_helper(a); 00334 } 00335 00336 /* Convert float to signed int64 00337 * FIXME: Im not sure what to return if overflow/underflow happens 00338 * - now its the biggest or the smallest int 00339 */ 00340 int64_t float32_to_int64(float32 a) 00341 { 00342 if (isFloat32NaN(a)) { 00343 return MAX_INT64; 00344 } 00345 00346 if (isFloat32Infinity(a) || (a.parts.exp >= (64 + FLOAT32_BIAS))) { 00347 if (a.parts.sign) { 00348 return (MIN_INT64); 00349 } 00350 return MAX_INT64; 00351 } 00352 return _float32_to_uint64_helper(a); 00353 } 00354 00355 00356 /* Convert float64 to unsigned int32 00357 * FIXME: Im not sure what to return if overflow/underflow happens 00358 * - now its the biggest or the smallest int 00359 */ 00360 uint32_t float64_to_uint32(float64 a) 00361 { 00362 if (isFloat64NaN(a)) { 00363 return MAX_UINT32; 00364 } 00365 00366 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) { 00367 if (a.parts.sign) { 00368 return MIN_UINT32; 00369 } 00370 return MAX_UINT32; 00371 } 00372 00373 return (uint32_t)_float64_to_uint64_helper(a); 00374 } 00375 00376 /* Convert float64 to signed int32 00377 * FIXME: Im not sure what to return if overflow/underflow happens 00378 * - now its the biggest or the smallest int 00379 */ 00380 int32_t float64_to_int32(float64 a) 00381 { 00382 if (isFloat64NaN(a)) { 00383 return MAX_INT32; 00384 } 00385 00386 if (isFloat64Infinity(a) || (a.parts.exp >= (32 + FLOAT64_BIAS))) { 00387 if (a.parts.sign) { 00388 return MIN_INT32; 00389 } 00390 return MAX_INT32; 00391 } 00392 return (int32_t)_float64_to_uint64_helper(a); 00393 } 00394 00399 float32 uint32_to_float32(uint32_t i) 00400 { 00401 int counter; 00402 int32_t exp; 00403 float32 result; 00404 00405 result.parts.sign = 0; 00406 result.parts.fraction = 0; 00407 00408 counter = countZeroes32(i); 00409 00410 exp = FLOAT32_BIAS + 32 - counter - 1; 00411 00412 if (counter == 32) { 00413 result.binary = 0; 00414 return result; 00415 } 00416 00417 if (counter > 0) { 00418 i <<= counter - 1; 00419 } else { 00420 i >>= 1; 00421 } 00422 00423 roundFloat32(&exp, &i); 00424 00425 result.parts.fraction = i >> 7; 00426 result.parts.exp = exp; 00427 00428 return result; 00429 } 00430 00431 float32 int32_to_float32(int32_t i) 00432 { 00433 float32 result; 00434 00435 if (i < 0) { 00436 result = uint32_to_float32((uint32_t)(-i)); 00437 } else { 00438 result = uint32_to_float32((uint32_t)i); 00439 } 00440 00441 result.parts.sign = i < 0; 00442 00443 return result; 00444 } 00445 00446 00447 float32 uint64_to_float32(uint64_t i) 00448 { 00449 int counter; 00450 int32_t exp; 00451 uint32_t j; 00452 float32 result; 00453 00454 result.parts.sign = 0; 00455 result.parts.fraction = 0; 00456 00457 counter = countZeroes64(i); 00458 00459 exp = FLOAT32_BIAS + 64 - counter - 1; 00460 00461 if (counter == 64) { 00462 result.binary = 0; 00463 return result; 00464 } 00465 00466 /* Shift all to the first 31 bits (31. will be hidden 1)*/ 00467 if (counter > 33) { 00468 i <<= counter - 1 - 32; 00469 } else { 00470 i >>= 1 + 32 - counter; 00471 } 00472 00473 j = (uint32_t)i; 00474 roundFloat32(&exp, &j); 00475 00476 result.parts.fraction = j >> 7; 00477 result.parts.exp = exp; 00478 return result; 00479 } 00480 00481 float32 int64_to_float32(int64_t i) 00482 { 00483 float32 result; 00484 00485 if (i < 0) { 00486 result = uint64_to_float32((uint64_t)(-i)); 00487 } else { 00488 result = uint64_to_float32((uint64_t)i); 00489 } 00490 00491 result.parts.sign = i < 0; 00492 00493 return result; 00494 } 00495 00500 float64 uint32_to_float64(uint32_t i) 00501 { 00502 int counter; 00503 int32_t exp; 00504 float64 result; 00505 uint64_t frac; 00506 00507 result.parts.sign = 0; 00508 result.parts.fraction = 0; 00509 00510 counter = countZeroes32(i); 00511 00512 exp = FLOAT64_BIAS + 32 - counter - 1; 00513 00514 if (counter == 32) { 00515 result.binary = 0; 00516 return result; 00517 } 00518 00519 frac = i; 00520 frac <<= counter + 32 - 1; 00521 00522 roundFloat64(&exp, &frac); 00523 00524 result.parts.fraction = frac >> 10; 00525 result.parts.exp = exp; 00526 00527 return result; 00528 } 00529 00530 float64 int32_to_float64(int32_t i) 00531 { 00532 float64 result; 00533 00534 if (i < 0) { 00535 result = uint32_to_float64((uint32_t)(-i)); 00536 } else { 00537 result = uint32_to_float64((uint32_t)i); 00538 } 00539 00540 result.parts.sign = i < 0; 00541 00542 return result; 00543 } 00544 00545 00546 float64 uint64_to_float64(uint64_t i) 00547 { 00548 int counter; 00549 int32_t exp; 00550 float64 result; 00551 00552 result.parts.sign = 0; 00553 result.parts.fraction = 0; 00554 00555 counter = countZeroes64(i); 00556 00557 exp = FLOAT64_BIAS + 64 - counter - 1; 00558 00559 if (counter == 64) { 00560 result.binary = 0; 00561 return result; 00562 } 00563 00564 if (counter > 0) { 00565 i <<= counter - 1; 00566 } else { 00567 i >>= 1; 00568 } 00569 00570 roundFloat64(&exp, &i); 00571 00572 result.parts.fraction = i >> 10; 00573 result.parts.exp = exp; 00574 return result; 00575 } 00576 00577 float64 int64_to_float64(int64_t i) 00578 { 00579 float64 result; 00580 00581 if (i < 0) { 00582 result = uint64_to_float64((uint64_t)(-i)); 00583 } else { 00584 result = uint64_to_float64((uint64_t)i); 00585 } 00586 00587 result.parts.sign = i < 0; 00588 00589 return result; 00590 } 00591