diff options
Diffstat (limited to 'source/luametatex/source/libraries/softposit/source/p32_sqrt.c')
-rw-r--r-- | source/luametatex/source/libraries/softposit/source/p32_sqrt.c | 137 |
1 files changed, 137 insertions, 0 deletions
diff --git a/source/luametatex/source/libraries/softposit/source/p32_sqrt.c b/source/luametatex/source/libraries/softposit/source/p32_sqrt.c new file mode 100644 index 000000000..e02ec5fd1 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_sqrt.c @@ -0,0 +1,137 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +extern const uint_fast16_t softposit_approxRecipSqrt0[]; +extern const uint_fast16_t softposit_approxRecipSqrt1[]; + + + +posit32_t p32_sqrt( posit32_t pA ) { + + union ui32_p32 uA; + uint_fast32_t index, r0, shift, fracA, expZ, expA; + uint_fast32_t mask, uiA, uiZ; + uint_fast64_t eSqrR0, fracZ, negRem, recipSqrt, shiftedFracZ, sigma0, sqrSigma0; + int_fast32_t eps, shiftZ; + + uA.p = pA; + uiA = uA.ui; + + // If NaR or a negative number, return NaR. + if (uiA & 0x80000000) { + uA.ui = 0x80000000; + return uA.p; + } + // If the argument is zero, return zero. + else if (!uiA) { + return uA.p; + } + // Compute the square root; shiftZ is the power-of-2 scaling of the result. + // Decode regime and exponent; scale the input to be in the range 1 to 4: + if (uiA & 0x40000000) { + shiftZ = -2; + while (uiA & 0x40000000) { + shiftZ += 2; + uiA = (uiA << 1) & 0xFFFFFFFF; + } + } else { + shiftZ = 0; + while (!(uiA & 0x40000000)) { + shiftZ -= 2; + uiA = (uiA << 1) & 0xFFFFFFFF; + } + } + + uiA &= 0x3FFFFFFF; + expA = (uiA >> 28); + shiftZ += (expA >> 1); + expA = (0x1 ^ (expA & 0x1)); + uiA &= 0x0FFFFFFF; + fracA = (uiA | 0x10000000); + + // Use table look-up of first 4 bits for piecewise linear approx. of 1/sqrt: + index = ((fracA >> 24) & 0xE) + expA; + eps = ((fracA >> 9) & 0xFFFF); + r0 = softposit_approxRecipSqrt0[index] + - (((uint_fast32_t) softposit_approxRecipSqrt1[index] * eps) >> 20); + + // Use Newton-Raphson refinement to get 33 bits of accuracy for 1/sqrt: + eSqrR0 = (uint_fast64_t) r0 * r0; + if (!expA) eSqrR0 <<= 1; + sigma0 = 0xFFFFFFFF & (0xFFFFFFFF ^ ((eSqrR0 * (uint64_t)fracA) >> 20)); + recipSqrt = ((uint_fast64_t) r0 << 20) + (((uint_fast64_t) r0 * sigma0) >> 21); + + sqrSigma0 = ((sigma0 * sigma0) >> 35); + recipSqrt += ( (( recipSqrt + (recipSqrt >> 2) - ((uint_fast64_t)r0 << 19) ) * sqrSigma0) >> 46 ); + + + fracZ = (((uint_fast64_t) fracA) * recipSqrt) >> 31; + if (expA) fracZ = (fracZ >> 1); + + // Find the exponent of Z and encode the regime bits. + expZ = shiftZ & 0x3; + if (shiftZ < 0) { + shift = (-1 - shiftZ) >> 2; + uiZ = 0x20000000 >> shift; + } else { + shift = shiftZ >> 2; + uiZ = 0x7FFFFFFF - (0x3FFFFFFF >> shift); + } + + // Trick for eliminating off-by-one cases that only uses one multiply: + fracZ++; + if (!(fracZ & 0xF)) { + shiftedFracZ = fracZ >> 1; + negRem = (shiftedFracZ * shiftedFracZ) & 0x1FFFFFFFF; + if (negRem & 0x100000000) { + fracZ |= 1; + } else { + if (negRem) fracZ--; + } + } + // Strip off the hidden bit and round-to-nearest using last shift+5 bits. + fracZ &= 0xFFFFFFFF; + mask = (1 << (4 + shift)); + if (mask & fracZ) { + if ( ((mask - 1) & fracZ) | ((mask << 1) & fracZ) ) fracZ += (mask << 1); + } + // Assemble the result and return it. + uA.ui = uiZ | (expZ << (27 - shift)) | (fracZ >> (5 + shift)); + return uA.p; +} |