summaryrefslogtreecommitdiff
path: root/source/luametatex/source/libraries/softposit/source/p16_sqrt.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/libraries/softposit/source/p16_sqrt.c')
-rw-r--r--source/luametatex/source/libraries/softposit/source/p16_sqrt.c143
1 files changed, 143 insertions, 0 deletions
diff --git a/source/luametatex/source/libraries/softposit/source/p16_sqrt.c b/source/luametatex/source/libraries/softposit/source/p16_sqrt.c
new file mode 100644
index 000000000..24d33f9c0
--- /dev/null
+++ b/source/luametatex/source/libraries/softposit/source/p16_sqrt.c
@@ -0,0 +1,143 @@
+
+/*============================================================================
+
+This C source file is part of the SoftPosit Posit Arithmetic Package
+by S. H. Leong (Cerlane) and John Gustafson.
+
+Copyright 2017, 2018 A*STAR. All rights reserved.
+
+This C source file was based on SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+ this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+ this list of conditions, and the following disclaimer in the documentation
+ and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+ be used to endorse or promote products derived from this software without
+ specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include "platform.h"
+#include "internals.h"
+
+extern const uint_fast16_t softposit_approxRecipSqrt0[];
+extern const uint_fast16_t softposit_approxRecipSqrt1[];
+
+posit16_t p16_sqrt( posit16_t pA ) {
+
+ union ui16_p16 uA;
+ uint_fast16_t expA, fracA, index, r0, shift, sigma0, uiA, uiZ;
+ uint_fast32_t eSqrR0, fracZ, negRem, recipSqrt, shiftedFracZ;
+ int_fast16_t kZ;
+ bool bitNPlusOne;
+
+ uA.p = pA;
+ uiA = uA.ui;
+
+ // If sign bit is set, return NaR.
+ if (uiA>>15) {
+ uA.ui = 0x8000;
+ return uA.p;
+ }
+ // If the argument is zero, return zero.
+ if (uiA==0) {
+ uA.ui = 0;
+ return uA.p;
+ }
+ // Compute the square root. Here, kZ is the net power-of-2 scaling of the result.
+ // Decode the regime and exponent bit; scale the input to be in the range 1 to 4:
+ if (uiA >> 14) {
+ kZ = -1;
+ while (uiA & 0x4000) {
+ kZ++;
+ uiA= (uiA<<1) & 0xFFFF;
+ }
+ }
+ else {
+ kZ = 0;
+ while (!(uiA & 0x4000)) {
+ kZ--;
+ uiA= (uiA<<1) & 0xFFFF;
+ }
+
+ }
+ uiA &= 0x3fff;
+ expA = 1 - (uiA >> 13);
+ fracA = (uiA | 0x2000) >> 1;
+
+ // Use table look-up of first four bits for piecewise linear approx. of 1/sqrt:
+ index = ((fracA >> 8) & 0xE) + expA;
+
+ r0 = softposit_approxRecipSqrt0[index]
+ - (((uint_fast32_t) softposit_approxRecipSqrt1[index]
+ * (fracA & 0x1FF)) >> 13);
+ // Use Newton-Raphson refinement to get more accuracy for 1/sqrt:
+ eSqrR0 = ((uint_fast32_t) r0 * r0) >> 1;
+
+ if (expA) eSqrR0 >>= 1;
+ sigma0 = 0xFFFF ^ (0xFFFF & (((uint64_t)eSqrR0 * (uint64_t)fracA) >> 18));//~(uint_fast16_t) ((eSqrR0 * fracA) >> 18);
+ recipSqrt = ((uint_fast32_t) r0 << 2) + (((uint_fast32_t) r0 * sigma0) >> 23);
+
+ // We need 17 bits of accuracy for posit16 square root approximation.
+ // Multiplying 16 bits and 18 bits needs 64-bit scratch before the right shift:
+ fracZ = (((uint_fast64_t) fracA) * recipSqrt) >> 13;
+
+ // Figure out the regime and the resulting right shift of the fraction:
+ if (kZ < 0) {
+ shift = (-1 - kZ) >> 1;
+ uiZ = 0x2000 >> shift;
+ }
+ else {
+ shift = kZ >> 1;
+ uiZ = 0x7fff - (0x7FFF >> (shift + 1));
+ }
+ // Set the exponent bit in the answer, if it is nonzero:
+ if (kZ & 1) uiZ |= (0x1000 >> shift);
+
+ // Right-shift fraction bits, accounting for 1 <= a < 2 versus 2 <= a < 4:
+ fracZ = fracZ >> (expA + shift);
+
+ // Trick for eliminating off-by-one cases that only uses one multiply:
+ fracZ++;
+ if (!(fracZ & 7)) {
+ shiftedFracZ = fracZ >> 1;
+ negRem = (shiftedFracZ * shiftedFracZ) & 0x3FFFF;
+ if (negRem & 0x20000) {
+ fracZ |= 1;
+ } else {
+ if (negRem) fracZ--;
+ }
+ }
+ // Strip off the hidden bit and round-to-nearest using last 4 bits.
+ fracZ -= (0x10000 >> shift);
+ bitNPlusOne = (fracZ >> 3) & 1;
+ if (bitNPlusOne) {
+ if (((fracZ >> 4) & 1) | (fracZ & 7)) fracZ += 0x10;
+ }
+ // Assemble the result and return it.
+ uA.ui = uiZ | (fracZ >> 4);
+ return uA.p;
+
+}