1 files changed, 143 insertions, 0 deletions
diff --git a/source/luametatex/source/libraries/softposit/source/p16_sqrt.c b/source/luametatex/source/libraries/softposit/source/p16_sqrt.c
new file mode 100644
index 000000000..24d33f9c0
--- /dev/null
+++ b/source/luametatex/source/libraries/softposit/source/p16_sqrt.c
@@ -0,0 +1,143 @@
+
+/*============================================================================
+
+This C source file is part of the SoftPosit Posit Arithmetic Package
+by S. H. Leong (Cerlane) and John Gustafson.
+
+Copyright 2017, 2018 A*STAR.  All rights reserved.
+
+This C source file was based on SoftFloat IEEE Floating-Point Arithmetic
+Package, Release 3d, by John R. Hauser.
+
+Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of
+California.  All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+
+ 1. Redistributions of source code must retain the above copyright notice,
+    this list of conditions, and the following disclaimer.
+
+ 2. Redistributions in binary form must reproduce the above copyright notice,
+    this list of conditions, and the following disclaimer in the documentation
+    and/or other materials provided with the distribution.
+
+ 3. Neither the name of the University nor the names of its contributors may
+    be used to endorse or promote products derived from this software without
+    specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY
+EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE
+DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+=============================================================================*/
+
+#include "platform.h"
+#include "internals.h"
+
+extern const uint_fast16_t softposit_approxRecipSqrt0[];
+extern const uint_fast16_t softposit_approxRecipSqrt1[];
+
+posit16_t p16_sqrt( posit16_t pA ) {
+
+    union ui16_p16 uA;
+    uint_fast16_t expA, fracA, index, r0, shift, sigma0, uiA, uiZ;
+    uint_fast32_t eSqrR0, fracZ, negRem, recipSqrt, shiftedFracZ;
+    int_fast16_t kZ;
+    bool bitNPlusOne;
+
+    uA.p = pA;
+    uiA = uA.ui;
+
+    // If sign bit is set, return NaR.
+    if (uiA>>15) {
+        uA.ui = 0x8000;
+        return uA.p;
+    }
+    // If the argument is zero, return zero.
+    if (uiA==0) {
+        uA.ui = 0;
+        return uA.p;
+    }
+    // Compute the square root. Here, kZ is the net power-of-2 scaling of the result.
+    // Decode the regime and exponent bit; scale the input to be in the range 1 to 4:
+	if (uiA >> 14) {
+		kZ = -1;
+		while (uiA & 0x4000) {
+			kZ++;
+			uiA= (uiA<<1) & 0xFFFF;
+		}
+	}
+	else {
+		kZ = 0;
+		while (!(uiA & 0x4000)) {
+			kZ--;
+			uiA= (uiA<<1) & 0xFFFF;
+		}
+
+	}
+	uiA &= 0x3fff;
+	expA = 1 - (uiA >> 13);
+	fracA = (uiA | 0x2000) >> 1;
+
+	// Use table look-up of first four bits for piecewise linear approx. of 1/sqrt:
+	index = ((fracA >> 8) & 0xE) + expA;
+
+	r0 = softposit_approxRecipSqrt0[index]
+		- (((uint_fast32_t) softposit_approxRecipSqrt1[index]
+			* (fracA & 0x1FF)) >> 13);
+	// Use Newton-Raphson refinement to get more accuracy for 1/sqrt:
+	eSqrR0 = ((uint_fast32_t) r0 * r0) >> 1;
+
+	if (expA) eSqrR0 >>= 1;
+	sigma0 = 0xFFFF ^ (0xFFFF & (((uint64_t)eSqrR0 * (uint64_t)fracA) >> 18));//~(uint_fast16_t) ((eSqrR0 * fracA) >> 18);
+	recipSqrt = ((uint_fast32_t) r0 << 2) + (((uint_fast32_t) r0 * sigma0) >> 23);
+
+	// We need 17 bits of accuracy for posit16 square root approximation.
+	// Multiplying 16 bits and 18 bits needs 64-bit scratch before the right shift:
+	fracZ = (((uint_fast64_t) fracA) * recipSqrt) >> 13;
+
+	// Figure out the regime and the resulting right shift of the fraction:
+	if (kZ < 0) {
+		shift = (-1 - kZ) >> 1;
+		uiZ = 0x2000 >> shift;
+	}
+	else {
+		shift = kZ >> 1;
+		uiZ = 0x7fff - (0x7FFF >> (shift + 1));
+	}
+	// Set the exponent bit in the answer, if it is nonzero:
+	if (kZ & 1) uiZ |= (0x1000 >> shift);
+
+	// Right-shift fraction bits, accounting for 1 <= a < 2 versus 2 <= a < 4:
+	fracZ = fracZ >> (expA + shift);
+
+	// Trick for eliminating off-by-one cases that only uses one multiply:
+	fracZ++;
+	if (!(fracZ & 7)) {
+		shiftedFracZ = fracZ >> 1;
+		negRem = (shiftedFracZ * shiftedFracZ) & 0x3FFFF;
+		if (negRem & 0x20000) {
+			fracZ |= 1;
+		} else {
+			if (negRem) fracZ--;
+		}
+	}
+	// Strip off the hidden bit and round-to-nearest using last 4 bits.
+	fracZ -= (0x10000 >> shift);
+	bitNPlusOne = (fracZ >> 3) & 1;
+	if (bitNPlusOne) {
+		if (((fracZ >> 4) & 1) | (fracZ & 7)) fracZ += 0x10;
+	}
+	// Assemble the result and return it.
+	uA.ui = uiZ | (fracZ >> 4);
+	return uA.p;
+
+}