diff options
author | Hans Hagen <pragma@wxs.nl> | 2023-04-27 00:04:07 +0200 |
---|---|---|
committer | Context Git Mirror Bot <phg@phi-gamma.net> | 2023-04-27 00:04:07 +0200 |
commit | 6b25a7a970ba9553adf8077ef2eecb50a5d77818 (patch) | |
tree | f06e40077ba9e55af4bf6cab52313f79f8ea84a8 /source | |
parent | 657457ef4a08c1f000f272e00f654f4064cc37bd (diff) | |
download | context-6b25a7a970ba9553adf8077ef2eecb50a5d77818.tar.gz |
2023-04-26 23:31:00
Diffstat (limited to 'source')
230 files changed, 29168 insertions, 2355 deletions
diff --git a/source/luametatex/CMakeLists.txt b/source/luametatex/CMakeLists.txt index 639c9b140..2fffa9ecb 100644 --- a/source/luametatex/CMakeLists.txt +++ b/source/luametatex/CMakeLists.txt @@ -259,5 +259,6 @@ include(cmake/luaoptional.cmake) include(cmake/pplib.cmake) include(cmake/miniz.cmake) +include(cmake/softposit.cmake) include(cmake/luametatex.cmake) diff --git a/source/luametatex/cmake/luametatex.cmake b/source/luametatex/cmake/luametatex.cmake index eabef915b..e1ce40b63 100644 --- a/source/luametatex/cmake/luametatex.cmake +++ b/source/luametatex/cmake/luametatex.cmake @@ -23,6 +23,7 @@ target_link_libraries(luametatex pplib miniz + softposit ) if (LUAMETATEX_NOLDL) diff --git a/source/luametatex/cmake/mimalloc.cmake b/source/luametatex/cmake/mimalloc.cmake index 02992344e..ba8e2ed49 100644 --- a/source/luametatex/cmake/mimalloc.cmake +++ b/source/luametatex/cmake/mimalloc.cmake @@ -13,7 +13,7 @@ set(mimalloc_sources source/libraries/mimalloc/src/page.c source/libraries/mimalloc/src/random.c source/libraries/mimalloc/src/segment.c - source/libraries/mimalloc/src/segment-cache.c + source/libraries/mimalloc/src/segment-map.c source/libraries/mimalloc/src/stats.c source/libraries/mimalloc/src/prim/prim.c ) diff --git a/source/luametatex/cmake/mp.cmake b/source/luametatex/cmake/mp.cmake index 22680cdb7..4870178fd 100644 --- a/source/luametatex/cmake/mp.cmake +++ b/source/luametatex/cmake/mp.cmake @@ -6,6 +6,7 @@ set(mp_sources source/mp/mpc/mpmathdouble.c source/mp/mpc/mpmathbinary.c source/mp/mpc/mpmathdecimal.c + source/mp/mpc/mpmathposit.c source/libraries/decnumber/decContext.c source/libraries/decnumber/decNumber.c @@ -29,6 +30,7 @@ target_include_directories(mp PRIVATE source/libraries/decnumber source/utilities source/libraries/mimalloc/include + source/libraries/softposit/source/include ) target_compile_definitions(mp PUBLIC diff --git a/source/luametatex/cmake/softposit.cmake b/source/luametatex/cmake/softposit.cmake new file mode 100644 index 000000000..2964464ba --- /dev/null +++ b/source/luametatex/cmake/softposit.cmake @@ -0,0 +1,126 @@ +set(softposit_sources + +# source/libraries/softposit/source/s_addMagsP8.c +# source/libraries/softposit/source/s_subMagsP8.c +# source/libraries/softposit/source/s_mulAddP8.c +# source/libraries/softposit/source/p8_add.c +# source/libraries/softposit/source/p8_sub.c +# source/libraries/softposit/source/p8_mul.c +# source/libraries/softposit/source/p8_div.c +# source/libraries/softposit/source/p8_sqrt.c +# source/libraries/softposit/source/p8_to_p16.c +# source/libraries/softposit/source/p8_to_p32.c +# source/libraries/softposit/source/p8_to_pX2.c +# source/libraries/softposit/source/p8_to_i32.c +# source/libraries/softposit/source/p8_to_i64.c +# source/libraries/softposit/source/p8_to_ui32.c +# source/libraries/softposit/source/p8_to_ui64.c +# source/libraries/softposit/source/p8_roundToInt.c +# source/libraries/softposit/source/p8_mulAdd.c +# source/libraries/softposit/source/p8_eq.c +# source/libraries/softposit/source/p8_le.c +# source/libraries/softposit/source/p8_lt.c +# source/libraries/softposit/source/quire8_fdp_add.c +# source/libraries/softposit/source/quire8_fdp_sub.c +# source/libraries/softposit/source/ui32_to_p8.c +# source/libraries/softposit/source/ui64_to_p8.c +# source/libraries/softposit/source/i32_to_p8.c +# source/libraries/softposit/source/i64_to_p8.c + +# source/libraries/softposit/source/s_addMagsP16.c +# source/libraries/softposit/source/s_subMagsP16.c +# source/libraries/softposit/source/s_mulAddP16.c +# source/libraries/softposit/source/p16_to_ui32.c +# source/libraries/softposit/source/p16_to_ui64.c +# source/libraries/softposit/source/p16_to_i32.c +# source/libraries/softposit/source/p16_to_i64.c +# source/libraries/softposit/source/p16_to_p8.c +# source/libraries/softposit/source/p16_to_p32.c +# source/libraries/softposit/source/p16_to_pX2.c +# source/libraries/softposit/source/p16_roundToInt.c +# source/libraries/softposit/source/p16_add.c +# source/libraries/softposit/source/p16_sub.c +# source/libraries/softposit/source/p16_mul.c +# source/libraries/softposit/source/p16_mulAdd.c +# source/libraries/softposit/source/p16_div.c +# source/libraries/softposit/source/p16_eq.c +# source/libraries/softposit/source/p16_le.c +# source/libraries/softposit/source/p16_lt.c +# source/libraries/softposit/source/p16_sqrt.c +# source/libraries/softposit/source/quire16_fdp_add.c +# source/libraries/softposit/source/quire16_fdp_sub.c +# source/libraries/softposit/source/quire_helper.c +# source/libraries/softposit/source/ui32_to_p16.c +# source/libraries/softposit/source/ui64_to_p16.c +# source/libraries/softposit/source/i32_to_p16.c +# source/libraries/softposit/source/i64_to_p16.c + + source/libraries/softposit/source/s_addMagsP32.c + source/libraries/softposit/source/s_subMagsP32.c + source/libraries/softposit/source/s_mulAddP32.c + source/libraries/softposit/source/p32_to_ui32.c + source/libraries/softposit/source/p32_to_ui64.c + source/libraries/softposit/source/p32_to_i32.c + source/libraries/softposit/source/p32_to_i64.c +# source/libraries/softposit/source/p32_to_p8.c +# source/libraries/softposit/source/p32_to_p16.c +##source/libraries/softposit/source/p32_to_pX2.c + source/libraries/softposit/source/p32_roundToInt.c + source/libraries/softposit/source/p32_add.c + source/libraries/softposit/source/p32_sub.c + source/libraries/softposit/source/p32_mul.c + source/libraries/softposit/source/p32_mulAdd.c + source/libraries/softposit/source/p32_div.c + source/libraries/softposit/source/p32_eq.c + source/libraries/softposit/source/p32_le.c + source/libraries/softposit/source/p32_lt.c + source/libraries/softposit/source/p32_sqrt.c +##source/libraries/softposit/source/quire32_fdp_add.c +##source/libraries/softposit/source/quire32_fdp_sub.c + source/libraries/softposit/source/ui32_to_p32.c + source/libraries/softposit/source/ui64_to_p32.c + source/libraries/softposit/source/i32_to_p32.c + source/libraries/softposit/source/i64_to_p32.c + source/libraries/softposit/source/s_approxRecipSqrt_1Ks.c +# source/libraries/softposit/source/c_convertDecToPosit8.c +# source/libraries/softposit/source/c_convertPosit8ToDec.c +# source/libraries/softposit/source/c_convertDecToPosit16.c +# source/libraries/softposit/source/c_convertPosit16ToDec.c +# source/libraries/softposit/source/c_convertQuire8ToPosit8.c +# source/libraries/softposit/source/c_convertQuire16ToPosit16.c +##source/libraries/softposit/source/c_convertQuire32ToPosit32.c + source/libraries/softposit/source/c_convertDecToPosit32.c + source/libraries/softposit/source/c_convertPosit32ToDec.c + source/libraries/softposit/source/c_int.c +##source/libraries/softposit/source/s_addMagsPX2.c +##source/libraries/softposit/source/s_subMagsPX2.c +##source/libraries/softposit/source/s_mulAddPX2.c +##source/libraries/softposit/source/pX2_add.c +##source/libraries/softposit/source/pX2_sub.c +##source/libraries/softposit/source/pX2_mul.c +##source/libraries/softposit/source/pX2_div.c +##source/libraries/softposit/source/pX2_mulAdd.c +##source/libraries/softposit/source/pX2_roundToInt.c +##source/libraries/softposit/source/pX2_sqrt.c +##source/libraries/softposit/source/pX2_eq.c +##source/libraries/softposit/source/pX2_le.c +##source/libraries/softposit/source/pX2_lt.c +##source/libraries/softposit/source/ui32_to_pX2.c +# source/libraries/softposit/source/ui64_to_pX2.c +##source/libraries/softposit/source/i32_to_pX2.c +# source/libraries/softposit/source/i64_to_pX2.c +##source/libraries/softposit/source/c_convertQuireX2ToPositX2.c + +) + +add_library(softposit STATIC ${softposit_sources}) + +target_include_directories(softposit PRIVATE + source/libraries/softposit/source + source/libraries/softposit/source/include + source/libraries/softposit/build/Linux-x86_64-GCC +) + +target_compile_options(softposit PRIVATE + -DSOFTPOSIT_FAST_INT64 +)
\ No newline at end of file diff --git a/source/luametatex/cmake/tex.cmake b/source/luametatex/cmake/tex.cmake index 83820aa1c..4a4c78110 100644 --- a/source/luametatex/cmake/tex.cmake +++ b/source/luametatex/cmake/tex.cmake @@ -6,6 +6,7 @@ set(tex_sources source/utilities/auxsystem.c source/utilities/auxunistring.c source/utilities/auxfile.c + source/utilities/auxposit.c source/libraries/hnj/hnjhyphen.c @@ -39,6 +40,7 @@ set(tex_sources source/luarest/lmtxcomplexlib.c source/luarest/lmtziplib.c source/luarest/lmtsparselib.c + source/luarest/lmtposit.c source/tex/texalign.c source/tex/texarithmetic.c @@ -96,4 +98,5 @@ target_include_directories(tex PRIVATE source/libraries/pplib/util source/luacore/lua54/src source/libraries/mimalloc/include + source/libraries/softposit/source/include ) diff --git a/source/luametatex/source/libraries/mimalloc/CMakeLists.txt b/source/luametatex/source/libraries/mimalloc/CMakeLists.txt index 35d5d6509..2bcd1ef76 100644 --- a/source/luametatex/source/libraries/mimalloc/CMakeLists.txt +++ b/source/luametatex/source/libraries/mimalloc/CMakeLists.txt @@ -50,7 +50,7 @@ set(mi_sources src/page.c src/random.c src/segment.c - src/segment-cache.c + src/segment-map.c src/stats.c src/prim/prim.c) diff --git a/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config-version.cmake b/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config-version.cmake index 855c44d22..a44c121d9 100644 --- a/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config-version.cmake +++ b/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config-version.cmake @@ -1,6 +1,6 @@ set(mi_version_major 2) set(mi_version_minor 1) -set(mi_version_patch 1) +set(mi_version_patch 2) set(mi_version ${mi_version_major}.${mi_version_minor}) set(PACKAGE_VERSION ${mi_version}) diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc.h index 800cfd7e4..f77c2ea17 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 211 // major + 2 digits minor +#define MI_MALLOC_VERSION 212 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes @@ -284,7 +284,7 @@ mi_decl_export int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; -#if MI_MALLOC_VERSION >= 200 +#if MI_MALLOC_VERSION >= 182 // Create a heap that only allocates in the specified arena mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id); #endif @@ -318,35 +318,40 @@ mi_decl_export int mi_reserve_huge_os_pages(size_t pages, double max_secs, size typedef enum mi_option_e { // stable options - mi_option_show_errors, - mi_option_show_stats, - mi_option_verbose, - // some of the following options are experimental - // (deprecated options are kept for binary backward compatibility with v1.x versions) - mi_option_eager_commit, - mi_option_deprecated_eager_region_commit, - mi_option_deprecated_reset_decommits, - mi_option_large_os_pages, // use large (2MiB) OS pages, implies eager commit - mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB) at startup + mi_option_show_errors, // print error messages + mi_option_show_stats, // print statistics on termination + mi_option_verbose, // print verbose messages + // the following options are experimental (see src/options.h) + mi_option_eager_commit, // eager commit segments? (after `eager_commit_delay` segments) (=1) + mi_option_arena_eager_commit, // eager commit arenas? Use 2 to enable just on overcommit systems (=2) + mi_option_purge_decommits, // should a memory purge decommit (or only reset) (=1) + mi_option_allow_large_os_pages, // allow large (2MiB) OS pages, implies eager commit + mi_option_reserve_huge_os_pages, // reserve N huge OS pages (1GiB/page) at startup mi_option_reserve_huge_os_pages_at, // reserve huge OS pages at a specific NUMA node - mi_option_reserve_os_memory, // reserve specified amount of OS memory at startup + mi_option_reserve_os_memory, // reserve specified amount of OS memory in an arena at startup mi_option_deprecated_segment_cache, - mi_option_page_reset, - mi_option_abandoned_page_decommit, - mi_option_deprecated_segment_reset, - mi_option_eager_commit_delay, - mi_option_decommit_delay, - mi_option_use_numa_nodes, // 0 = use available numa nodes, otherwise use at most N nodes. - mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only reserved arenas) - mi_option_os_tag, - mi_option_max_errors, - mi_option_max_warnings, - mi_option_max_segment_reclaim, - mi_option_allow_decommit, - mi_option_segment_decommit_delay, - mi_option_decommit_extend_delay, - mi_option_destroy_on_exit, - _mi_option_last + mi_option_deprecated_page_reset, + mi_option_abandoned_page_purge, // immediately purge delayed purges on thread termination + mi_option_deprecated_segment_reset, + mi_option_eager_commit_delay, + mi_option_purge_delay, // memory purging is delayed by N milli seconds; use 0 for immediate purging or -1 for no purging at all. + mi_option_use_numa_nodes, // 0 = use all available numa nodes, otherwise use at most N nodes. + mi_option_limit_os_alloc, // 1 = do not use OS memory for allocation (but only programmatically reserved arenas) + mi_option_os_tag, // tag used for OS logging (macOS only for now) + mi_option_max_errors, // issue at most N error messages + mi_option_max_warnings, // issue at most N warning messages + mi_option_max_segment_reclaim, + mi_option_destroy_on_exit, // if set, release all memory on exit; sometimes used for dynamic unloading but can be unsafe. + mi_option_arena_reserve, // initial memory size in KiB for arena reservation (1GiB on 64-bit) + mi_option_arena_purge_mult, + mi_option_purge_extend_delay, + _mi_option_last, + // legacy option names + mi_option_large_os_pages = mi_option_allow_large_os_pages, + mi_option_eager_region_commit = mi_option_arena_eager_commit, + mi_option_reset_decommits = mi_option_purge_decommits, + mi_option_reset_delay = mi_option_purge_delay, + mi_option_abandoned_page_reset = mi_option_abandoned_page_purge } mi_option_t; @@ -356,8 +361,9 @@ mi_decl_export void mi_option_disable(mi_option_t option); mi_decl_export void mi_option_set_enabled(mi_option_t option, bool enable); mi_decl_export void mi_option_set_enabled_default(mi_option_t option, bool enable); -mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); -mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max); +mi_decl_nodiscard mi_decl_export long mi_option_get(mi_option_t option); +mi_decl_nodiscard mi_decl_export long mi_option_get_clamp(mi_option_t option, long min, long max); +mi_decl_nodiscard mi_decl_export size_t mi_option_get_size(mi_option_t option); mi_decl_export void mi_option_set(mi_option_t option, long value); mi_decl_export void mi_option_set_default(mi_option_t option, long value); diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc/atomic.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc/atomic.h index fe79fbcaf..fe418fab3 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc/atomic.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc/atomic.h @@ -39,7 +39,11 @@ terms of the MIT license. A copy of the license can be found in the file #include <stdatomic.h> #define mi_atomic(name) atomic_##name #define mi_memory_order(name) memory_order_##name -#define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#if !defined(ATOMIC_VAR_INIT) || (__STDC_VERSION__ >= 201710L) // c17, see issue #735 + #define MI_ATOMIC_VAR_INIT(x) x +#else + #define MI_ATOMIC_VAR_INIT(x) ATOMIC_VAR_INIT(x) +#endif #endif // Various defines for all used memory orders in mimalloc @@ -113,11 +117,13 @@ static inline void mi_atomic_maxi64_relaxed(volatile int64_t* p, int64_t x) { } // Used by timers -#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) -#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) -#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) -#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_loadi64_acquire(p) mi_atomic(load_explicit)(p,mi_memory_order(acquire)) +#define mi_atomic_loadi64_relaxed(p) mi_atomic(load_explicit)(p,mi_memory_order(relaxed)) +#define mi_atomic_storei64_release(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(release)) +#define mi_atomic_storei64_relaxed(p,x) mi_atomic(store_explicit)(p,x,mi_memory_order(relaxed)) +#define mi_atomic_casi64_strong_acq_rel(p,e,d) mi_atomic_cas_strong_acq_rel(p,e,d) +#define mi_atomic_addi64_acq_rel(p,i) mi_atomic_add_acq_rel(p,i) #elif defined(_MSC_VER) @@ -245,6 +251,21 @@ static inline void mi_atomic_maxi64_relaxed(volatile _Atomic(int64_t)*p, int64_t } while (current < x && _InterlockedCompareExchange64(p, x, current) != current); } +static inline void mi_atomic_addi64_acq_rel(volatile _Atomic(int64_t*)p, int64_t i) { + mi_atomic_addi64_relaxed(p, i); +} + +static inline bool mi_atomic_casi64_strong_acq_rel(volatile _Atomic(int64_t*)p, int64_t* exp, int64_t des) { + int64_t read = _InterlockedCompareExchange64(p, des, *exp); + if (read == *exp) { + return true; + } + else { + *exp = read; + return false; + } +} + // The pointer macros cast to `uintptr_t`. #define mi_atomic_load_ptr_acquire(tp,p) (tp*)mi_atomic_load_acquire((_Atomic(uintptr_t)*)(p)) #define mi_atomic_load_ptr_relaxed(tp,p) (tp*)mi_atomic_load_relaxed((_Atomic(uintptr_t)*)(p)) @@ -281,9 +302,20 @@ typedef _Atomic(uintptr_t) mi_atomic_once_t; static inline bool mi_atomic_once( mi_atomic_once_t* once ) { if (mi_atomic_load_relaxed(once) != 0) return false; // quick test uintptr_t expected = 0; - return mi_atomic_cas_strong_acq_rel(once, &expected, 1); // try to set to 1 + return mi_atomic_cas_strong_acq_rel(once, &expected, (uintptr_t)1); // try to set to 1 } +typedef _Atomic(uintptr_t) mi_atomic_guard_t; + +// Allows only one thread to execute at a time +#define mi_atomic_guard(guard) \ + uintptr_t _mi_guard_expected = 0; \ + for(bool _mi_guard_once = true; \ + _mi_guard_once && mi_atomic_cas_strong_acq_rel(guard,&_mi_guard_expected,(uintptr_t)1); \ + (mi_atomic_store_release(guard,(uintptr_t)0), _mi_guard_once = false) ) + + + // Yield #if defined(__cplusplus) #include <thread> @@ -303,7 +335,7 @@ static inline void mi_atomic_yield(void) { } #elif (defined(__GNUC__) || defined(__clang__)) && \ (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__armel__) || defined(__ARMEL__) || \ - defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) + defined(__aarch64__) || defined(__powerpc__) || defined(__ppc__) || defined(__PPC__)) || defined(__POWERPC__) #if defined(__x86_64__) || defined(__i386__) static inline void mi_atomic_yield(void) { __asm__ volatile ("pause" ::: "memory"); @@ -316,10 +348,16 @@ static inline void mi_atomic_yield(void) { static inline void mi_atomic_yield(void) { __asm__ volatile("yield" ::: "memory"); } -#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) +#elif defined(__powerpc__) || defined(__ppc__) || defined(__PPC__) || defined(__POWERPC__) +#ifdef __APPLE__ +static inline void mi_atomic_yield(void) { + __asm__ volatile ("or r27,r27,r27" ::: "memory"); +} +#else static inline void mi_atomic_yield(void) { __asm__ __volatile__ ("or 27,27,27" ::: "memory"); } +#endif #elif defined(__armel__) || defined(__ARMEL__) static inline void mi_atomic_yield(void) { __asm__ volatile ("nop" ::: "memory"); diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc/internal.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc/internal.h index a4495c161..00d262609 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc/internal.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc/internal.h @@ -80,49 +80,52 @@ extern mi_decl_cache_align mi_stats_t _mi_stats_main; extern mi_decl_cache_align const mi_page_t _mi_page_empty; bool _mi_is_main_thread(void); size_t _mi_current_thread_count(void); -bool _mi_preloading(void); // true while the C runtime is not ready +bool _mi_preloading(void); // true while the C runtime is not initialized yet mi_threadid_t _mi_thread_id(void) mi_attr_noexcept; -mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap +mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing heap void _mi_thread_done(mi_heap_t* heap); +void _mi_thread_data_collect(void); // os.c -void _mi_os_init(void); // called from process init -void* _mi_os_alloc(size_t size, mi_stats_t* stats); // to allocate thread local data -void _mi_os_free(void* p, size_t size, mi_stats_t* stats); // to free thread local data +void _mi_os_init(void); // called from process init +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* stats); +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* stats); +void _mi_os_free_ex(void* p, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* stats); + size_t _mi_os_page_size(void); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); +bool _mi_os_has_virtual_reserve(void); +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); +bool _mi_os_purge(void* p, size_t size, mi_stats_t* stats); +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats); + +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* stats); +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats); -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats); -void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats); -void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size); bool _mi_os_use_large_page(size_t size, size_t alignment); size_t _mi_os_large_page_size(void); -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); -void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid); // arena.c mi_arena_id_t _mi_arena_id_none(void); -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats); -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id); -bool _mi_arena_is_os_allocated(size_t arena_memid); - -// "segment-cache.c" -void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); -void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); -void _mi_segment_cache_free_all(mi_os_tld_t* tld); +void _mi_arena_free(void* p, size_t size, size_t still_committed_size, mi_memid_t memid, mi_stats_t* stats); +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld); +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id); +bool _mi_arena_contains(const void* p); +void _mi_arena_collect(bool force_purge, mi_stats_t* stats); +void _mi_arena_unsafe_destroy_all(mi_stats_t* stats); + +// "segment-map.c" void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); @@ -170,8 +173,8 @@ uint8_t _mi_bin(size_t size); // for stats void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); -bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid); -void _mi_heap_destroy_all(void); +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid); +void _mi_heap_unsafe_destroy_all(void); // "stats.c" void _mi_stats_done(mi_stats_t* stats); @@ -266,6 +269,10 @@ bool _mi_page_is_valid(mi_page_t* page); #define MI_INIT256(x) MI_INIT128(x),MI_INIT128(x) +#include <string.h> +// initialize a local variable to zero; use memset as compilers optimize constant sized memset's +#define _mi_memzero_var(x) memset(&x,0,sizeof(x)) + // Is `x` a power of two? (0 is considered a power of two) static inline bool _mi_is_power_of_two(uintptr_t x) { return ((x & (x - 1)) == 0); @@ -308,7 +315,7 @@ static inline uintptr_t _mi_divide_up(uintptr_t size, size_t divider) { } // Is memory zero initialized? -static inline bool mi_mem_is_zero(void* p, size_t size) { +static inline bool mi_mem_is_zero(const void* p, size_t size) { for (size_t i = 0; i < size; i++) { if (((uint8_t*)p)[i] != 0) return false; } @@ -727,6 +734,29 @@ size_t _mi_commit_mask_next_run(const mi_commit_mask_t* cm, size_t* idx); +/* ----------------------------------------------------------- + memory id's +----------------------------------------------------------- */ + +static inline mi_memid_t _mi_memid_create(mi_memkind_t memkind) { + mi_memid_t memid; + _mi_memzero_var(memid); + memid.memkind = memkind; + return memid; +} + +static inline mi_memid_t _mi_memid_none(void) { + return _mi_memid_create(MI_MEM_NONE); +} + +static inline mi_memid_t _mi_memid_create_os(bool committed, bool is_zero, bool is_large) { + mi_memid_t memid = _mi_memid_create(MI_MEM_OS); + memid.initially_committed = committed; + memid.initially_zero = is_zero; + memid.is_pinned = is_large; + return memid; +} + // ------------------------------------------------------------------- // Fast "random" shuffle @@ -887,7 +917,6 @@ static inline size_t mi_bsr(uintptr_t x) { #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) #include <intrin.h> -#include <string.h> extern bool _mi_cpu_has_fsrm; static inline void _mi_memcpy(void* dst, const void* src, size_t n) { if (_mi_cpu_has_fsrm) { @@ -906,7 +935,6 @@ static inline void _mi_memzero(void* dst, size_t n) { } } #else -#include <string.h> static inline void _mi_memcpy(void* dst, const void* src, size_t n) { memcpy(dst, src, n); } @@ -915,7 +943,6 @@ static inline void _mi_memzero(void* dst, size_t n) { } #endif - // ------------------------------------------------------------------------------- // The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned // This is used for example in `mi_realloc`. @@ -923,7 +950,6 @@ static inline void _mi_memzero(void* dst, size_t n) { #if (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // On GCC/CLang we provide a hint that the pointers are word aligned. -#include <string.h> static inline void _mi_memcpy_aligned(void* dst, const void* src, size_t n) { mi_assert_internal(((uintptr_t)dst % MI_INTPTR_SIZE == 0) && ((uintptr_t)src % MI_INTPTR_SIZE == 0)); void* adst = __builtin_assume_aligned(dst, MI_INTPTR_SIZE); diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc/prim.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc/prim.h index 10378c922..9e560696f 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc/prim.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc/prim.h @@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file // Each OS/host needs to implement these primitives, see `src/prim` // for implementations on Window, macOS, WASI, and Linux/Unix. // -// note: on all primitive functions, we always get: +// note: on all primitive functions, we always have result parameters != NUL, and: // addr != NULL and page aligned // size > 0 and page aligned // return value is an error code an int where 0 is success. @@ -22,11 +22,12 @@ terms of the MIT license. A copy of the license can be found in the file // OS memory configuration typedef struct mi_os_mem_config_s { - size_t page_size; // 4KiB - size_t large_page_size; // 2MiB - size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) - bool has_overcommit; // can we reserve more memory than can be actually committed? - bool must_free_whole; // must allocated blocks free as a whole (false for mmap, true for VirtualAlloc) + size_t page_size; // 4KiB + size_t large_page_size; // 2MiB + size_t alloc_granularity; // smallest allocation size (on Windows 64KiB) + bool has_overcommit; // can we reserve more memory than can be actually committed? + bool must_free_whole; // must allocated blocks be freed as a whole (false for mmap, true for VirtualAlloc) + bool has_virtual_reserve; // supports virtual address space reservation? (if true we can reserve virtual address space without using commit or physical memory) } mi_os_mem_config_t; // Initialize @@ -37,12 +38,23 @@ int _mi_prim_free(void* addr, size_t size ); // Allocate OS memory. Return NULL on error. // The `try_alignment` is just a hint and the returned pointer does not have to be aligned. +// If `commit` is false, the virtual memory range only needs to be reserved (with no access) +// which will later be committed explicitly using `_mi_prim_commit`. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) // pre: !commit => !allow_large // try_alignment >= _mi_os_page_size() and a power of 2 -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr); +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr); // Commit memory. Returns error code or 0 on success. -int _mi_prim_commit(void* addr, size_t size, bool commit); +// For example, on Linux this would make the memory PROT_READ|PROT_WRITE. +// `is_zero` is set to true if the memory was zero initialized (e.g. on Windows) +int _mi_prim_commit(void* addr, size_t size, bool* is_zero); + +// Decommit memory. Returns error code or 0 on success. The `needs_recommit` result is true +// if the memory would need to be re-committed. For example, on Windows this is always true, +// but on Linux we could use MADV_DONTNEED to decommit which does not need a recommit. +// pre: needs_recommit != NULL +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit); // Reset memory. The range keeps being accessible but the content might be reset. // Returns error code or 0 on success. @@ -52,10 +64,10 @@ int _mi_prim_reset(void* addr, size_t size); int _mi_prim_protect(void* addr, size_t size, bool protect); // Allocate huge (1GiB) pages possibly associated with a NUMA node. +// `is_zero` is set to true if the memory was zero initialized (as on most OS's) // pre: size > 0 and a multiple of 1GiB. -// addr is either NULL or an address hint. // numa_node is either negative (don't care), or a numa node number. -int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr); +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr); // Return the current NUMA node size_t _mi_prim_numa_node(void); diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc/track.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc/track.h index f78e8daa7..9545f7507 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc/track.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc/track.h @@ -79,7 +79,7 @@ defined, undefined, or not accessible at all: // windows event tracing #define MI_TRACK_ENABLED 1 -#define MI_TRACK_HEAP_DESTROY 0 +#define MI_TRACK_HEAP_DESTROY 1 #define MI_TRACK_TOOL "ETW" #define WIN32_LEAN_AND_MEAN diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc/types.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc/types.h index c7ddaaaef..2005238a6 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc/types.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc/types.h @@ -172,7 +172,7 @@ typedef int32_t mi_ssize_t; // Derived constants #define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT) #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_SEGMENT_MASK (MI_SEGMENT_ALIGN - 1) +#define MI_SEGMENT_MASK ((uintptr_t)(MI_SEGMENT_ALIGN - 1)) #define MI_SEGMENT_SLICE_SIZE (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT) #define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024 @@ -291,16 +291,15 @@ typedef uintptr_t mi_thread_free_t; typedef struct mi_page_s { // "owned" by the segment uint32_t slice_count; // slices in this page (0 if not a page) - uint32_t slice_offset; // distance from the actual page data slice (0 if a page) - uint8_t is_reset : 1; // `true` if the page memory was reset + uint32_t slice_offset; // distance from the actual page data slice (0 if a page) uint8_t is_committed : 1; // `true` if the page virtual memory is committed - uint8_t is_zero_init : 1; // `true` if the page was zero initialized + uint8_t is_zero_init : 1; // `true` if the page was initially zero initialized // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) - uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized + uint8_t free_is_zero : 1; // `true` if the blocks in the free list are zero initialized uint8_t retire_expire : 7; // expiration count for retired blocks mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) @@ -326,6 +325,10 @@ typedef struct mi_page_s { +// ------------------------------------------------------ +// Mimalloc segments contain mimalloc pages +// ------------------------------------------------------ + typedef enum mi_page_kind_e { MI_PAGE_SMALL, // small blocks go into 64KiB pages inside a segment MI_PAGE_MEDIUM, // medium blocks go into medium pages inside a segment @@ -350,7 +353,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB +#define MI_MINIMAL_COMMIT_SIZE (1*MI_SEGMENT_SLICE_SIZE) #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS @@ -368,20 +371,57 @@ typedef mi_page_t mi_slice_t; typedef int64_t mi_msecs_t; +// Memory can reside in arena's, direct OS allocated, or statically allocated. The memid keeps track of this. +typedef enum mi_memkind_e { + MI_MEM_NONE, // not allocated + MI_MEM_EXTERNAL, // not owned by mimalloc but provided externally (via `mi_manage_os_memory` for example) + MI_MEM_STATIC, // allocated in a static area and should not be freed (for arena meta data for example) + MI_MEM_OS, // allocated from the OS + MI_MEM_OS_HUGE, // allocated as huge os pages + MI_MEM_OS_REMAP, // allocated in a remapable area (i.e. using `mremap`) + MI_MEM_ARENA // allocated from an arena (the usual case) +} mi_memkind_t; + +static inline bool mi_memkind_is_os(mi_memkind_t memkind) { + return (memkind >= MI_MEM_OS && memkind <= MI_MEM_OS_REMAP); +} + +typedef struct mi_memid_os_info { + void* base; // actual base address of the block (used for offset aligned allocations) + size_t alignment; // alignment at allocation +} mi_memid_os_info_t; + +typedef struct mi_memid_arena_info { + size_t block_index; // index in the arena + mi_arena_id_t id; // arena id (>= 1) + bool is_exclusive; // the arena can only be used for specific arena allocations +} mi_memid_arena_info_t; + +typedef struct mi_memid_s { + union { + mi_memid_os_info_t os; // only used for MI_MEM_OS + mi_memid_arena_info_t arena; // only used for MI_MEM_ARENA + } mem; + bool is_pinned; // `true` if we cannot decommit/reset/protect in this memory (e.g. when allocated using large OS pages) + bool initially_committed;// `true` if the memory was originally allocated as committed + bool initially_zero; // `true` if the memory was originally zero initialized + mi_memkind_t memkind; +} mi_memid_t; + + // Segments are large allocated memory blocks (8mb on 64 bit) from // the OS. Inside segments we allocated fixed size _pages_ that // contain blocks. typedef struct mi_segment_s { - size_t memid; // memory id for arena allocation - bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) - bool mem_is_large; // in large/huge os pages? - bool mem_is_committed; // `true` if the whole segment is eagerly committed - size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX) - size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX) - - bool allow_decommit; - mi_msecs_t decommit_expire; - mi_commit_mask_t decommit_mask; + // constant fields + mi_memid_t memid; // memory id for arena allocation + bool allow_decommit; + bool allow_purge; + size_t segment_size; + + // segment fields + mi_msecs_t purge_expire; + mi_commit_mask_t purge_mask; mi_commit_mask_t commit_mask; _Atomic(struct mi_segment_s*) abandoned_next; @@ -540,6 +580,7 @@ typedef struct mi_stats_s { mi_stat_count_t reserved; mi_stat_count_t committed; mi_stat_count_t reset; + mi_stat_count_t purged; mi_stat_count_t page_committed; mi_stat_count_t segments_abandoned; mi_stat_count_t pages_abandoned; @@ -552,6 +593,8 @@ typedef struct mi_stats_s { mi_stat_counter_t pages_extended; mi_stat_counter_t mmap_calls; mi_stat_counter_t commit_calls; + mi_stat_counter_t reset_calls; + mi_stat_counter_t purge_calls; mi_stat_counter_t page_no_retire; mi_stat_counter_t searches; mi_stat_counter_t normal_count; diff --git a/source/luametatex/source/libraries/mimalloc/readme.md b/source/luametatex/source/libraries/mimalloc/readme.md index 408f3cb1f..85d3563fb 100644 --- a/source/luametatex/source/libraries/mimalloc/readme.md +++ b/source/luametatex/source/libraries/mimalloc/readme.md @@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac Initially developed by Daan Leijen for the runtime systems of the [Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages. -Latest release tag: `v2.1.1` (2023-04-03). -Latest stable tag: `v1.8.1` (2023-04-03). +Latest release tag: `v2.1.2` (2023-04-24). +Latest stable tag: `v1.8.2` (2023-04-24). mimalloc is a drop-in replacement for `malloc` and can be used in other programs without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as: @@ -43,7 +43,7 @@ It also includes a robust way to override the default allocator in [Windows](#ov and the chance of contending on a single location will be low -- this is quite similar to randomized algorithms like skip lists where adding a random oracle removes the need for a more complex algorithm. -- __eager page reset__: when a "page" becomes empty (with increased chance +- __eager page purging__: when a "page" becomes empty (with increased chance due to free list sharding) the memory is marked to the OS as unused (reset or decommitted) reducing (real) memory pressure and fragmentation, especially in long running programs. @@ -78,6 +78,10 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance (see [below](#performance)); please report if you observe any significant performance regression. +* 2023-04-24, `v1.8.2`, `v2.1.2`: Fixes build issues on freeBSD, musl, and C17 (UE 5.1.1). Reduce code size/complexity + by removing regions and segment-cache's and only use arenas with improved memory purging -- this may improve memory + usage as well for larger services. Renamed options for consistency. Improved Valgrind and ASAN checking. + * 2023-04-03, `v1.8.1`, `v2.1.1`: Fixes build issues on some platforms. * 2023-03-29, `v1.8.0`, `v2.1.0`: Improved support dynamic overriding on Windows 11. Improved tracing precision @@ -104,20 +108,6 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page improved wasm support, faster aligned allocation, various small fixes. -* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including - M1), improved performance for v2 for large objects, Python integration improvements, more standard - installation directories, various small fixes. - -* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix - thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. - -* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental). - -* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages. - -* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics, - improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes. - * [Older release notes](#older-release-notes) Special thanks to: @@ -279,43 +269,48 @@ completely and redirect all calls to the _mimalloc_ library instead . ## Environment Options -You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), -or via environment variables: +You can set further options either programmatically (using [`mi_option_set`](https://microsoft.github.io/mimalloc/group__options.html)), or via environment variables: - `MIMALLOC_SHOW_STATS=1`: show statistics when the program terminates. - `MIMALLOC_VERBOSE=1`: show verbose messages. - `MIMALLOC_SHOW_ERRORS=1`: show error and warning messages. -- `MIMALLOC_PAGE_RESET=0`: by default, mimalloc will reset (or purge) OS pages that are not in use, to signal to the OS - that the underlying physical memory can be reused. This can reduce memory fragmentation in long running (server) - programs. By setting it to `0` this will no longer be done which can improve performance for batch-like programs. - As an alternative, the `MIMALLOC_RESET_DELAY=`<msecs> can be set higher (100ms by default) to make the page - reset occur less frequently instead of turning it off completely. + +Advanced options: + +- `MIMALLOC_PURGE_DELAY=N`: the delay in `N` milli-seconds (by default `10`) after which mimalloc will purge + OS pages that are not in use. This signals to the OS that the underlying physical memory can be reused which + can reduce memory fragmentation especially in long running (server) programs. Setting `N` to `0` purges immediately when + a page becomes unused which can improve memory usage but also decreases performance. Setting `N` to a higher + value like `100` can improve performance (sometimes by a lot) at the cost of potentially using more memory at times. + Setting it to `-1` disables purging completely. +- `MIMALLOC_ARENA_EAGER_COMMIT=1`: turns on eager commit for the large arenas (usually 1GiB) from which mimalloc + allocates segments and pages. This is by default + only enabled on overcommit systems (e.g. Linux) but enabling it explicitly on other systems (like Windows or macOS) + may improve performance. Note that eager commit only increases the commit but not the actual the peak resident set + (rss) so it is generally ok to enable this. + +Further options for large workloads and services: + - `MIMALLOC_USE_NUMA_NODES=N`: pretend there are at most `N` NUMA nodes. If not set, the actual NUMA nodes are detected at runtime. Setting `N` to 1 may avoid problems in some virtual environments. Also, setting it to a lower number than the actual NUMA nodes is fine and will only cause threads to potentially allocate more memory across actual NUMA nodes (but this can happen in any case as NUMA local allocation is always a best effort but not guaranteed). -- `MIMALLOC_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly +- `MIMALLOC_ALLOW_LARGE_OS_PAGES=1`: use large OS pages (2MiB) when available; for some workloads this can significantly improve performance. Use `MIMALLOC_VERBOSE` to check if the large OS pages are enabled -- usually one needs to explicitly allow large OS pages (as on [Windows][windows-huge] and [Linux][linux-huge]). However, sometimes the OS is very slow to reserve contiguous physical memory for large OS pages so use with care on systems that - can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). - <!-- - - `MIMALLOC_EAGER_REGION_COMMIT=1`: on Windows, commit large (256MiB) regions eagerly. On Windows, these regions - show in the working set even though usually just a small part is committed to physical memory. This is why it - turned off by default on Windows as it looks not good in the task manager. However, turning it on has no - real drawbacks and may improve performance by a little. - --> -- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where N is the number of 1GiB _huge_ OS pages. This reserves the huge pages at + can have fragmented memory (for that reason, we generally recommend to use `MIMALLOC_RESERVE_HUGE_OS_PAGES` instead whenever possible). +- `MIMALLOC_RESERVE_HUGE_OS_PAGES=N`: where `N` is the number of 1GiB _huge_ OS pages. This reserves the huge pages at startup and sometimes this can give a large (latency) performance improvement on big workloads. - Usually it is better to not use - `MIMALLOC_LARGE_OS_PAGES` in combination with this setting. Just like large OS pages, use with care as reserving + Usually it is better to not use `MIMALLOC_ALLOW_LARGE_OS_PAGES=1` in combination with this setting. Just like large + OS pages, use with care as reserving contiguous physical memory can take a long time when memory is fragmented (but reserving the huge pages is done at startup only once). Note that we usually need to explicitly enable huge OS pages (as on [Windows][windows-huge] and [Linux][linux-huge])). With huge OS pages, it may be beneficial to set the setting `MIMALLOC_EAGER_COMMIT_DELAY=N` (`N` is 1 by default) to delay the initial `N` segments (of 4MiB) of a thread to not allocate in the huge OS pages; this prevents threads that are short lived - and allocate just a little to take up space in the huge OS page area (which cannot be reset). + and allocate just a little to take up space in the huge OS page area (which cannot be purged). The huge pages are usually allocated evenly among NUMA nodes. We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all the huge pages at a specific numa node instead. @@ -517,7 +512,7 @@ Adress sanitizer support is in its initial development -- please report any issu ### ETW Event tracing for Windows ([ETW]) provides a high performance way to capture all allocations though -mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACE_ETW=ON` cmake option. +mimalloc and analyze them later. To build with ETW support, use the `-DMI_TRACK_ETW=ON` cmake option. You can then capture an allocation trace using the Windows performance recorder (WPR), using the `src/prim/windows/etw-mimalloc.wprp` profile. In an admin prompt, you can use: @@ -793,6 +788,16 @@ provided by the bot. You will only need to do this once across all repos using o # Older Release Notes +* 2021-11-14, `v1.7.3`, `v2.0.3` (beta): improved WASM support, improved macOS support and performance (including + M1), improved performance for v2 for large objects, Python integration improvements, more standard + installation directories, various small fixes. +* 2021-06-17, `v1.7.2`, `v2.0.2` (beta): support M1, better installation layout on Linux, fix + thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes. +* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental). +* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages. +* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics, + improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes. + * 2020-09-24, `v1.6.7`: stable release 1.6: using standard C atomics, passing tsan testing, improved handling of failing to commit on Windows, add [`mi_process_info`](https://github.com/microsoft/mimalloc/blob/master/include/mimalloc.h#L156) api call. * 2020-08-06, `v1.6.4`: stable release 1.6: improved error recovery in low-memory situations, @@ -814,6 +819,7 @@ provided by the bot. You will only need to do this once across all repos using o more eager concurrent free, addition of STL allocator, fixed potential memory leak. * 2020-01-15, `v1.3.0`: stable release 1.3: bug fixes, improved randomness and [stronger free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af43a0793910a9f2d01ac7863/include/mimalloc-internal.h#L396) in secure mode. + * 2019-12-22, `v1.2.2`: stable release 1.2: minor updates. * 2019-11-22, `v1.2.0`: stable release 1.2: bug fixes, improved secure mode (free list corruption checks, double free mitigation). Improved dynamic overriding on Windows. * 2019-10-07, `v1.1.0`: stable release 1.1. diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c b/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c index e79a22208..1cd809f15 100644 --- a/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c +++ b/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c @@ -79,7 +79,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* // for the tracker, on huge aligned allocations only from the start of the large block is defined mi_track_mem_undefined(aligned_p, size); if (zero) { - _mi_memzero(aligned_p, mi_usable_size(aligned_p)); + _mi_memzero_aligned(aligned_p, mi_usable_size(aligned_p)); } } @@ -93,21 +93,13 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept { // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size. - mi_assert(alignment > 0); if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment); #endif return NULL; } - /* - if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers) - #if MI_DEBUG > 0 - _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment); - #endif - return NULL; - } - */ + if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>) #if MI_DEBUG > 0 _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment); @@ -147,9 +139,9 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* he } mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept { + if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) return NULL; #if !MI_PADDING // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`) - if (!_mi_is_power_of_two(alignment)) return NULL; if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX) #else // with padding, we can only guarantee this for fixed alignments @@ -165,6 +157,11 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, } } +// ensure a definition is emitted +#if defined(__cplusplus) +static void* _mi_heap_malloc_aligned = (void*)&mi_heap_malloc_aligned; +#endif + // ------------------------------------------------------ // Aligned Allocation // ------------------------------------------------------ @@ -226,19 +223,13 @@ static void* mi_heap_realloc_zero_aligned_at(mi_heap_t* heap, void* p, size_t ne return p; // reallocation still fits, is aligned and not more than 50% waste } else { + // note: we don't zero allocate upfront so we only zero initialize the expanded part void* newp = mi_heap_malloc_aligned_at(heap,newsize,alignment,offset); if (newp != NULL) { if (zero && newsize > size) { - const mi_page_t* page = _mi_ptr_page(newp); - if (page->is_zero) { - // already zero initialized - mi_assert_expensive(mi_mem_is_zero(newp,newsize)); - } - else { - // also set last word in the previous allocation to zero to ensure any padding is zero-initialized - size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); - memset((uint8_t*)newp + start, 0, newsize - start); - } + // also set last word in the previous allocation to zero to ensure any padding is zero-initialized + size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); + _mi_memzero((uint8_t*)newp + start, newsize - start); } _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize)); mi_free(p); // only free if successful diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-override.c b/source/luametatex/source/libraries/mimalloc/src/alloc-override.c index 40098ac58..873065dc6 100644 --- a/source/luametatex/source/libraries/mimalloc/src/alloc-override.c +++ b/source/luametatex/source/libraries/mimalloc/src/alloc-override.c @@ -245,11 +245,13 @@ extern "C" { int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); } // `aligned_alloc` is only available when __USE_ISOC11 is defined. + // Note: it seems __USE_ISOC11 is not defined in musl (and perhaps other libc's) so we only check + // for it if using glibc. // Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot // override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9. // Fortunately, in the case where `aligned_alloc` is declared as `static inline` it // uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we can avoid overriding it ourselves. - #if __USE_ISOC11 + #if !defined(__GLIBC__) || __USE_ISOC11 void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); } #endif #endif diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c b/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c index b6f09d1a1..225752fd8 100644 --- a/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c +++ b/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c @@ -56,7 +56,8 @@ int mi_posix_memalign(void** p, size_t alignment, size_t size) mi_attr_noexcept // Note: The spec dictates we should not modify `*p` on an error. (issue#27) // <http://man7.org/linux/man-pages/man3/posix_memalign.3.html> if (p == NULL) return EINVAL; - if (alignment % sizeof(void*) != 0) return EINVAL; // natural alignment + if ((alignment % sizeof(void*)) != 0) return EINVAL; // natural alignment + // it is also required that alignment is a power of 2 and > 0; this is checked in `mi_malloc_aligned` if (alignment==0 || !_mi_is_power_of_two(alignment)) return EINVAL; // not a power of 2 void* q = mi_malloc_aligned(size, alignment); if (q==NULL && size != 0) return ENOMEM; diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc.c b/source/luametatex/source/libraries/mimalloc/src/alloc.c index 147e11094..ffc1747d5 100644 --- a/source/luametatex/source/libraries/mimalloc/src/alloc.c +++ b/source/luametatex/source/libraries/mimalloc/src/alloc.c @@ -37,6 +37,11 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz page->used++; page->free = mi_block_next(page, block); mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page); + #if MI_DEBUG>3 + if (page->free_is_zero) { + mi_assert_expensive(mi_mem_is_zero(block+1,size - sizeof(*block))); + } + #endif // allow use of the block internally // note: when tracking we need to avoid ever touching the MI_PADDING since @@ -46,12 +51,18 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // zero the block? note: we need to zero the full block size (issue #63) if mi_unlikely(zero) { mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic) - const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size); - _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE); + mi_assert_internal(page->xblock_size >= MI_PADDING_SIZE); + if (page->free_is_zero) { + block->next = 0; + mi_track_mem_defined(block, page->xblock_size - MI_PADDING_SIZE); + } + else { + _mi_memzero_aligned(block, page->xblock_size - MI_PADDING_SIZE); + } } #if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN - if (!page->is_zero && !zero && !mi_page_is_huge(page)) { + if (!zero && !mi_page_is_huge(page)) { memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); } #elif (MI_SECURE!=0) @@ -110,6 +121,11 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif + #if MI_DEBUG>3 + if (p != NULL && zero) { + mi_assert_expensive(mi_mem_is_zero(p, size)); + } + #endif return p; } @@ -139,6 +155,11 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif + #if MI_DEBUG>3 + if (p != NULL && zero) { + mi_assert_expensive(mi_mem_is_zero(p, size)); + } + #endif return p; } } @@ -691,6 +712,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_assert_internal(p!=NULL); // todo: do not track as the usable size is still the same in the free; adjust potential padding? // mi_track_resize(p,size,newsize) + // if (newsize < size) { mi_track_mem_noaccess((uint8_t*)p + newsize, size - newsize); } return p; // reallocation still fits and not more than 50% waste } void* newp = mi_heap_malloc(heap,newsize); @@ -698,14 +720,15 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) if (zero && newsize > size) { // also set last word in the previous allocation to zero to ensure any padding is zero-initialized const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0); - memset((uint8_t*)newp + start, 0, newsize - start); + _mi_memzero((uint8_t*)newp + start, newsize - start); + } + else if (newsize == 0) { + ((uint8_t*)newp)[0] = 0; // work around for applications that expect zero-reallocation to be zero initialized (issue #725) } if mi_likely(p != NULL) { - if mi_likely(_mi_is_aligned(p, sizeof(uintptr_t))) { // a client may pass in an arbitrary pointer `p`.. - const size_t copysize = (newsize > size ? size : newsize); - mi_track_mem_defined(p,copysize); // _mi_useable_size may be too large for byte precise memory tracking.. - _mi_memcpy_aligned(newp, p, copysize); - } + const size_t copysize = (newsize > size ? size : newsize); + mi_track_mem_defined(p,copysize); // _mi_useable_size may be too large for byte precise memory tracking.. + _mi_memcpy(newp, p, copysize); mi_free(p); // only free the original pointer if successful } } @@ -1030,7 +1053,7 @@ void* _mi_externs[] = { (void*)&mi_zalloc_small, (void*)&mi_heap_malloc, (void*)&mi_heap_zalloc, - (void*)&mi_heap_malloc_small + (void*)&mi_heap_malloc_small, // (void*)&mi_heap_alloc_new, // (void*)&mi_heap_alloc_new_n }; diff --git a/source/luametatex/source/libraries/mimalloc/src/arena.c b/source/luametatex/source/libraries/mimalloc/src/arena.c index 35cbcde6a..a04a04c8f 100644 --- a/source/luametatex/source/libraries/mimalloc/src/arena.c +++ b/source/luametatex/source/libraries/mimalloc/src/arena.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2022, Microsoft Research, Daan Leijen +Copyright (c) 2019-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -23,7 +23,7 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo #include "mimalloc/atomic.h" #include <string.h> // memset -#include <errno.h> // ENOMEM +#include <errno.h> // ENOMEM #include "bitmap.h" // atomic bitmap @@ -36,22 +36,25 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo typedef uintptr_t mi_block_info_t; #define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN) #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB -#define MI_MAX_ARENAS (64) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) +#define MI_MAX_ARENAS (112) // not more than 126 (since we use 7 bits in the memid and an arena index + 1) // A memory arena descriptor typedef struct mi_arena_s { mi_arena_id_t id; // arena id; 0 for non-specific - bool exclusive; // only allow allocations if specifically for this arena + mi_memid_t memid; // memid of the memory area _Atomic(uint8_t*) start; // the start of the memory area size_t block_count; // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`) size_t field_count; // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`) + size_t meta_size; // size of the arena structure itself (including its bitmaps) + mi_memid_t meta_memid; // memid of the arena structure itself (OS or static allocation) int numa_node; // associated NUMA node - bool is_zero_init; // is the arena zero initialized? - bool allow_decommit; // is decommit allowed? if true, is_large should be false and blocks_committed != NULL - bool is_large; // large- or huge OS pages (always committed) + bool exclusive; // only allow allocations if specifically for this arena + bool is_large; // memory area consists of large- or huge OS pages (always committed) _Atomic(size_t) search_idx; // optimization to start the search for free blocks + _Atomic(mi_msecs_t) purge_expire; // expiration time when blocks should be decommitted from `blocks_decommit`. mi_bitmap_field_t* blocks_dirty; // are the blocks potentially non-zero? mi_bitmap_field_t* blocks_committed; // are the blocks committed? (can be NULL for memory that cannot be decommitted) + mi_bitmap_field_t* blocks_purge; // blocks that can be (reset) decommitted. (can be NULL for memory that cannot be (reset) decommitted) mi_bitmap_field_t blocks_inuse[1]; // in-place bitmap of in-use blocks (of size `field_count`) } mi_arena_t; @@ -61,9 +64,10 @@ static mi_decl_cache_align _Atomic(mi_arena_t*) mi_arenas[MI_MAX_ARENAS]; static mi_decl_cache_align _Atomic(size_t) mi_arena_count; // = 0 +//static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept; + /* ----------------------------------------------------------- Arena id's - 0 is used for non-arena's (like OS memory) id = arena_index + 1 ----------------------------------------------------------- */ @@ -73,10 +77,7 @@ static size_t mi_arena_id_index(mi_arena_id_t id) { static mi_arena_id_t mi_arena_id_create(size_t arena_index) { mi_assert_internal(arena_index < MI_MAX_ARENAS); - mi_assert_internal(MI_MAX_ARENAS <= 126); - int id = (int)arena_index + 1; - mi_assert_internal(id >= 1 && id <= 127); - return id; + return (int)arena_index + 1; } mi_arena_id_t _mi_arena_id_none(void) { @@ -88,50 +89,123 @@ static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclus (arena_id == req_arena_id)); } +bool _mi_arena_memid_is_suitable(mi_memid_t memid, mi_arena_id_t request_arena_id) { + if (memid.memkind == MI_MEM_ARENA) { + return mi_arena_id_is_suitable(memid.mem.arena.id, memid.mem.arena.is_exclusive, request_arena_id); + } + else { + return mi_arena_id_is_suitable(0, false, request_arena_id); + } +} + +bool _mi_arena_memid_is_os_allocated(mi_memid_t memid) { + return (memid.memkind == MI_MEM_OS); +} /* ----------------------------------------------------------- - Arena allocations get a memory id where the lower 8 bits are - the arena id, and the upper bits the block index. + Arena allocations get a (currently) 16-bit memory id where the + lower 8 bits are the arena id, and the upper bits the block index. ----------------------------------------------------------- */ -// Use `0` as a special id for direct OS allocated memory. -#define MI_MEMID_OS 0 +static size_t mi_block_count_of_size(size_t size) { + return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); +} -static size_t mi_arena_memid_create(mi_arena_id_t id, bool exclusive, mi_bitmap_index_t bitmap_index) { - mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow? - mi_assert_internal(id >= 0 && id <= 0x7F); - return ((bitmap_index << 8) | ((uint8_t)id & 0x7F) | (exclusive ? 0x80 : 0)); +static size_t mi_arena_block_size(size_t bcount) { + return (bcount * MI_ARENA_BLOCK_SIZE); } -static bool mi_arena_memid_indices(size_t arena_memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { - *bitmap_index = (arena_memid >> 8); - mi_arena_id_t id = (int)(arena_memid & 0x7F); - *arena_index = mi_arena_id_index(id); - return ((arena_memid & 0x80) != 0); +static size_t mi_arena_size(mi_arena_t* arena) { + return mi_arena_block_size(arena->block_count); } -bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) { - mi_arena_id_t id = (int)(arena_memid & 0x7F); - bool exclusive = ((arena_memid & 0x80) != 0); - return mi_arena_id_is_suitable(id, exclusive, request_arena_id); +static mi_memid_t mi_memid_create_arena(mi_arena_id_t id, bool is_exclusive, mi_bitmap_index_t bitmap_index) { + mi_memid_t memid = _mi_memid_create(MI_MEM_ARENA); + memid.mem.arena.id = id; + memid.mem.arena.block_index = bitmap_index; + memid.mem.arena.is_exclusive = is_exclusive; + return memid; } -bool _mi_arena_is_os_allocated(size_t arena_memid) { - return (arena_memid == MI_MEMID_OS); +static bool mi_arena_memid_indices(mi_memid_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) { + mi_assert_internal(memid.memkind == MI_MEM_ARENA); + *arena_index = mi_arena_id_index(memid.mem.arena.id); + *bitmap_index = memid.mem.arena.block_index; + return memid.mem.arena.is_exclusive; } -static size_t mi_block_count_of_size(size_t size) { - return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); + + +/* ----------------------------------------------------------- + Special static area for mimalloc internal structures + to avoid OS calls (for example, for the arena metadata) +----------------------------------------------------------- */ + +#define MI_ARENA_STATIC_MAX (MI_INTPTR_SIZE*MI_KiB) // 8 KiB on 64-bit + +static uint8_t mi_arena_static[MI_ARENA_STATIC_MAX]; +static _Atomic(size_t) mi_arena_static_top; + +static void* mi_arena_static_zalloc(size_t size, size_t alignment, mi_memid_t* memid) { + *memid = _mi_memid_none(); + if (size == 0 || size > MI_ARENA_STATIC_MAX) return NULL; + if ((mi_atomic_load_relaxed(&mi_arena_static_top) + size) > MI_ARENA_STATIC_MAX) return NULL; + + // try to claim space + if (alignment == 0) { alignment = 1; } + const size_t oversize = size + alignment - 1; + if (oversize > MI_ARENA_STATIC_MAX) return NULL; + const size_t oldtop = mi_atomic_add_acq_rel(&mi_arena_static_top, oversize); + size_t top = oldtop + oversize; + if (top > MI_ARENA_STATIC_MAX) { + // try to roll back, ok if this fails + mi_atomic_cas_strong_acq_rel(&mi_arena_static_top, &top, oldtop); + return NULL; + } + + // success + *memid = _mi_memid_create(MI_MEM_STATIC); + const size_t start = _mi_align_up(oldtop, alignment); + uint8_t* const p = &mi_arena_static[start]; + _mi_memzero(p, size); + return p; +} + +static void* mi_arena_meta_zalloc(size_t size, mi_memid_t* memid, mi_stats_t* stats) { + *memid = _mi_memid_none(); + + // try static + void* p = mi_arena_static_zalloc(size, MI_ALIGNMENT_MAX, memid); + if (p != NULL) return p; + + // or fall back to the OS + return _mi_os_alloc(size, memid, stats); } +static void mi_arena_meta_free(void* p, mi_memid_t memid, size_t size, mi_stats_t* stats) { + if (mi_memkind_is_os(memid.memkind)) { + _mi_os_free(p, size, memid, stats); + } + else { + mi_assert(memid.memkind == MI_MEM_STATIC); + } +} + +static void* mi_arena_block_start(mi_arena_t* arena, mi_bitmap_index_t bindex) { + return (arena->start + mi_arena_block_size(mi_bitmap_index_bit(bindex))); +} + + /* ----------------------------------------------------------- Thread safe allocation in an arena ----------------------------------------------------------- */ -static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) + +// claim the `blocks_inuse` bits +static bool mi_arena_try_claim(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* bitmap_idx) { size_t idx = 0; // mi_atomic_load_relaxed(&arena->search_idx); // start from last search; ok to be relaxed as the exact start does not matter if (_mi_bitmap_try_find_from_claim_across(arena->blocks_inuse, arena->field_count, idx, blocks, bitmap_idx)) { - mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around + mi_atomic_store_relaxed(&arena->search_idx, mi_bitmap_index_field(*bitmap_idx)); // start search from found location next time around return true; }; return false; @@ -142,92 +216,116 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t* Arena Allocation ----------------------------------------------------------- */ -static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, - bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) +static mi_decl_noinline void* mi_arena_try_alloc_at(mi_arena_t* arena, size_t arena_index, size_t needed_bcount, + bool commit, mi_memid_t* memid, mi_os_tld_t* tld) { MI_UNUSED(arena_index); mi_assert_internal(mi_arena_id_index(arena->id) == arena_index); - if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; mi_bitmap_index_t bitmap_index; - if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL; - - // claimed it! set the dirty bits (todo: no need for an atomic op here?) - void* p = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE); - *memid = mi_arena_memid_create(arena->id, arena->exclusive, bitmap_index); - *is_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); - *large = arena->is_large; - *is_pinned = (arena->is_large || !arena->allow_decommit); + if (!mi_arena_try_claim(arena, needed_bcount, &bitmap_index)) return NULL; + + // claimed it! + void* p = mi_arena_block_start(arena, bitmap_index); + *memid = mi_memid_create_arena(arena->id, arena->exclusive, bitmap_index); + memid->is_pinned = arena->memid.is_pinned; + + // none of the claimed blocks should be scheduled for a decommit + if (arena->blocks_purge != NULL) { + // this is thread safe as a potential purge only decommits parts that are not yet claimed as used (in `blocks_inuse`). + _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, needed_bcount, bitmap_index); + } + + // set the dirty bits (todo: no need for an atomic op here?) + if (arena->memid.initially_zero && arena->blocks_dirty != NULL) { + memid->initially_zero = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL); + } + + // set commit state if (arena->blocks_committed == NULL) { // always committed - *commit = true; + memid->initially_committed = true; } - else if (*commit) { - // arena not committed as a whole, but commit requested: ensure commit now + else if (commit) { + // commit requested, but the range may not be committed as a whole: ensure it is committed now + memid->initially_committed = true; bool any_uncommitted; _mi_bitmap_claim_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index, &any_uncommitted); if (any_uncommitted) { - bool commit_zero; - _mi_os_commit(p, needed_bcount * MI_ARENA_BLOCK_SIZE, &commit_zero, tld->stats); - if (commit_zero) *is_zero = true; + bool commit_zero = false; + if (!_mi_os_commit(p, mi_arena_block_size(needed_bcount), &commit_zero, tld->stats)) { + memid->initially_committed = false; + } + else { + if (commit_zero) { memid->initially_zero = true; } + } } } else { // no need to commit, but check if already fully committed - *commit = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); + memid->initially_committed = _mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, needed_bcount, bitmap_index); } + return p; } +// allocate in a speficic arena +static void* mi_arena_try_alloc_at_id(mi_arena_id_t arena_id, bool match_numa_node, int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) +{ + MI_UNUSED_RELEASE(alignment); + mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); + const size_t bcount = mi_block_count_of_size(size); + const size_t arena_index = mi_arena_id_index(arena_id); + mi_assert_internal(arena_index < mi_atomic_load_relaxed(&mi_arena_count)); + mi_assert_internal(size <= mi_arena_block_size(bcount)); + + // Check arena suitability + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); + if (arena == NULL) return NULL; + if (!allow_large && arena->is_large) return NULL; + if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL; + if (req_arena_id == _mi_arena_id_none()) { // in not specific, check numa affinity + const bool numa_suitable = (numa_node < 0 || arena->numa_node < 0 || arena->numa_node == numa_node); + if (match_numa_node) { if (!numa_suitable) return NULL; } + else { if (numa_suitable) return NULL; } + } + + // try to allocate + void* p = mi_arena_try_alloc_at(arena, arena_index, bcount, commit, memid, tld); + mi_assert_internal(p == NULL || _mi_is_aligned(p, alignment)); + return p; +} + + // allocate from an arena with fallback to the OS -static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, - bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld ) +static mi_decl_noinline void* mi_arena_try_alloc(int numa_node, size_t size, size_t alignment, + bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld ) { MI_UNUSED(alignment); mi_assert_internal(alignment <= MI_SEGMENT_ALIGN); const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); - const size_t bcount = mi_block_count_of_size(size); if mi_likely(max_arena == 0) return NULL; - mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE); - - size_t arena_index = mi_arena_id_index(req_arena_id); - if (arena_index < MI_MAX_ARENAS) { - // try a specific arena if requested - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); - if ((arena != NULL) && - (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); + + if (req_arena_id != _mi_arena_id_none()) { + // try a specific arena if requested + if (mi_arena_id_index(req_arena_id) < max_arena) { + void* p = mi_arena_try_alloc_at_id(req_arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } else { // try numa affine allocation - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena == NULL) break; // end reached - if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local? - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); - if (p != NULL) return p; - } + for (size_t i = 0; i < max_arena; i++) { + void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; } // try from another numa node instead.. - for (size_t i = 0; i < max_arena; i++) { - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]); - if (arena == NULL) break; // end reached - if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local! - (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages - { - void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); - mi_assert_internal((uintptr_t)p % alignment == 0); + if (numa_node >= 0) { // if numa_node was < 0 (no specific affinity requested), all arena's have been tried already + for (size_t i = 0; i < max_arena; i++) { + void* p = mi_arena_try_alloc_at_id(mi_arena_id_create(i), false /* only proceed if not numa local */, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); if (p != NULL) return p; } } @@ -235,75 +333,294 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size return NULL; } -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) +// try to reserve a fresh arena space +static bool mi_arena_reserve(size_t req_size, bool allow_large, mi_arena_id_t req_arena_id, mi_arena_id_t *arena_id) +{ + if (_mi_preloading()) return false; // use OS only while pre loading + if (req_arena_id != _mi_arena_id_none()) return false; + + const size_t arena_count = mi_atomic_load_acquire(&mi_arena_count); + if (arena_count > (MI_MAX_ARENAS - 4)) return false; + + size_t arena_reserve = mi_option_get_size(mi_option_arena_reserve); + if (arena_reserve == 0) return false; + + if (!_mi_os_has_virtual_reserve()) { + arena_reserve = arena_reserve/4; // be conservative if virtual reserve is not supported (for some embedded systems for example) + } + arena_reserve = _mi_align_up(arena_reserve, MI_ARENA_BLOCK_SIZE); + if (arena_count >= 8 && arena_count <= 128) { + arena_reserve = ((size_t)1<<(arena_count/8)) * arena_reserve; // scale up the arena sizes exponentially + } + if (arena_reserve < req_size) return false; // should be able to at least handle the current allocation size + + // commit eagerly? + bool arena_commit = false; + if (mi_option_get(mi_option_arena_eager_commit) == 2) { arena_commit = _mi_os_has_overcommit(); } + else if (mi_option_get(mi_option_arena_eager_commit) == 1) { arena_commit = true; } + + return (mi_reserve_os_memory_ex(arena_reserve, arena_commit, allow_large, false /* exclusive */, arena_id) == 0); +} + + +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool commit, bool allow_large, + mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { - mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL); + mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); - *memid = MI_MEMID_OS; - *is_zero = false; - *is_pinned = false; + *memid = _mi_memid_none(); - bool default_large = false; - if (large == NULL) large = &default_large; // ensure `large != NULL` const int numa_node = _mi_os_numa_node(tld); // current numa node // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data) if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { - void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); - if (p != NULL) return p; + void* p = mi_arena_try_alloc(numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + + // otherwise, try to first eagerly reserve a new arena + if (req_arena_id == _mi_arena_id_none()) { + mi_arena_id_t arena_id = 0; + if (mi_arena_reserve(size, allow_large, req_arena_id, &arena_id)) { + // and try allocate in there + mi_assert_internal(req_arena_id == _mi_arena_id_none()); + p = mi_arena_try_alloc_at_id(arena_id, true, numa_node, size, alignment, commit, allow_large, req_arena_id, memid, tld); + if (p != NULL) return p; + } + } } - // finally, fall back to the OS + // if we cannot use OS allocation, return NULL if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) { errno = ENOMEM; return NULL; } - *is_zero = true; - *memid = MI_MEMID_OS; - void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats); - if (p != NULL) { *is_pinned = *large; } - return p; + + // finally, fall back to the OS + if (align_offset > 0) { + return _mi_os_alloc_aligned_at_offset(size, alignment, align_offset, commit, allow_large, memid, tld->stats); + } + else { + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld->stats); + } } -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld) +void* _mi_arena_alloc(size_t size, bool commit, bool allow_large, mi_arena_id_t req_arena_id, mi_memid_t* memid, mi_os_tld_t* tld) { - return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld); + return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, allow_large, req_arena_id, memid, tld); } + void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) { if (size != NULL) *size = 0; size_t arena_index = mi_arena_id_index(arena_id); if (arena_index >= MI_MAX_ARENAS) return NULL; - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[arena_index]); if (arena == NULL) return NULL; - if (size != NULL) *size = arena->block_count * MI_ARENA_BLOCK_SIZE; + if (size != NULL) { *size = mi_arena_block_size(arena->block_count); } return arena->start; } + +/* ----------------------------------------------------------- + Arena purge +----------------------------------------------------------- */ + +static long mi_arena_purge_delay(void) { + // <0 = no purging allowed, 0=immediate purging, >0=milli-second delay + return (mi_option_get(mi_option_purge_delay) * mi_option_get(mi_option_arena_purge_mult)); +} + +// reset or decommit in an arena and update the committed/decommit bitmaps +// assumes we own the area (i.e. blocks_in_use is claimed by us) +static void mi_arena_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { + mi_assert_internal(arena->blocks_committed != NULL); + mi_assert_internal(arena->blocks_purge != NULL); + mi_assert_internal(!arena->memid.is_pinned); + const size_t size = mi_arena_block_size(blocks); + void* const p = mi_arena_block_start(arena, bitmap_idx); + bool needs_recommit; + if (_mi_bitmap_is_claimed_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx)) { + // all blocks are committed, we can purge freely + needs_recommit = _mi_os_purge(p, size, stats); + } + else { + // some blocks are not committed -- this can happen when a partially committed block is freed + // in `_mi_arena_free` and it is conservatively marked as uncommitted but still scheduled for a purge + // we need to ensure we do not try to reset (as that may be invalid for uncommitted memory), + // and also undo the decommit stats (as it was already adjusted) + mi_assert_internal(mi_option_is_enabled(mi_option_purge_decommits)); + needs_recommit = _mi_os_purge_ex(p, size, false /* allow reset? */, stats); + _mi_stat_increase(&stats->committed, size); + } + + // clear the purged blocks + _mi_bitmap_unclaim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx); + // update committed bitmap + if (needs_recommit) { + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + } +} + +// Schedule a purge. This is usually delayed to avoid repeated decommit/commit calls. +// Note: assumes we (still) own the area as we may purge immediately +static void mi_arena_schedule_purge(mi_arena_t* arena, size_t bitmap_idx, size_t blocks, mi_stats_t* stats) { + mi_assert_internal(arena->blocks_purge != NULL); + const long delay = mi_arena_purge_delay(); + if (delay < 0) return; // is purging allowed at all? + + if (_mi_preloading() || delay == 0) { + // decommit directly + mi_arena_purge(arena, bitmap_idx, blocks, stats); + } + else { + // schedule decommit + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (expire != 0) { + mi_atomic_addi64_acq_rel(&arena->purge_expire, delay/10); // add smallish extra delay + } + else { + mi_atomic_storei64_release(&arena->purge_expire, _mi_clock_now() + delay); + } + _mi_bitmap_claim_across(arena->blocks_purge, arena->field_count, blocks, bitmap_idx, NULL); + } +} + +// purge a range of blocks +// return true if the full range was purged. +// assumes we own the area (i.e. blocks_in_use is claimed by us) +static bool mi_arena_purge_range(mi_arena_t* arena, size_t idx, size_t startidx, size_t bitlen, size_t purge, mi_stats_t* stats) { + const size_t endidx = startidx + bitlen; + size_t bitidx = startidx; + bool all_purged = false; + while (bitidx < endidx) { + // count consequetive ones in the purge mask + size_t count = 0; + while (bitidx + count < endidx && (purge & ((size_t)1 << (bitidx + count))) != 0) { + count++; + } + if (count > 0) { + // found range to be purged + const mi_bitmap_index_t range_idx = mi_bitmap_index_create(idx, bitidx); + mi_arena_purge(arena, range_idx, count, stats); + if (count == bitlen) { + all_purged = true; + } + } + bitidx += (count+1); // +1 to skip the zero bit (or end) + } + return all_purged; +} + +// returns true if anything was purged +static bool mi_arena_try_purge(mi_arena_t* arena, mi_msecs_t now, bool force, mi_stats_t* stats) +{ + if (arena->memid.is_pinned || arena->blocks_purge == NULL) return false; + mi_msecs_t expire = mi_atomic_loadi64_relaxed(&arena->purge_expire); + if (expire == 0) return false; + if (!force && expire > now) return false; + + // reset expire (if not already set concurrently) + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire, &expire, 0); + + // potential purges scheduled, walk through the bitmap + bool any_purged = false; + bool full_purge = true; + for (size_t i = 0; i < arena->field_count; i++) { + size_t purge = mi_atomic_load_relaxed(&arena->blocks_purge[i]); + if (purge != 0) { + size_t bitidx = 0; + while (bitidx < MI_BITMAP_FIELD_BITS) { + // find consequetive range of ones in the purge mask + size_t bitlen = 0; + while (bitidx + bitlen < MI_BITMAP_FIELD_BITS && (purge & ((size_t)1 << (bitidx + bitlen))) != 0) { + bitlen++; + } + // try to claim the longest range of corresponding in_use bits + const mi_bitmap_index_t bitmap_index = mi_bitmap_index_create(i, bitidx); + while( bitlen > 0 ) { + if (_mi_bitmap_try_claim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index)) { + break; + } + bitlen--; + } + // actual claimed bits at `in_use` + if (bitlen > 0) { + // read purge again now that we have the in_use bits + purge = mi_atomic_load_acquire(&arena->blocks_purge[i]); + if (!mi_arena_purge_range(arena, i, bitidx, bitlen, purge, stats)) { + full_purge = false; + } + any_purged = true; + // release the claimed `in_use` bits again + _mi_bitmap_unclaim(arena->blocks_inuse, arena->field_count, bitlen, bitmap_index); + } + bitidx += (bitlen+1); // +1 to skip the zero (or end) + } // while bitidx + } // purge != 0 + } + // if not fully purged, make sure to purge again in the future + if (!full_purge) { + const long delay = mi_arena_purge_delay(); + mi_msecs_t expected = 0; + mi_atomic_casi64_strong_acq_rel(&arena->purge_expire,&expected,_mi_clock_now() + delay); + } + return any_purged; +} + +static void mi_arenas_try_purge( bool force, bool visit_all, mi_stats_t* stats ) { + if (_mi_preloading() || mi_arena_purge_delay() <= 0) return; // nothing will be scheduled + + const size_t max_arena = mi_atomic_load_acquire(&mi_arena_count); + if (max_arena == 0) return; + + // allow only one thread to purge at a time + static mi_atomic_guard_t purge_guard; + mi_atomic_guard(&purge_guard) + { + mi_msecs_t now = _mi_clock_now(); + size_t max_purge_count = (visit_all ? max_arena : 1); + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + if (arena != NULL) { + if (mi_arena_try_purge(arena, now, force, stats)) { + if (max_purge_count <= 1) break; + max_purge_count--; + } + } + } + } +} + + /* ----------------------------------------------------------- Arena free ----------------------------------------------------------- */ -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats) { +void _mi_arena_free(void* p, size_t size, size_t committed_size, mi_memid_t memid, mi_stats_t* stats) { mi_assert_internal(size > 0 && stats != NULL); + mi_assert_internal(committed_size <= size); if (p==NULL) return; if (size==0) return; - - if (memid == MI_MEMID_OS) { + const bool all_committed = (committed_size == size); + + if (mi_memkind_is_os(memid.memkind)) { // was a direct OS allocation, pass through - _mi_os_free_aligned(p, size, alignment, align_offset, all_committed, stats); + if (!all_committed && committed_size > 0) { + // if partially committed, adjust the committed stats (as `_mi_os_free` will increase decommit by the full size) + _mi_stat_decrease(&stats->committed, committed_size); + } + _mi_os_free(p, size, memid, stats); } - else { + else if (memid.memkind == MI_MEM_ARENA) { // allocated in an arena - mi_assert_internal(align_offset == 0); size_t arena_idx; size_t bitmap_idx; mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx); mi_assert_internal(arena_idx < MI_MAX_ARENAS); - mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]); + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t,&mi_arenas[arena_idx]); mi_assert_internal(arena != NULL); const size_t blocks = mi_block_count_of_size(size); + // checks if (arena == NULL) { _mi_error_message(EINVAL, "trying to free from non-existent arena: %p, size %zu, memid: 0x%zx\n", p, size, memid); @@ -314,24 +631,100 @@ void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, _mi_error_message(EINVAL, "trying to free from non-existent arena block: %p, size %zu, memid: 0x%zx\n", p, size, memid); return; } + + // need to set all memory to undefined as some parts may still be marked as no_access (like padding etc.) + mi_track_mem_undefined(p,size); + // potentially decommit - if (!arena->allow_decommit || arena->blocks_committed == NULL) { - mi_assert_internal(all_committed); // note: may be not true as we may "pretend" to be not committed (in segment.c) + if (arena->memid.is_pinned || arena->blocks_committed == NULL) { + mi_assert_internal(all_committed); } else { mi_assert_internal(arena->blocks_committed != NULL); - _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails - _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + mi_assert_internal(arena->blocks_purge != NULL); + + if (!all_committed) { + // mark the entire range as no longer committed (so we recommit the full range when re-using) + _mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx); + mi_track_mem_noaccess(p,size); + if (committed_size > 0) { + // if partially committed, adjust the committed stats (is it will be recommitted when re-using) + // in the delayed purge, we now need to not count a decommit if the range is not marked as committed. + _mi_stat_decrease(&stats->committed, committed_size); + } + // note: if not all committed, it may be that the purge will reset/decommit the entire range + // that contains already decommitted parts. Since purge consistently uses reset or decommit that + // works (as we should never reset decommitted parts). + } + // (delay) purge the entire range + mi_arena_schedule_purge(arena, bitmap_idx, blocks, stats); } + // and make it available to others again bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx); if (!all_inuse) { - _mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size); + _mi_error_message(EAGAIN, "trying to free an already freed arena block: %p, size %zu\n", p, size); return; }; } + else { + // arena was none, external, or static; nothing to do + mi_assert_internal(memid.memkind < MI_MEM_OS); + } + + // purge expired decommits + mi_arenas_try_purge(false, false, stats); +} + +// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` +// for dynamic libraries that are unloaded and need to release all their allocated memory. +static void mi_arenas_unsafe_destroy(void) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + size_t new_max_arena = 0; + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + if (arena != NULL) { + if (arena->start != NULL && mi_memkind_is_os(arena->memid.memkind)) { + mi_atomic_store_ptr_release(mi_arena_t, &mi_arenas[i], NULL); + _mi_os_free(arena->start, mi_arena_size(arena), arena->memid, &_mi_stats_main); + } + else { + new_max_arena = i; + } + mi_arena_meta_free(arena, arena->meta_memid, arena->meta_size, &_mi_stats_main); + } + } + + // try to lower the max arena. + size_t expected = max_arena; + mi_atomic_cas_strong_acq_rel(&mi_arena_count, &expected, new_max_arena); +} + +// Purge the arenas; if `force_purge` is true, amenable parts are purged even if not yet expired +void _mi_arena_collect(bool force_purge, mi_stats_t* stats) { + mi_arenas_try_purge(force_purge, true /* visit all */, stats); +} + +// destroy owned arenas; this is unsafe and should only be done using `mi_option_destroy_on_exit` +// for dynamic libraries that are unloaded and need to release all their allocated memory. +void _mi_arena_unsafe_destroy_all(mi_stats_t* stats) { + mi_arenas_unsafe_destroy(); + _mi_arena_collect(true /* force purge */, stats); // purge non-owned arenas } +// Is a pointer inside any of our arenas? +bool _mi_arena_contains(const void* p) { + const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count); + for (size_t i = 0; i < max_arena; i++) { + mi_arena_t* arena = mi_atomic_load_ptr_acquire(mi_arena_t, &mi_arenas[i]); + if (arena != NULL && arena->start <= (const uint8_t*)p && arena->start + mi_arena_block_size(arena->block_count) > (const uint8_t*)p) { + return true; + } + } + return false; +} + + /* ----------------------------------------------------------- Add an arena. ----------------------------------------------------------- */ @@ -340,53 +733,58 @@ static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) { mi_assert_internal(arena != NULL); mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0); mi_assert_internal(arena->block_count > 0); - if (arena_id != NULL) *arena_id = -1; + if (arena_id != NULL) { *arena_id = -1; } size_t i = mi_atomic_increment_acq_rel(&mi_arena_count); if (i >= MI_MAX_ARENAS) { mi_atomic_decrement_acq_rel(&mi_arena_count); return false; } - mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); arena->id = mi_arena_id_create(i); - if (arena_id != NULL) *arena_id = arena->id; + mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena); + if (arena_id != NULL) { *arena_id = arena->id; } return true; } -bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept +static bool mi_manage_os_memory_ex2(void* start, size_t size, bool is_large, int numa_node, bool exclusive, mi_memid_t memid, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); if (size < MI_ARENA_BLOCK_SIZE) return false; if (is_large) { - mi_assert_internal(is_committed); - is_committed = true; + mi_assert_internal(memid.initially_committed && memid.is_pinned); } const size_t bcount = size / MI_ARENA_BLOCK_SIZE; const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS); - const size_t bitmaps = (is_committed ? 2 : 3); + const size_t bitmaps = (memid.is_pinned ? 2 : 4); const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t)); - mi_arena_t* arena = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS? + mi_memid_t meta_memid; + mi_arena_t* arena = (mi_arena_t*)mi_arena_meta_zalloc(asize, &meta_memid, &_mi_stats_main); // TODO: can we avoid allocating from the OS? if (arena == NULL) return false; - + + // already zero'd due to os_alloc + // _mi_memzero(arena, asize); arena->id = _mi_arena_id_none(); + arena->memid = memid; arena->exclusive = exclusive; + arena->meta_size = asize; + arena->meta_memid = meta_memid; arena->block_count = bcount; arena->field_count = fields; arena->start = (uint8_t*)start; arena->numa_node = numa_node; // TODO: or get the current numa node if -1? (now it allows anyone to allocate on -1) arena->is_large = is_large; - arena->is_zero_init = is_zero; - arena->allow_decommit = !is_large && !is_committed; // only allow decommit for initially uncommitted memory + arena->purge_expire = 0; arena->search_idx = 0; arena->blocks_dirty = &arena->blocks_inuse[fields]; // just after inuse bitmap - arena->blocks_committed = (!arena->allow_decommit ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap - // the bitmaps are already zero initialized due to os_alloc + arena->blocks_committed = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[2*fields]); // just after dirty bitmap + arena->blocks_purge = (arena->memid.is_pinned ? NULL : &arena->blocks_inuse[3*fields]); // just after committed bitmap // initialize committed bitmap? - if (arena->blocks_committed != NULL && is_committed) { + if (arena->blocks_committed != NULL && arena->memid.initially_committed) { memset((void*)arena->blocks_committed, 0xFF, fields*sizeof(mi_bitmap_field_t)); // cast to void* to avoid atomic warning } + // and claim leftover blocks if needed (so we never allocate there) ptrdiff_t post = (fields * MI_BITMAP_FIELD_BITS) - bcount; mi_assert_internal(post >= 0); @@ -395,32 +793,42 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is mi_bitmap_index_t postidx = mi_bitmap_index_create(fields - 1, MI_BITMAP_FIELD_BITS - post); _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL); } - return mi_arena_add(arena, arena_id); } +bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { + mi_memid_t memid = _mi_memid_create(MI_MEM_EXTERNAL); + memid.initially_committed = is_committed; + memid.initially_zero = is_zero; + memid.is_pinned = is_large; + return mi_manage_os_memory_ex2(start,size,is_large,numa_node,exclusive,memid, arena_id); +} + // Reserve a range of regular OS memory -int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept -{ +int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept { if (arena_id != NULL) *arena_id = _mi_arena_id_none(); size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block - bool large = allow_large; - void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main); - if (start==NULL) return ENOMEM; - if (!mi_manage_os_memory_ex(start, size, (large || commit), large, true, -1, exclusive, arena_id)) { - _mi_os_free_ex(start, size, commit, &_mi_stats_main); - _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024)); + mi_memid_t memid; + void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, allow_large, &memid, &_mi_stats_main); + if (start == NULL) return ENOMEM; + const bool is_large = memid.is_pinned; // todo: use separate is_large field? + if (!mi_manage_os_memory_ex2(start, size, is_large, -1 /* numa node */, exclusive, memid, arena_id)) { + _mi_os_free_ex(start, size, commit, memid, &_mi_stats_main); + _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size, 1024)); return ENOMEM; } - _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size,1024), large ? " (in large os pages)" : ""); + _mi_verbose_message("reserved %zu KiB memory%s\n", _mi_divide_up(size, 1024), is_large ? " (in large os pages)" : ""); return 0; } + +// Manage a range of regular OS memory bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept { - return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false, NULL); + return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false /* exclusive? */, NULL); } +// Reserve a range of regular OS memory int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept { return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL); } @@ -470,15 +878,16 @@ int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_m if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count(); size_t hsize = 0; size_t pages_reserved = 0; - void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize); + mi_memid_t memid; + void* p = _mi_os_alloc_huge_os_pages(pages, numa_node, timeout_msecs, &pages_reserved, &hsize, &memid); if (p==NULL || pages_reserved==0) { _mi_warning_message("failed to reserve %zu GiB huge pages\n", pages); return ENOMEM; } _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages); - if (!mi_manage_os_memory_ex(p, hsize, true, true, true, numa_node, exclusive, arena_id)) { - _mi_os_free_huge_pages(p, hsize, &_mi_stats_main); + if (!mi_manage_os_memory_ex2(p, hsize, true, numa_node, exclusive, memid, arena_id)) { + _mi_os_free(p, hsize, memid, &_mi_stats_main); return ENOMEM; } return 0; @@ -524,3 +933,4 @@ int mi_reserve_huge_os_pages(size_t pages, double max_secs, size_t* pages_reserv if (err==0 && pages_reserved!=NULL) *pages_reserved = pages; return err; } + diff --git a/source/luametatex/source/libraries/mimalloc/src/bitmap.c b/source/luametatex/source/libraries/mimalloc/src/bitmap.c index ee94edb98..a13dbe15b 100644 --- a/source/luametatex/source/libraries/mimalloc/src/bitmap.c +++ b/source/luametatex/source/libraries/mimalloc/src/bitmap.c @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2021 Microsoft Research, Daan Leijen +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. @@ -11,7 +11,6 @@ represeted as an array of fields where each field is a machine word (`size_t`) There are two api's; the standard one cannot have sequences that cross between the bitmap fields (and a sequence must be <= MI_BITMAP_FIELD_BITS). -(this is used in region allocation) The `_across` postfixed functions do allow sequences that can cross over between the fields. (This is used in arena allocation) @@ -63,12 +62,12 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons // scan linearly for a free range of zero bits while (bitidx <= bitidx_max) { - const size_t mapm = map & m; + const size_t mapm = (map & m); if (mapm == 0) { // are the mask bits free at bitidx? mi_assert_internal((m >> bitidx) == mask); // no overflow? - const size_t newmap = map | m; + const size_t newmap = (map | m); mi_assert_internal((newmap^map) >> bitidx == mask); - if (!mi_atomic_cas_weak_acq_rel(field, &map, newmap)) { // TODO: use strong cas here? + if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { // TODO: use weak cas here? // no success, another thread claimed concurrently.. keep going (with updated `map`) continue; } @@ -81,7 +80,8 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons else { // on to the next bit range #ifdef MI_HAVE_FAST_BITSCAN - const size_t shift = (count == 1 ? 1 : mi_bsr(mapm) - bitidx + 1); + mi_assert_internal(mapm != 0); + const size_t shift = (count == 1 ? 1 : (MI_INTPTR_BITS - mi_clz(mapm) - bitidx)); mi_assert_internal(shift > 0 && shift <= count); #else const size_t shift = 1; @@ -100,7 +100,7 @@ inline bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, cons bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) idx = 0; // wrap + if (idx >= bitmap_fields) { idx = 0; } // wrap if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } @@ -127,14 +127,6 @@ bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap return false; } -/* -// Find `count` bits of 0 and set them to 1 atomically; returns `true` on success. -// For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields. -bool _mi_bitmap_try_find_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t count, mi_bitmap_index_t* bitmap_idx) { - return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, 0, count, bitmap_idx); -} -*/ - // Set `count` bits at `bitmap_idx` to 0 atomically // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { @@ -143,7 +135,7 @@ bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); // mi_assert_internal((bitmap[idx] & mask) == mask); - size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); + const size_t prev = mi_atomic_and_acq_rel(&bitmap[idx], ~mask); return ((prev & mask) == mask); } @@ -157,7 +149,7 @@ bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); //mi_assert_internal(any_zero != NULL || (bitmap[idx] & mask) == 0); size_t prev = mi_atomic_or_acq_rel(&bitmap[idx], mask); - if (any_zero != NULL) *any_zero = ((prev & mask) != mask); + if (any_zero != NULL) { *any_zero = ((prev & mask) != mask); } return ((prev & mask) == 0); } @@ -167,11 +159,28 @@ static bool mi_bitmap_is_claimedx(mi_bitmap_t bitmap, size_t bitmap_fields, size const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); const size_t mask = mi_bitmap_mask_(count, bitidx); mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); - size_t field = mi_atomic_load_relaxed(&bitmap[idx]); - if (any_ones != NULL) *any_ones = ((field & mask) != 0); + const size_t field = mi_atomic_load_relaxed(&bitmap[idx]); + if (any_ones != NULL) { *any_ones = ((field & mask) != 0); } return ((field & mask) == mask); } +// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. +// Returns `true` if successful when all previous `count` bits were 0. +bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { + const size_t idx = mi_bitmap_index_field(bitmap_idx); + const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx); + const size_t mask = mi_bitmap_mask_(count, bitidx); + mi_assert_internal(bitmap_fields > idx); MI_UNUSED(bitmap_fields); + size_t expected = mi_atomic_load_relaxed(&bitmap[idx]); + do { + if ((expected & mask) != 0) return false; + } + while (!mi_atomic_cas_strong_acq_rel(&bitmap[idx], &expected, expected | mask)); + mi_assert_internal((expected & mask) == 0); + return true; +} + + bool _mi_bitmap_is_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx) { return mi_bitmap_is_claimedx(bitmap, bitmap_fields, count, bitmap_idx, NULL); } @@ -190,6 +199,7 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t // Try to atomically claim a sequence of `count` bits starting from the field // at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success. +// Only needs to consider crossing into the next fields (see `mi_bitmap_try_find_from_claim_across`) static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(bitmap_idx != NULL); @@ -200,9 +210,9 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit const size_t initial = mi_clz(map); // count of initial zeros starting at idx mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS); if (initial == 0) return false; - if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields + if (initial >= count) return _mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx); // no need to cross fields (this case won't happen for us) if (_mi_divide_up(count - initial, MI_BITMAP_FIELD_BITS) >= (bitmap_fields - idx)) return false; // not enough entries - + // scan ahead size_t found = initial; size_t mask = 0; // mask bits for the final field @@ -210,25 +220,27 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit field++; map = mi_atomic_load_relaxed(field); const size_t mask_bits = (found + MI_BITMAP_FIELD_BITS <= count ? MI_BITMAP_FIELD_BITS : (count - found)); + mi_assert_internal(mask_bits > 0 && mask_bits <= MI_BITMAP_FIELD_BITS); mask = mi_bitmap_mask_(mask_bits, 0); - if ((map & mask) != 0) return false; + if ((map & mask) != 0) return false; // some part is already claimed found += mask_bits; } mi_assert_internal(field < &bitmap[bitmap_fields]); - // found range of zeros up to the final field; mask contains mask in the final field - // now claim it atomically + // we found a range of contiguous zeros up to the final field; mask contains mask in the final field + // now try to claim the range atomically mi_bitmap_field_t* const final_field = field; const size_t final_mask = mask; mi_bitmap_field_t* const initial_field = &bitmap[idx]; - const size_t initial_mask = mi_bitmap_mask_(initial, MI_BITMAP_FIELD_BITS - initial); + const size_t initial_idx = MI_BITMAP_FIELD_BITS - initial; + const size_t initial_mask = mi_bitmap_mask_(initial, initial_idx); // initial field size_t newmap; field = initial_field; map = mi_atomic_load_relaxed(field); do { - newmap = map | initial_mask; + newmap = (map | initial_mask); if ((map & initial_mask) != 0) { goto rollback; }; } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); @@ -243,31 +255,32 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit mi_assert_internal(field == final_field); map = mi_atomic_load_relaxed(field); do { - newmap = map | final_mask; + newmap = (map | final_mask); if ((map & final_mask) != 0) { goto rollback; } } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); // claimed! - *bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial); + *bitmap_idx = mi_bitmap_index_create(idx, initial_idx); return true; rollback: // roll back intermediate fields + // (we just failed to claim `field` so decrement first) while (--field > initial_field) { newmap = 0; map = MI_BITMAP_FIELD_FULL; mi_assert_internal(mi_atomic_load_relaxed(field) == map); mi_atomic_store_release(field, newmap); } - if (field == initial_field) { + if (field == initial_field) { // (if we failed on the initial field, `field + 1 == initial_field`) map = mi_atomic_load_relaxed(field); do { mi_assert_internal((map & initial_mask) == initial_mask); - newmap = map & ~initial_mask; + newmap = (map & ~initial_mask); } while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)); } // retry? (we make a recursive call instead of goto to be able to use const declarations) - if (retries < 4) { + if (retries <= 2) { return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx); } else { @@ -280,17 +293,22 @@ rollback: // Starts at idx, and wraps around to search in all `bitmap_fields` fields. bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx) { mi_assert_internal(count > 0); - if (count==1) return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + if (count <= 2) { + // we don't bother with crossover fields for small counts + return _mi_bitmap_try_find_from_claim(bitmap, bitmap_fields, start_field_idx, count, bitmap_idx); + } + + // visit the fields size_t idx = start_field_idx; for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) { - if (idx >= bitmap_fields) idx = 0; // wrap - // try to claim inside the field + if (idx >= bitmap_fields) { idx = 0; } // wrap + // first try to claim inside a field if (count <= MI_BITMAP_FIELD_BITS) { if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) { return true; } } - // try to claim across fields + // if that fails, then try to claim across fields if (mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, 0, bitmap_idx)) { return true; } @@ -333,14 +351,14 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask); bool all_one = true; mi_bitmap_field_t* field = &bitmap[idx]; - size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); + size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask); // clear first part if ((prev & pre_mask) != pre_mask) all_one = false; while(mid_count-- > 0) { - prev = mi_atomic_and_acq_rel(field++, ~mid_mask); + prev = mi_atomic_and_acq_rel(field++, ~mid_mask); // clear mid part if ((prev & mid_mask) != mid_mask) all_one = false; } if (post_mask!=0) { - prev = mi_atomic_and_acq_rel(field, ~post_mask); + prev = mi_atomic_and_acq_rel(field, ~post_mask); // clear end part if ((prev & post_mask) != post_mask) all_one = false; } return all_one; @@ -370,7 +388,7 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co if ((prev & post_mask) != 0) all_zero = false; if ((prev & post_mask) != post_mask) any_zero = true; } - if (pany_zero != NULL) *pany_zero = any_zero; + if (pany_zero != NULL) { *pany_zero = any_zero; } return all_zero; } @@ -399,7 +417,7 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field if ((prev & post_mask) != post_mask) all_ones = false; if ((prev & post_mask) != 0) any_ones = true; } - if (pany_ones != NULL) *pany_ones = any_ones; + if (pany_ones != NULL) { *pany_ones = any_ones; } return all_ones; } diff --git a/source/luametatex/source/libraries/mimalloc/src/bitmap.h b/source/luametatex/source/libraries/mimalloc/src/bitmap.h index 3476ea46b..0a765c714 100644 --- a/source/luametatex/source/libraries/mimalloc/src/bitmap.h +++ b/source/luametatex/source/libraries/mimalloc/src/bitmap.h @@ -80,6 +80,10 @@ bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap // Returns `true` if all `count` bits were 1 previously. bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); +// Try to set `count` bits at `bitmap_idx` from 0 to 1 atomically. +// Returns `true` if successful when all previous `count` bits were 0. +bool _mi_bitmap_try_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx); + // Set `count` bits at `bitmap_idx` to 1 atomically // Returns `true` if all `count` bits were 0 previously. `any_zero` is `true` if there was at least one zero bit. bool _mi_bitmap_claim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* any_zero); diff --git a/source/luametatex/source/libraries/mimalloc/src/heap.c b/source/luametatex/source/libraries/mimalloc/src/heap.c index 7103281f0..58520ddf6 100644 --- a/source/luametatex/source/libraries/mimalloc/src/heap.c +++ b/source/luametatex/source/libraries/mimalloc/src/heap.c @@ -154,8 +154,8 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) mi_heap_visit_pages(heap, &mi_heap_page_collect, &collect, NULL); mi_assert_internal( collect != MI_ABANDON || mi_atomic_load_ptr_acquire(mi_block_t,&heap->thread_delayed_free) == NULL ); - // collect abandoned segments (in particular, decommit expired parts of segments in the abandoned segment list) - // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment + // collect abandoned segments (in particular, purge expired parts of segments in the abandoned segment list) + // note: forced purge can be quite expensive if many threads are created/destroyed so we do not force on abandonment _mi_abandoned_collect(heap, collect == MI_FORCE /* force? */, &heap->tld->segments); // collect segment local caches @@ -163,13 +163,10 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect) _mi_segment_thread_collect(&heap->tld->segments); } - // decommit in global segment caches - // note: forced decommit can be quite expensive if many threads are created/destroyed so we do not force on abandonment - _mi_segment_cache_collect( collect == MI_FORCE, &heap->tld->os); - // collect regions on program-exit (or shared library unload) if (force && _mi_is_main_thread() && mi_heap_is_backing(heap)) { - //_mi_mem_collect(&heap->tld->os); + _mi_thread_data_collect(); // collect thread data cache + _mi_arena_collect(true /* force purge */, &heap->tld->stats); } } @@ -209,16 +206,16 @@ mi_heap_t* mi_heap_get_backing(void) { return bheap; } -mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena( mi_arena_id_t arena_id ) { +mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t); // todo: OS allocate in secure mode? - if (heap==NULL) return NULL; + if (heap == NULL) return NULL; _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t)); heap->tld = bheap->tld; heap->thread_id = _mi_thread_id(); heap->arena_id = arena_id; _mi_random_split(&bheap->random, &heap->random); - heap->cookie = _mi_heap_random_next(heap) | 1; + heap->cookie = _mi_heap_random_next(heap) | 1; heap->keys[0] = _mi_heap_random_next(heap); heap->keys[1] = _mi_heap_random_next(heap); heap->no_reclaim = true; // don't reclaim abandoned pages or otherwise destroy is unsafe @@ -232,7 +229,7 @@ mi_decl_nodiscard mi_heap_t* mi_heap_new(void) { return mi_heap_new_in_arena(_mi_arena_id_none()); } -bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid) { +bool _mi_heap_memid_is_suitable(mi_heap_t* heap, mi_memid_t memid) { return _mi_arena_memid_is_suitable(memid, heap->arena_id); } @@ -365,7 +362,8 @@ void mi_heap_destroy(mi_heap_t* heap) { } } -void _mi_heap_destroy_all(void) { +// forcefully destroy all heaps in the current thread +void _mi_heap_unsafe_destroy_all(void) { mi_heap_t* bheap = mi_heap_get_backing(); mi_heap_t* curr = bheap->tld->heaps; while (curr != NULL) { diff --git a/source/luametatex/source/libraries/mimalloc/src/init.c b/source/luametatex/source/libraries/mimalloc/src/init.c index 51d42acd9..b1db14c5f 100644 --- a/source/luametatex/source/libraries/mimalloc/src/init.c +++ b/source/luametatex/source/libraries/mimalloc/src/init.c @@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { - 0, false, false, false, false, + 0, false, false, false, 0, // capacity 0, // reserved capacity { 0 }, // flags @@ -37,6 +37,7 @@ const mi_page_t _mi_page_empty = { #define MI_PAGE_EMPTY() ((mi_page_t*)&_mi_page_empty) +#if (MI_SMALL_WSIZE_MAX==128) #if (MI_PADDING>0) && (MI_INTPTR_SIZE >= 8) #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY(), MI_PAGE_EMPTY() } #elif (MI_PADDING>0) @@ -44,7 +45,9 @@ const mi_page_t _mi_page_empty = { #else #define MI_SMALL_PAGES_EMPTY { MI_INIT128(MI_PAGE_EMPTY), MI_PAGE_EMPTY() } #endif - +#else +#error "define right initialization sizes corresponding to MI_SMALL_WSIZE_MAX" +#endif // Empty page queues for every bin #define QNULL(sz) { NULL, NULL, (sz)*sizeof(uintptr_t) } @@ -79,8 +82,9 @@ const mi_page_t _mi_page_empty = { MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ MI_STAT_COUNT_NULL(), MI_STAT_COUNT_NULL(), \ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ - { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ + MI_STAT_COUNT_NULL(), \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, \ + { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 }, { 0, 0 } \ MI_STAT_COUNT_END_NULL() @@ -199,6 +203,7 @@ mi_heap_t* _mi_heap_main_get(void) { typedef struct mi_thread_data_s { mi_heap_t heap; // must come first due to cast in `_mi_heap_done` mi_tld_t tld; + mi_memid_t memid; } mi_thread_data_t; @@ -207,30 +212,44 @@ typedef struct mi_thread_data_s { // destroy many OS threads, this may causes too much overhead // per thread so we maintain a small cache of recently freed metadata. -#define TD_CACHE_SIZE (8) +#define TD_CACHE_SIZE (16) static _Atomic(mi_thread_data_t*) td_cache[TD_CACHE_SIZE]; -static mi_thread_data_t* mi_thread_data_alloc(void) { +static mi_thread_data_t* mi_thread_data_zalloc(void) { // try to find thread metadata in the cache - mi_thread_data_t* td; + bool is_zero = false; + mi_thread_data_t* td = NULL; for (int i = 0; i < TD_CACHE_SIZE; i++) { td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td != NULL) { + // found cached allocation, try use it td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { - return td; + break; } } } - // if that fails, allocate directly from the OS - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + + // if that fails, allocate as meta data if (td == NULL) { - // if this fails, try once more. (issue #257) - td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &_mi_stats_main); + mi_memid_t memid; + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); if (td == NULL) { - // really out of memory - _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + // if this fails, try once more. (issue #257) + td = (mi_thread_data_t*)_mi_os_alloc(sizeof(mi_thread_data_t), &memid, &_mi_stats_main); + if (td == NULL) { + // really out of memory + _mi_error_message(ENOMEM, "unable to allocate thread local heap metadata (%zu bytes)\n", sizeof(mi_thread_data_t)); + } } + if (td != NULL) { + td->memid = memid; + is_zero = memid.initially_zero; + } + } + + if (td != NULL && !is_zero) { + _mi_memzero_aligned(td, sizeof(*td)); } return td; } @@ -247,17 +266,17 @@ static void mi_thread_data_free( mi_thread_data_t* tdfree ) { } } // if that fails, just free it directly - _mi_os_free(tdfree, sizeof(mi_thread_data_t), &_mi_stats_main); + _mi_os_free(tdfree, sizeof(mi_thread_data_t), tdfree->memid, &_mi_stats_main); } -static void mi_thread_data_collect(void) { +void _mi_thread_data_collect(void) { // free all thread metadata from the cache for (int i = 0; i < TD_CACHE_SIZE; i++) { mi_thread_data_t* td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]); if (td != NULL) { td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL); if (td != NULL) { - _mi_os_free( td, sizeof(mi_thread_data_t), &_mi_stats_main ); + _mi_os_free(td, sizeof(mi_thread_data_t), td->memid, &_mi_stats_main); } } } @@ -275,10 +294,9 @@ static bool _mi_heap_init(void) { } else { // use `_mi_os_alloc` to allocate directly from the OS - mi_thread_data_t* td = mi_thread_data_alloc(); + mi_thread_data_t* td = mi_thread_data_zalloc(); if (td == NULL) return false; - // OS allocated so already zero initialized mi_tld_t* tld = &td->tld; mi_heap_t* heap = &td->heap; _mi_memcpy_aligned(tld, &tld_empty, sizeof(*tld)); @@ -340,7 +358,6 @@ static bool _mi_heap_done(mi_heap_t* heap) { mi_thread_data_free((mi_thread_data_t*)heap); } else { - mi_thread_data_collect(); // free cached thread metadata #if 0 // never free the main thread even in debug mode; if a dll is linked statically with mimalloc, // there may still be delete/free calls after the mi_fls_done is called. Issue #207 @@ -548,6 +565,9 @@ static void mi_detect_cpu_features(void) { void mi_process_init(void) mi_attr_noexcept { // ensure we are called once static mi_atomic_once_t process_init; + #if _MSC_VER < 1920 + mi_heap_main_init(); // vs2017 can dynamically re-initialize _mi_heap_main + #endif if (!mi_atomic_once(&process_init)) return; _mi_process_is_initialized = true; _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); @@ -606,7 +626,7 @@ static void mi_cdecl mi_process_done(void) { _mi_prim_thread_done_auto_done(); #ifndef MI_SKIP_COLLECT_ON_EXIT - #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB) + #if (MI_DEBUG || !defined(MI_SHARED_LIB)) // free all memory if possible on process exit. This is not needed for a stand-alone process // but should be done if mimalloc is statically linked into another shared library which // is repeatedly loaded/unloaded, see issue #281. @@ -618,8 +638,9 @@ static void mi_cdecl mi_process_done(void) { // since after process_done there might still be other code running that calls `free` (like at_exit routines, // or C-runtime termination code. if (mi_option_is_enabled(mi_option_destroy_on_exit)) { - _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) - _mi_segment_cache_free_all(&_mi_heap_main_get()->tld->os); // release all cached segments + mi_collect(true /* force */); + _mi_heap_unsafe_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!) + _mi_arena_unsafe_destroy_all(& _mi_heap_main_get()->tld->stats); } if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) { diff --git a/source/luametatex/source/libraries/mimalloc/src/options.c b/source/luametatex/source/libraries/mimalloc/src/options.c index 450bc2f3f..345b560e3 100644 --- a/source/luametatex/source/libraries/mimalloc/src/options.c +++ b/source/luametatex/source/libraries/mimalloc/src/options.c @@ -41,7 +41,7 @@ typedef struct mi_option_desc_s { mi_init_t init; // is it initialized yet? (from the environment) mi_option_t option; // for debugging: the option index should match the option const char* name; // option name without `mimalloc_` prefix - const char* legacy_name; // potential legacy v1.x option name + const char* legacy_name; // potential legacy option name } mi_option_desc_t; #define MI_OPTION(opt) mi_option_##opt, #opt, NULL @@ -58,36 +58,38 @@ static mi_option_desc_t options[_mi_option_last] = { 0, UNINIT, MI_OPTION(show_stats) }, { 0, UNINIT, MI_OPTION(verbose) }, - // Some of the following options are experimental and not all combinations are valid. Use with care. - { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (8MiB) (but see also `eager_commit_delay`) - { 0, UNINIT, MI_OPTION(deprecated_eager_region_commit) }, - { 0, UNINIT, MI_OPTION(deprecated_reset_decommits) }, - { 0, UNINIT, MI_OPTION(large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's - { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages - { -1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N + // the following options are experimental and not all combinations make sense. + { 1, UNINIT, MI_OPTION(eager_commit) }, // commit per segment directly (4MiB) (but see also `eager_commit_delay`) + { 2, UNINIT, MI_OPTION_LEGACY(arena_eager_commit,eager_region_commit) }, // eager commit arena's? 2 is used to enable this only on an OS that has overcommit (i.e. linux) + { 1, UNINIT, MI_OPTION_LEGACY(purge_decommits,reset_decommits) }, // purge decommits memory (instead of reset) (note: on linux this uses MADV_DONTNEED for decommit) + { 0, UNINIT, MI_OPTION_LEGACY(allow_large_os_pages,large_os_pages) }, // use large OS pages, use only with eager commit to prevent fragmentation of VMA's + { 0, UNINIT, MI_OPTION(reserve_huge_os_pages) }, // per 1GiB huge pages + {-1, UNINIT, MI_OPTION(reserve_huge_os_pages_at) }, // reserve huge pages at node N { 0, UNINIT, MI_OPTION(reserve_os_memory) }, - { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread - { 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free - { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates - { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, - #if defined(__NetBSD__) - { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed - #elif defined(_WIN32) - { 4, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) + { 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread + { 0, UNINIT, MI_OPTION(deprecated_page_reset) }, // reset page memory on free + { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_purge,abandoned_page_reset) }, // reset free page memory when a thread terminates + { 0, UNINIT, MI_OPTION(deprecated_segment_reset) }, // reset segment memory on free (needs eager commit) +#if defined(__NetBSD__) + { 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed +#else + { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) +#endif + { 10, UNINIT, MI_OPTION_LEGACY(purge_delay,reset_delay) }, // purge delay in milli-seconds + { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. + { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) + { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose + { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output + { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output + { 8, UNINIT, MI_OPTION(max_segment_reclaim)}, // max. number of segment reclaims from the abandoned segments per try. + { 0, UNINIT, MI_OPTION(destroy_on_exit)}, // release all OS memory on process exit; careful with dangling pointer or after-exit frees! + #if (MI_INTPTR_SIZE>4) + { 1024L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, // reserve memory N KiB at a time #else - { 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand) + { 128L * 1024L, UNINIT, MI_OPTION(arena_reserve) }, #endif - { 25, UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds - { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes. - { 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas) - { 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose - { 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output - { 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output - { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try. - { 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds) - { 500, UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments - { 1, UNINIT, MI_OPTION(decommit_extend_delay) }, - { 0, UNINIT, MI_OPTION(destroy_on_exit)} // release all OS memory on process exit; careful with dangling pointer or after-exit frees! + { 10, UNINIT, MI_OPTION(arena_purge_mult) }, // purge delay multiplier for arena's + { 1, UNINIT, MI_OPTION_LEGACY(purge_extend_delay, decommit_extend_delay) }, }; static void mi_option_init(mi_option_desc_t* desc); @@ -125,6 +127,12 @@ mi_decl_nodiscard long mi_option_get_clamp(mi_option_t option, long min, long ma return (x < min ? min : (x > max ? max : x)); } +mi_decl_nodiscard size_t mi_option_get_size(mi_option_t option) { + mi_assert_internal(option == mi_option_reserve_os_memory || option == mi_option_arena_reserve); + long x = mi_option_get(option); + return (x < 0 ? 0 : (size_t)x * MI_KiB); +} + void mi_option_set(mi_option_t option, long value) { mi_assert(option >= 0 && option < _mi_option_last); if (option < 0 || option >= _mi_option_last) return; @@ -241,7 +249,7 @@ void mi_register_output(mi_output_fun* out, void* arg) mi_attr_noexcept { } // add stderr to the delayed output after the module is loaded -static void mi_add_stderr_output() { +static void mi_add_stderr_output(void) { mi_assert_internal(mi_out_default == NULL); mi_out_buf_flush(&mi_out_stderr, false, NULL); // flush current contents to stderr mi_out_default = &mi_out_buf_stderr; // and add stderr to the delayed output @@ -496,27 +504,27 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { static void mi_option_init(mi_option_desc_t* desc) { // Read option value from the environment - char s[64+1]; + char s[64 + 1]; char buf[64+1]; _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); _mi_strlcat(buf, desc->name, sizeof(buf)); - bool found = mi_getenv(buf,s,sizeof(s)); + bool found = mi_getenv(buf, s, sizeof(s)); if (!found && desc->legacy_name != NULL) { _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); _mi_strlcat(buf, desc->legacy_name, sizeof(buf)); - found = mi_getenv(buf,s,sizeof(s)); + found = mi_getenv(buf, s, sizeof(s)); if (found) { - _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name ); - } + _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name); + } } if (found) { - size_t len = _mi_strnlen(s,sizeof(buf)-1); + size_t len = _mi_strnlen(s, sizeof(buf) - 1); for (size_t i = 0; i < len; i++) { buf[i] = _mi_toupper(s[i]); } buf[len] = 0; - if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { + if (buf[0] == 0 || strstr("1;TRUE;YES;ON", buf) != NULL) { desc->value = 1; desc->init = INITIALIZED; } @@ -527,7 +535,7 @@ static void mi_option_init(mi_option_desc_t* desc) { else { char* end = buf; long value = strtol(buf, &end, 10); - if (desc->option == mi_option_reserve_os_memory) { + if (desc->option == mi_option_reserve_os_memory || desc->option == mi_option_arena_reserve) { // this option is interpreted in KiB to prevent overflow of `long` if (*end == 'K') { end++; } else if (*end == 'M') { value *= MI_KiB; end++; } @@ -547,11 +555,11 @@ static void mi_option_init(mi_option_desc_t* desc) { // if the 'mimalloc_verbose' env var has a bogus value we'd never know // (since the value defaults to 'off') so in that case briefly enable verbose desc->value = 1; - _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name ); + _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name); desc->value = 0; } else { - _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name ); + _mi_warning_message("environment option mimalloc_%s has an invalid value.\n", desc->name); } } } diff --git a/source/luametatex/source/libraries/mimalloc/src/os.c b/source/luametatex/source/libraries/mimalloc/src/os.c index 6145ccb36..b4f02ba37 100644 --- a/source/luametatex/source/libraries/mimalloc/src/os.c +++ b/source/luametatex/source/libraries/mimalloc/src/os.c @@ -21,13 +21,19 @@ static mi_os_mem_config_t mi_os_mem_config = { 0, // large page size (usually 2MiB) 4096, // allocation granularity true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems) - false // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) + false, // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) + true // has virtual reserve? (if true we can reserve virtual address space without using commit or physical memory) }; bool _mi_os_has_overcommit(void) { return mi_os_mem_config.has_overcommit; } +bool _mi_os_has_virtual_reserve(void) { + return mi_os_mem_config.has_virtual_reserve; +} + + // OS (small) page size size_t _mi_os_page_size(void) { return mi_os_mem_config.page_size; @@ -40,7 +46,7 @@ size_t _mi_os_large_page_size(void) { bool _mi_os_use_large_page(size_t size, size_t alignment) { // if we have access, check the size and alignment requirements - if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false; + if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_allow_large_os_pages)) return false; return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0); } @@ -131,7 +137,9 @@ void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { Free memory -------------------------------------------------------------- */ -static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { +static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats); + +static void mi_os_prim_free(void* addr, size_t size, bool still_committed, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_assert_internal((size % _mi_os_page_size()) == 0); if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) @@ -140,18 +148,38 @@ static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } mi_stats_t* stats = &_mi_stats_main; - if (was_committed) { _mi_stat_decrease(&stats->committed, size); } + if (still_committed) { _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); } - -void _mi_os_free_ex(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { - const size_t csize = _mi_os_good_alloc_size(size); - mi_os_mem_free(addr,csize,was_committed,tld_stats); +void _mi_os_free_ex(void* addr, size_t size, bool still_committed, mi_memid_t memid, mi_stats_t* tld_stats) { + if (mi_memkind_is_os(memid.memkind)) { + size_t csize = _mi_os_good_alloc_size(size); + void* base = addr; + // different base? (due to alignment) + if (memid.mem.os.base != NULL) { + mi_assert(memid.mem.os.base <= addr); + mi_assert((uint8_t*)memid.mem.os.base + memid.mem.os.alignment >= (uint8_t*)addr); + base = memid.mem.os.base; + csize += ((uint8_t*)addr - (uint8_t*)memid.mem.os.base); + } + // free it + if (memid.memkind == MI_MEM_OS_HUGE) { + mi_assert(memid.is_pinned); + mi_os_free_huge_os_pages(base, csize, tld_stats); + } + else { + mi_os_prim_free(base, csize, still_committed, tld_stats); + } + } + else { + // nothing to do + mi_assert(memid.memkind < MI_MEM_OS); + } } -void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) { - _mi_os_free_ex(p, size, true, tld_stats); +void _mi_os_free(void* p, size_t size, mi_memid_t memid, mi_stats_t* tld_stats) { + _mi_os_free_ex(p, size, true, memid, tld_stats); } @@ -160,31 +188,31 @@ void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) { -------------------------------------------------------------- */ // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { +static void* mi_os_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, mi_stats_t* stats) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(is_zero != NULL); + mi_assert_internal(is_large != NULL); if (size == 0) return NULL; - if (!commit) allow_large = false; - if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning + if (!commit) { allow_large = false; } + if (try_alignment == 0) { try_alignment = 1; } // avoid 0 to ensure there will be no divide by zero when aligning + *is_zero = false; void* p = NULL; - int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, &p); + int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, is_zero, &p); if (err != 0) { _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large); } - /* - if (commit && allow_large) { - p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); - if (p != NULL) { - *is_large = true; - return p; - } - } - */ - mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); - if (commit) { _mi_stat_increase(&stats->committed, size); } + if (commit) { + _mi_stat_increase(&stats->committed, size); + // seems needed for asan (or `mimalloc-test-api` fails) + #ifdef MI_TRACK_ASAN + if (*is_zero) { mi_track_mem_defined(p,size); } + else { mi_track_mem_undefined(p,size); } + #endif + } } return p; } @@ -192,33 +220,40 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo // Primitive aligned allocation from the OS. // This function guarantees the allocated memory is aligned. -static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, mi_stats_t* stats) { +static void* mi_os_prim_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** base, mi_stats_t* stats) { mi_assert_internal(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0)); mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(is_large != NULL); + mi_assert_internal(is_zero != NULL); + mi_assert_internal(base != NULL); if (!commit) allow_large = false; if (!(alignment >= _mi_os_page_size() && ((alignment & (alignment - 1)) == 0))) return NULL; size = _mi_align_up(size, _mi_os_page_size()); // try first with a hint (this will be aligned directly on Win 10+ or BSD) - void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats); + void* p = mi_os_prim_alloc(size, alignment, commit, allow_large, is_large, is_zero, stats); if (p == NULL) return NULL; - // if not aligned, free it, overallocate, and unmap around it - if (((uintptr_t)p % alignment != 0)) { - mi_os_mem_free(p, size, commit, stats); + // aligned already? + if (((uintptr_t)p % alignment) == 0) { + *base = p; + } + else { + // if not aligned, free it, overallocate, and unmap around it _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); + mi_os_prim_free(p, size, commit, stats); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block // over-allocate uncommitted (virtual) memory - p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats); + p = mi_os_prim_alloc(over_size, 1 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, is_zero, stats); if (p == NULL) return NULL; - + // set p to the aligned part in the full region - // note: this is dangerous on Windows as VirtualFree needs the actual region pointer - // but in mi_os_mem_free we handle this (hopefully exceptional) situation. + // note: this is dangerous on Windows as VirtualFree needs the actual base pointer + // this is handled though by having the `base` field in the memid's + *base = p; // remember the base p = mi_align_up_ptr(p, alignment); // explicitly commit only the aligned part @@ -228,22 +263,24 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, } else { // mmap can free inside an allocation // overallocate... - p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats); + p = mi_os_prim_alloc(over_size, 1, commit, false, is_large, is_zero, stats); if (p == NULL) return NULL; + // and selectively unmap parts around the over-allocated area. (noop on sbrk) void* aligned_p = mi_align_up_ptr(p, alignment); size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; size_t mid_size = _mi_align_up(size, _mi_os_page_size()); size_t post_size = over_size - pre_size - mid_size; mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); - if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); - if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); + if (pre_size > 0) { mi_os_prim_free(p, pre_size, commit, stats); } + if (post_size > 0) { mi_os_prim_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); } // we can return the aligned pointer on `mmap` (and sbrk) systems p = aligned_p; + *base = aligned_p; // since we freed the pre part, `*base == p`. } } - mi_assert_internal(p == NULL || (p != NULL && ((uintptr_t)p % alignment) == 0)); + mi_assert_internal(p == NULL || (p != NULL && *base != NULL && ((uintptr_t)p % alignment) == 0)); return p; } @@ -252,28 +289,40 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, OS API: alloc and alloc_aligned ----------------------------------------------------------- */ -void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { +void* _mi_os_alloc(size_t size, mi_memid_t* memid, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); + *memid = _mi_memid_none(); mi_stats_t* stats = &_mi_stats_main; if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); - bool is_large = false; - return mi_os_mem_alloc(size, 0, true, false, &is_large, stats); + bool os_is_large = false; + bool os_is_zero = false; + void* p = mi_os_prim_alloc(size, 0, true, false, &os_is_large, &os_is_zero, stats); + if (p != NULL) { + *memid = _mi_memid_create_os(true, os_is_zero, os_is_large); + } + return p; } -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats) +void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) { MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(tld_stats); + *memid = _mi_memid_none(); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); alignment = _mi_align_up(alignment, _mi_os_page_size()); - bool allow_large = false; - if (large != NULL) { - allow_large = *large; - *large = false; + + bool os_is_large = false; + bool os_is_zero = false; + void* os_base = NULL; + void* p = mi_os_prim_alloc_aligned(size, alignment, commit, allow_large, &os_is_large, &os_is_zero, &os_base, &_mi_stats_main /*tld->stats*/ ); + if (p != NULL) { + *memid = _mi_memid_create_os(commit, os_is_zero, os_is_large); + memid->mem.os.base = os_base; + memid->mem.os.alignment = alignment; } - return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ ); + return p; } /* ----------------------------------------------------------- @@ -284,22 +333,24 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar to use the actual start of the memory region. ----------------------------------------------------------- */ -void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) { +void* _mi_os_alloc_aligned_at_offset(size_t size, size_t alignment, size_t offset, bool commit, bool allow_large, mi_memid_t* memid, mi_stats_t* tld_stats) { mi_assert(offset <= MI_SEGMENT_SIZE); mi_assert(offset <= size); mi_assert((alignment % _mi_os_page_size()) == 0); + *memid = _mi_memid_none(); if (offset > MI_SEGMENT_SIZE) return NULL; if (offset == 0) { // regular aligned allocation - return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats); + return _mi_os_alloc_aligned(size, alignment, commit, allow_large, memid, tld_stats); } else { // overallocate to align at an offset const size_t extra = _mi_align_up(offset, alignment) - offset; const size_t oversize = size + extra; - void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats); + void* const start = _mi_os_alloc_aligned(oversize, alignment, commit, allow_large, memid, tld_stats); if (start == NULL) return NULL; - void* p = (uint8_t*)start + extra; + + void* const p = (uint8_t*)start + extra; mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); // decommit the overallocation at the start if (commit && extra > _mi_os_page_size()) { @@ -309,14 +360,6 @@ void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, } } -void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) { - mi_assert(align_offset <= MI_SEGMENT_SIZE); - const size_t extra = _mi_align_up(align_offset, alignment) - align_offset; - void* start = (uint8_t*)p - extra; - _mi_os_free_ex(start, size + extra, was_committed, tld_stats); -} - - /* ----------------------------------------------------------- OS memory API: reset, commit, decommit, protect, unprotect. ----------------------------------------------------------- */ @@ -345,63 +388,75 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } -// Commit/Decommit memory. -// Usually commit is aligned liberal, while decommit is aligned conservative. -// (but not for the reset version where we want commit to be conservative as well) -static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservative, bool* is_zero, mi_stats_t* stats) { - // page align in the range, commit liberally, decommit conservative +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_stats_t* stats = &_mi_stats_main; if (is_zero != NULL) { *is_zero = false; } + _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit + _mi_stat_counter_increase(&stats->commit_calls, 1); + + // page align range size_t csize; - void* start = mi_os_page_align_areax(conservative, addr, size, &csize); - if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) - if (commit) { - _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit - _mi_stat_counter_increase(&stats->commit_calls, 1); - } - else { - _mi_stat_decrease(&stats->committed, size); - } + void* start = mi_os_page_align_areax(false /* conservative? */, addr, size, &csize); + if (csize == 0) return true; - int err = _mi_prim_commit(start, csize, commit); + // commit + bool os_is_zero = false; + int err = _mi_prim_commit(start, csize, &os_is_zero); if (err != 0) { - _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize); + _mi_warning_message("cannot commit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); + return false; } - mi_assert_internal(err == 0); - return (err == 0); + if (os_is_zero && is_zero != NULL) { + *is_zero = true; + mi_assert_expensive(mi_mem_is_zero(start, csize)); + } + // note: the following seems required for asan (otherwise `mimalloc-test-stress` fails) + #ifdef MI_TRACK_ASAN + if (os_is_zero) { mi_track_mem_defined(start,csize); } + else { mi_track_mem_undefined(start,csize); } + #endif + return true; } -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { +static bool mi_os_decommit_ex(void* addr, size_t size, bool* needs_recommit, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; - return mi_os_commitx(addr, size, true, false /* liberal */, is_zero, stats); + mi_assert_internal(needs_recommit!=NULL); + _mi_stat_decrease(&stats->committed, size); + + // page align + size_t csize; + void* start = mi_os_page_align_area_conservative(addr, size, &csize); + if (csize == 0) return true; + + // decommit + *needs_recommit = true; + int err = _mi_prim_decommit(start,csize,needs_recommit); + if (err != 0) { + _mi_warning_message("cannot decommit OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); + } + mi_assert_internal(err == 0); + return (err == 0); } bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - bool is_zero; - return mi_os_commitx(addr, size, false, true /* conservative */, &is_zero, stats); + bool needs_recommit; + return mi_os_decommit_ex(addr, size, &needs_recommit, tld_stats); } -/* -static bool mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats) { - return mi_os_commitx(addr, size, true, true // conservative - , is_zero, stats); -} -*/ // Signal to the OS that the address range is no longer in use // but may be used later again. This will release physical memory // pages and reduce swapping while keeping the memory committed. // We page align to a conservative area inside the range to reset. -static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) { +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* stats) { // page align conservatively within the range size_t csize; void* start = mi_os_page_align_area_conservative(addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr) - if (reset) _mi_stat_increase(&stats->reset, csize); - else _mi_stat_decrease(&stats->reset, csize); - if (!reset) return true; // nothing to do on unreset! + _mi_stat_increase(&stats->reset, csize); + _mi_stat_counter_increase(&stats->reset_calls, 1); #if (MI_DEBUG>1) && !MI_SECURE && !MI_TRACK_ENABLED // && !MI_TSAN memset(start, 0, csize); // pretend it is eagerly reset @@ -414,24 +469,35 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) return (err == 0); } -// Signal to the OS that the address range is no longer in use -// but may be used later again. This will release physical memory -// pages and reduce swapping while keeping the memory committed. -// We page align to a conservative area inside the range to reset. -bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - return mi_os_resetx(addr, size, true, stats); + +// either resets or decommits memory, returns true if the memory needs +// to be recommitted if it is to be re-used later on. +bool _mi_os_purge_ex(void* p, size_t size, bool allow_reset, mi_stats_t* stats) +{ + if (mi_option_get(mi_option_purge_delay) < 0) return false; // is purging allowed? + _mi_stat_counter_increase(&stats->purge_calls, 1); + _mi_stat_increase(&stats->purged, size); + + if (mi_option_is_enabled(mi_option_purge_decommits) && // should decommit? + !_mi_preloading()) // don't decommit during preloading (unsafe) + { + bool needs_recommit = true; + mi_os_decommit_ex(p, size, &needs_recommit, stats); + return needs_recommit; + } + else { + if (allow_reset) { // this can sometimes be not allowed if the range is not fully committed + _mi_os_reset(p, size, stats); + } + return false; // needs no recommit + } } -/* -bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); +// either resets or decommits memory, returns true if the memory needs +// to be recommitted if it is to be re-used later on. +bool _mi_os_purge(void* p, size_t size, mi_stats_t * stats) { + return _mi_os_purge_ex(p, size, true, stats); } -*/ // Protect a region in memory to be not accessible. static bool mi_os_protectx(void* addr, size_t size, bool protect) { @@ -506,7 +572,8 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { #endif // Allocate MI_SEGMENT_SIZE aligned huge pages -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize) { +void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_msecs, size_t* pages_reserved, size_t* psize, mi_memid_t* memid) { + *memid = _mi_memid_none(); if (psize != NULL) *psize = 0; if (pages_reserved != NULL) *pages_reserved = 0; size_t size = 0; @@ -518,11 +585,14 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // or to at least allocate as many as available on the system. mi_msecs_t start_t = _mi_clock_start(); size_t page = 0; + bool all_zero = true; while (page < pages) { // allocate a page + bool is_zero = false; void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); void* p = NULL; - int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &p); + int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &is_zero, &p); + if (!is_zero) { all_zero = false; } if (err != 0) { _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE); break; @@ -533,7 +603,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // no success, issue a warning and break if (p != NULL) { _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr); - _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); + mi_os_prim_free(p, MI_HUGE_OS_PAGE_SIZE, true, &_mi_stats_main); } break; } @@ -561,16 +631,25 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse mi_assert_internal(page*MI_HUGE_OS_PAGE_SIZE <= size); if (pages_reserved != NULL) { *pages_reserved = page; } if (psize != NULL) { *psize = page * MI_HUGE_OS_PAGE_SIZE; } + if (page != 0) { + mi_assert(start != NULL); + *memid = _mi_memid_create_os(true /* is committed */, all_zero, true /* is_large */); + memid->memkind = MI_MEM_OS_HUGE; + mi_assert(memid->is_pinned); + #ifdef MI_TRACK_ASAN + if (all_zero) { mi_track_mem_defined(start,size); } + #endif + } return (page == 0 ? NULL : start); } // free every huge page in a range individually (as we allocated per page) // note: needed with VirtualAlloc but could potentially be done in one go on mmap'd systems. -void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { +static void mi_os_free_huge_os_pages(void* p, size_t size, mi_stats_t* stats) { if (p==NULL || size==0) return; uint8_t* base = (uint8_t*)p; while (size >= MI_HUGE_OS_PAGE_SIZE) { - _mi_os_free(base, MI_HUGE_OS_PAGE_SIZE, stats); + mi_os_prim_free(base, MI_HUGE_OS_PAGE_SIZE, true, stats); size -= MI_HUGE_OS_PAGE_SIZE; base += MI_HUGE_OS_PAGE_SIZE; } diff --git a/source/luametatex/source/libraries/mimalloc/src/page.c b/source/luametatex/source/libraries/mimalloc/src/page.c index cae6b5813..8ac0a715e 100644 --- a/source/luametatex/source/libraries/mimalloc/src/page.c +++ b/source/luametatex/source/libraries/mimalloc/src/page.c @@ -66,6 +66,14 @@ static bool mi_page_list_is_valid(mi_page_t* page, mi_block_t* p) { if (p < start || p >= end) return false; p = mi_block_next(page, p); } +#if MI_DEBUG>3 // generally too expensive to check this + if (page->free_is_zero) { + const size_t ubsize = mi_page_usable_block_size(page); + for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page, block)) { + mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t))); + } + } +#endif return true; } @@ -84,7 +92,7 @@ static bool mi_page_is_valid_init(mi_page_t* page) { mi_assert_internal(mi_page_list_is_valid(page,page->local_free)); #if MI_DEBUG>3 // generally too expensive to check this - if (page->is_zero) { + if (page->free_is_zero) { const size_t ubsize = mi_page_usable_block_size(page); for(mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { mi_assert_expensive(mi_mem_is_zero(block + 1, ubsize - sizeof(mi_block_t))); @@ -221,7 +229,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { // usual case page->free = page->local_free; page->local_free = NULL; - page->is_zero = false; + page->free_is_zero = false; } else if (force) { // append -- only on shutdown (force) as this is a linear operation @@ -233,7 +241,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) { mi_block_set_next(page, tail, page->free); page->free = page->local_free; page->local_free = NULL; - page->is_zero = false; + page->free_is_zero = false; } } @@ -255,7 +263,7 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) { #if MI_HUGE_PAGE_ABANDON mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE); #endif - mi_assert_internal(!page->is_reset); + // TODO: push on full queue immediately if it is full? mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page)); mi_page_queue_push(heap, pq, page); @@ -421,7 +429,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) { // Retire parameters #define MI_MAX_RETIRE_SIZE (MI_MEDIUM_OBJ_SIZE_MAX) -#define MI_RETIRE_CYCLES (8) +#define MI_RETIRE_CYCLES (16) // Retire a page with no more used blocks // Important to not retire too quickly though as new @@ -641,11 +649,6 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld) // enable the new free list page->capacity += (uint16_t)extend; mi_stat_increase(tld->stats.page_committed, extend * bsize); - - // extension into zero initialized memory preserves the zero'd free list - if (!page->is_zero_init) { - page->is_zero = false; - } mi_assert_expensive(mi_page_is_valid_init(page)); } @@ -671,14 +674,15 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); #endif - #if MI_DEBUG > 0 - page->is_zero = false; // ensure in debug mode we initialize with MI_DEBUG_UNINIT, see issue #501 - #else - page->is_zero = page->is_zero_init; + page->free_is_zero = page->is_zero_init; + #if MI_DEBUG>2 + if (page->is_zero_init) { + mi_track_mem_defined(page_start, page_size); + mi_assert_expensive(mi_mem_is_zero(page_start, page_size)); + } #endif - + mi_assert_internal(page->is_committed); - mi_assert_internal(!page->is_reset); mi_assert_internal(page->capacity == 0); mi_assert_internal(page->free == NULL); mi_assert_internal(page->used == 0); diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c b/source/luametatex/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c index 80bcfa939..0e0a99d93 100644 --- a/source/luametatex/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c +++ b/source/luametatex/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c @@ -195,7 +195,7 @@ static malloc_introspection_t mi_introspect = { .log = &intro_log, .force_lock = &intro_force_lock, .force_unlock = &intro_force_unlock, -#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__) .statistics = &intro_statistics, .zone_locked = &intro_zone_locked, #endif @@ -216,7 +216,7 @@ static malloc_zone_t mi_malloc_zone = { .batch_malloc = &zone_batch_malloc, .batch_free = &zone_batch_free, .introspect = &mi_introspect, -#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) && !defined(__ppc__) #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) .version = 10, #else diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/unix/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/unix/prim.c index 0c1fbb3e2..314281fe8 100644 --- a/source/luametatex/source/libraries/mimalloc/src/prim/unix/prim.c +++ b/source/luametatex/source/libraries/mimalloc/src/prim/unix/prim.c @@ -134,6 +134,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? config->has_overcommit = unix_detect_overcommit(); config->must_free_whole = false; // mmap can free in parts + config->has_virtual_reserve = true; // todo: check if this true for NetBSD? (for anonymous mmap with PROT_NONE) } @@ -169,7 +170,7 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { int err = errno; - _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx)\n", err, err, size, try_alignment); + _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, addr); } if (p!=MAP_FAILED) return p; // fall back to regular mmap @@ -189,7 +190,11 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p if (hint != NULL) { p = mmap(hint, size, protect_flags, flags, fd, 0); if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + #if MI_TRACK_ENABLED // asan sometimes does not instrument errno correctly? + int err = 0; + #else int err = errno; + #endif _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); } if (p!=MAP_FAILED) return p; @@ -204,28 +209,33 @@ static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int p return NULL; } +static int unix_mmap_fd(void) { + #if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + int os_tag = (int)mi_option_get(mi_option_os_tag); + if (os_tag < 100 || os_tag > 255) { os_tag = 100; } + return VM_MAKE_TAG(os_tag); + #else + return -1; + #endif +} + static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { - void* p = NULL; #if !defined(MAP_ANONYMOUS) #define MAP_ANONYMOUS MAP_ANON #endif #if !defined(MAP_NORESERVE) #define MAP_NORESERVE 0 #endif + void* p = NULL; + const int fd = unix_mmap_fd(); int flags = MAP_PRIVATE | MAP_ANONYMOUS; - int fd = -1; if (_mi_os_has_overcommit()) { flags |= MAP_NORESERVE; } #if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD #endif - #if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) - int os_tag = (int)mi_option_get(mi_option_os_tag); - if (os_tag < 100 || os_tag > 255) { os_tag = 100; } - fd = VM_MAKE_TAG(os_tag); - #endif // huge page allocation if ((large_only || _mi_os_use_large_page(size, try_alignment)) && allow_large) { static _Atomic(size_t) large_page_try_ok; // = 0; @@ -313,12 +323,13 @@ static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protec } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + *is_zero = true; + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); return (*addr != NULL ? 0 : errno); } @@ -340,46 +351,46 @@ static void unix_mprotect_hint(int err) { #endif } - -int _mi_prim_commit(void* start, size_t size, bool commit) { - /* - #if 0 && defined(MAP_FIXED) && !defined(__APPLE__) - // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?) - if (commit) { - // commit: just change the protection - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) - const int fd = mi_unix_mmap_fd(); - void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); - if (p != start) { err = errno; } +int _mi_prim_commit(void* start, size_t size, bool* is_zero) { + // commit: ensure we can access the area + // note: we may think that *is_zero can be true since the memory + // was either from mmap PROT_NONE, or from decommit MADV_DONTNEED, but + // we sometimes call commit on a range with still partially committed + // memory and `mprotect` does not zero the range. + *is_zero = false; + int err = mprotect(start, size, (PROT_READ | PROT_WRITE)); + if (err != 0) { + err = errno; + unix_mprotect_hint(err); } + return err; +} + +int _mi_prim_decommit(void* start, size_t size, bool* needs_recommit) { + int err = 0; + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + err = unix_madvise(start, size, MADV_DONTNEED); + #if !MI_DEBUG && !MI_SECURE + *needs_recommit = false; #else + *needs_recommit = true; + mprotect(start, size, PROT_NONE); + #endif + /* + // decommit: use mmap with MAP_FIXED and PROT_NONE to discard the existing memory (and reduce rss) + *needs_recommit = true; + const int fd = unix_mmap_fd(); + void* p = mmap(start, size, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); + if (p != start) { err = errno; } */ - int err = 0; - if (commit) { - // commit: ensure we can access the area - err = mprotect(start, size, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0 - // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) - // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) - err = unix_madvise(start, size, MADV_DONTNEED); - #else - // decommit: just disable access (also used in debug and secure mode to trap on illegal access) - err = mprotect(start, size, PROT_NONE); - if (err != 0) { err = errno; } - #endif - } - unix_mprotect_hint(err); return err; } int _mi_prim_reset(void* start, size_t size) { + // We try to use `MADV_FREE` as that is the fastest. A drawback though is that it + // will not reduce the `rss` stats in tools like `top` even though the memory is available + // to other processes. With the default `MIMALLOC_PURGE_DECOMMITS=1` we ensure that by + // default `MADV_DONTNEED` is used though. #if defined(MADV_FREE) static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); int oadvice = (int)mi_atomic_load_relaxed(&advice); @@ -426,8 +437,9 @@ static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, co } #endif -int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { bool is_large = true; + *is_zero = true; *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes unsigned long numa_mask = (1UL << numa_node); @@ -445,8 +457,9 @@ int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, vo #else -int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *is_zero = false; *addr = NULL; return ENOMEM; } @@ -610,11 +623,19 @@ void _mi_prim_process_info(mi_process_info_t* pinfo) pinfo->page_faults = 0; #elif defined(__APPLE__) pinfo->peak_rss = rusage.ru_maxrss; // macos reports in bytes + #ifdef MACH_TASK_BASIC_INFO struct mach_task_basic_info info; mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { pinfo->current_rss = (size_t)info.resident_size; } + #else + struct task_basic_info info; + mach_msg_type_number_t infoCount = TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + pinfo->current_rss = (size_t)info.resident_size; + } + #endif #else pinfo->peak_rss = rusage.ru_maxrss * 1024; // Linux/BSD report in KiB #endif diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/wasi/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/wasi/prim.c index cb3ce1a7f..50511f0b5 100644 --- a/source/luametatex/source/libraries/mimalloc/src/prim/wasi/prim.c +++ b/source/luametatex/source/libraries/mimalloc/src/prim/wasi/prim.c @@ -21,6 +21,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->alloc_granularity = 16; config->has_overcommit = false; config->must_free_whole = true; + config->has_virtual_reserve = false; } //--------------------------------------------- @@ -114,9 +115,10 @@ static void* mi_prim_mem_grow(size_t size, size_t try_alignment) { } // Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { MI_UNUSED(allow_large); MI_UNUSED(commit); *is_large = false; + *is_zero = false; *addr = mi_prim_mem_grow(size, try_alignment); return (*addr != NULL ? 0 : ENOMEM); } @@ -126,8 +128,15 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la // Commit/Reset/Protect //--------------------------------------------- -int _mi_prim_commit(void* addr, size_t size, bool commit) { - MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(commit); +int _mi_prim_commit(void* addr, size_t size, bool* is_zero) { + MI_UNUSED(addr); MI_UNUSED(size); + *is_zero = false; + return 0; +} + +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { + MI_UNUSED(addr); MI_UNUSED(size); + *needs_recommit = false; return 0; } @@ -146,8 +155,9 @@ int _mi_prim_protect(void* addr, size_t size, bool protect) { // Huge pages and NUMA nodes //--------------------------------------------- -int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *is_zero = true; *addr = NULL; return ENOSYS; } diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/windows/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/windows/prim.c index e3dc33e32..e6b610792 100644 --- a/source/luametatex/source/libraries/mimalloc/src/prim/windows/prim.c +++ b/source/luametatex/source/libraries/mimalloc/src/prim/windows/prim.c @@ -113,6 +113,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) { config->has_overcommit = false; config->must_free_whole = true; + config->has_virtual_reserve = true; // get the page size SYSTEM_INFO si; GetSystemInfo(&si); @@ -142,7 +143,7 @@ void _mi_prim_mem_init( mi_os_mem_config_t* config ) pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); FreeLibrary(hDll); } - if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + if (mi_option_is_enabled(mi_option_allow_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { win_enable_large_os_pages(&config->large_page_size); } } @@ -239,10 +240,11 @@ static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DW return p; } -int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, bool* is_zero, void** addr) { mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); mi_assert_internal(commit || !allow_large); mi_assert_internal(try_alignment > 0); + *is_zero = true; int flags = MEM_RESERVE; if (commit) { flags |= MEM_COMMIT; } *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); @@ -257,26 +259,38 @@ int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_la #pragma warning(disable:6250) // suppress warning calling VirtualFree without MEM_RELEASE (for decommit) #endif -int _mi_prim_commit(void* addr, size_t size, bool commit) { - if (commit) { - void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE); - return (p == addr ? 0 : (int)GetLastError()); - } - else { - BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); - return (ok ? 0 : (int)GetLastError()); +int _mi_prim_commit(void* addr, size_t size, bool* is_zero) { + *is_zero = false; + /* + // zero'ing only happens on an initial commit... but checking upfront seems expensive.. + _MEMORY_BASIC_INFORMATION meminfo; _mi_memzero_var(meminfo); + if (VirtualQuery(addr, &meminfo, size) > 0) { + if ((meminfo.State & MEM_COMMIT) == 0) { + *is_zero = true; + } } + */ + // commit + void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE); + if (p == NULL) return (int)GetLastError(); + return 0; +} + +int _mi_prim_decommit(void* addr, size_t size, bool* needs_recommit) { + BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); + *needs_recommit = true; // for safety, assume always decommitted even in the case of an error. + return (ok ? 0 : (int)GetLastError()); } int _mi_prim_reset(void* addr, size_t size) { void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); mi_assert_internal(p == addr); - #if 1 - if (p == addr && addr != NULL) { - VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory from the working set + #if 0 + if (p != NULL) { + VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory directly from the working set } #endif - return (p == addr ? 0 : (int)GetLastError()); + return (p != NULL ? 0 : (int)GetLastError()); } int _mi_prim_protect(void* addr, size_t size, bool protect) { @@ -331,7 +345,8 @@ static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int num return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE); } -int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, bool* is_zero, void** addr) { + *is_zero = true; *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node); return (*addr != NULL ? 0 : (int)GetLastError()); } diff --git a/source/luametatex/source/libraries/mimalloc/src/region.c b/source/luametatex/source/libraries/mimalloc/src/region.c deleted file mode 100644 index 6c8ffb79c..000000000 --- a/source/luametatex/source/libraries/mimalloc/src/region.c +++ /dev/null @@ -1,501 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2019-2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- -This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) -and the segment and huge object allocation by mimalloc. There may be multiple -implementations of this (one could be the identity going directly to the OS, -another could be a simple cache etc), but the current one uses large "regions". -In contrast to the rest of mimalloc, the "regions" are shared between threads and -need to be accessed using atomic operations. -We need this memory layer between the raw OS calls because of: -1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order - to reuse memory effectively. -2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of - an OS allocation/free is still (much) too expensive relative to the accesses - in that object :-( (`malloc-large` tests this). This means we need a cheaper - way to reuse memory. -3. This layer allows for NUMA aware allocation. - -Possible issues: -- (2) can potentially be addressed too with a small cache per thread which is much - simpler. Generally though that requires shrinking of huge pages, and may overuse - memory per thread. (and is not compatible with `sbrk`). -- Since the current regions are per-process, we need atomic operations to - claim blocks which may be contended -- In the worst case, we need to search the whole region map (16KiB for 256GiB) - linearly. At what point will direct OS calls be faster? Is there a way to - do this better without adding too much complexity? ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc/internal.h" -#include "mimalloc/atomic.h" - -#include <string.h> // memset - -#include "bitmap.h" - -// os.c -bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); - -// Constants -#if (MI_INTPTR_SIZE==8) -#define MI_HEAP_REGION_MAX_SIZE (256 * MI_GiB) // 64KiB for the region map -#elif (MI_INTPTR_SIZE==4) -#define MI_HEAP_REGION_MAX_SIZE (3 * MI_GiB) // ~ KiB for the region map -#else -#error "define the maximum heap space allowed for regions on this platform" -#endif - -#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS -#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) -#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) -#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB -#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) - -// Region info -typedef union mi_region_info_u { - size_t value; - struct { - bool valid; // initialized? - bool is_large:1; // allocated in fixed large/huge OS pages - bool is_pinned:1; // pinned memory cannot be decommitted - short numa_node; // the associated NUMA node (where -1 means no associated node) - } x; -} mi_region_info_t; - - -// A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with -// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. -typedef struct mem_region_s { - _Atomic(size_t) info; // mi_region_info_t.value - _Atomic(void*) start; // start of the memory area - mi_bitmap_field_t in_use; // bit per in-use block - mi_bitmap_field_t dirty; // track if non-zero per block - mi_bitmap_field_t commit; // track if committed per block - mi_bitmap_field_t reset; // track if reset per block - _Atomic(size_t) arena_memid; // if allocated from a (huge page) arena - _Atomic(size_t) padding; // round to 8 fields (needs to be atomic for msvc, see issue #508) -} mem_region_t; - -// The region map -static mem_region_t regions[MI_REGION_MAX]; - -// Allocated regions -static _Atomic(size_t) regions_count; // = 0; - - -/* ---------------------------------------------------------------------------- -Utility functions ------------------------------------------------------------------------------*/ - -// Blocks (of 4MiB) needed for the given size. -static size_t mi_region_block_count(size_t size) { - return _mi_divide_up(size, MI_SEGMENT_SIZE); -} - -/* -// Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. -static size_t mi_good_commit_size(size_t size) { - if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; - return _mi_align_up(size, _mi_os_large_page_size()); -} -*/ - -// Return if a pointer points into a region reserved by us. -mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - if (p==NULL) return false; - size_t count = mi_atomic_load_relaxed(®ions_count); - for (size_t i = 0; i < count; i++) { - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start); - if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; - } - return false; -} - - -static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start); - mi_assert_internal(start != NULL); - return (start + (bit_idx * MI_SEGMENT_SIZE)); -} - -static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { - mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); - size_t idx = region - regions; - mi_assert_internal(®ions[idx] == region); - return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; -} - -static size_t mi_memid_create_from_arena(size_t arena_memid) { - return (arena_memid << 1) | 1; -} - - -static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { - if ((id&1)==1) { - if (arena_memid != NULL) *arena_memid = (id>>1); - return true; - } - else { - size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; - *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; - *region = ®ions[idx]; - return false; - } -} - - -/* ---------------------------------------------------------------------------- - Allocate a region is allocated from the OS (or an arena) ------------------------------------------------------------------------------*/ - -static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) -{ - // not out of regions yet? - if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; - - // try to allocate a fresh region from the OS - bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); - bool region_large = (commit && allow_large); - bool is_zero = false; - bool is_pinned = false; - size_t arena_memid = 0; - void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, ®ion_commit, ®ion_large, &is_pinned, &is_zero, _mi_arena_id_none(), & arena_memid, tld); - if (start == NULL) return false; - mi_assert_internal(!(region_large && !allow_large)); - mi_assert_internal(!region_large || region_commit); - - // claim a fresh slot - const size_t idx = mi_atomic_increment_acq_rel(®ions_count); - if (idx >= MI_REGION_MAX) { - mi_atomic_decrement_acq_rel(®ions_count); - _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats); - _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB)); - return false; - } - - // allocated, initialize and claim the initial blocks - mem_region_t* r = ®ions[idx]; - r->arena_memid = arena_memid; - mi_atomic_store_release(&r->in_use, (size_t)0); - mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); - mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); - mi_atomic_store_release(&r->reset, (size_t)0); - *bit_idx = 0; - _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); - mi_atomic_store_ptr_release(void,&r->start, start); - - // and share it - mi_region_info_t info; - info.value = 0; // initialize the full union to zero - info.x.valid = true; - info.x.is_large = region_large; - info.x.is_pinned = is_pinned; - info.x.numa_node = (short)_mi_os_numa_node(tld); - mi_atomic_store_release(&r->info, info.value); // now make it available to others - *region = r; - return true; -} - -/* ---------------------------------------------------------------------------- - Try to claim blocks in suitable regions ------------------------------------------------------------------------------*/ - -static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { - // initialized at all? - mi_region_info_t info; - info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info); - if (info.value==0) return false; - - // numa correct - if (numa_node >= 0) { // use negative numa node to always succeed - int rnode = info.x.numa_node; - if (rnode >= 0 && rnode != numa_node) return false; - } - - // check allow-large - if (!allow_large && info.x.is_large) return false; - - return true; -} - - -static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) -{ - // try all regions for a free slot - const size_t count = mi_atomic_load_relaxed(®ions_count); // monotonic, so ok to be relaxed - size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though - for (size_t visited = 0; visited < count; visited++, idx++) { - if (idx >= count) idx = 0; // wrap around - mem_region_t* r = ®ions[idx]; - // if this region suits our demand (numa node matches, large OS page matches) - if (mi_region_is_suitable(r, numa_node, allow_large)) { - // then try to atomically claim a segment(s) in this region - if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { - tld->region_idx = idx; // remember the last found position - *region = r; - return true; - } - } - } - return false; -} - - -static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) -{ - mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); - mem_region_t* region; - mi_bitmap_index_t bit_idx; - const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); - // try to claim in existing regions - if (!mi_region_try_claim(numa_node, blocks, *large, ®ion, &bit_idx, tld)) { - // otherwise try to allocate a fresh region and claim in there - if (!mi_region_try_alloc_os(blocks, *commit, *large, ®ion, &bit_idx, tld)) { - // out of regions or memory - return NULL; - } - } - - // ------------------------------------------------ - // found a region and claimed `blocks` at `bit_idx`, initialize them now - mi_assert_internal(region != NULL); - mi_assert_internal(_mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); - - mi_region_info_t info; - info.value = mi_atomic_load_acquire(®ion->info); - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start); - mi_assert_internal(!(info.x.is_large && !*large)); - mi_assert_internal(start != NULL); - - *is_zero = _mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); - *large = info.x.is_large; - *is_pinned = info.x.is_pinned; - *memid = mi_memid_create(region, bit_idx); - void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); - - // commit - if (*commit) { - // ensure commit - bool any_uncommitted; - _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); - if (any_uncommitted) { - mi_assert_internal(!info.x.is_large && !info.x.is_pinned); - bool commit_zero = false; - if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) { - // failed to commit! unclaim and return - mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); - return NULL; - } - if (commit_zero) *is_zero = true; - } - } - else { - // no need to commit, but check if already fully committed - *commit = _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); - } - mi_assert_internal(!*commit || _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); - - // unreset reset blocks - if (_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { - // some blocks are still reset - mi_assert_internal(!info.x.is_large && !info.x.is_pinned); - mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); - mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); - if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed - bool reset_zero = false; - _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); - if (reset_zero) *is_zero = true; - } - } - mi_assert_internal(!_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); - - #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED // && !MI_TSAN - if (*commit) { ((uint8_t*)p)[0] = 0; } - #endif - - // and return the allocation - mi_assert_internal(p != NULL); - return p; -} - - -/* ---------------------------------------------------------------------------- - Allocation ------------------------------------------------------------------------------*/ - -// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. -// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) -void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) -{ - mi_assert_internal(memid != NULL && tld != NULL); - mi_assert_internal(size > 0); - *memid = 0; - *is_zero = false; - *is_pinned = false; - bool default_large = false; - if (large==NULL) large = &default_large; // ensure `large != NULL` - if (size == 0) return NULL; - size = _mi_align_up(size, _mi_os_page_size()); - - // allocate from regions if possible - void* p = NULL; - size_t arena_memid; - const size_t blocks = mi_region_block_count(size); - if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { - p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld); - if (p == NULL) { - _mi_warning_message("unable to allocate from region: size %zu\n", size); - } - } - if (p == NULL) { - // and otherwise fall back to the OS - p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(), & arena_memid, tld); - *memid = mi_memid_create_from_arena(arena_memid); - } - - if (p != NULL) { - mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0); - #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED // && !MI_TSAN - if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed - #endif - } - return p; -} - - - -/* ---------------------------------------------------------------------------- -Free ------------------------------------------------------------------------------*/ - -// Free previously allocated memory with a given id. -void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { - mi_assert_internal(size > 0 && tld != NULL); - if (p==NULL) return; - if (size==0) return; - size = _mi_align_up(size, _mi_os_page_size()); - - size_t arena_memid = 0; - mi_bitmap_index_t bit_idx; - mem_region_t* region; - if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { - // was a direct arena allocation, pass through - _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats); - } - else { - // allocated in a region - mi_assert_internal(align_offset == 0); - mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; - const size_t blocks = mi_region_block_count(size); - mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); - mi_region_info_t info; - info.value = mi_atomic_load_acquire(®ion->info); - mi_assert_internal(info.value != 0); - void* blocks_start = mi_region_blocks_start(region, bit_idx); - mi_assert_internal(blocks_start == p); // not a pointer in our area? - mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); - if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? - - // committed? - if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { - _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); - } - - if (any_reset) { - // set the is_reset bits if any pages were reset - _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); - } - - // reset the blocks to reduce the working set. - if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) - && (mi_option_is_enabled(mi_option_eager_commit) || - mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead - { - bool any_unreset; - _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); - if (any_unreset) { - _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) - _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); - } - } - - // and unclaim - bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); - mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed); - } -} - - -/* ---------------------------------------------------------------------------- - collection ------------------------------------------------------------------------------*/ -void _mi_mem_collect(mi_os_tld_t* tld) { - // free every region that has no segments in use. - size_t rcount = mi_atomic_load_relaxed(®ions_count); - for (size_t i = 0; i < rcount; i++) { - mem_region_t* region = ®ions[i]; - if (mi_atomic_load_relaxed(®ion->info) != 0) { - // if no segments used, try to claim the whole region - size_t m = mi_atomic_load_relaxed(®ion->in_use); - while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; - if (m == 0) { - // on success, free the whole region - uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start); - size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid); - size_t commit = mi_atomic_load_relaxed(®ions[i].commit); - memset((void*)®ions[i], 0, sizeof(mem_region_t)); // cast to void* to avoid atomic warning - // and release the whole region - mi_atomic_store_release(®ion->info, (size_t)0); - if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { - _mi_abandoned_await_readers(); // ensure no pending reads - _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats); - } - } - } - } -} - - -/* ---------------------------------------------------------------------------- - Other ------------------------------------------------------------------------------*/ - -bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_decommit(p, size, tld->stats); - } - else { - return _mi_os_reset(p, size, tld->stats); - } -} - -bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return _mi_os_commit(p, size, is_zero, tld->stats); - } - else { - return _mi_os_unreset(p, size, is_zero, tld->stats); - } -} - -bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { - return _mi_os_commit(p, size, is_zero, tld->stats); -} - -bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { - return _mi_os_decommit(p, size, tld->stats); -} - -bool _mi_mem_protect(void* p, size_t size) { - return _mi_os_protect(p, size); -} - -bool _mi_mem_unprotect(void* p, size_t size) { - return _mi_os_unprotect(p, size); -} diff --git a/source/luametatex/source/libraries/mimalloc/src/segment-cache.c b/source/luametatex/source/libraries/mimalloc/src/segment-cache.c deleted file mode 100644 index eeae1b508..000000000 --- a/source/luametatex/source/libraries/mimalloc/src/segment-cache.c +++ /dev/null @@ -1,423 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2020, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -/* ---------------------------------------------------------------------------- - Implements a cache of segments to avoid expensive OS calls and to reuse - the commit_mask to optimize the commit/decommit calls. - The full memory map of all segments is also implemented here. ------------------------------------------------------------------------------*/ -#include "mimalloc.h" -#include "mimalloc/internal.h" -#include "mimalloc/atomic.h" - -#include "./bitmap.h" // atomic bitmap - -//#define MI_CACHE_DISABLE 1 // define to completely disable the segment cache - -#define MI_CACHE_FIELDS (16) -#define MI_CACHE_MAX (MI_BITMAP_FIELD_BITS*MI_CACHE_FIELDS) // 1024 on 64-bit - -#define BITS_SET() MI_ATOMIC_VAR_INIT(UINTPTR_MAX) -#define MI_CACHE_BITS_SET MI_INIT16(BITS_SET) // note: update if MI_CACHE_FIELDS changes - -typedef struct mi_cache_slot_s { - void* p; - size_t memid; - bool is_pinned; - mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; - _Atomic(mi_msecs_t) expire; -} mi_cache_slot_t; - -static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 - -static mi_decl_cache_align mi_bitmap_field_t cache_unavailable[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! -static mi_decl_cache_align mi_bitmap_field_t cache_unavailable_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; -static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free - -static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { - mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg); - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - return _mi_arena_memid_is_suitable(slot->memid, req_arena_id); -} - -mi_decl_noinline static void* mi_segment_cache_pop_ex( - bool all_suitable, - size_t size, mi_commit_mask_t* commit_mask, - mi_commit_mask_t* decommit_mask, bool large_allowed, - bool* large, bool* is_pinned, bool* is_zero, - mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) -{ -#ifdef MI_CACHE_DISABLE - return NULL; -#else - - // only segment blocks - if (size != MI_SEGMENT_SIZE) return NULL; - - // numa node determines start field - const int numa_node = _mi_os_numa_node(tld); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot and make it unavailable - mi_bitmap_index_t bitidx = 0; - bool claimed = false; - mi_arena_id_t req_arena_id = _req_arena_id; - mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable); // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? - - if (large_allowed) { // large allowed? - claimed = _mi_bitmap_try_find_from_claim_pred(cache_unavailable_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); - if (claimed) *large = true; - } - if (!claimed) { - claimed = _mi_bitmap_try_find_from_claim_pred (cache_unavailable, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); - if (claimed) *large = false; - } - - if (!claimed) return NULL; - - // no longer available but still in-use - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - - // found a slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - void* p = slot->p; - *memid = slot->memid; - *is_pinned = slot->is_pinned; - *is_zero = false; - *commit_mask = slot->commit_mask; - *decommit_mask = slot->decommit_mask; - slot->p = NULL; - mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); - - // mark the slot as free again - _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); - return p; -#endif -} - - -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) -{ - return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld); -} - -static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) -{ - if (mi_commit_mask_is_empty(cmask)) { - // nothing - } - else if (mi_commit_mask_is_full(cmask)) { - // decommit the whole in one call - _mi_os_decommit(p, total, stats); - } - else { - // decommit parts - mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); - size_t part = total/MI_COMMIT_MASK_BITS; - size_t idx; - size_t count; - mi_commit_mask_foreach(cmask, idx, count) { - void* start = (uint8_t*)p + (idx*part); - size_t size = count*part; - _mi_os_decommit(start, size, stats); - } - mi_commit_mask_foreach_end() - } - mi_commit_mask_create_empty(cmask); -} - -#define MI_MAX_PURGE_PER_PUSH (4) - -static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld) -{ - MI_UNUSED(tld); - if (!mi_option_is_enabled(mi_option_allow_decommit)) return; - mi_msecs_t now = _mi_clock_now(); - size_t purged = 0; - const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */); - size_t idx = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ ); - for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots - if (idx >= MI_CACHE_MAX) idx = 0; // wrap - mi_cache_slot_t* slot = &cache[idx]; - mi_msecs_t expire = mi_atomic_loadi64_relaxed(&slot->expire); - if (expire != 0 && (force || now >= expire)) { // racy read - // seems expired, first claim it from available - purged++; - mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); - if (_mi_bitmap_claim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx, NULL)) { // no need to check large as those cannot be decommitted anyways - // it was available, we claimed it (and made it unavailable) - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); - // we can now access it safely - expire = mi_atomic_loadi64_acquire(&slot->expire); - if (expire != 0 && (force || now >= expire)) { // safe read - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); - // still expired, decommit it - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask)); - _mi_abandoned_await_readers(); // wait until safe to decommit - // decommit committed parts - // TODO: instead of decommit, we could also free to the OS? - mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->decommit_mask); - } - _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop - } - if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push - } - } -} - -void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) { - if (force) { - // called on `mi_collect(true)` but not on thread termination - _mi_segment_cache_free_all(tld); - } - else { - mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld); - } -} - -void _mi_segment_cache_free_all(mi_os_tld_t* tld) { - mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; - bool is_pinned; - bool is_zero; - bool is_large; - size_t memid; - const size_t size = MI_SEGMENT_SIZE; - void* p; - do { - // keep popping and freeing the memory - p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask, - true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); - if (p != NULL) { - size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); - if (csize > 0 && !is_pinned) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } - _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); - } - } while (p != NULL); -} - -mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) -{ -#ifdef MI_CACHE_DISABLE - return false; -#else - - // purge expired entries - mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); - - // only cache normal segment blocks - if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; - - // Also do not cache arena allocated segments that cannot be decommitted. (as arena allocation is fast) - // This is a common case with reserved huge OS pages. - // - // (note: we could also allow segments that are already fully decommitted but that never happens - // as the first slice is always committed (for the segment metadata)) - if (!_mi_arena_is_os_allocated(memid) && is_pinned) return false; - - // numa node determines start field - int numa_node = _mi_os_numa_node(NULL); - size_t start_field = 0; - if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count()) * numa_node; - if (start_field >= MI_CACHE_FIELDS) start_field = 0; - } - - // find an available slot - mi_bitmap_index_t bitidx; - bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); - if (!claimed) return false; - - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); -#if MI_DEBUG>1 - if (is_pinned || is_large) { - mi_assert_internal(mi_commit_mask_is_full(commit_mask)); - } -#endif - - // set the slot - mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; - slot->p = start; - slot->memid = memid; - slot->is_pinned = is_pinned; - mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - slot->commit_mask = *commit_mask; - slot->decommit_mask = *decommit_mask; - if (!mi_commit_mask_is_empty(commit_mask) && !is_large && !is_pinned && mi_option_is_enabled(mi_option_allow_decommit)) { - long delay = mi_option_get(mi_option_segment_decommit_delay); - if (delay == 0) { - _mi_abandoned_await_readers(); // wait until safe to decommit - mi_commit_mask_decommit(&slot->commit_mask, start, MI_SEGMENT_SIZE, tld->stats); - mi_commit_mask_create_empty(&slot->decommit_mask); - } - else { - mi_atomic_storei64_release(&slot->expire, _mi_clock_now() + delay); - } - } - - // make it available - _mi_bitmap_unclaim((is_large ? cache_unavailable_large : cache_unavailable), MI_CACHE_FIELDS, 1, bitidx); - return true; -#endif -} - - -/* ----------------------------------------------------------- - The following functions are to reliably find the segment or - block that encompasses any pointer p (or NULL if it is not - in any of our segments). - We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) - set to 1 if it contains the segment meta data. ------------------------------------------------------------ */ - - -#if (MI_INTPTR_SIZE==8) -#define MI_MAX_ADDRESS ((size_t)40 << 40) // 20TB -#else -#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb -#endif - -#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) -#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) -#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) - -static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments - -static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { - mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? - if ((uintptr_t)segment >= MI_MAX_ADDRESS) { - *bitidx = 0; - return MI_SEGMENT_MAP_WSIZE; - } - else { - const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; - *bitidx = segindex % MI_INTPTR_BITS; - const size_t mapindex = segindex / MI_INTPTR_BITS; - mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); - return mapindex; - } -} - -void _mi_segment_map_allocated_at(const mi_segment_t* segment) { - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); - if (index==MI_SEGMENT_MAP_WSIZE) return; - uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - uintptr_t newmask; - do { - newmask = (mask | ((uintptr_t)1 << bitidx)); - } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); -} - -void _mi_segment_map_freed_at(const mi_segment_t* segment) { - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); - if (index == MI_SEGMENT_MAP_WSIZE) return; - uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - uintptr_t newmask; - do { - newmask = (mask & ~((uintptr_t)1 << bitidx)); - } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); -} - -// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. -static mi_segment_t* _mi_segment_of(const void* p) { - if (p == NULL) return NULL; - mi_segment_t* segment = _mi_ptr_segment(p); - mi_assert_internal(segment != NULL); - size_t bitidx; - size_t index = mi_segment_map_index_of(segment, &bitidx); - // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge - const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); - if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { - return segment; // yes, allocated by us - } - if (index==MI_SEGMENT_MAP_WSIZE) return NULL; - - // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? - - // search downwards for the first segment in case it is an interior pointer - // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough - // valid huge objects - // note: we could maintain a lowest index to speed up the path for invalid pointers? - size_t lobitidx; - size_t loindex; - uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); - if (lobits != 0) { - loindex = index; - lobitidx = mi_bsr(lobits); // lobits != 0 - } - else if (index == 0) { - return NULL; - } - else { - mi_assert_internal(index > 0); - uintptr_t lomask = mask; - loindex = index; - do { - loindex--; - lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); - } while (lomask != 0 && loindex > 0); - if (lomask == 0) return NULL; - lobitidx = mi_bsr(lomask); // lomask != 0 - } - mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); - // take difference as the addresses could be larger than the MAX_ADDRESS space. - size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; - segment = (mi_segment_t*)((uint8_t*)segment - diff); - - if (segment == NULL) return NULL; - mi_assert_internal((void*)segment < p); - bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); - mi_assert_internal(cookie_ok); - if mi_unlikely(!cookie_ok) return NULL; - if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range - mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); - return segment; -} - -// Is this a valid pointer in our heap? -static bool mi_is_valid_pointer(const void* p) { - return (_mi_segment_of(p) != NULL); -} - -mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { - return mi_is_valid_pointer(p); -} - -/* -// Return the full segment range belonging to a pointer -static void* mi_segment_range_of(const void* p, size_t* size) { - mi_segment_t* segment = _mi_segment_of(p); - if (segment == NULL) { - if (size != NULL) *size = 0; - return NULL; - } - else { - if (size != NULL) *size = segment->segment_size; - return segment; - } - mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); - mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); - mi_reset_delayed(tld); - mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); - return page; -} -*/ diff --git a/source/luametatex/source/libraries/mimalloc/src/segment-map.c b/source/luametatex/source/libraries/mimalloc/src/segment-map.c new file mode 100644 index 000000000..4c2104bd8 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/segment-map.c @@ -0,0 +1,153 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2019-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +/* ----------------------------------------------------------- + The following functions are to reliably find the segment or + block that encompasses any pointer p (or NULL if it is not + in any of our segments). + We maintain a bitmap of all memory with 1 bit per MI_SEGMENT_SIZE (64MiB) + set to 1 if it contains the segment meta data. +----------------------------------------------------------- */ +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" + +#if (MI_INTPTR_SIZE==8) +#define MI_MAX_ADDRESS ((size_t)40 << 40) // 40TB (to include huge page areas) +#else +#define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb +#endif + +#define MI_SEGMENT_MAP_BITS (MI_MAX_ADDRESS / MI_SEGMENT_SIZE) +#define MI_SEGMENT_MAP_SIZE (MI_SEGMENT_MAP_BITS / 8) +#define MI_SEGMENT_MAP_WSIZE (MI_SEGMENT_MAP_SIZE / MI_INTPTR_SIZE) + +static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments + +static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) { + mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE? + if ((uintptr_t)segment >= MI_MAX_ADDRESS) { + *bitidx = 0; + return MI_SEGMENT_MAP_WSIZE; + } + else { + const uintptr_t segindex = ((uintptr_t)segment) / MI_SEGMENT_SIZE; + *bitidx = segindex % MI_INTPTR_BITS; + const size_t mapindex = segindex / MI_INTPTR_BITS; + mi_assert_internal(mapindex < MI_SEGMENT_MAP_WSIZE); + return mapindex; + } +} + +void _mi_segment_map_allocated_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); + if (index==MI_SEGMENT_MAP_WSIZE) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask | ((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +void _mi_segment_map_freed_at(const mi_segment_t* segment) { + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + mi_assert_internal(index <= MI_SEGMENT_MAP_WSIZE); + if (index == MI_SEGMENT_MAP_WSIZE) return; + uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + uintptr_t newmask; + do { + newmask = (mask & ~((uintptr_t)1 << bitidx)); + } while (!mi_atomic_cas_weak_release(&mi_segment_map[index], &mask, newmask)); +} + +// Determine the segment belonging to a pointer or NULL if it is not in a valid segment. +static mi_segment_t* _mi_segment_of(const void* p) { + if (p == NULL) return NULL; + mi_segment_t* segment = _mi_ptr_segment(p); + mi_assert_internal(segment != NULL); + size_t bitidx; + size_t index = mi_segment_map_index_of(segment, &bitidx); + // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge + const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]); + if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) { + return segment; // yes, allocated by us + } + if (index==MI_SEGMENT_MAP_WSIZE) return NULL; + + // TODO: maintain max/min allocated range for efficiency for more efficient rejection of invalid pointers? + + // search downwards for the first segment in case it is an interior pointer + // could be slow but searches in MI_INTPTR_SIZE * MI_SEGMENT_SIZE (512MiB) steps trough + // valid huge objects + // note: we could maintain a lowest index to speed up the path for invalid pointers? + size_t lobitidx; + size_t loindex; + uintptr_t lobits = mask & (((uintptr_t)1 << bitidx) - 1); + if (lobits != 0) { + loindex = index; + lobitidx = mi_bsr(lobits); // lobits != 0 + } + else if (index == 0) { + return NULL; + } + else { + mi_assert_internal(index > 0); + uintptr_t lomask = mask; + loindex = index; + do { + loindex--; + lomask = mi_atomic_load_relaxed(&mi_segment_map[loindex]); + } while (lomask != 0 && loindex > 0); + if (lomask == 0) return NULL; + lobitidx = mi_bsr(lomask); // lomask != 0 + } + mi_assert_internal(loindex < MI_SEGMENT_MAP_WSIZE); + // take difference as the addresses could be larger than the MAX_ADDRESS space. + size_t diff = (((index - loindex) * (8*MI_INTPTR_SIZE)) + bitidx - lobitidx) * MI_SEGMENT_SIZE; + segment = (mi_segment_t*)((uint8_t*)segment - diff); + + if (segment == NULL) return NULL; + mi_assert_internal((void*)segment < p); + bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie); + mi_assert_internal(cookie_ok); + if mi_unlikely(!cookie_ok) return NULL; + if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range + mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment)); + return segment; +} + +// Is this a valid pointer in our heap? +static bool mi_is_valid_pointer(const void* p) { + return ((_mi_segment_of(p) != NULL) || (_mi_arena_contains(p))); +} + +mi_decl_nodiscard mi_decl_export bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { + return mi_is_valid_pointer(p); +} + +/* +// Return the full segment range belonging to a pointer +static void* mi_segment_range_of(const void* p, size_t* size) { + mi_segment_t* segment = _mi_segment_of(p); + if (segment == NULL) { + if (size != NULL) *size = 0; + return NULL; + } + else { + if (size != NULL) *size = segment->segment_size; + return segment; + } + mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld)); + mi_assert_internal(page == NULL || (mi_segment_page_size(_mi_page_segment(page)) - (MI_SECURE == 0 ? 0 : _mi_os_page_size())) >= block_size); + mi_reset_delayed(tld); + mi_assert_internal(page == NULL || mi_page_not_in_queue(page, tld)); + return page; +} +*/ diff --git a/source/luametatex/source/libraries/mimalloc/src/segment.c b/source/luametatex/source/libraries/mimalloc/src/segment.c index 3e56d50f5..28685f21c 100644 --- a/source/luametatex/source/libraries/mimalloc/src/segment.c +++ b/source/luametatex/source/libraries/mimalloc/src/segment.c @@ -11,9 +11,9 @@ terms of the MIT license. A copy of the license can be found in the file #include <string.h> // memset #include <stdio.h> -#define MI_PAGE_HUGE_ALIGN (256*1024) +#define MI_PAGE_HUGE_ALIGN (256*1024) -static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats); +static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats); // ------------------------------------------------------------------- @@ -257,7 +257,7 @@ static bool mi_segment_is_valid(mi_segment_t* segment, mi_segments_tld_t* tld) { mi_assert_internal(_mi_ptr_cookie(segment) == segment->cookie); mi_assert_internal(segment->abandoned <= segment->used); mi_assert_internal(segment->thread_id == 0 || segment->thread_id == _mi_thread_id()); - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // can only decommit committed blocks + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // can only decommit committed blocks //mi_assert_internal(segment->segment_info_size % MI_SEGMENT_SLICE_SIZE == 0); mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); @@ -389,21 +389,14 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { _mi_os_unprotect(end, os_pagesize); } - // purge delayed decommits now? (no, leave it to the cache) - // mi_segment_delayed_decommit(segment,true,tld->stats); + // purge delayed decommits now? (no, leave it to the arena) + // mi_segment_try_purge(segment,true,tld->stats); - // _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats); const size_t size = mi_segment_size(segment); - if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache - !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) - { - if (!segment->mem_is_pinned) { - const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - if (csize > 0) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } - } - _mi_abandoned_await_readers(); // wait until safe to free - _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats); - } + const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); + + _mi_abandoned_await_readers(); // wait until safe to free + _mi_arena_free(segment, mi_segment_size(segment), csize, segment->memid, tld->stats); } // called by threads that are terminating @@ -467,61 +460,81 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin mi_commit_mask_create(bitidx, bitcount, cm); } +static bool mi_segment_commit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); -static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) { - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); - - // commit liberal, but decommit conservative + // commit liberal uint8_t* start = NULL; size_t full_size = 0; mi_commit_mask_t mask; - mi_segment_commit_mask(segment, !commit/*conservative*/, p, size, &start, &full_size, &mask); - if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; + mi_segment_commit_mask(segment, false /* conservative? */, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size == 0) return true; - if (commit && !mi_commit_mask_all_set(&segment->commit_mask, &mask)) { + if (!mi_commit_mask_all_set(&segment->commit_mask, &mask)) { + // committing bool is_zero = false; mi_commit_mask_t cmask; mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); _mi_stat_decrease(&_mi_stats_main.committed, _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap - if (!_mi_os_commit(start,full_size,&is_zero,stats)) return false; - mi_commit_mask_set(&segment->commit_mask, &mask); + if (!_mi_os_commit(start, full_size, &is_zero, stats)) return false; + mi_commit_mask_set(&segment->commit_mask, &mask); + } + + // increase purge expiration when using part of delayed purges -- we assume more allocations are coming soon. + if (mi_commit_mask_any_set(&segment->purge_mask, &mask)) { + segment->purge_expire = _mi_clock_now() + mi_option_get(mi_option_purge_delay); } - else if (!commit && mi_commit_mask_any_set(&segment->commit_mask, &mask)) { - mi_assert_internal((void*)start != (void*)segment); - //mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &mask)); - mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); - _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for overlap - if (segment->allow_decommit) { - _mi_os_decommit(start, full_size, stats); // ok if this fails - } - mi_commit_mask_clear(&segment->commit_mask, &mask); - } - // increase expiration of reusing part of the delayed decommit - if (commit && mi_commit_mask_any_set(&segment->decommit_mask, &mask)) { - segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay); - } - // always undo delayed decommits - mi_commit_mask_clear(&segment->decommit_mask, &mask); + // always clear any delayed purges in our range (as they are either committed now) + mi_commit_mask_clear(&segment->purge_mask, &mask); return true; } static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow - if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed + if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->purge_mask)) return true; // fully committed mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); - return mi_segment_commitx(segment,true,p,size,stats); + return mi_segment_commit(segment, p, size, stats); +} + +static bool mi_segment_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->purge_mask)); + if (!segment->allow_purge) return true; + + // purge conservative + uint8_t* start = NULL; + size_t full_size = 0; + mi_commit_mask_t mask; + mi_segment_commit_mask(segment, true /* conservative? */, p, size, &start, &full_size, &mask); + if (mi_commit_mask_is_empty(&mask) || full_size==0) return true; + + if (mi_commit_mask_any_set(&segment->commit_mask, &mask)) { + // purging + mi_assert_internal((void*)start != (void*)segment); + mi_assert_internal(segment->allow_decommit); + const bool decommitted = _mi_os_purge(start, full_size, stats); // reset or decommit + if (decommitted) { + mi_commit_mask_t cmask; + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); + _mi_stat_increase(&_mi_stats_main.committed, full_size - _mi_commit_mask_committed_size(&cmask, MI_SEGMENT_SIZE)); // adjust for double counting + mi_commit_mask_clear(&segment->commit_mask, &mask); + } + } + + // always clear any scheduled purges in our range + mi_commit_mask_clear(&segment->purge_mask, &mask); + return true; } -static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { - if (!segment->allow_decommit) return; - if (mi_option_get(mi_option_decommit_delay) == 0) { - mi_segment_commitx(segment, false, p, size, stats); +static void mi_segment_schedule_purge(mi_segment_t* segment, uint8_t* p, size_t size, mi_stats_t* stats) { + if (!segment->allow_purge) return; + + if (mi_option_get(mi_option_purge_delay) == 0) { + mi_segment_purge(segment, p, size, stats); } else { - // register for future decommit in the decommit mask + // register for future purge in the purge mask uint8_t* start = NULL; size_t full_size = 0; mi_commit_mask_t mask; @@ -529,39 +542,39 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_ if (mi_commit_mask_is_empty(&mask) || full_size==0) return; // update delayed commit - mi_assert_internal(segment->decommit_expire > 0 || mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(segment->purge_expire > 0 || mi_commit_mask_is_empty(&segment->purge_mask)); mi_commit_mask_t cmask; - mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only decommit what is committed; span_free may try to decommit more - mi_commit_mask_set(&segment->decommit_mask, &cmask); + mi_commit_mask_create_intersect(&segment->commit_mask, &mask, &cmask); // only purge what is committed; span_free may try to decommit more + mi_commit_mask_set(&segment->purge_mask, &cmask); mi_msecs_t now = _mi_clock_now(); - if (segment->decommit_expire == 0) { - // no previous decommits, initialize now - segment->decommit_expire = now + mi_option_get(mi_option_decommit_delay); + if (segment->purge_expire == 0) { + // no previous purgess, initialize now + segment->purge_expire = now + mi_option_get(mi_option_purge_delay); } - else if (segment->decommit_expire <= now) { - // previous decommit mask already expired - if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) { - mi_segment_delayed_decommit(segment, true, stats); + else if (segment->purge_expire <= now) { + // previous purge mask already expired + if (segment->purge_expire + mi_option_get(mi_option_purge_extend_delay) <= now) { + mi_segment_try_purge(segment, true, stats); } else { - segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's + segment->purge_expire = now + mi_option_get(mi_option_purge_extend_delay); // (mi_option_get(mi_option_purge_delay) / 8); // wait a tiny bit longer in case there is a series of free's } } else { - // previous decommit mask is not yet expired, increase the expiration by a bit. - segment->decommit_expire += mi_option_get(mi_option_decommit_extend_delay); + // previous purge mask is not yet expired, increase the expiration by a bit. + segment->purge_expire += mi_option_get(mi_option_purge_extend_delay); } } } -static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_stats_t* stats) { - if (!segment->allow_decommit || mi_commit_mask_is_empty(&segment->decommit_mask)) return; +static void mi_segment_try_purge(mi_segment_t* segment, bool force, mi_stats_t* stats) { + if (!segment->allow_purge || mi_commit_mask_is_empty(&segment->purge_mask)) return; mi_msecs_t now = _mi_clock_now(); - if (!force && now < segment->decommit_expire) return; + if (!force && now < segment->purge_expire) return; - mi_commit_mask_t mask = segment->decommit_mask; - segment->decommit_expire = 0; - mi_commit_mask_create_empty(&segment->decommit_mask); + mi_commit_mask_t mask = segment->purge_mask; + segment->purge_expire = 0; + mi_commit_mask_create_empty(&segment->purge_mask); size_t idx; size_t count; @@ -570,11 +583,11 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st if (count > 0) { uint8_t* p = (uint8_t*)segment + (idx*MI_COMMIT_SIZE); size_t size = count * MI_COMMIT_SIZE; - mi_segment_commitx(segment, false, p, size, stats); + mi_segment_purge(segment, p, size, stats); } } mi_commit_mask_foreach_end() - mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); } @@ -587,7 +600,7 @@ static bool mi_segment_is_abandoned(mi_segment_t* segment) { } // note: can be called on abandoned segments -static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) { +static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_purge, mi_segments_tld_t* tld) { mi_assert_internal(slice_index < segment->slice_entries); mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment) ? NULL : mi_span_queue_for(slice_count,tld)); @@ -607,8 +620,8 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size } // perhaps decommit - if (allow_decommit) { - mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); + if (allow_purge) { + mi_segment_schedule_purge(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats); } // and push it on the free page queue (if it was not a huge page) @@ -726,7 +739,6 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i } // and initialize the page - page->is_reset = false; page->is_committed = true; segment->used++; return page; @@ -740,7 +752,7 @@ static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, siz mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); size_t next_index = mi_slice_index(slice) + slice_count; size_t next_count = slice->slice_count - slice_count; - mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld); + mi_segment_span_free(segment, next_index, next_count, false /* don't purge left-over part */, tld); slice->slice_count = (uint32_t)slice_count; } @@ -783,16 +795,13 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren Segment allocation ----------------------------------------------------------- */ -static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delay, mi_arena_id_t req_arena_id, +static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delayed, mi_arena_id_t req_arena_id, size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices, - mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* pdecommit_mask, - bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) + bool commit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) { - // Allocate the segment from the OS - bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy - bool is_pinned = false; - size_t memid = 0; + mi_memid_t memid; + bool allow_large = (!eager_delayed && (MI_SECURE == 0)); // only allow large OS pages once we are no longer lazy size_t align_offset = 0; size_t alignment = MI_SEGMENT_ALIGN; @@ -806,48 +815,40 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment // recalculate due to potential guard pages *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices); } - const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; - mi_segment_t* segment = NULL; - // get from cache? - if (page_alignment == 0) { - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); + const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; + mi_segment_t* segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, commit, allow_large, req_arena_id, &memid, os_tld); + if (segment == NULL) { + return NULL; // failed to allocate } - - // get from OS - if (segment==NULL) { - segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, pcommit, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); - if (segment == NULL) return NULL; // failed to allocate - if (*pcommit) { - mi_commit_mask_create_full(pcommit_mask); - } - else { - mi_commit_mask_create_empty(pcommit_mask); - } - } - mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); - const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); - mi_assert_internal(commit_needed>0); - mi_commit_mask_t commit_needed_mask; - mi_commit_mask_create(0, commit_needed, &commit_needed_mask); - if (!mi_commit_mask_all_set(pcommit_mask, &commit_needed_mask)) { + // ensure metadata part of the segment is committed + mi_commit_mask_t commit_mask; + if (memid.initially_committed) { + mi_commit_mask_create_full(&commit_mask); + } + else { // at least commit the info slices + const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); + mi_assert_internal(commit_needed>0); + mi_commit_mask_create(0, commit_needed, &commit_mask); mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= (*pinfo_slices)*MI_SEGMENT_SLICE_SIZE); - bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, is_zero, tld->stats); - if (!ok) return NULL; // failed to commit - mi_commit_mask_set(pcommit_mask, &commit_needed_mask); - } - else if (*is_zero) { - // track zero initialization for valgrind - mi_track_mem_defined(segment, commit_needed * MI_COMMIT_SIZE); + if (!_mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, NULL, tld->stats)) { + _mi_arena_free(segment,segment_size,0,memid,tld->stats); + return NULL; + } } + mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0); + segment->memid = memid; - segment->mem_is_pinned = is_pinned; - segment->mem_is_large = mem_large; - segment->mem_is_committed = mi_commit_mask_is_full(pcommit_mask); - segment->mem_alignment = alignment; - segment->mem_align_offset = align_offset; + segment->allow_decommit = !memid.is_pinned; + segment->allow_purge = segment->allow_decommit && (mi_option_get(mi_option_purge_delay) >= 0); + segment->segment_size = segment_size; + segment->commit_mask = commit_mask; + segment->purge_expire = 0; + mi_commit_mask_create_empty(&segment->purge_mask); + mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan + mi_segments_track_size((long)(segment_size), tld); _mi_segment_map_allocated_at(segment); return segment; @@ -870,49 +871,21 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay)); const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit); bool commit = eager || (required > 0); - bool is_zero = false; - - mi_commit_mask_t commit_mask; - mi_commit_mask_t decommit_mask; - mi_commit_mask_create_empty(&commit_mask); - mi_commit_mask_create_empty(&decommit_mask); - + // Allocate the segment from the OS mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id, - &segment_slices, &pre_size, &info_slices, &commit_mask, &decommit_mask, - &is_zero, &commit, tld, os_tld); + &segment_slices, &pre_size, &info_slices, commit, tld, os_tld); if (segment == NULL) return NULL; - // zero the segment info? -- not always needed as it may be zero initialized from the OS - mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan - { + // zero the segment info? -- not always needed as it may be zero initialized from the OS + if (!segment->memid.initially_zero) { ptrdiff_t ofs = offsetof(mi_segment_t, next); size_t prefix = offsetof(mi_segment_t, slices) - ofs; - size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more - if (!is_zero) { - memset((uint8_t*)segment + ofs, 0, zsize); - } + size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more + _mi_memzero((uint8_t*)segment + ofs, zsize); } - segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed - segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large); - if (segment->allow_decommit) { - segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay); - segment->decommit_mask = decommit_mask; - mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); - #if MI_DEBUG>2 - const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE); - mi_commit_mask_t commit_needed_mask; - mi_commit_mask_create(0, commit_needed, &commit_needed_mask); - mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask)); - #endif - } - else { - segment->decommit_expire = 0; - mi_commit_mask_create_empty( &segment->decommit_mask ); - } - - // initialize segment info + // initialize the rest of the segment info const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); segment->segment_slices = segment_slices; segment->segment_info_slices = info_slices; @@ -921,7 +894,7 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi segment->slice_entries = slice_entries; segment->kind = (required == 0 ? MI_SEGMENT_NORMAL : MI_SEGMENT_HUGE); - // memset(segment->slices, 0, sizeof(mi_slice_t)*(info_slices+1)); + // _mi_memzero(segment->slices, sizeof(mi_slice_t)*(info_slices+1)); _mi_stat_increase(&tld->stats->page_committed, mi_segment_info_size(segment)); // set up guard pages @@ -948,11 +921,11 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi // initialize initial free pages if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page mi_assert_internal(huge_page==NULL); - mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld); + mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't purge */, tld); } else { mi_assert_internal(huge_page!=NULL); - mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask)); + mi_assert_internal(mi_commit_mask_is_empty(&segment->purge_mask)); mi_assert_internal(mi_commit_mask_is_full(&segment->commit_mask)); *huge_page = mi_segment_span_allocate(segment, info_slices, segment_slices - info_slices - guard_slices, tld); mi_assert_internal(*huge_page != NULL); // cannot fail as we commit in advance @@ -1015,17 +988,16 @@ static mi_slice_t* mi_segment_page_clear(mi_page_t* page, mi_segments_tld_t* tld _mi_stat_decrease(&tld->stats->pages, 1); // reset the page memory to reduce memory pressure? - if (!segment->mem_is_pinned && !page->is_reset && mi_option_is_enabled(mi_option_page_reset)) { + if (segment->allow_decommit && mi_option_is_enabled(mi_option_deprecated_page_reset)) { size_t psize; - uint8_t* start = _mi_page_start(segment, page, &psize); - page->is_reset = true; + uint8_t* start = _mi_page_start(segment, page, &psize); _mi_os_reset(start, psize, tld->stats); } // zero the page data, but not the segment fields page->is_zero_init = false; ptrdiff_t ofs = offsetof(mi_page_t, capacity); - memset((uint8_t*)page + ofs, 0, sizeof(*page) - ofs); + _mi_memzero((uint8_t*)page + ofs, sizeof(*page) - ofs); page->xblock_size = 1; // and free it @@ -1256,8 +1228,8 @@ static void mi_segment_abandon(mi_segment_t* segment, mi_segments_tld_t* tld) { slice = slice + slice->slice_count; } - // perform delayed decommits - mi_segment_delayed_decommit(segment, mi_option_is_enabled(mi_option_abandoned_page_decommit) /* force? */, tld->stats); + // perform delayed decommits (forcing is much slower on mstress) + mi_segment_try_purge(segment, mi_option_is_enabled(mi_option_abandoned_page_purge) /* force? */, tld->stats); // all pages in the segment are abandoned; add it to the abandoned list _mi_stat_increase(&tld->stats->segments_abandoned, 1); @@ -1365,7 +1337,6 @@ static mi_segment_t* mi_segment_reclaim(mi_segment_t* segment, mi_heap_t* heap, if (mi_slice_is_used(slice)) { // in use: reclaim the page in our heap mi_page_t* page = mi_slice_to_page(slice); - mi_assert_internal(!page->is_reset); mi_assert_internal(page->is_committed); mi_assert_internal(mi_page_thread_free_flag(page)==MI_NEVER_DELAYED_FREE); mi_assert_internal(mi_page_heap(page) == NULL); @@ -1446,7 +1417,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice } else { // otherwise, push on the visited list so it gets not looked at too quickly again - mi_segment_delayed_decommit(segment, true /* force? */, tld->stats); // forced decommit if needed as we may not visit soon again + mi_segment_try_purge(segment, true /* force? */, tld->stats); // force purge if needed as we may not visit soon again mi_abandoned_visited_push(segment); } } @@ -1470,9 +1441,9 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld) mi_segment_reclaim(segment, heap, 0, NULL, tld); } else { - // otherwise, decommit if needed and push on the visited list - // note: forced decommit can be expensive if many threads are destroyed/created as in mstress. - mi_segment_delayed_decommit(segment, force, tld->stats); + // otherwise, purge if needed and push on the visited list + // note: forced purge can be expensive if many threads are destroyed/created as in mstress. + mi_segment_try_purge(segment, force, tld->stats); mi_abandoned_visited_push(segment); } } @@ -1530,7 +1501,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki } mi_assert_internal(page != NULL && page->slice_count*MI_SEGMENT_SLICE_SIZE == page_size); mi_assert_internal(_mi_ptr_segment(page)->thread_id == _mi_thread_id()); - mi_segment_delayed_decommit(_mi_ptr_segment(page), false, tld->stats); + mi_segment_try_purge(_mi_ptr_segment(page), false, tld->stats); return page; } @@ -1564,7 +1535,7 @@ static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_assert_internal(psize - (aligned_p - start) >= size); uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list ptrdiff_t decommit_size = aligned_p - decommit_start; - _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + _mi_os_reset(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments } return page; @@ -1607,9 +1578,12 @@ void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_bloc mi_assert_internal(page->used == 1); // this is called just before the free mi_assert_internal(page->free == NULL); if (segment->allow_decommit) { - const size_t csize = mi_usable_size(block) - sizeof(mi_block_t); - uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); - _mi_os_decommit(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + size_t csize = mi_usable_size(block); + if (csize > sizeof(mi_block_t)) { + csize = csize - sizeof(mi_block_t); + uint8_t* p = (uint8_t*)block + sizeof(mi_block_t); + _mi_os_reset(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments + } } } #endif diff --git a/source/luametatex/source/libraries/mimalloc/src/static.c b/source/luametatex/source/libraries/mimalloc/src/static.c index d992f4daf..bc05dd72f 100644 --- a/source/luametatex/source/libraries/mimalloc/src/static.c +++ b/source/luametatex/source/libraries/mimalloc/src/static.c @@ -32,7 +32,7 @@ terms of the MIT license. A copy of the license can be found in the file #include "page.c" // includes page-queue.c #include "random.c" #include "segment.c" -#include "segment-cache.c" +#include "segment-map.c" #include "stats.c" #include "prim/prim.c" #if MI_OSX_ZONE diff --git a/source/luametatex/source/libraries/mimalloc/src/stats.c b/source/luametatex/source/libraries/mimalloc/src/stats.c index d2a316818..300956ce1 100644 --- a/source/luametatex/source/libraries/mimalloc/src/stats.c +++ b/source/luametatex/source/libraries/mimalloc/src/stats.c @@ -96,6 +96,7 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_add(&stats->reserved, &src->reserved, 1); mi_stat_add(&stats->committed, &src->committed, 1); mi_stat_add(&stats->reset, &src->reset, 1); + mi_stat_add(&stats->purged, &src->purged, 1); mi_stat_add(&stats->page_committed, &src->page_committed, 1); mi_stat_add(&stats->pages_abandoned, &src->pages_abandoned, 1); @@ -111,6 +112,8 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) { mi_stat_counter_add(&stats->pages_extended, &src->pages_extended, 1); mi_stat_counter_add(&stats->mmap_calls, &src->mmap_calls, 1); mi_stat_counter_add(&stats->commit_calls, &src->commit_calls, 1); + mi_stat_counter_add(&stats->reset_calls, &src->reset_calls, 1); + mi_stat_counter_add(&stats->purge_calls, &src->purge_calls, 1); mi_stat_counter_add(&stats->page_no_retire, &src->page_no_retire, 1); mi_stat_counter_add(&stats->searches, &src->searches, 1); @@ -143,7 +146,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* const int64_t pos = (n < 0 ? -n : n); if (pos < base) { if (n!=1 || suffix[0] != 'B') { // skip printing 1 B for the unit column - snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix)); + snprintf(buf, len, "%d %-3s", (int)n, (n==0 ? "" : suffix)); } } else { @@ -158,7 +161,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* snprintf(unitdesc, 8, "%s%s%s", magnitude, (base==1024 ? "i" : ""), suffix); snprintf(buf, len, "%ld.%ld %-3s", whole, (frac1 < 0 ? -frac1 : frac1), unitdesc); } - _mi_fprintf(out, arg, (fmt==NULL ? "%11s" : fmt), buf); + _mi_fprintf(out, arg, (fmt==NULL ? "%12s" : fmt), buf); } @@ -167,7 +170,7 @@ static void mi_print_amount(int64_t n, int64_t unit, mi_output_fun* out, void* a } static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* arg) { - if (unit==1) _mi_fprintf(out, arg, "%11s"," "); + if (unit==1) _mi_fprintf(out, arg, "%12s"," "); else mi_print_amount(n,0,out,arg); } @@ -182,7 +185,7 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64 mi_print_count(stat->allocated, unit, out, arg); if (stat->allocated > stat->freed) { _mi_fprintf(out, arg, " "); - _mi_fprintf(out, arg, (notok == NULL ? "not all freed!" : notok)); + _mi_fprintf(out, arg, (notok == NULL ? "not all freed" : notok)); _mi_fprintf(out, arg, "\n"); } else { @@ -195,7 +198,7 @@ static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64 mi_print_amount(stat->freed, -1, out, arg); mi_print_amount(stat->current, -1, out, arg); if (unit==-1) { - _mi_fprintf(out, arg, "%22s", ""); + _mi_fprintf(out, arg, "%24s", ""); } else { mi_print_amount(-unit, 1, out, arg); @@ -219,12 +222,19 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t mi_stat_print_ex(stat, msg, unit, out, arg, NULL); } +static void mi_stat_peak_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) { + _mi_fprintf(out, arg, "%10s:", msg); + mi_print_amount(stat->peak, unit, out, arg); + _mi_fprintf(out, arg, "\n"); +} + static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) { _mi_fprintf(out, arg, "%10s:", msg); mi_print_amount(stat->total, -1, out, arg); _mi_fprintf(out, arg, "\n"); } + static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) { const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count)); const long avg_whole = (long)(avg_tens/10); @@ -234,7 +244,7 @@ static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* static void mi_print_header(mi_output_fun* out, void* arg ) { - _mi_fprintf(out, arg, "%10s: %10s %10s %10s %10s %10s %10s\n", "heap stats", "peak ", "total ", "freed ", "current ", "unit ", "count "); + _mi_fprintf(out, arg, "%10s: %11s %11s %11s %11s %11s %11s\n", "heap stats", "peak ", "total ", "freed ", "current ", "unit ", "count "); } #if MI_STAT>1 @@ -321,7 +331,8 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) #endif mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, ""); mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, ""); - mi_stat_print(&stats->reset, "reset", 1, out, arg); + mi_stat_peak_print(&stats->reset, "reset", 1, out, arg ); + mi_stat_peak_print(&stats->purged, "purged", 1, out, arg ); mi_stat_print(&stats->page_committed, "touched", 1, out, arg); mi_stat_print(&stats->segments, "segments", -1, out, arg); mi_stat_print(&stats->segments_abandoned, "-abandoned", -1, out, arg); @@ -332,9 +343,11 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print(&stats->page_no_retire, "-noretire", out, arg); mi_stat_counter_print(&stats->mmap_calls, "mmaps", out, arg); mi_stat_counter_print(&stats->commit_calls, "commits", out, arg); + mi_stat_counter_print(&stats->reset_calls, "resets", out, arg); + mi_stat_counter_print(&stats->purge_calls, "purges", out, arg); mi_stat_print(&stats->threads, "threads", -1, out, arg); mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); - _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count()); + _mi_fprintf(out, arg, "%10s: %5zu\n", "numa nodes", _mi_os_numa_node_count()); size_t elapsed; size_t user_time; @@ -345,7 +358,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) size_t peak_commit; size_t page_faults; mi_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); - _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); + _mi_fprintf(out, arg, "%10s: %5ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); mi_printf_amount((int64_t)peak_rss, 1, out, arg, "%s"); @@ -431,7 +444,7 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { mi_process_info_t pinfo; - _mi_memzero(&pinfo,sizeof(pinfo)); + _mi_memzero_var(pinfo); pinfo.elapsed = _mi_clock_end(mi_process_start); pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); diff --git a/source/luametatex/source/libraries/readme.txt b/source/luametatex/source/libraries/readme.txt index d41ecb58a..f249eae99 100644 --- a/source/luametatex/source/libraries/readme.txt +++ b/source/luametatex/source/libraries/readme.txt @@ -24,4 +24,14 @@ In decNumber.c this got added: # define malloc lmt_memory_malloc # define free lmt_memory_free +In softposit/source/include/softposit_types.h we have to comment the initializations in the unions +bcause the compiler complains about it (we're not using c++). So: + +uint32_t ui; // =0; // patched by HH because the compilers don't like this +uint64_t ui[2]; // ={0,0}; // idem +uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idme +uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idem +uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idem + + Hans
\ No newline at end of file diff --git a/source/luametatex/source/libraries/softposit/LICENSE b/source/luametatex/source/libraries/softposit/LICENSE new file mode 100644 index 000000000..88173ab8d --- /dev/null +++ b/source/luametatex/source/libraries/softposit/LICENSE @@ -0,0 +1,34 @@ +BSD 3-Clause License + +This is part of the SoftPosit Posit Arithmetic Package by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +Many of the C source files design were based on SoftFloat IEEE Floating-Point +Arithmetic Package, Release 3d, by John R. Hauser. + + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +* Redistributions of source code must retain the above copyright notice, this + list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +* Neither the name of the copyright holder nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/source/luametatex/source/libraries/softposit/README.md b/source/luametatex/source/libraries/softposit/README.md new file mode 100644 index 000000000..b50b271c5 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/README.md @@ -0,0 +1,551 @@ +# <img src="https://posithub.org/images/NGA_logo.png" width="90" height="50"/> SoftPosit + +This version (0.4.1) supports: + + 32-bit with two exponent bit (posit32_t). -> Not exhaustively tested + + 16-bit with one exponent bit (posit16_t). + + 8-bit with zero exponent bit (posit8_t). + + 2-bit to 32-bit with two exponent bits (posit_2_t) -> Not fast : Using 32-bits in the background to store all sizes. + Exhaustively tested for X=(2:32) : pX2_rint, pX2_to_pX2, pX2_to_i32/64, pX2_to_ui32/64, pX2_sqrt, ui/i32_to_pX2 + Exhaustively tested for X=(2:13) : ui64_to_pX2, i64_to_pX2 + Exhaustively tested for X=(2:20) : pX2_add, pX2_sub, pX2_mul, pX2_div + Exhaustively tested for X=(2:21) : pX2_mul + Exhaustively tested for X=(2:15) : pX2_mulAdd + Exhaustively tested for X=(2:14) : quireX2_fdp_add, quireX2_fdp_sub (using quire32 as the underlying code) + + + +This code is tested on + +* GNU gcc (SUSE Linux) 4.8.5 +* Apple LLVM version 9.1.0 (clang-902.0.39.2) +* Windows 10 (Mingw-w64) + +Please note that the same Makefile in build/Linux-x86_64-GCC is used for all 3 operating systems. + + + All posit8_t and posit16_t operations are exhaustively tested with exception of p16_mulAdd and q16_fdp_add/sub operations. + + **posit32_t operations are still being tested exhaustively for correctness. It will take weeks to months before these tests complete.** + + + Versions are offered + + * [Fast C version](#cversion) : The main source code where all other versions are based on. + * [User friendly C++ version](#cppversion) : Documentation can be found below. + * [User friendly Python version](https://gitlab.com/cerlane/SoftPosit-Python/) : https://gitlab.com/cerlane/SoftPosit-Python/ + * [Julia](#jversion) : Currently only simple .so support. Documentation can be found below. + * [Others](#known) + + +## <a name="cversion"/>Fast C version + + +### Examples + +#### A 8-bit example on how to use the code to add: + + +``` +#include "softposit.h" + +int main (int argc, char *argv[]){ + + posit8_t pA, pB, pZ; + pA = castP8(0xF2); + pB = castP8(0x23); + + pZ = p8_add(pA, pB); + + //To check answer by converting it to double + double dZ = convertP8ToDouble(pZ); + printf("dZ: %.15f\n", dZ); + + //To print result in binary + uint8_t uiZ = castUI(pZ); + printBinary((uint64_t*)&uiZ, 8); + + return 0; + +} +``` + + +#### A 16-bit example on how to use the code to multiply: + +``` +#include "softposit.h" + +int main (int argc, char *argv[]){ + + posit16_t pA, pB, pZ; + pA = castP16(0x0FF2); + pB = castP16(0x2123); + + pZ = p16_mul(pA, pB); + + //To check answer by converting it to double + double dZ = convertP16ToDouble(pZ); + printf("dZ: %.15f\n", dZ); + + //To print result in binary + uint16_t uiZ = castUI(pZ); + printBinary((uint64_t*)&uiZ, 16); + + return 0; +} +``` + +#### A 24-bit (es=2) example on how to use the code: + + +``` +#include "softposit.h" + +int main (int argc, char *argv[]){ + + posit_2_t pA, pB, pZ; + pA.v = 0xF2; //this is to set the bits (method 1) + pB = castPX2(0x23); //this is to set the bits (method 2) + + pZ = pX2_add(pA, pB, 24); + + //To check answer by converting it to double + double dZ = convertPX2ToDouble(pZ); + printf("dZ: %.40f\n", dZ); + + //To print result in binary + printBinaryPX((uint32_t*)&pZ.v, 24); + + //To print result as double + printf("result: %.40f\n", convertPX2ToDouble(pZ)); + + return 0; + +} +``` + +#### For deep learning, please use quire. + + +``` +//Convert double to posit +posit16_t pA = convertDoubleToP16(1.02783203125 ); +posit16_t pB = convertDoubleToP16(0.987060546875); +posit16_t pC = convertDoubleToP16(0.4998779296875); +posit16_t pD = convertDoubleToP16(0.8797607421875); + +quire16_t qZ; + +//Set quire to 0 +qZ = q16_clr(qZ); + +//accumulate products without roundings +qZ = q16_fdp_add(qZ, pA, pB); +qZ = q16_fdp_add(qZ, pC, pD); + +//Convert back to posit +posit16_t pZ = q16_to_p16(qZ); + +//To check answer +double dZ = convertP16ToDouble(pZ); +``` + +### Build and link + +#### Build - softposit.a + + +Please note that only 64-bit systems are supported. For Mac OSX and Linux, the same Makefile is used. + +Note that architecture specific optimisation is removed. To get maximum speed, please update OPTIMISATION flag in build/Linux-x86_64-GCC/Makefile. + + +``` +cd SoftPosit/build/Linux-x86_64-GCC +make -j6 all + +``` + +#### Link - softposit.a + + +If your source code is for example "main.c" and you want to create an executable "main". +Assume that SoftPosit is installed and installed in the same directory (installing in the same directory is NOT recommended). + +``` +gcc -lm -o main \ + main.c SoftPosit/build/Linux-x86_64-GCC/softposit.a -ISoftPosit/source/include -O2 + +``` + +### Features + + +#### Main Posit Functionalities: + + +Add : + + posit16_t p16_add(posit16_t, posit16_t) + + posit8_t p8_add(posit8_t, posit8_t) + +Subtract : + + posit16_t p16_sub(posit16_t, posit16_t) + + posit8_t p8_sub(posit8_t, posit8_t) + + +Divide : + + posit16_t p16_div(posit16_t, posit16_t) + + posit8_t p8_div(posit8_t, posit8_t) + +Multiply : + + posit16_t p16_mul(posit16_t, posit16_t) + + posit8_t p8_mul(posit8_t, posit8_t) + + +Fused Multiply Add : + + posit16_t p16_mulAdd(posit16_t, posit16_t, posit16_t) + + posit8_t p8_mulAdd(posit8_t, posit8_t, posit8_t) + + + Note: p16_mulAdd(a, b, c) <=> a*b + c + + +#### Main Quire Functionalities + + +Fused dot product-add : + + quire16_t q16_fdp_add(quire16_t, posit16_t, posit16_t) + + quire8_t q16_fdp_add(quire8_t, posit8_t, posit8_t) + + Note: q8_fdp_add (a, b, c) <=> a + b*c + +Fused dot product-subtract : + + quire16_t q16_fdp_sub(quire16_t, posit16_t, posit16_t) + + quire8_t q8_fdp_sub(quire8_t, posit8_t, posit8_t) + +Set quire variable to zero : + + quire16_t q16_clr(quire16_t) + + quire8_t q8_clr(quire8_t) + +Convert quire to posit : + + posit16_t q16_to_p16(quire16_t) + + posit8_t q8_to_p8(quire8_t) + + +#### Functionalites in Posit Standard + + +Square root : + + posit16_t p16_sqrt(posit16_t) + + posit8_t p8_sqrt(posit8_t) + +Round to nearest integer : + + posit16_t p16_roundToInt(posit16_t) + + posit8_t p8_roundToInt(posit8_t) + +Check equal : + + bool p16_eq( posit16_t, posit16_t ) + + bool p8_eq( posit8_t, posit8_t ) + +Check less than equal : + + bool p16_le( posit16_t, posit16_t ) + + bool p8_le( posit8_t, posit8_t ) + +Check less than : + + bool p16_lt( posit16_t, posit16_t ) + + bool p8_lt( posit8_t, posit8_t ) + +Convert posit to integer (32 bits) : + + int_fast32_t p16_to_i32( posit16_t ) + + int_fast32_t p8_to_i32( posit8_t ) + +Convert posit to long long integer (64 bits) : + + int_fast64_t p16_to_i64( posit16_t) + + int_fast64_t p8_to_i64( posit8_t) + +Convert unsigned integer (32 bits) to posit: + + posit16_t ui32_to_p16( uint32_t a ) + + posit8_t ui32_to_p8( uint32_t a ) + +Convert unsigned long long int (64 bits) to posit: + + posit16_t ui64_to_p16( uint64_t a ) + + posit8_t ui64_to_p8( uint64_t a ) + +Convert integer (32 bits) to posit: + + posit16_t i32_to_p16( int32_t a ) + + posit8_t i32_to_p8( uint32_t a ) + +Convert long integer (64 bits) to posit: + + posit16_t i64_to_p16( int64_t a ) + + posit8_t i64_to_p8( uint64_t a ) + +Convert posit to unsigned integer (32 bits) : + + uint_fast32_t p16_to_ui32( posit16_t ) + + uint_fast32_t p8_to_ui32( posit8_t ) + +Convert posit to unsigned long long integer (64 bits) : + + uint_fast64_t p16_to_ui64( posit16_t) + + uint_fast64_t p8_to_ui64( posit8_t) + +Convert posit to integer (32 bits) : + + uint_fast32_t p16_to_i32( posit16_t ) + + uint_fast32_t p8_to_i32( posit8_t ) + +Convert posit to long long integer (64 bits) : + + uint_fast64_t p16_to_i64( posit16_t) + + uint_fast64_t p8_to_i64( posit8_t) + +Convert posit to posit of another size : + + posit8_t p16_to_p8( posit16_t ) + + posit32_t p16_to_p32( posit16_t ) + + posit16_t p8_to_p16( posit8_t ) + + posit32_t p8_to_p32( posit8_t ) + + + +#### Helper Functionalites (NOT in Posit Standard) + +Convert posit to double (64 bits) : + + double convertP16ToDouble(posit16_t) + + double convertP8ToDouble(posit8_t) + +Convert double (64 bits) to posit : + + posit16_t convertDoubleToP16(double) + + posit8_t convertDoubleToP8(double) + +Cast binary expressed in unsigned integer to posit : + + posit16_t castP16(uint16_t) + + posit8_t castP8(uint8_t) + +Cast posit into binary expressed in unsigned integer + + uint16_t castUI(posit16_t) + + uint8_t castUI(posit8_t) + + +## <a name="cppversion"/>Easy to use C++ version + + +### Build and Link + +**Build and link your C++ program to SoftPosit.a (C)** + +Please compile your executable with g++ and not gcc. + +``` +g++ -std=gnu++11 -o main \ + ../source/testmain.cpp \ + ../../SoftPosit/source/../build/Linux-x86_64-GCC/softposit.a \ + -I../../SoftPosit/source/../build/Linux-x86_64-GCC -O2 +``` + +### Example + +#### Example of testmain.cpp + +``` +#include "softposit_cpp.h" + +int main(int argc, char *argv[]){ + posit16 x = 1; + posit16 y = 1.5; + posit8 x8 = 1; + quire16 q; + quire8 q8; + + x += p16(1.5)*5.1; + + printf("%.13f sizeof: %d\n", x.toDouble(), sizeof(posit16)); + + x = q.qma(4, 1.2).toPosit(); + printf("%.13f sizeof: %d\n", x.toDouble(), sizeof(quire16)); + + x8 = q8.qma(4, 1.2).toPosit(); + printf("%.13f sizeof: %d\n", x8.toDouble(), sizeof(quire8)); + + std::cout << x; + + return 0; +} + + +``` + +### Functionalities + +#### Main functionalities + +* Posit types: posit16, posit8 +* Fused-multiply-add: + * posit16 fma(posit16, posit16, posit16) + * posit18 fma(posit18, posit18, posit8) +* Square root: + * posit16 sqrt(posit16) + * posit8 sqrt(posit8) +* roundToInt: + * posit16 rint(posit16) + * posit8 rint(posit8) +* Supported operators + * \+ + * += + * \- + * \-= + * * + * *= + * / + * /= + * << + * <<= + * >> + * >>= + * & + * &= + * | + * |= + * ^ + * ^= + * && + * || + * ++ + * -- + * == + * ~ + * ! + * != + * * + * < + * *= + * <= +* Posit to Double: + * double (instance of posit).toDouble() +* Double to Posit: + * posit16 p16(double) + * posit8 p8(double) +* Posit to NaR: + * posit16 (instance of posit16).toNaR() + * posit8 (instance of posit8).toNaR() + +#### Quire functionalities (particularly for deep learning) + +* Quire types: quire16, quire8 (when declared, quire is initiated to zero) +* Clear quire to zero: + * (instance of quire16).clr() +* Quire multiply add (fused) + * (instance of quire16).fma(quire16) + * (instance of quire8).fma(quire8) +* Quire multiply subtract (fused) + * (instance of quire16).fms(quire16) + * (instance of quire8).fms(quire8) +* Convert quire to Posit + * posit16 (instance of quire16).toPosit() + * posit8 (instance of quire8).toPosit() +* Check if quire is NaR + * bool (instance of quire).isNaR() + +## <a name="jversion"/>Julia + +* [Julia implementation] (https://github.com/milankl/SoftPosit.jl) on top of SoftPosit + +### Install via Julia package manager + +``` +> add https://github.com/milankl/SoftPosit.jl + +``` + +Credits to Milan Klöwer. + +### Behind the scene + +#### Build shared library + +``` +cd SoftPosit/build/Linux_x86_64_GCC/ +make -j6 julia +``` + +#### Simple Tests + +``` +julia> t = ccall((:convertDoubleToP16, "/path/to/SoftPosit/build/Linux-x86_64-GCC/softposit.so"), UInt16, (Float64,),1.0) +0x4000 + +julia> t = ccall((:convertDoubleToP16, "/path/to/SoftPosit/build/Linux-x86_64-GCC/softposit.so"), UInt16, (Float64,),-1.0) +0xc000 + +``` + +## <a href="known"/>Known implementations on top of SoftPosit + +* [Andrey Zgarbul's Rust implementation](https://crates.io/crates/softposit) +* [Milan Klöwer's Julia implementation](https://github.com/milankl/SoftPosit.jl) +* [SpeedGo Computing's TensorFlow](https://github.com/xman/tensorflow/tree/posit) +* [SpeedGo Computing's Numpy](https://github.com/xman/numpy-posit) +* [Cerlane Leong's SoftPosit-Python](https://gitlab.com/cerlane/SoftPosit-Python) +* [David Thien's SoftPosit bindings Racket](https://github.com/DavidThien/softposit-rkt) +* [Bill Zorn's SoftPosit and SoftFloat Python](https://pypi.org/project/sfpy/) + + diff --git a/source/luametatex/source/libraries/softposit/build/Linux-x86_64-GCC/Makefile b/source/luametatex/source/libraries/softposit/build/Linux-x86_64-GCC/Makefile new file mode 100644 index 000000000..d8e76b157 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/build/Linux-x86_64-GCC/Makefile @@ -0,0 +1,245 @@ +#============================================================================ +# +#This C source file is part of the SoftPosit Posit Arithmetic Package +#by S. H. Leong (Cerlane). +# +#Copyright 2017, 2018 A*STAR. All rights reserved. +# +#This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +#Package, Release 3d, by John R. Hauser. +# +# +# Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +# University of California. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions, and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions, and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# 3. Neither the name of the University nor the names of its contributors +# may be used to endorse or promote products derived from this software +# without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +# DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +# DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF +# THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +#============================================================================= + +SOURCE_DIR ?= ../../source +PYTHON_DIR ?= ../../python +SPECIALIZE_TYPE ?= 8086-SSE +COMPILER ?= gcc + +SOFTPOSIT_OPTS ?= \ + -DINLINE_LEVEL=5 #\ + -DSOFTPOSIT_QUAD -lquadmath + +COMPILE_PYTHON = \ + $(COMPILER) -fPIC -c $(PYTHON_DIR)/softposit_python_wrap.c \ + -I/usr/include/python \ + -I$(SOURCE_DIR)/include -I. +COMPILE_PYTHON3 = \ + $(COMPILER) -fPIC -c $(PYTHON_DIR)/softposit_python_wrap.c \ + -I/usr/include/python3 \ + -I$(SOURCE_DIR)/include -I. +LINK_PYTHON = \ + ld -shared *.o -o $(PYTHON_DIR)/_softposit.so + + +ifeq ($(OS),Windows_NT) + DELETE = del /Q /F +else + DELETE = rm -f +endif + +C_INCLUDES = -I. -I$(SOURCE_DIR)/$(SPECIALIZE_TYPE) -I$(SOURCE_DIR)/include +OPTIMISATION = -O2 #-march=core-avx2 +COMPILE_C = \ + $(COMPILER) -c -Werror-implicit-function-declaration -DSOFTPOSIT_FAST_INT64 \ + $(SOFTPOSIT_OPTS) $(C_INCLUDES) $(OPTIMISATION) \ + -o $@ +MAKELIB = ar crs $@ +MAKESLIB = $(COMPILER) -shared $^ + +OBJ = .o +LIB = .a +SLIB = .so + +.PHONY: all +all: softposit$(LIB) + +quad: SOFTPOSIT_OPTS+= -DSOFTPOSIT_QUAD -lquadmath +quad: all + +python2: SOFTPOSIT_OPTS+= -fPIC +python2: all + $(COMPILE_PYTHON) + $(LINK_PYTHON) + +python3: SOFTPOSIT_OPTS+= -fPIC +python3: all + $(COMPILE_PYTHON3) + $(LINK_PYTHON) + +julia: SOFTPOSIT_OPTS+= -fPIC +julia: softposit$(SLIB) + + + +OBJS_PRIMITIVES = + +OBJS_SPECIALIZE = + +OBJS_OTHERS = \ + s_addMagsP8$(OBJ) \ + s_subMagsP8$(OBJ) \ + s_mulAddP8$(OBJ) \ + p8_add$(OBJ) \ + p8_sub$(OBJ) \ + p8_mul$(OBJ) \ + p8_div$(OBJ) \ + p8_sqrt$(OBJ) \ + p8_to_p16$(OBJ) \ + p8_to_p32$(OBJ) \ + p8_to_pX2$(OBJ) \ + p8_to_i32$(OBJ) \ + p8_to_i64$(OBJ) \ + p8_to_ui32$(OBJ) \ + p8_to_ui64$(OBJ) \ + p8_roundToInt$(OBJ) \ + p8_mulAdd$(OBJ) \ + p8_eq$(OBJ) \ + p8_le$(OBJ) \ + p8_lt$(OBJ) \ + quire8_fdp_add$(OBJ) \ + quire8_fdp_sub$(OBJ) \ + ui32_to_p8$(OBJ) \ + ui64_to_p8$(OBJ) \ + i32_to_p8$(OBJ) \ + i64_to_p8$(OBJ) \ + s_addMagsP16$(OBJ) \ + s_subMagsP16$(OBJ) \ + s_mulAddP16$(OBJ) \ + p16_to_ui32$(OBJ) \ + p16_to_ui64$(OBJ) \ + p16_to_i32$(OBJ) \ + p16_to_i64$(OBJ) \ + p16_to_p8$(OBJ) \ + p16_to_p32$(OBJ) \ + p16_to_pX2$(OBJ) \ + p16_roundToInt$(OBJ) \ + p16_add$(OBJ) \ + p16_sub$(OBJ) \ + p16_mul$(OBJ) \ + p16_mulAdd$(OBJ) \ + p16_div$(OBJ) \ + p16_eq$(OBJ) \ + p16_le$(OBJ) \ + p16_lt$(OBJ) \ + p16_sqrt$(OBJ) \ + quire16_fdp_add$(OBJ) \ + quire16_fdp_sub$(OBJ) \ + quire_helper$(OBJ) \ + ui32_to_p16$(OBJ) \ + ui64_to_p16$(OBJ) \ + i32_to_p16$(OBJ) \ + i64_to_p16$(OBJ) \ + s_addMagsP32$(OBJ) \ + s_subMagsP32$(OBJ) \ + s_mulAddP32$(OBJ) \ + p32_to_ui32$(OBJ) \ + p32_to_ui64$(OBJ) \ + p32_to_i32$(OBJ) \ + p32_to_i64$(OBJ) \ + p32_to_p8$(OBJ) \ + p32_to_p16$(OBJ) \ + p32_to_pX2$(OBJ) \ + p32_roundToInt$(OBJ) \ + p32_add$(OBJ) \ + p32_sub$(OBJ) \ + p32_mul$(OBJ) \ + p32_mulAdd$(OBJ) \ + p32_div$(OBJ) \ + p32_eq$(OBJ) \ + p32_le$(OBJ) \ + p32_lt$(OBJ) \ + p32_sqrt$(OBJ) \ + quire32_fdp_add$(OBJ) \ + quire32_fdp_sub$(OBJ) \ + ui32_to_p32$(OBJ) \ + ui64_to_p32$(OBJ) \ + i32_to_p32$(OBJ) \ + i64_to_p32$(OBJ) \ + s_approxRecipSqrt_1Ks$(OBJ) \ + c_convertDecToPosit8$(OBJ) \ + c_convertPosit8ToDec$(OBJ) \ + c_convertDecToPosit16$(OBJ) \ + c_convertPosit16ToDec$(OBJ) \ + c_convertQuire8ToPosit8$(OBJ) \ + c_convertQuire16ToPosit16$(OBJ) \ + c_convertQuire32ToPosit32$(OBJ) \ + c_convertDecToPosit32$(OBJ) \ + c_convertPosit32ToDec$(OBJ) \ + c_int$(OBJ) \ + s_addMagsPX2$(OBJ) \ + s_subMagsPX2$(OBJ) \ + s_mulAddPX2$(OBJ) \ + pX2_add$(OBJ) \ + pX2_sub$(OBJ) \ + pX2_mul$(OBJ) \ + pX2_div$(OBJ) \ + pX2_mulAdd$(OBJ) \ + pX2_roundToInt$(OBJ) \ + pX2_sqrt$(OBJ) \ + pX2_eq$(OBJ) \ + pX2_le$(OBJ) \ + pX2_lt$(OBJ) \ + ui32_to_pX2$(OBJ) \ + ui64_to_pX2$(OBJ) \ + i32_to_pX2$(OBJ) \ + i64_to_pX2$(OBJ) \ + c_convertQuireX2ToPositX2$(OBJ) + + +OBJS_ALL := $(OBJS_PRIMITIVES) $(OBJS_SPECIALIZE) $(OBJS_OTHERS) + +$(OBJS_ALL): \ + platform.h \ + $(SOURCE_DIR)/include/primitives.h + +$(OBJS_SPECIALIZE) $(OBJS_OTHERS): \ + $(SOURCE_DIR)/include/softposit_types.h $(SOURCE_DIR)/include/internals.h \ + $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/specialize.h \ + $(SOURCE_DIR)/include/softposit.h + +$(OBJS_PRIMITIVES) $(OBJS_OTHERS): %$(OBJ): $(SOURCE_DIR)/%.c + $(COMPILE_C) $(SOURCE_DIR)/$*.c + +$(OBJS_SPECIALIZE): %$(OBJ): $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/%.c + $(COMPILE_C) $(SOURCE_DIR)/$(SPECIALIZE_TYPE)/$*.c + +softposit$(LIB): $(OBJS_ALL) + $(MAKELIB) $^ + +softposit$(SLIB): $(OBJS_ALL) + $(MAKESLIB) -o $@ + + +.PHONY: clean +clean: + $(DELETE) $(OBJS_ALL) softposit_python_wrap.o softposit$(LIB) softposit$(SLIB) + diff --git a/source/luametatex/source/libraries/softposit/build/Linux-x86_64-GCC/platform.h b/source/luametatex/source/libraries/softposit/build/Linux-x86_64-GCC/platform.h new file mode 100644 index 000000000..c6c752ffc --- /dev/null +++ b/source/luametatex/source/libraries/softposit/build/Linux-x86_64-GCC/platform.h @@ -0,0 +1,48 @@ + +/*============================================================================ + +This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ +#define LITTLEENDIAN 1 + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ +#ifdef __GNUC_STDC_INLINE__ +#define INLINE inline +#else +#define INLINE extern inline +#endif + diff --git a/source/luametatex/source/libraries/softposit/build/Linux-x86_64-GCC/softposit_test.a b/source/luametatex/source/libraries/softposit/build/Linux-x86_64-GCC/softposit_test.a Binary files differnew file mode 100644 index 000000000..7bab40bc7 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/build/Linux-x86_64-GCC/softposit_test.a diff --git a/source/luametatex/source/libraries/softposit/source/8086-SSE/specialize.h b/source/luametatex/source/libraries/softposit/source/8086-SSE/specialize.h new file mode 100644 index 000000000..fcf057113 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/8086-SSE/specialize.h @@ -0,0 +1,50 @@ + +/*============================================================================ + +This C header file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C header file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#ifndef specialize_h +#define specialize_h 1 + +#include <stdbool.h> +#include <stdint.h> + +#include "softposit_types.h" + +#endif diff --git a/source/luametatex/source/libraries/softposit/source/c_convertDecToPosit16.c b/source/luametatex/source/libraries/softposit/source/c_convertDecToPosit16.c new file mode 100644 index 000000000..86a8383bc --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertDecToPosit16.c @@ -0,0 +1,447 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <math.h> + +#include "platform.h" +#include "internals.h" + +void checkExtraTwoBitsP16(double f16, double temp, bool * bitsNPlusOne, bool * bitsMore ){ + temp /= 2; + if (temp<=f16){ + *bitsNPlusOne = 1; + f16-=temp; + } + if (f16>0) + *bitsMore = 1; +} +uint_fast16_t convertFractionP16(double f16, uint_fast8_t fracLength, bool * bitsNPlusOne, bool * bitsMore ){ + + uint_fast16_t frac=0; + + if(f16==0) return 0; + else if(f16==INFINITY) return 0x8000; + + f16 -= 1; //remove hidden bit + if (fracLength==0) + checkExtraTwoBitsP16(f16, 1.0, bitsNPlusOne, bitsMore); + else{ + double temp = 1; + while (true){ + temp /= 2; + if (temp<=f16){ + f16-=temp; + fracLength--; + frac = (frac<<1) + 1; //shift in one + if (f16==0){ + //put in the rest of the bits + frac <<= (uint_fast8_t)fracLength; + break; + } + + if (fracLength == 0){ + checkExtraTwoBitsP16(f16, temp, bitsNPlusOne, bitsMore); + + break; + } + } + else{ + frac <<= 1; //shift in a zero + fracLength--; + if (fracLength == 0){ + checkExtraTwoBitsP16(f16, temp, bitsNPlusOne, bitsMore); + break; + } + } + } + } + + return frac; +} +posit16_t convertFloatToP16(float a){ + return convertDoubleToP16((double) a); +} + +posit16_t convertDoubleToP16(double f16){ + union ui16_p16 uZ; + bool sign, regS; + uint_fast16_t reg, frac=0; + int_fast8_t exp=0; + bool bitNPlusOne=0, bitsMore=0; + + (f16>=0) ? (sign=0) : (sign=1); + + if (f16 == 0 ){ + uZ.ui = 0; + return uZ.p; + } + else if(f16 == INFINITY || f16 == -INFINITY || f16 == NAN){ + uZ.ui = 0x8000; + return uZ.p; + } + else if (f16 == 1) { + uZ.ui = 16384; + return uZ.p; + } + else if (f16 == -1){ + uZ.ui = 49152; + return uZ.p; + } + else if (f16 >= 268435456){ + //maxpos + uZ.ui = 32767; + return uZ.p; + } + else if (f16 <= -268435456){ + // -maxpos + uZ.ui = 32769; + return uZ.p; + } + else if(f16 <= 3.725290298461914e-9 && !sign){ + //minpos + uZ.ui = 1; + return uZ.p; + } + else if(f16 >= -3.725290298461914e-9 && sign){ + //-minpos + uZ.ui = 65535; + return uZ.p; + } + else if (f16>1 || f16<-1){ + if (sign){ + //Make negative numbers positive for easier computation + f16 = -f16; + } + + regS = 1; + reg = 1; //because k = m-1; so need to add back 1 + // minpos + if (f16 <= 3.725290298461914e-9){ + uZ.ui = 1; + } + else{ + //regime + while (f16>=4){ + f16 *=0.25; + reg++; + } + if (f16>=2){ + f16*=0.5; + exp++; + } + + int fracLength = 13-reg; + + if (fracLength<0){ + //reg == 14, means rounding bits is exp and just the rest. + if (f16>1) bitsMore = 1; + + } + else + frac = convertFractionP16 (f16, fracLength, &bitNPlusOne, &bitsMore); + + + if (reg==14 && frac>0) { + bitsMore = 1; + frac=0; + } + if (reg>14) + (regS) ? (uZ.ui= 32767): (uZ.ui=0x1); + else{ + uint_fast16_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + uZ.ui = ((uint16_t) (regime) << (14-reg)) + ((uint16_t) (exp)<< (13-reg)) + ((uint16_t)(frac)); + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (reg==14 && exp) bitNPlusOne = 1; + uZ.ui += (bitNPlusOne & (uZ.ui&1)) | ( bitNPlusOne & bitsMore); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFF; + } + } + else if (f16 < 1 || f16 > -1 ){ + + if (sign){ + //Make negative numbers positive for easier computation + f16 = -f16; + } + regS = 0; + reg = 0; + + //regime + while (f16<1){ + f16 *= 4; + reg++; + } + if (f16>=2){ + f16/=2; + exp++; + } + if (reg==14){ + bitNPlusOne = exp; + if (frac>1) bitsMore = 1; + } + else{ + //only possible combination for reg=15 to reach here is 7FFF (maxpos) and FFFF (-minpos) + //but since it should be caught on top, so no need to handle + int_fast8_t fracLength = 13-reg; + frac = convertFractionP16 (f16, fracLength, &bitNPlusOne, &bitsMore); + } + + if (reg==14 && frac>0) { + bitsMore = 1; + frac=0; + } + if (reg>14) + (regS) ? (uZ.ui= 32767): (uZ.ui=0x1); + else{ + uint_fast16_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + uZ.ui = ((uint16_t) (regime) << (14-reg)) + ((uint16_t) (exp)<< (13-reg)) + ((uint16_t)(frac)); + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (reg==14 && exp) bitNPlusOne = 1; + uZ.ui += (bitNPlusOne & (uZ.ui&1)) | ( bitNPlusOne & bitsMore); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFF; + } + else { + //NaR - for NaN, INF and all other combinations + uZ.ui = 0x8000; + } + return uZ.p; +} + + +#ifdef SOFTPOSIT_QUAD + void checkQuadExtraTwoBitsP16(__float128 f16, double temp, bool * bitsNPlusOne, bool * bitsMore ){ + temp /= 2; + if (temp<=f16){ + *bitsNPlusOne = 1; + f16-=temp; + } + if (f16>0) + *bitsMore = 1; + } + uint_fast16_t convertQuadFractionP16(__float128 f16, uint_fast8_t fracLength, bool * bitsNPlusOne, bool * bitsMore ){ + + uint_fast16_t frac=0; + + if(f16==0) return 0; + else if(f16==INFINITY) return 0x8000; + + f16 -= 1; //remove hidden bit + if (fracLength==0) + checkQuadExtraTwoBitsP16(f16, 1.0, bitsNPlusOne, bitsMore); + else{ + __float128 temp = 1; + while (true){ + temp /= 2; + if (temp<=f16){ + f16-=temp; + fracLength--; + frac = (frac<<1) + 1; //shift in one + if (f16==0){ + //put in the rest of the bits + frac <<= (uint_fast8_t)fracLength; + break; + } + + if (fracLength == 0){ + checkQuadExtraTwoBitsP16(f16, temp, bitsNPlusOne, bitsMore); + + break; + } + } + else{ + frac <<= 1; //shift in a zero + fracLength--; + if (fracLength == 0){ + checkQuadExtraTwoBitsP16(f16, temp, bitsNPlusOne, bitsMore); + break; + } + } + } + } + + return frac; + } + + + posit16_t convertQuadToP16(__float128 f16){ + union ui16_p16 uZ; + bool sign, regS; + uint_fast16_t reg, frac=0; + int_fast8_t exp=0; + bool bitNPlusOne=0, bitsMore=0; + + (f16>=0) ? (sign=0) : (sign=1); + + if (f16 == 0 ){ + uZ.ui = 0; + return uZ.p; + } + else if(f16 == INFINITY || f16 == -INFINITY || f16 == NAN){ + uZ.ui = 0x8000; + return uZ.p; + } + else if (f16 == 1) { + uZ.ui = 16384; + return uZ.p; + } + else if (f16 == -1){ + uZ.ui = 49152; + return uZ.p; + } + else if (f16 >= 268435456){ + //maxpos + uZ.ui = 32767; + return uZ.p; + } + else if (f16 <= -268435456){ + // -maxpos + uZ.ui = 32769; + return uZ.p; + } + else if(f16 <= 3.725290298461914e-9 && !sign){ + //minpos + uZ.ui = 1; + return uZ.p; + } + else if(f16 >= -3.725290298461914e-9 && sign){ + //-minpos + uZ.ui = 65535; + return uZ.p; + } + else if (f16>1 || f16<-1){ + if (sign){ + //Make negative numbers positive for easier computation + f16 = -f16; + } + regS = 1; + reg = 1; //because k = m-1; so need to add back 1 + // minpos + if (f16 <= 3.725290298461914e-9){ + uZ.ui = 1; + } + else{ + //regime + while (f16>=4){ + f16 *=0.25; + reg++; + } + if (f16>=2){ + f16*=0.5; + exp++; + } + + int8_t fracLength = 13-reg; + if (fracLength<0){ + //reg == 14, means rounding bits is exp and just the rest. + if (f16>1) bitsMore = 1; + } + else + frac = convertQuadFractionP16 (f16, fracLength, &bitNPlusOne, &bitsMore); + + if (reg==14 && frac>0) { + bitsMore = 1; + frac=0; + } + + if (reg>14) + (regS) ? (uZ.ui= 32767): (uZ.ui=0x1); + else{ + uint_fast16_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + uZ.ui = ((uint16_t) (regime) << (14-reg)) + ((uint16_t) (exp)<< (13-reg)) + ((uint16_t)(frac)); + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (reg==14 && exp) bitNPlusOne = 1; + uZ.ui += (bitNPlusOne & (uZ.ui&1)) | ( bitNPlusOne & bitsMore); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFF; + } + } + else if (f16 < 1 || f16 > -1 ){ + if (sign){ + //Make negative numbers positive for easier computation + f16 = -f16; + } + regS = 0; + reg = 0; + + //regime + while (f16<1){ + f16 *= 4; + reg++; + } + if (f16>=2){ + f16/=2; + exp++; + } + if (reg==14){ + bitNPlusOne = exp; + if (frac>1) bitsMore = 1; + } + else{ + //only possible combination for reg=15 to reach here is 7FFF (maxpos) and FFFF (-minpos) + //but since it should be caught on top, so no need to handle + int_fast8_t fracLength = 13-reg; + frac = convertQuadFractionP16 (f16, fracLength, &bitNPlusOne, &bitsMore); + } + + if (reg==14 && frac>0) { + bitsMore = 1; + frac=0; + } + if (reg>14) + (regS) ? (uZ.ui= 32767): (uZ.ui=0x1); + else{ + uint_fast16_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + uZ.ui = ((uint16_t) (regime) << (14-reg)) + ((uint16_t) (exp)<< (13-reg)) + ((uint16_t)(frac)); + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (reg==14 && exp) bitNPlusOne = 1; + uZ.ui += (bitNPlusOne & (uZ.ui&1)) | ( bitNPlusOne & bitsMore); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFF; + + } + else { + //NaR - for NaN, INF and all other combinations + uZ.ui = 0x8000; + } + return uZ.p; + } +#endif + + + diff --git a/source/luametatex/source/libraries/softposit/source/c_convertDecToPosit32.c b/source/luametatex/source/libraries/softposit/source/c_convertDecToPosit32.c new file mode 100644 index 000000000..9fdf83b7d --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertDecToPosit32.c @@ -0,0 +1,878 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <math.h> + +#ifdef SOFTPOSIT_QUAD +#include <quadmath.h> +#endif + +#include "platform.h" +#include "internals.h" + +#ifdef SOFTPOSIT_QUAD + +void checkQuadExtraP32TwoBits(__float128 f32, __float128 temp, bool * bitsNPlusOne, bool * bitsMore ){ + temp /= 2; + if (temp<=f32){ + *bitsNPlusOne = 1; + f32-=temp; + } + if (f32>0) + *bitsMore = 1; +} + +uint_fast32_t convertQuadFractionP32(__float128 f32, uint_fast16_t fracLength, bool * bitNPlusOne, bool * bitsMore ){ + + uint_fast32_t frac=0; + + if(f32==0) return 0; + else if(f32==INFINITY) return 0x80000000; + + f32 -= 1; //remove hidden bit + if (fracLength==0) + checkQuadExtraP32TwoBits(f32, 1.0, bitNPlusOne, bitsMore); + else{ + __float128 temp = 1; + while (true){ + temp /= 2; + if (temp<=f32){ + + f32-=temp; + + fracLength--; + frac = (frac<<1) + 1; //shift in one + + if (f32==0){ + frac <<= (uint_fast32_t)fracLength; + break; + } + + if (fracLength == 0){ + checkQuadExtraP32TwoBits(f32, temp, bitNPlusOne, bitsMore); + break; + } + } + else{ + frac <<= 1; //shift in a zero + fracLength--; + if (fracLength == 0){ + checkQuadExtraP32TwoBits(f32, temp, bitNPlusOne, bitsMore); + break; + } + } + + } + } + + return frac; +} +posit32_t convertQuadToP32(__float128 f32){ + union ui32_p32 uZ; + bool sign, regS; + uint_fast32_t reg, frac=0; + int_fast32_t exp=0; + bool bitNPlusOne=0, bitsMore=0; + + (f32>=0) ? (sign=0) : (sign=1); + + if (f32 == 0 ){ + uZ.ui = 0; + return uZ.p; + } + else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ + uZ.ui = 0x80000000; + return uZ.p; + } + else if (f32 == 1) { + uZ.ui = 0x40000000; + return uZ.p; + } + else if (f32 == -1){ + uZ.ui = 0xC0000000; + return uZ.p; + } + else if (f32 >= 1.329227995784916e+36){ + //maxpos + uZ.ui = 0x7FFFFFFF; + return uZ.p; + } + else if (f32 <= -1.329227995784916e+36){ + // -maxpos + uZ.ui = 0x80000001; + return uZ.p; + } + else if(f32 <= 7.52316384526264e-37 && !sign){ + //minpos + uZ.ui = 0x1; + return uZ.p; + } + else if(f32 >= -7.52316384526264e-37 && sign){ + //-minpos + uZ.ui = 0xFFFFFFFF; + return uZ.p; + } + else if (f32>1 || f32<-1){ + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + + regS = 1; + reg = 1; //because k = m-1; so need to add back 1 + // minpos + if (f32 <= 7.52316384526264e-37){ + uZ.ui = 1; + } + else{ + //regime + while (f32>=16){ + f32 *=0.0625; // f32/=16; + reg++; + } + + while (f32>=2){ + f32*=0.5; + exp++; + } + + int fracLength = 28-reg; + if (fracLength<0){ + //remove hidden bit + + //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits + if(reg==29){ + bitNPlusOne = exp&0x1; + exp>>=1; //taken care of by the pack algo + } + else{//reg=30 + bitNPlusOne=exp>>1; + bitsMore=exp&0x1; + exp=0; + } + if (f32!=1){ + bitsMore =1; + frac=0; + } + } + else + frac = convertQuadFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); + + if (reg>30 ){ + (regS) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); + } + //rounding off fraction bits + else{ + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + if (reg<=28) exp<<= (28-reg); + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac)); + uZ.ui += (bitNPlusOne & (uZ.ui&1)) | ( bitNPlusOne & bitsMore); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + /*if(sign) + uZ.ui = -uZ.ui;*/ + } + } + else if (f32 < 1 || f32 > -1 ){ + + + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + regS = 0; + reg = 0; + + while (f32<1){ + f32 *= 16; + reg++; + } + + while (f32>=2){ + f32*=0.5; + exp++; + } + + //only possible combination for reg=15 to reach here is 7FFF (maxpos) and FFFF (-minpos) + //but since it should be caught on top, so no need to handle + int fracLength = 28-reg; + if (fracLength<0){ + //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits + if(reg==29){ + bitNPlusOne = exp&0x1; + exp>>=1; //taken care of by the pack algo + } + else{//reg=30 + bitNPlusOne=exp>>1; + bitsMore=exp&0x1; + exp=0; + } + if (f32!=1){//because of hidden bit + bitsMore =1; + frac=0; + } + } + else + frac = convertQuadFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); + + if (reg>30 ){ + (regS) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); + } + //rounding off fraction bits + else{ + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + if (reg<=28) exp<<= (28-reg); + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac)); + uZ.ui += (bitNPlusOne & (uZ.ui&1)) | ( bitNPlusOne & bitsMore); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + } + else { + //NaR - for NaN, INF and all other combinations + uZ.ui = 0x80000000; + } + return uZ.p; + +} + +posit_2_t convertQuadToPX2(__float128 f32, int x){ + + union ui32_pX2 uZ; + bool sign, regS; + uint_fast32_t reg, frac=0; + int_fast32_t exp=0; + bool bitNPlusOne=0, bitsMore=0; + + (f32>=0) ? (sign=0) : (sign=1); + + if (f32 == 0 ){ + uZ.ui = 0; + return uZ.p; + } + else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ + uZ.ui = 0x80000000; + return uZ.p; + } + else if (f32 == 1) { + uZ.ui = 0x40000000; + return uZ.p; + } + else if (f32 == -1){ + uZ.ui = 0xC0000000; + return uZ.p; + } + /*else if (f32 >= 1.329227995784916e+36){ + //maxpos + uZ.ui = 0x7FFFFFFF; + return uZ.p; + } + else if (f32 <= -1.329227995784916e+36){ + // -maxpos + uZ.ui = 0x80000001; + return uZ.p; + } + else if(f32 <= 7.52316384526264e-37 && !sign){ + //minpos + uZ.ui = 0x1; + return uZ.p; + } + else if(f32 >= -7.52316384526264e-37 && sign){ + //-minpos + uZ.ui = 0xFFFFFFFF; + return uZ.p; + }*/ + else if (f32>1 || f32<-1){ + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + + regS = 1; + reg = 1; //because k = m-1; so need to add back 1 + // minpos + if (x==32 && f32 <= 7.52316384526264e-37){ + uZ.ui = 1; + } + else{ + //regime + while (f32>=16){ + f32 *=0.0625; // f32/=16; + reg++; + } + while (f32>=2){ + f32*=0.5; + exp++; + } + + int fracLength = x-4-reg; + if (fracLength<0){ + //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits + if(reg==x-3){ + bitNPlusOne = exp&0x1; + //exp>>=1; //taken care of by the pack algo + exp&=0x2; + } + else{//reg=30 + bitNPlusOne=exp>>1; + bitsMore=exp&0x1; + exp=0; + } + if (f32!=1){//because of hidden bit + bitsMore =1; + frac=0; + } + } + else + frac = convertQuadFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); + + + if (reg>(x-2) ){ + uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + //rounding off fraction bits + else{ + + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + if (reg<=28) exp<<= (28-reg); + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac<<(32-x))); + //minpos + if (uZ.ui==0 && frac>0){ + uZ.ui = 0x1 << (32-x); + } + if (bitNPlusOne) + uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + } + } + else if (f32 < 1 || f32 > -1 ){ + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + regS = 0; + reg = 0; + + //regime + while (f32<1){ + f32 *= 16; + reg++; + } + + while (f32>=2){ + f32*=0.5; + exp++; + } + + + int fracLength = x-4-reg; + if (fracLength<0){ + //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits + if(reg==x-3){ + bitNPlusOne = exp&0x1; + //exp>>=1; //taken care of by the pack algo + exp&=0x2; + } + else{//reg=30 + bitNPlusOne=exp>>1; + bitsMore=exp&0x1; + exp=0; + } + if (f32!=1){//because of hidden bit + bitsMore =1; + frac=0; + } + } + else + frac = convertQuadFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); + + if (reg>(x-2) ){ + uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + //rounding off fraction bits + else{ + + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + + if (reg<=28) exp<<= (28-reg); + + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac<<(32-x))); + //minpos + if (uZ.ui==0 && frac>0){ + uZ.ui = 0x1 << (32-x); + } + + if (bitNPlusOne){ + uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); + } + + + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + } + else { + //NaR - for NaN, INF and all other combinations + uZ.ui = 0x80000000; + } + return uZ.p; +} + +#endif + +void checkExtraP32TwoBits(double f32, double temp, bool * bitsNPlusOne, bool * bitsMore ){ + temp /= 2; + if (temp<=f32){ + *bitsNPlusOne = 1; + f32-=temp; + } + if (f32>0) + *bitsMore = 1; +} +uint_fast32_t convertFractionP32(double f32, uint_fast16_t fracLength, bool * bitsNPlusOne, bool * bitsMore ){ + + uint_fast32_t frac=0; + + if(f32==0) return 0; + else if(f32==INFINITY) return 0x80000000; + + f32 -= 1; //remove hidden bit + if (fracLength==0) + checkExtraP32TwoBits(f32, 1.0, bitsNPlusOne, bitsMore); + else{ + double temp = 1; + while (true){ + temp /= 2; + if (temp<=f32){ + f32-=temp; + fracLength--; + frac = (frac<<1) + 1; //shift in one + if (f32==0){ + frac <<= (uint_fast16_t)fracLength; + break; + } + + if (fracLength == 0){ + checkExtraP32TwoBits(f32, temp, bitsNPlusOne, bitsMore); + break; + } + } + else{ + + frac <<= 1; //shift in a zero + fracLength--; + if (fracLength == 0){ + checkExtraP32TwoBits(f32, temp, bitsNPlusOne, bitsMore); + break; + } + } + } + } + + return frac; +} + +posit32_t convertDoubleToP32(double f32){ + + union ui32_p32 uZ; + bool sign, regS; + uint_fast32_t reg, frac=0; + int_fast32_t exp=0; + bool bitNPlusOne=0, bitsMore=0; + + (f32>=0) ? (sign=0) : (sign=1); + + if (f32 == 0 ){ + uZ.ui = 0; + return uZ.p; + } + else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ + uZ.ui = 0x80000000; + return uZ.p; + } + else if (f32 == 1) { + uZ.ui = 0x40000000; + return uZ.p; + } + else if (f32 == -1){ + uZ.ui = 0xC0000000; + return uZ.p; + } + else if (f32 >= 1.329227995784916e+36){ + //maxpos + uZ.ui = 0x7FFFFFFF; + return uZ.p; + } + else if (f32 <= -1.329227995784916e+36){ + // -maxpos + uZ.ui = 0x80000001; + return uZ.p; + } + else if(f32 <= 7.52316384526264e-37 && !sign){ + //minpos + uZ.ui = 0x1; + return uZ.p; + } + else if(f32 >= -7.52316384526264e-37 && sign){ + //-minpos + uZ.ui = 0xFFFFFFFF; + return uZ.p; + } + else if (f32>1 || f32<-1){ + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + + regS = 1; + reg = 1; //because k = m-1; so need to add back 1 + // minpos + if (f32 <= 7.52316384526264e-37){ + uZ.ui = 1; + } + else{ + //regime + while (f32>=16){ + f32 *=0.0625; // f32/=16; + reg++; + } + while (f32>=2){ + f32*=0.5; + exp++; + } + + int8_t fracLength = 28-reg; + + if (fracLength<0){ + //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits + if(reg==29){ + bitNPlusOne = exp&0x1; + exp>>=1; //taken care of by the pack algo + } + else{//reg=30 + bitNPlusOne=exp>>1; + bitsMore=exp&0x1; + exp=0; + } + if (f32!=1){//because of hidden bit + bitsMore =1; + frac=0; + } + } + else + frac = convertFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); + + + if (reg>30 ){ + (regS) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); + } + //rounding off fraction bits + else{ + + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + if (reg<=28) exp<<= (28-reg); + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac)); + uZ.ui += (bitNPlusOne & (uZ.ui&1)) | ( bitNPlusOne & bitsMore); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + } + } + else if (f32 < 1 || f32 > -1 ){ + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + regS = 0; + reg = 0; + + //regime + while (f32<1){ + f32 *= 16; + reg++; + } + + while (f32>=2){ + f32*=0.5; + exp++; + } + + + //only possible combination for reg=15 to reach here is 7FFF (maxpos) and FFFF (-minpos) + //but since it should be caught on top, so no need to handle + int_fast8_t fracLength = 28-reg; + if (fracLength<0){ + //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits + if(reg==29){ + bitNPlusOne = exp&0x1; + exp>>=1; //taken care of by the pack algo + } + else{//reg=30 + bitNPlusOne=exp>>1; + bitsMore=exp&0x1; + exp=0; + } + if (f32!=1){//because of hidden bit + bitsMore =1; + frac=0; + } + } + else + frac = convertFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); + + + if (reg>30 ){ + (regS) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); + } + //rounding off fraction bits + else{ + + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + if (reg<=28) exp<<= (28-reg); + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac)); + uZ.ui += (bitNPlusOne & (uZ.ui&1)) | ( bitNPlusOne & bitsMore); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + } + else { + //NaR - for NaN, INF and all other combinations + uZ.ui = 0x80000000; + } + return uZ.p; +} + + +posit32_t convertFloatToP32(float a){ + return convertDoubleToP32((double) a ); +} + + +posit_2_t convertDoubleToPX2(double f32, int x){ + + union ui32_pX2 uZ; + bool sign, regS; + uint_fast32_t reg, frac=0; + int_fast32_t exp=0; + bool bitNPlusOne=0, bitsMore=0; + + (f32>=0) ? (sign=0) : (sign=1); + + if (f32 == 0 ){ + uZ.ui = 0; + return uZ.p; + } + else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ + uZ.ui = 0x80000000; + return uZ.p; + } + else if (f32 == 1) { + uZ.ui = 0x40000000; + return uZ.p; + } + else if (f32 == -1){ + uZ.ui = 0xC0000000; + return uZ.p; + } + /*else if (f32 >= 1.329227995784916e+36){ + //maxpos + uZ.ui = 0x7FFFFFFF; + return uZ.p; + } + else if (f32 <= -1.329227995784916e+36){ + // -maxpos + uZ.ui = 0x80000001; + return uZ.p; + } + else if(f32 <= 7.52316384526264e-37 && !sign){ + //minpos + uZ.ui = 0x1; + return uZ.p; + } + else if(f32 >= -7.52316384526264e-37 && sign){ + //-minpos + uZ.ui = 0xFFFFFFFF; + return uZ.p; + }*/ + else if (f32>1 || f32<-1){ + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + + regS = 1; + reg = 1; //because k = m-1; so need to add back 1 + // minpos + if (x==32 && f32 <= 7.52316384526264e-37){ + uZ.ui = 1; + } + else{ + //regime + while (f32>=16){ + f32 *=0.0625; // f32/=16; + reg++; + } + while (f32>=2){ + f32*=0.5; + exp++; + } + + int fracLength = x-4-reg; + if (fracLength<0){ + //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits + if(reg==x-3){ + bitNPlusOne = exp&0x1; + //exp>>=1; //taken care of by the pack algo + exp&=0x2; + } + else{//reg=30 + bitNPlusOne=exp>>1; + bitsMore=exp&0x1; + exp=0; + } + if (f32!=1){//because of hidden bit + bitsMore =1; + frac=0; + } + } + else + frac = convertFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); + + + if (reg>(x-2) ){ + uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + //rounding off fraction bits + else{ + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + + if (x==32 && reg==29) exp>>=1; + else if (reg<=28) exp<<= (28-reg); + + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac<<(32-x))); + //minpos + if (uZ.ui==0 && frac>0){ + uZ.ui = 0x1 << (32-x); + } + if (bitNPlusOne) + uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + } + } + else if (f32 < 1 || f32 > -1 ){ + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + regS = 0; + reg = 0; + + //regime + while (f32<1){ + f32 *= 16; + reg++; + } + + while (f32>=2){ + f32*=0.5; + exp++; + } + + int fracLength = x-4-reg; + if (fracLength<0){ + //in both cases, reg=29 and 30, e is n+1 bit and frac are sticky bits + if(reg==x-3){ + bitNPlusOne = exp&0x1; + //exp>>=1; //taken care of by the pack algo + exp&=0x2; + } + else{//reg=30 + bitNPlusOne=exp>>1; + bitsMore=exp&0x1; + exp=0; + } + if (f32!=1){//because of hidden bit + bitsMore =1; + frac=0; + } + } + else + frac = convertFractionP32 (f32, fracLength, &bitNPlusOne, &bitsMore); + + if (reg>(x-2) ){ + uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + //rounding off fraction bits + else{ + + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + + if (x==32 && reg==29) exp>>=1; + else if (reg<=28) exp<<= (28-reg); + + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac<<(32-x))); + //minpos + if (uZ.ui==0 && frac>0){ + uZ.ui = 0x1 << (32-x); + } + + if (bitNPlusOne){ + uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); + } + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + } + else { + //NaR - for NaN, INF and all other combinations + uZ.ui = 0x80000000; + } + return uZ.p; +} + + + + + diff --git a/source/luametatex/source/libraries/softposit/source/c_convertDecToPosit8.c b/source/luametatex/source/libraries/softposit/source/c_convertDecToPosit8.c new file mode 100644 index 000000000..16d5f527f --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertDecToPosit8.c @@ -0,0 +1,211 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include <math.h> + +#include "platform.h" +#include "internals.h" + +void checkExtraTwoBitsP8(double f8, double temp, bool * bitsNPlusOne, bool * bitsMore ){ + temp /= 2; + if (temp<=f8){ + *bitsNPlusOne = 1; + f8-=temp; + } + if (f8>0) + *bitsMore = 1; +} +uint_fast16_t convertFractionP8(double f8, uint_fast8_t fracLength, bool * bitsNPlusOne, bool * bitsMore ){ + + uint_fast8_t frac=0; + + if(f8==0) return 0; + else if(f8==INFINITY) return 0x80; + + f8 -= 1; //remove hidden bit + if (fracLength==0) + checkExtraTwoBitsP8(f8, 1.0, bitsNPlusOne, bitsMore); + else{ + double temp = 1; + while (true){ + temp /= 2; + if (temp<=f8){ + f8-=temp; + fracLength--; + frac = (frac<<1) + 1; //shift in one + if (f8==0){ + //put in the rest of the bits + frac <<= (uint_fast8_t)fracLength; + break; + } + + if (fracLength == 0){ + checkExtraTwoBitsP8(f8, temp, bitsNPlusOne, bitsMore); + + break; + } + } + else{ + frac <<= 1; //shift in a zero + fracLength--; + if (fracLength == 0){ + checkExtraTwoBitsP8(f8, temp, bitsNPlusOne, bitsMore); + break; + } + } + } + } + + + //printf("convertfloat: frac:%d f16: %.26f bitsNPlusOne: %d, bitsMore: %d\n", frac, f16, bitsNPlusOne, bitsMore); + + return frac; +} +posit8_t convertDoubleToP8(double f8){ + union ui8_p8 uZ; + bool sign; + uint_fast8_t reg, frac=0; + bool bitNPlusOne=0, bitsMore=0; + + (f8>=0) ? (sign=0) : (sign=1); + // sign: 1 bit, frac: 8 bits, mantisa: 23 bits + //sign = a.parts.sign; + //frac = a.parts.fraction; + //exp = a.parts.exponent; + + if (f8 == 0 ){ + uZ.ui = 0; + return uZ.p; + } + else if(f8 == INFINITY || f8 == -INFINITY || f8 == NAN){ + uZ.ui = 0x80; + return uZ.p; + } + else if (f8 == 1) { + uZ.ui = 0x40; + return uZ.p; + } + else if (f8 == -1){ + uZ.ui = 0xC0; + return uZ.p; + } + else if (f8 >= 64){ + //maxpos + uZ.ui = 0x7F; + return uZ.p; + } + else if (f8 <= -64){ + // -maxpos + uZ.ui = 0x81; + return uZ.p; + } + else if(f8 <= 0.015625 && !sign){ + //minpos + uZ.ui = 0x1; + return uZ.p; + } + else if(f8 >= -0.015625 && sign){ + //-minpos + uZ.ui = 0xFF; + return uZ.p; + } + else if (f8>1 || f8<-1){ + if (sign){ + //Make negative numbers positive for easier computation + f8 = -f8; + } + reg = 1; //because k = m-1; so need to add back 1 + // minpos + if (f8 <= 0.015625){ + uZ.ui = 1; + } + else{ + //regime + while (f8>=2){ + f8 *=0.5; + reg++; + } + + //rounding off regime bits + if (reg>6 ) + uZ.ui= 0x7F; + else{ + int8_t fracLength = 6-reg; + frac = convertFractionP8 (f8, fracLength, &bitNPlusOne, &bitsMore); + uint_fast8_t regime = 0x7F - (0x7F>>reg); + uZ.ui = packToP8UI(regime, frac); + if (bitNPlusOne) + uZ.ui += ((uZ.ui&1) | bitsMore ); + } + if(sign) uZ.ui = -uZ.ui & 0xFF; + } + } + else if (f8 < 1 || f8 > -1 ){ + + if (sign){ + //Make negative numbers positive for easier computation + f8 = -f8; + } + reg = 0; + + //regime + //printf("here we go\n"); + while (f8<1){ + f8 *= 2; + reg++; + } + //rounding off regime bits + if (reg>6 ) + uZ.ui=0x1; + else{ + int_fast8_t fracLength = 6-reg; + frac = convertFractionP8 (f8, fracLength, &bitNPlusOne, &bitsMore); + uint_fast8_t regime = 0x40>>reg; + uZ.ui = packToP8UI(regime, frac); + if (bitNPlusOne) + uZ.ui += ((uZ.ui&1) | bitsMore ); + } + if(sign) uZ.ui = -uZ.ui & 0xFF; + + } + else { + //NaR - for NaN, INF and all other combinations + uZ.ui = 0x80; + } + return uZ.p; +} + + + diff --git a/source/luametatex/source/libraries/softposit/source/c_convertDecToPositX1.c b/source/luametatex/source/libraries/softposit/source/c_convertDecToPositX1.c new file mode 100644 index 000000000..62550f141 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertDecToPositX1.c @@ -0,0 +1,461 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <math.h> + +#ifdef SOFTPOSIT_QUAD +#include <quadmath.h> +#endif + +#include "platform.h" +#include "internals.h" + +#ifdef SOFTPOSIT_QUAD + +void checkQuadExtraPX1TwoBits(__float128 f32, __float128 temp, bool * bitsNPlusOne, bool * bitsMore ){ + temp /= 2; + if (temp<=f32){ + *bitsNPlusOne = 1; + f32-=temp; + } + if (f32>0) + *bitsMore = 1; +} + +uint_fast32_t convertQuadFractionPX1(__float128 f32, uint_fast16_t fracLength, bool * bitNPlusOne, bool * bitsMore ){ + + uint_fast32_t frac=0; + + if(f32==0) return 0; + else if(f32==INFINITY) return 0x80000000; + + f32 -= 1; //remove hidden bit + if (fracLength==0) + checkQuadExtraPX1TwoBits(f32, 1.0, bitNPlusOne, bitsMore); + else{ + __float128 temp = 1; + while (true){ + temp /= 2; + if (temp<=f32){ + + f32-=temp; + + fracLength--; + frac = (frac<<1) + 1; //shift in one + + if (f32==0){ + frac <<= (uint_fast32_t)fracLength; + break; + } + + if (fracLength == 0){ + checkQuadExtraPX1TwoBits(f32, temp, bitNPlusOne, bitsMore); + break; + } + } + else{ + frac <<= 1; //shift in a zero + fracLength--; + if (fracLength == 0){ + checkQuadExtraPX1TwoBits(f32, temp, bitNPlusOne, bitsMore); + break; + } + } + + } + } + + return frac; +} + +posit_1_t convertQuadToPX1(__float128 f32, int x){ + + union ui32_pX1 uZ; + bool sign, regS; + uint_fast32_t reg, frac=0; + int_fast32_t exp=0; + bool bitNPlusOne=0, bitsMore=0; + + (f32>=0) ? (sign=0) : (sign=1); + + if (f32 == 0 ){ + uZ.ui = 0; + return uZ.p; + } + else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ + uZ.ui = 0x80000000; + return uZ.p; + } + else if (f32 == 1) { + uZ.ui = 0x40000000; + return uZ.p; + } + else if (f32 == -1){ + uZ.ui = 0xC0000000; + return uZ.p; + } + else if (f32>1 || f32<-1){ + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + + regS = 1; + reg = 1; //because k = m-1; so need to add back 1 + // minpos + if (x==32 && f32 <= 8.673617379884035e-19){ + uZ.ui = 1; + } + else{ + //regime + while (f32>=4){ + f32 *=0.25; // f32/=4; + reg++; + } + if (f32>=2){ + f32*=0.5; + exp++; + } + + int fracLength = x-3-reg; + if (fracLength<0){ + if (reg==x-2){ + bitNPlusOne=exp; + exp=0; + } + if(f32>1) bitsMore=1; + } + else + frac = convertQuadFractionPX1 (f32, fracLength, &bitNPlusOne, &bitsMore); + + if (reg==30 && frac>0){ + bitsMore = 1; + frac = 0; + } + + if (reg>(x-2) ){ + uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + //rounding off fraction bits + else{ + + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) (exp)<< (29-reg)) + ((uint32_t)(frac<<(32-x))); + //uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac<<(32-x))); + //minpos + if (uZ.ui==0 && frac>0){ + uZ.ui = 0x1 << (32-x); + } + if (bitNPlusOne) + uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + } + } + else if (f32 < 1 || f32 > -1 ){ + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + regS = 0; + reg = 0; + + //regime + while (f32<1){ + f32 *= 4; + reg++; + } + + if (f32>=2){ + f32*=0.5; + exp++; + } + + + int fracLength = x-3-reg; + if (fracLength<0){ + if (reg==x-2){ + bitNPlusOne=exp; + exp=0; + } + if(f32>1) bitsMore=1; + } + else + frac = convertQuadFractionPX1 (f32, fracLength, &bitNPlusOne, &bitsMore); + + if (reg==30 && frac>0){ + bitsMore = 1; + frac = 0; + } + + if (reg>(x-2) ){ + uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + //rounding off fraction bits + else{ + + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) (exp)<< (29-reg)) + ((uint32_t)(frac<<(32-x))); + //uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac<<(32-x))); + //minpos + if (uZ.ui==0 && frac>0){ + uZ.ui = 0x1 << (32-x); + } + if (bitNPlusOne) + uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + } + else { + //NaR - for NaN, INF and all other combinations + uZ.ui = 0x80000000; + } + return uZ.p; +} + +#endif + +void checkExtraPX1TwoBits(double f32, double temp, bool * bitsNPlusOne, bool * bitsMore ){ + temp /= 2; + if (temp<=f32){ + *bitsNPlusOne = 1; + f32-=temp; + } + if (f32>0) + *bitsMore = 1; +} +uint_fast32_t convertFractionPX1(double f32, uint_fast16_t fracLength, bool * bitsNPlusOne, bool * bitsMore ){ + + uint_fast32_t frac=0; + + if(f32==0) return 0; + else if(f32==INFINITY) return 0x80000000; + + f32 -= 1; //remove hidden bit + if (fracLength==0) + checkExtraPX1TwoBits(f32, 1.0, bitsNPlusOne, bitsMore); + else{ + double temp = 1; + while (true){ + temp /= 2; + if (temp<=f32){ + f32-=temp; + fracLength--; + frac = (frac<<1) + 1; //shift in one + if (f32==0){ + frac <<= (uint_fast16_t)fracLength; + break; + } + + if (fracLength == 0){ + checkExtraPX1TwoBits(f32, temp, bitsNPlusOne, bitsMore); + break; + } + } + else{ + + frac <<= 1; //shift in a zero + fracLength--; + if (fracLength == 0){ + checkExtraPX1TwoBits(f32, temp, bitsNPlusOne, bitsMore); + break; + } + } + } + } + + return frac; +} + + +posit_1_t convertDoubleToPX1(double f32, int x){ + + union ui32_pX1 uZ; + bool sign, regS; + uint_fast32_t reg, frac=0; + int_fast32_t exp=0; + bool bitNPlusOne=0, bitsMore=0; + + (f32>=0) ? (sign=0) : (sign=1); + + if (f32 == 0 ){ + uZ.ui = 0; + return uZ.p; + } + else if(f32 == INFINITY || f32 == -INFINITY || f32 == NAN){ + uZ.ui = 0x80000000; + return uZ.p; + } + else if (f32 == 1) { + uZ.ui = 0x40000000; + return uZ.p; + } + else if (f32 == -1){ + uZ.ui = 0xC0000000; + return uZ.p; + } + else if (f32>1 || f32<-1){ + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + regS = 1; + reg = 1; //because k = m-1; so need to add back 1 + // minpos + if (x==32 && f32 <= 8.673617379884035e-19){ + uZ.ui = 1; + } + else{ + //regime + while (f32>=4){ + f32 *=0.25; // f32/=4; + reg++; + } + if (f32>=2){ + f32*=0.5; + exp++; + } +//printf("reg: %d, exp: %d f32: %.26lf\n", reg, exp, f32); + int fracLength = x-3-reg; + if (fracLength<0){ + if (reg==x-2){ + bitNPlusOne=exp; + exp=0; + } + if(f32>1) bitsMore=1; + } + else + frac = convertFractionPX1(f32, fracLength, &bitNPlusOne, &bitsMore); + + if (reg==30 && frac>0){ + bitsMore = 1; + frac = 0; + } + + if (reg>(x-2) ){ + uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + //rounding off fraction bits + else{ + + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; +//printf("reg: %d, exp: %d bitNPlusOne: %d bitsMore: %d\n", reg, exp, bitNPlusOne, bitsMore); +//printBinary(®ime, 32); + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) (exp)<< (29-reg)) + ((uint32_t)(frac<<(32-x))); + //uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac<<(32-x))); + //minpos + if (uZ.ui==0 && frac>0){ + uZ.ui = 0x1 << (32-x); + } +//printBinary(&uZ.ui, 32); +//uint32_t tt = (uZ.ui>>(32-x)); +//printBinary(&tt, 32); + if (bitNPlusOne) + uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + } + } + else if (f32 < 1 || f32 > -1 ){ + if (sign){ + //Make negative numbers positive for easier computation + f32 = -f32; + } + regS = 0; + reg = 0; + + //regime + while (f32<1){ + f32 *= 4; + reg++; + } + + if (f32>=2){ + f32*=0.5; + exp++; + } + + int fracLength = x-3-reg; + if (fracLength<0){ + if (reg==x-2){ + bitNPlusOne=exp; + exp=0; + } + + if(f32>1) bitsMore=1; + } + else + frac = convertFractionPX1 (f32, fracLength, &bitNPlusOne, &bitsMore); + + if (reg==30 && frac>0){ + bitsMore = 1; + frac = 0; + } + + if (reg>(x-2) ){ + uZ.ui=(regS) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + //rounding off fraction bits + else{ + uint_fast32_t regime = 1; + if (regS) regime = ( (1<<reg)-1 ) <<1; + uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) (exp)<< (29-reg)) + ((uint32_t)(frac<<(32-x))); + //uZ.ui = ((uint32_t) (regime) << (30-reg)) + ((uint32_t) exp ) + ((uint32_t)(frac<<(32-x))); + //minpos + if (uZ.ui==0 && frac>0){ + uZ.ui = 0x1 << (32-x); + } + if (bitNPlusOne) + uZ.ui += ( ((uZ.ui>>(32-x)) & 0x1) | bitsMore ) << (32-x); + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + } + else { + //NaR - for NaN, INF and all other combinations + uZ.ui = 0x80000000; + } + return uZ.p; +} + + + + + diff --git a/source/luametatex/source/libraries/softposit/source/c_convertPosit16ToDec.c b/source/luametatex/source/libraries/softposit/source/c_convertPosit16ToDec.c new file mode 100644 index 000000000..9befa42ed --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertPosit16ToDec.c @@ -0,0 +1,172 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <math.h> + +#include "platform.h" +#include "internals.h" + + +double convertP16ToDouble(posit16_t a){ + union ui16_p16 uZ; + //posit16 p16; + double d16; + uZ.p = a; + + if (uZ.ui==0){ + return 0; + } + else if(uZ.ui==0x7FFF){ //maxpos -> 32767 + return 268435456; + } + else if (uZ.ui==0x8001){ //-maxpos -> 32769 + return -268435456; + } + else if (uZ.ui == 0x8000){ //NaR -> 32768 + return NAN; + } + + bool regS, sign; + uint_fast16_t reg, shift=2, frac; + int_fast16_t k=0; + int_fast8_t exp; + double fraction_max; + + sign = signP16UI( uZ.ui ); + if (sign) + uZ.ui = -uZ.ui & 0xFFFF; + regS = signregP16UI( uZ.ui ); + + uint_fast16_t tmp = (uZ.ui<<2) & 0xFFFF; + if (regS){ + while (tmp>>15){ + k++; + shift++; + tmp= (tmp<<1) & 0xFFFF; + } + reg = k+1; + } + else{ + k=-1; + while (!(tmp>>15)){ + k--; + shift++; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + reg =-k; + } + exp = tmp>>14; + frac = (tmp & 0x3FFF) >> shift; + + + fraction_max = pow(2, 13-reg) ; + d16 = (double)( pow(4, k)* pow(2, exp) * (1+((double)frac/fraction_max)) ); + + if (sign) + d16 = -d16; + + return d16; +} + +#ifdef SOFTPOSIT_QUAD + __float128 convertP16ToQuadDec(posit16_t a){ + + union ui16_p16 uZ; + __float128 p16; + uZ.p = a; + + if (uZ.ui==0){ + p16 = 0; + return p16; + } + else if(uZ.ui==0x7FFF){ //maxpos -> 32767 + p16 = 268435456; + return p16; + } + else if (uZ.ui==0x8001){ //-maxpos -> 32769 + p16 = -268435456; + return p16; + } + else if (uZ.ui == 0x8000){ //NaR -> 32768 + p16 = INFINITY; + return p16; + } + + bool regS, sign; + uint_fast16_t reg, shift=2, frac; + int_fast16_t k=0; + int_fast8_t exp; + __float128 fraction_max; + + sign = signP16UI( uZ.ui ); + if (sign) + uZ.ui = -uZ.ui & 0xFFFF; + regS = signregP16UI( uZ.ui ); + + uint_fast16_t tmp = (uZ.ui<<2) & 0xFFFF; + if (regS){ + while (tmp>>15){ + k++; + shift++; + tmp= (tmp<<1) & 0xFFFF; + } + reg = k+1; + } + else{ + k=-1; + while (!(tmp>>15)){ + k--; + shift++; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + reg =-k; + } + exp = tmp>>14; + frac = (tmp & 0x3FFF) >> shift; + + + fraction_max = pow(2, 13-reg) ; + p16 = (__float128)( pow(4, k)* pow(2, exp) * (1+((__float128)frac/fraction_max)) ); + + if (sign) + p16 = -p16; + + return p16; + + } +#endif + diff --git a/source/luametatex/source/libraries/softposit/source/c_convertPosit32ToDec.c b/source/luametatex/source/libraries/softposit/source/c_convertPosit32ToDec.c new file mode 100644 index 000000000..f04b23d21 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertPosit32ToDec.c @@ -0,0 +1,360 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <math.h> + +#ifdef SOFTPOSIT_QUAD +#include <quadmath.h> +#endif + +#include "platform.h" +#include "internals.h" + + + +#ifdef SOFTPOSIT_QUAD +__float128 convertP32ToQuad(posit32_t a){ + + union ui32_p32 uZ; + __float128 p32; + uZ.p = a; + + if (uZ.ui==0){ + p32 = 0; + return p32; + } + else if(uZ.ui==0x7FFFFFFF){ //maxpos + p32 = 1.329227995784916e+36; + return p32; + } + else if (uZ.ui==0x80000001){ //-maxpos + p32 = -1.329227995784916e+36; + return p32; + } + else if (uZ.ui == 0x80000000){ + p32 = NAN; + return p32; + } + + bool regS, sign; + uint_fast32_t reg, shift=2, frac, tmp; + int_fast32_t k=0; + int_fast8_t exp; + __float128 fraction_max; + + sign = signP32UI( uZ.ui ); + if (sign) + uZ.ui = -uZ.ui & 0xFFFFFFFF; + regS = signregP32UI( uZ.ui ); + + tmp = tmp = (uZ.ui<<2)&0xFFFFFFFF; + if (regS){ + while (tmp>>31){ + k++; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + reg = k+1; + } + else{ + k=-1; + while (!(tmp>>31)){ + k--; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + reg =-k; + } + exp = tmp>>29; + frac = (tmp & 0x1FFFFFFF) >> shift; + + (reg>28) ? (fraction_max=1) : (fraction_max = pow(2, 28-reg) ) ; + + p32 = (__float128)( pow(16, k)* pow(2, exp) * (1+((__float128)frac/fraction_max)) ); + + if (sign) + p32 = -p32; + + return p32; + +} + +__float128 convertPX2ToQuad(posit_2_t a){ + + union ui32_pX2 uZ; + __float128 p32; + uZ.p = a; + + if (uZ.ui==0){ + p32 = 0; + return p32; + } + else if(uZ.ui==0x7FFFFFFF){ //maxpos + p32 = 1.329227995784916e+36; + return p32; + } + else if (uZ.ui==0x80000001){ //-maxpos + p32 = -1.329227995784916e+36; + return p32; + } + else if (uZ.ui == 0x80000000){ + p32 = INFINITY; + return p32; + } + + bool regS, sign; + uint_fast32_t reg, shift=2, frac, tmp; + int_fast32_t k=0; + int_fast8_t exp; + __float128 fraction_max; + + sign = signP32UI( uZ.ui ); + if (sign) + uZ.ui = -uZ.ui & 0xFFFFFFFF; + regS = signregP32UI( uZ.ui ); + + tmp = tmp = (uZ.ui<<2)&0xFFFFFFFF; + if (regS){ + while (tmp>>31){ + k++; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + reg = k+1; + } + else{ + k=-1; + while (!(tmp>>31)){ + k--; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + reg =-k; + } + exp = tmp>>29; + frac = (tmp & 0x1FFFFFFF) >> shift; + + (reg>28) ? (fraction_max=1) : (fraction_max = pow(2, 28-reg) ) ; + + p32 = (__float128)( pow(16, k)* pow(2, exp) * (1+((__float128)frac/fraction_max)) ); + + if (sign) + p32 = -p32; + + return p32; + +} + + +#endif + + +double convertP32ToDouble(posit32_t pA){ + union ui32_p32 uA; + union ui64_double uZ; + uint_fast32_t uiA, tmp=0; + uint_fast64_t expA=0, uiZ, fracA=0; + bool signA=0, regSA; + int_fast32_t scale, kA=0; + + uA.p = pA; + uiA = uA.ui; + + if (uA.ui == 0) + return 0; + else if(uA.ui == 0x80000000) + return NAN; + + else{ + signA = signP32UI( uiA ); + if(signA) uiA = (-uiA & 0xFFFFFFFF); + regSA = signregP32UI(uiA); + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA = tmp>>29; //to get 2 bits + + fracA = (((uint64_t)tmp<<3) & 0xFFFFFFFF)<<20; + + expA = (((kA<<2)+expA) + 1023) << 52; + uiZ = expA + fracA + (((uint64_t)signA&0x1)<<63); + + uZ.ui = uiZ; + return uZ.d; + } +} +/*double convertP32ToDoubleOld(posit32_t pA){ + union ui32_p32 uZ; + double d32; + uZ.p = pA; + + if (uZ.ui==0){ + return 0; + } + else if(uZ.ui==0x7FFFFFFF){ //maxpos + return 1.329227995784916e+36; + } + else if (uZ.ui==0x80000001){ //-maxpos + return -1.329227995784916e+36; + } + else if (uZ.ui == 0x80000000){ + return INFINITY; + } + + bool regS, sign; + uint_fast32_t reg, shift=2, frac, tmp; + int_fast32_t k=0; + int_fast8_t exp; + double fraction_max; + + sign = signP32UI( uZ.ui ); + if (sign) + uZ.ui = -uZ.ui & 0xFFFFFFFF; + regS = signregP32UI( uZ.ui ); + + tmp = (uZ.ui<<2)&0xFFFFFFFF; + if (regS){ + while (tmp>>31){ + k++; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + reg = k+1; + } + else{ + k=-1; + while (!(tmp>>31)){ + k--; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + reg =-k; + } + exp = tmp>>29; + + frac = (tmp & 0x1FFFFFFF) >> shift; + + (reg>28) ? (fraction_max=1) : (fraction_max = pow(2, 28-reg) ) ; + + + d32 = (double)( pow(16, k)* pow(2, exp) * (1+((double)frac/fraction_max)) ); + if (sign) + d32 = -d32; + + return d32; + +}*/ + + +double convertPX2ToDouble(posit_2_t a){ + union ui32_pX2 uZ; + double d32; + uZ.p = a; + + if (uZ.ui==0){ + return 0; + } + else if(uZ.ui==0x7FFFFFFF){ //maxpos + return 1.329227995784916e+36; + } + else if (uZ.ui==0x80000001){ //-maxpos + return -1.329227995784916e+36; + } + else if (uZ.ui == 0x80000000){ + return INFINITY; + } + + bool regS, sign; + uint_fast32_t reg, shift=2, frac, tmp; + int_fast32_t k=0; + int_fast8_t exp; + double fraction_max; + + sign = signP32UI( uZ.ui ); + if (sign) + uZ.ui = -uZ.ui & 0xFFFFFFFF; + regS = signregP32UI( uZ.ui ); + + tmp = (uZ.ui<<2)&0xFFFFFFFF; + if (regS){ + while (tmp>>31){ + k++; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + reg = k+1; + } + else{ + k=-1; + while (!(tmp>>31)){ + k--; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + reg =-k; + } + exp = tmp>>29; + + frac = (tmp & 0x1FFFFFFF) >> shift; + + (reg>28) ? (fraction_max=1) : (fraction_max = pow(2, 28-reg) ) ; + + + d32 = (double)( pow(16, k)* pow(2, exp) * (1+((double)frac/fraction_max)) ); + if (sign) + d32 = -d32; + + return d32; + +} + + diff --git a/source/luametatex/source/libraries/softposit/source/c_convertPosit8ToDec.c b/source/luametatex/source/libraries/softposit/source/c_convertPosit8ToDec.c new file mode 100644 index 000000000..f8af0b97f --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertPosit8ToDec.c @@ -0,0 +1,98 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <math.h> + +#include "platform.h" +#include "internals.h" + + +double convertP8ToDouble(posit8_t a){ + union ui8_p8 uZ; + double d8; + uZ.p = a; + + if (uZ.ui==0){ + return 0; + } + else if(uZ.ui==0x7F){ //maxpos + return 64; + } + else if (uZ.ui==0x81){ //-maxpos + return -64; + } + else if (uZ.ui == 0x80){ //NaR + return NAN; + } + + bool regS, sign; + uint_fast8_t reg, shift=2, frac; + int_fast8_t k=0; + double fraction_max; + + sign = signP8UI( uZ.ui ); + if (sign) uZ.ui = -uZ.ui & 0xFF; + regS = signregP8UI( uZ.ui ); + + uint_fast8_t tmp = (uZ.ui<<2) & 0xFF; + if (regS){ + while (tmp>>7){ + k++; + shift++; + tmp= (tmp<<1) & 0xFF; + } + reg = k+1; + } + else{ + k=-1; + while (!(tmp>>7)){ + k--; + shift++; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + reg =-k; + } + frac = (tmp & 0x7F) >> shift; + + + fraction_max = pow(2, 6-reg) ; + d8 = (double)( pow(2, k) * (1+((double)frac/fraction_max)) ); + + if (sign) + d8 = -d8; + + return d8; +} diff --git a/source/luametatex/source/libraries/softposit/source/c_convertPositX1ToDec.c b/source/luametatex/source/libraries/softposit/source/c_convertPositX1ToDec.c new file mode 100644 index 000000000..cf757a321 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertPositX1ToDec.c @@ -0,0 +1,184 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <math.h> + +#ifdef SOFTPOSIT_QUAD +#include <quadmath.h> +#endif + +#include "platform.h" +#include "internals.h" + + + +#ifdef SOFTPOSIT_QUAD + +__float128 convertPX1ToQuad(posit_1_t a){ + + union ui32_pX1 uZ; + __float128 p32; + uZ.p = a; + + if (uZ.ui==0){ + p32 = 0; + return p32; + } + else if(uZ.ui==0x7FFFFFFF){ //maxpos + p32 = 1152921504606847000; + return p32; + } + else if (uZ.ui==0x80000001){ //-maxpos + p32 = -1152921504606847000; + return p32; + } + else if (uZ.ui == 0x80000000){ + p32 = NAN; + return p32; + } + + bool regS, sign; + uint_fast32_t reg, shift=2, frac, tmp; + int_fast32_t k=0; + int_fast8_t exp; + __float128 fraction_max; + + sign = signP32UI( uZ.ui ); + if (sign) + uZ.ui = -uZ.ui & 0xFFFFFFFF; + regS = signregP32UI( uZ.ui ); + + tmp = tmp = (uZ.ui<<2)&0xFFFFFFFF; + if (regS){ + while (tmp>>31){ + k++; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + reg = k+1; + } + else{ + k=-1; + while (!(tmp>>31)){ + k--; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + reg =-k; + } + exp = tmp>>30; + frac = (tmp & 0x1FFFFFFF) >> shift; + + (reg>29) ? (fraction_max=1) : (fraction_max = pow(2, 29-reg) ) ; + + p32 = (__float128)( pow(16, k)* pow(2, exp) * (1+((__float128)frac/fraction_max)) ); + + if (sign) + p32 = -p32; + + return p32; + +} + + +#endif + + + + +double convertPX1ToDouble(posit_1_t a){ + union ui32_pX1 uZ; + double d32; + uZ.p = a; + + if (uZ.ui==0){ + return 0; + } + else if(uZ.ui==0x7FFFFFFF){ //maxpos + return 1152921504606847000; + } + else if (uZ.ui==0x80000001){ //-maxpos + return -1152921504606847000; + } + else if (uZ.ui == 0x80000000){ + return NAN; + } + + bool regS, sign; + uint_fast32_t reg, shift=2, frac, tmp; + int_fast32_t k=0; + int_fast8_t exp; + double fraction_max; + + sign = signP32UI( uZ.ui ); + if (sign) + uZ.ui = -uZ.ui & 0xFFFFFFFF; + regS = signregP32UI( uZ.ui ); + + tmp = (uZ.ui<<2)&0xFFFFFFFF; + if (regS){ + while (tmp>>31){ + k++; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + reg = k+1; + } + else{ + k=-1; + while (!(tmp>>31)){ + k--; + shift++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + reg =-k; + } + exp = tmp>>30; + + frac = (tmp & 0x3FFFFFFF) >> shift; + + (reg>29) ? (fraction_max=1) : (fraction_max = pow(2, 29-reg) ) ; + + d32 = (double)( pow(4, k)* pow(2, exp) * (1+((double)frac/fraction_max)) ); + if (sign) + d32 = -d32; + + return d32; + +} + + diff --git a/source/luametatex/source/libraries/softposit/source/c_convertQuire16ToPosit16.c b/source/luametatex/source/libraries/softposit/source/c_convertQuire16ToPosit16.c new file mode 100644 index 000000000..80deccc4c --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertQuire16ToPosit16.c @@ -0,0 +1,159 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <math.h> + +#include "platform.h" +#include "internals.h" + +//TODO DEPRECATED +posit16_t convertQ16ToP16(quire16_t qA){ + return q16_to_p16(qA); +} + + +posit16_t q16_to_p16(quire16_t qA){ + union ui128_q16 uZ; + union ui16_p16 uA; + uint_fast16_t regA, fracA = 0, shift=0, regime; + uint_fast64_t frac64A; + bool sign, regSA=0, bitNPlusOne=0, bitsMore=0; + int_fast8_t expA = 0; + + + if (isQ16Zero(qA)){ + uA.ui=0; + return uA.p; + } + //handle NaR + else if (isNaRQ16(qA)){ + uA.ui=0x8000; + return uA.p; + } + + uZ.q = qA; + + sign = uZ.ui[0]>>63; + + if(sign){ + //probably need to do two's complement here before the rest. + if (uZ.ui[1]==0){ + uZ.ui[0] = -uZ.ui[0]; + } + else{ + uZ.ui[1] = -uZ.ui[1]; + uZ.ui[0] = ~(uZ.ui[0]); + } + } + + + int noLZ =0; + + if (uZ.ui[0] == 0){ + noLZ+=64; + uint_fast64_t tmp = uZ.ui[1]; + + while(!(tmp>>63)){ + noLZ++; + tmp<<=1; + } + frac64A = tmp; + } + else{ + uint_fast64_t tmp = uZ.ui[0]; + int noLZtmp = 0; + + while(!(tmp>>63)){ + noLZtmp++; + tmp<<=1; + } + noLZ+=noLZtmp; + frac64A = tmp; + frac64A+= ( uZ.ui[1]>>(64-noLZtmp) ); + if (uZ.ui[1]<<noLZtmp)bitsMore = 1; + } + + //default dot is between bit 71 and 72, extreme left bit is bit 0. Last right bit is bit 127. + //Equations derived from quire16_mult last_pos = 71 - (kA<<1) - expA and first_pos = last_pos - frac_len + int kA=(71-noLZ) >> 1; + expA = 71 - noLZ - (kA<<1) ; + + if(kA<0){ + regA = (-kA & 0xFFFF); + regSA = 0; + regime = 0x4000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFF - (0x7FFF>>regA); + } + + if(regA>14){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uA.ui= 0x7FFF): (uA.ui=0x1); + } + else{ + + //remove hidden bit + frac64A&=0x7FFFFFFFFFFFFFFF; + shift = regA+50; //1 es bit, 1 sign bit and 1 r terminating bit , 16+31+3 + fracA = frac64A>>shift; + + if (regA!=14){ + bitNPlusOne = (frac64A>>(shift-1)) & 0x1; + unsigned long long tmp = frac64A<<(65-shift); + if(frac64A<<(65-shift)) bitsMore = 1; + } + else if (frac64A>0){ + fracA=0; + bitsMore=1; + } + + if (regA==14 && expA) bitNPlusOne = 1; + + uA.ui = packToP16UI(regime, regA, expA, fracA); + + if (bitNPlusOne){ + uA.ui += (uA.ui&1) | bitsMore; + } + } + + if (sign) uA.ui = -uA.ui & 0xFFFF; + return uA.p; +} + + + diff --git a/source/luametatex/source/libraries/softposit/source/c_convertQuire32ToPosit32.c b/source/luametatex/source/libraries/softposit/source/c_convertQuire32ToPosit32.c new file mode 100644 index 000000000..c3de4b768 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertQuire32ToPosit32.c @@ -0,0 +1,183 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <math.h> + +#include "platform.h" +#include "internals.h" + + + +posit32_t q32_to_p32(quire32_t qA){ + + union ui512_q32 uZ; + union ui32_p32 uA; + uint_fast32_t regA, fracA = 0, shift=0, regime; + uint_fast64_t frac64A; + bool sign, regSA=0, bitNPlusOne=0, bitsMore=0; + int_fast32_t expA = 0; + int i; + + if (isQ32Zero(qA)){ + uA.ui=0; + return uA.p; + } + //handle NaR + else if (isNaRQ32(qA)){ + uA.ui=0x80000000; + return uA.p; + } + + uZ.q = qA; + + sign = uZ.ui[0]>>63; + + if(sign){ + for (i=7; i>=0; i--){ + if (uZ.ui[i]>0){ + uZ.ui[i] = - uZ.ui[i]; + i--; + while(i>=0){ + uZ.ui[i] = ~uZ.ui[i]; + i--; + } + break; + } + } + } + //minpos and maxpos + + int noLZ =0; + + for (i=0; i<8; i++){ + if (uZ.ui[i]==0){ + noLZ+=64; + } + else{ + uint_fast64_t tmp = uZ.ui[i]; + int noLZtmp = 0; + + while (!(tmp>>63)){ + noLZtmp++; + tmp= (tmp<<1); + } + + noLZ+=noLZtmp; + frac64A = tmp; + if (i!=7 && noLZtmp!=0){ + frac64A+= ( uZ.ui[i+1]>>(64-noLZtmp) ); + if( uZ.ui[i+1] & (((uint64_t)0x1<<(64-noLZtmp))-1) ) + bitsMore=1; + i++; + } + i++; + while(i<8){ + if (uZ.ui[i]>0){ + bitsMore = 1;; + break; + } + i++; + } + break; + } + } + + //default dot is between bit 271 and 272, extreme left bit is bit 0. Last right bit is bit 511. + //Equations derived from quire32_mult last_pos = 271 - (kA<<2) - expA and first_pos = last_pos - frac_len + int kA=(271-noLZ) >> 2; + expA = 271 - noLZ - (kA<<2) ; + + + if(kA<0){ + //regA = (-kA & 0xFFFF); + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + + if(regA>30){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uA.ui= 0x7FFFFFFF): (uA.ui=0x1); + } + else{ + + //remove hidden bit + frac64A&=0x7FFFFFFFFFFFFFFF; + + shift = regA+35; //2 es bit, 1 sign bit and 1 r terminating bit , 31+4 + + fracA = frac64A>>shift; + + if (regA<=28){ + bitNPlusOne = (frac64A>>(shift-1)) & 0x1; + expA<<= (28-regA); + if (frac64A<<(65-shift)) bitsMore=1; + + } + else { + if (regA==30){ + bitNPlusOne = expA&0x2; + bitsMore = (expA&0x1); + expA = 0; + } + else if (regA==29){ + bitNPlusOne = expA&0x1; + expA>>=1; //taken care of by the pack algo + } + if (frac64A>0){ + fracA=0; + bitsMore =1; + } + } + + uA.ui = packToP32UI(regime, expA, fracA); + if (bitNPlusOne) + uA.ui += (uA.ui&1) | bitsMore; + + } + if (sign) uA.ui = -uA.ui & 0xFFFFFFFF; + + return uA.p; + +} + + + diff --git a/source/luametatex/source/libraries/softposit/source/c_convertQuire8ToPosit8.c b/source/luametatex/source/libraries/softposit/source/c_convertQuire8ToPosit8.c new file mode 100644 index 000000000..bd537320f --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertQuire8ToPosit8.c @@ -0,0 +1,123 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include <stdint.h> +#include <math.h> + +#include "platform.h" +#include "internals.h" + + +posit8_t q8_to_p8(quire8_t qA){ + + union ui32_q8 uZ; + union ui8_p8 uA; + uint_fast8_t regA, fracA = 0, shift=0, regime; + uint_fast32_t frac32A; + bool sign=0, regSA=0, bitNPlusOne=0, bitsMore=0; + + if (isQ8Zero(qA)){ + uA.ui=0; + return uA.p; + } + //handle NaR + else if (isNaRQ8(qA)){ + uA.ui=0x80; + return uA.p; + } + + uZ.q = qA; + + sign = uZ.ui>>31; + + if(sign){ + uZ.ui = -uZ.ui & 0xFFFFFFFF; + } + + int noLZ =0; + + uint_fast32_t tmp = uZ.ui; + + while (!(tmp>>31)){//==0 + noLZ++; + tmp<<=1; + } + frac32A = tmp; + + //default dot is between bit 19 and 20, extreme left bit is bit 0. Last right bit is bit 31. + //Scale = k + int kA=(19-noLZ); + + if(kA<0){ + regA = (-kA & 0xFF); + regSA = 0; + regime = 0x40>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7F-(0x7F>>regA); + } + + if(regA>6){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uA.ui= 0x7F): (uA.ui=0x1); + } + else{ + //remove hidden bit + frac32A&=0x7FFFFFFF; + shift = regA+25; // 1 sign bit and 1 r terminating bit , 16+7+2 + fracA = frac32A>>shift; + + bitNPlusOne = (frac32A>>(shift-1))&0x1 ; + + uA.ui = packToP8UI(regime, fracA); + + if (bitNPlusOne){ + ( (frac32A <<(33-shift)) & 0xFFFFFFFF ) ? (bitsMore=1) : (bitsMore=0); + uA.ui += (uA.ui&1) | bitsMore; + } + } + + if (sign) uA.ui = -uA.ui & 0xFF; + + return uA.p; + + +} + + + diff --git a/source/luametatex/source/libraries/softposit/source/c_convertQuireX2ToPositX2.c b/source/luametatex/source/libraries/softposit/source/c_convertQuireX2ToPositX2.c new file mode 100644 index 000000000..f8a5aaa57 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_convertQuireX2ToPositX2.c @@ -0,0 +1,191 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <math.h> + +#include "platform.h" +#include "internals.h" + + + +posit_2_t qX2_to_pX2(quire_2_t qA, int x){ + + union ui512_qX2 uZ; + union ui32_pX2 uA; + uint_fast32_t fracA = 0, shift=0, regime; + uint_fast64_t frac64A; + bool sign, regSA=0, bitNPlusOne=0, bitsMore=0; + int_fast32_t regA, expA = 0; + int i; + + if (isQX2Zero(qA)){ + uA.ui=0; + return uA.p; + } + //handle NaR + else if (isNaRQX2(qA)){ + uA.ui=0x80000000; + return uA.p; + } + + uZ.q = qA; + + sign = uZ.ui[0]>>63; + + if(sign){ + for (i=7; i>=0; i--){ + if (uZ.ui[i]>0){ + uZ.ui[i] = - uZ.ui[i]; + i--; + while(i>=0){ + uZ.ui[i] = ~uZ.ui[i]; + i--; + } + break; + } + } + } + //minpos and maxpos + + int noLZ =0; + + for (i=0; i<8; i++){ + if (uZ.ui[i]==0){ + noLZ+=64; + } + else{ + uint_fast64_t tmp = uZ.ui[i]; + int noLZtmp = 0; + + while (!(tmp>>63)){ + noLZtmp++; + tmp= (tmp<<1); + } + + noLZ+=noLZtmp; + frac64A = tmp; + if (i!=7 && noLZtmp!=0){ + frac64A+= ( uZ.ui[i+1]>>(64-noLZtmp) ); + if( uZ.ui[i+1] & (((uint64_t)0x1<<(64-noLZtmp))-1) ) + bitsMore=1; + i++; + } + i++; + while(i<8){ + if (uZ.ui[i]>0){ + bitsMore = 1;; + break; + } + i++; + } + break; + } + } + //default dot is between bit 271 and 272, extreme left bit is bit 0. Last right bit is bit 511. + //Equations derived from quire32_mult last_pos = 271 - (kA<<2) - expA and first_pos = last_pos - frac_len + int kA=(271-noLZ) >> 2; + expA = 271 - noLZ - (kA<<2) ; + + + if(kA<0){ + //regA = (-kA & 0xFFFF); + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uA.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + + //remove hidden bit + frac64A&=0x7FFFFFFFFFFFFFFF; + + shift = regA+35; //2 es bit, 1 sign bit and 1 r terminating bit , 31+4 + + fracA = ((uint32_t) (frac64A>>shift)); + + //regime length is smaller than length of posit + if (regA<x){ + if (regA<=(x-4)){ + + bitNPlusOne = (frac64A>>(shift+31-x)) & 0x1; + if ((frac64A<<(33-shift+x)) !=0) bitsMore=1; + } + else { + if (regA==(x-2)){ + bitNPlusOne = expA&0x2; + bitsMore = (expA&0x1); + expA = 0; + } + else if (regA==(x-3)){ + bitNPlusOne = expA&0x1; + //expA>>=1; //taken care of by the pack algo + expA &=0x2; + } + if (frac64A>0){ + fracA=0; + bitsMore =1; + } + } + } + else{ + regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); + expA=0; + fracA=0; + } + + expA <<= (28-regA); + uA.ui = packToP32UI(regime, expA, fracA) & ((int32_t)0x80000000>>(x-1)); + + if (bitNPlusOne) + uA.ui += (((uA.ui>>(32-x)) &0x1) | (uint32_t)bitsMore )<< (32-x); + } + + if (sign) uA.ui = -uA.ui & 0xFFFFFFFF; + + return uA.p; + +} + + + diff --git a/source/luametatex/source/libraries/softposit/source/c_int.c b/source/luametatex/source/libraries/softposit/source/c_int.c new file mode 100644 index 000000000..16b6c756b --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/c_int.c @@ -0,0 +1,155 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +int_fast64_t p8_int( posit8_t pA ) { + + union ui8_p8 uA; + int_fast64_t iZ; + uint_fast8_t scale = 0, uiA; + bool sign; + + uA.p = pA; + uiA = uA.ui; + //NaR + if (uiA==0x80) return 0x8000000000000000LL; + + sign = uiA>>7; + if (sign) uiA = -uiA & 0xFF; + + if (uiA < 0x40) return 0; + else if (uiA < 0x60) iZ = 1; + else { + uiA -= 0x40; + while (0x20 & uiA) { + scale ++; + uiA = (uiA - 0x20) << 1; + } + uiA <<= 1; + iZ = ((uint64_t)uiA | 0x40) >> (6 - scale); + } + + if (sign) iZ = -iZ; + return iZ; + +} + +int_fast64_t p16_int( posit16_t pA ){ + union ui16_p16 uA; + int_fast64_t iZ; + uint_fast16_t scale = 0, uiA; + bool sign; + + uA.p = pA; + uiA = uA.ui; + + // NaR + if (uiA==0x8000) return 0x8000000000000000LL; + + sign = uiA>>15; + if (sign) uiA = -uiA & 0xFFFF; + + if (uiA < 0x4000) return 0; + else if (uiA < 0x5000) iZ = 1; + else if (uiA < 0x5800) iZ = 2; + else{ + uiA -= 0x4000; + while (0x2000 & uiA) { + scale += 2; + uiA = (uiA - 0x2000) << 1; + } + uiA <<= 1; + if (0x2000 & uiA) scale++; + iZ = ((uint64_t)uiA | 0x2000) >> (13 - scale); + + } + if (sign) iZ = -iZ; + return iZ; + +} + + +int64_t p32_int( posit32_t pA ){ + union ui32_p32 uA; + int_fast64_t iZ; + uint_fast32_t scale = 0, uiA; + bool sign; + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000) return 0x8000000000000000; + + sign = uiA>>31; + if (sign) uiA = -uiA & 0xFFFFFFFF; + + if (uiA < 0x40000000) return 0; + else if (uiA < 0x48000000) iZ = 1; + else if (uiA < 0x4C000000) iZ = 2; + else if(uiA>0x7FFFAFFF) iZ= 0x7FFFFFFFFFFFFFFF; + else{ + uiA -= 0x40000000; + while (0x20000000 & uiA) { + scale += 4; + uiA = (uiA - 0x20000000) << 1; + } + uiA <<= 1; // Skip over termination bit, which is 0. + if (0x20000000 & uiA) scale+=2; // If first exponent bit is 1, increment the scale. + if (0x10000000 & uiA) scale++; + iZ = ((uiA | 0x10000000ULL)&0x1FFFFFFFULL) << 34; // Left-justify fraction in 32-bit result (one left bit padding) + + iZ = (scale<62) ? ((uiA | 0x10000000ULL)&0x1FFFFFFFULL) >> (28-scale): + ((uiA | 0x10000000ULL)&0x1FFFFFFFULL) << (scale-28); + + } + + if (sign) iZ = -iZ ; + return iZ; +} + +int64_t pX2_int( posit_2_t pA ){ + posit32_t p32 = {.v = pA.v}; + return p32_int(p32); + +} + diff --git a/source/luametatex/source/libraries/softposit/source/i32_to_p16.c b/source/luametatex/source/libraries/softposit/source/i32_to_p16.c new file mode 100644 index 000000000..c7ec6a24b --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/i32_to_p16.c @@ -0,0 +1,90 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit16_t i32_to_p16( int32_t iA ){ + int_fast8_t k, log2 = 25; + union ui16_p16 uZ; + uint_fast16_t uiA; + uint_fast32_t expA, mask = 0x02000000, fracA; + bool sign; + + + if (iA < -134217728){ //-2147483648 to -134217729 rounds to P32 value -268435456 + uZ.ui = 0x8001; //-maxpos + return uZ.p; + } + + sign = iA>>31; + if(sign){ + iA = -iA &0xFFFFFFFF; + } + + if( iA > 134217728 ) { //134217729 to 2147483647 rounds to P32 value 268435456 + uiA = 0x7FFF; //maxpos + } + else if ( iA > 0x02FFFFFF ){ + uiA = 0x7FFE; + } + else if ( iA < 2 ){ + uiA = (iA << 14); + } + else { + fracA = iA; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + k = log2 >> 1; + expA = (log2 & 0x1) << (12 - k); + fracA = (fracA ^ mask); + + uiA = (0x7FFF ^ (0x3FFF >> k)) | expA | ( fracA >> (k + 13)); + mask = 0x1000 << k; //bitNPlusOne + if (mask & fracA) { + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; + } + } + (sign) ? (uZ.ui = -uiA &0xFFFF) : (uZ.ui = uiA); + return uZ.p; +} diff --git a/source/luametatex/source/libraries/softposit/source/i32_to_p32.c b/source/luametatex/source/libraries/softposit/source/i32_to_p32.c new file mode 100644 index 000000000..b822d9e39 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/i32_to_p32.c @@ -0,0 +1,87 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + + +posit32_t i32_to_p32( int32_t iA ) { + int_fast8_t k, log2 = 31;//length of bit (e.g. 4294966271) in int (32 but because we have only 32 bits, so one bit off to accommodate that fact) + union ui32_p32 uZ; + uint_fast32_t uiA; + uint_fast32_t expA, mask = 0x80000000, fracA; + bool sign; + + if (iA < -2147483135){ //-2147483648 to -2147483136 rounds to P32 value -2147483648 + uZ.ui = 0x80500000; + return uZ.p; + } + + sign = iA>>31; + if(sign) iA = -iA &0xFFFFFFFF; + + if ( iA >2147483135)//2147483136 to 2147483647 rounds to P32 value (2147483648)=> 0x7FB00000 + uiA = 0x7FB00000; + else if ( iA < 0x2 ) + uiA = (iA << 30); + else { + fracA = iA; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + k = (log2 >> 2); + expA = (log2 & 0x3) << (27 - k); + fracA = (fracA ^ mask); + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | expA | fracA>>(k+4); + + mask = 0x8 << k; //bitNPlusOne + + if (mask & fracA) + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; + + } + (sign) ? (uZ.ui = -uiA &0xFFFFFFFF) : (uZ.ui = uiA); + return uZ.p; +} + + diff --git a/source/luametatex/source/libraries/softposit/source/i32_to_p8.c b/source/luametatex/source/libraries/softposit/source/i32_to_p8.c new file mode 100644 index 000000000..698f4b9bd --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/i32_to_p8.c @@ -0,0 +1,88 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> + +#include "platform.h" +#include "internals.h" + +posit8_t i32_to_p8( int32_t iA ){ + int_fast8_t k, log2 = 6;//length of bit + union ui8_p8 uZ; + uint_fast8_t uiA; + uint_fast32_t mask = 0x40, fracA; + bool sign; + + if (iA < -48){ //-48 to -MAX_INT rounds to P32 value -268435456 + uZ.ui = 0x81; //-maxpos + return uZ.p; + } + sign = iA>>31; + if(sign){ + iA = -iA &0xFFFFFFFF; + } + if ( iA > 48 ) { + uiA = 0x7F; + } + else if ( iA < 2 ){ + uiA = (iA << 6); + } + else { + fracA = iA; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + + k = log2; + + fracA = (fracA ^ mask); + + uiA = (0x7F ^ (0x3F >> k)) | ( fracA >> (k+1) ) ; + + mask = 0x1 << k; //bitNPlusOne + if (mask & fracA) { + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; + } + } + (sign) ? (uZ.ui = -uiA &0xFF) : (uZ.ui = uiA); + return uZ.p; +} diff --git a/source/luametatex/source/libraries/softposit/source/i32_to_pX1.c b/source/luametatex/source/libraries/softposit/source/i32_to_pX1.c new file mode 100644 index 000000000..5baa6e847 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/i32_to_pX1.c @@ -0,0 +1,115 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + + +posit_1_t i32_to_pX1( int32_t a, int x ) { + int_fast8_t k, log2 = 31;//length of bit (e.g. 2147418111) in int (32 but because we have only 32 bits, so one bit off to accommodate that fact) + union ui32_pX1 uZ; + uint_fast32_t uiA=0; + uint_fast32_t expA, mask = 0x80000000, fracA; + bool sign; + + sign = a>>31; + if(sign) a = -a &0xFFFFFFFF; + + //NaR + if (a == 0x80000000 || x<2 || x>32) + uiA = 0x80000000; + else if (x==2){ + if (a>0) uiA=0x40000000; + } + else if ( a > 2147418111){ + uiA = 0x7FFF9FFF; // 2147483648 + //if (x<12) uiA&=((int32_t)0x80000000>>(x-1)); + } + else if ( a < 0x2 ) + uiA = (a << 30); + else { + fracA = a; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + k = (log2 >> 1); + expA = (log2 & 0x1) << (28-k); + fracA = (fracA ^ mask); + + if(k>=(x-2)){//maxpos + uiA= 0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)); + } + else if (k==(x-3)){//bitNPlusOne-> first exp bit //bitLast is zero + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)); + if( (expA & 0x2) && ((expA&0x1) | fracA) ) //bitNPlusOne //bitsMore + uiA |= ((uint32_t)0x80000000>>(x-1)); + } + else if (k==(x-4)){ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | ((expA &0x2)<< (27 - k)); + if(expA&0x1){ + if( (((uint32_t)0x80000000>>(x-1)) & uiA)| fracA) + uiA += ((uint32_t)0x80000000>>(x-1)); + } + } + else if (k==(x-5)){ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)); + mask = 0x8 << (k -x); + if (mask & fracA){ //bitNPlusOne + if (((mask - 1) & fracA) | (expA&0x1)) { + uiA+= ((uint32_t)0x80000000>>(x-1)); + } + } + } + else{ + uiA = ((0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)) | fracA>>(k+4)) & ((int32_t)0x80000000>>(x-1));; + mask = 0x8 << (k-x); //bitNPlusOne + if (mask & fracA) + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA+= ((uint32_t)0x80000000>>(x-1)); + } + + } + (sign) ? (uZ.ui = -uiA &0xFFFFFFFF) : (uZ.ui = uiA); + return uZ.p; +} + + diff --git a/source/luametatex/source/libraries/softposit/source/i32_to_pX2.c b/source/luametatex/source/libraries/softposit/source/i32_to_pX2.c new file mode 100644 index 000000000..ac31957fb --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/i32_to_pX2.c @@ -0,0 +1,121 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + + +posit_2_t i32_to_pX2( int32_t iA, int x ) { + int_fast8_t k, log2 = 31;//length of bit (e.g. 4294966271) in int (32 but because we have only 32 bits, so one bit off to accomdate that fact) + union ui32_pX2 uZ; + uint_fast32_t uiA=0; + uint_fast32_t expA, mask = 0x80000000, fracA; + bool sign; + + if (iA < -2147483135){ + uZ.ui = 0x80500000; + return uZ.p; + } + + sign = iA>>31; + if(sign) iA = -iA &0xFFFFFFFF; + + //NaR + if (x<2 || x>32) + uiA = 0x80000000; + else if (x==2){ + if (iA>0) uiA=0x40000000; + } + else if ( iA > 2147483135){//2147483136 to 2147483647 rounds to P32 value (2147483648)=> 0x7FB00000 + uiA = 0x7FB00000; // 2147483648 + if (x<10) uiA&=((int32_t)0x80000000>>(x-1)); + else if (x<12) uiA = 0x7FF00000&((int32_t)0x80000000>>(x-1)); + } + else if ( iA < 0x2 ) + uiA = (iA << 30); + else { + fracA = iA; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + k = (log2 >> 2); + expA = (log2 & 0x3); + fracA = (fracA ^ mask); + + if(k>=(x-2)){//maxpos + uiA= 0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)); + } + else if (k==(x-3)){//bitNPlusOne-> first exp bit //bitLast is zero + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)); + if( (expA & 0x2) && ((expA&0x1) | fracA) ) //bitNPlusOne //bitsMore + uiA |= ((uint32_t)0x80000000>>(x-1)); + } + else if (k==(x-4)){ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | ((expA &0x2)<< (27 - k)); + if(expA&0x1){ + if( (((uint32_t)0x80000000>>(x-1)) & uiA)| fracA) + uiA += ((uint32_t)0x80000000>>(x-1)); + } + } + else if (k==(x-5)){ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)); + mask = 0x8 << (k -x); + if (mask & fracA){ //bitNPlusOne + if (((mask - 1) & fracA) | (expA&0x1)) { + uiA+= ((uint32_t)0x80000000>>(x-1)); + } + } + } + else{ + uiA = ((0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)) | fracA>>(k+4)) & ((int32_t)0x80000000>>(x-1));; + mask = 0x8 << (k-x); //bitNPlusOne + if (mask & fracA) + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA+= ((uint32_t)0x80000000>>(x-1)); + } + + } + (sign) ? (uZ.ui = -uiA &0xFFFFFFFF) : (uZ.ui = uiA); + return uZ.p; +} + + diff --git a/source/luametatex/source/libraries/softposit/source/i64_to_p16.c b/source/luametatex/source/libraries/softposit/source/i64_to_p16.c new file mode 100644 index 000000000..43e61abe7 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/i64_to_p16.c @@ -0,0 +1,90 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit16_t i64_to_p16( int64_t iA ) { + int_fast8_t k, log2 = 25; + union ui16_p16 uZ; + uint_fast16_t uiA; + uint_fast64_t expA, mask = 0x0000000002000000, fracA; + bool sign; + + if (iA < -134217728){ //-9223372036854775808 to -134217729 rounds to P32 value -268435456 + uZ.ui = 0x8001; //-maxpos + return uZ.p; + } + + sign = iA>>63; + if (sign) iA = -iA; + + if( iA > 134217728 ) { //134217729 to 9223372036854775807 rounds to P32 value 268435456 + uiA = 0x7FFF; //maxpos + } + else if ( iA > 0x0000000008000000 ) { + uiA = 0x7FFF; + } + else if ( iA > 0x0000000002FFFFFF ){ + uiA = 0x7FFE; + } + else if ( iA < 2 ){ + uiA = (iA << 14); + } + else { + fracA = iA; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + k = log2 >> 1; + expA = (log2 & 0x1) << (12 - k); + fracA = fracA ^ mask; + uiA = (0x7FFF ^ (0x3FFF >> k)) | expA | (fracA >> (k + 13)); + mask = 0x1000 << k; + if (mask & fracA) { + if ( ((mask - 1) & fracA) | ((mask << 1) & fracA) ) uiA++; + } + } + (sign) ? (uZ.ui = -uiA) : (uZ.ui = uiA); + return uZ.p; + +} diff --git a/source/luametatex/source/libraries/softposit/source/i64_to_p32.c b/source/luametatex/source/libraries/softposit/source/i64_to_p32.c new file mode 100644 index 000000000..5453860fc --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/i64_to_p32.c @@ -0,0 +1,89 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +posit32_t i64_to_p32( int64_t iA ) { + int_fast8_t k, log2 = 63;//length of bit (e.g. 9222809086901354496) in int (64 but because we have only 64 bits, so one bit off to accomdate that fact) + union ui32_p32 uZ; + uint_fast64_t uiA; + uint_fast64_t mask = 0x8000000000000000, fracA; + uint_fast32_t expA; + bool sign; + + if (iA < -9222809086901354495){//-9222809086901354496 to -9223372036854775808 will be P32 value -9223372036854775808 + uZ.ui = 0x80005000; + return uZ.p; + } + sign = iA>>63; + if(sign) iA = -iA; + + if ( iA >9222809086901354495)//9222809086901354495 bcos 9222809086901354496 to 9223372036854775807 will be P32 value 9223372036854775808 + uiA = 0x7FFFB000; // P32: 9223372036854775808 + else if ( iA < 0x2 ) + uiA = (iA << 30); + else { + fracA = iA; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + + k = (log2 >> 2); + + expA = (log2 & 0x3) << (27 - k); + fracA = (fracA ^ mask); + + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | expA | fracA>>(k+36); + + mask = 0x800000000 << k; //bitNPlusOne + + if (mask & fracA) { + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; + } + } + (sign) ? (uZ.ui = -uiA) : (uZ.ui = uiA); + return uZ.p; +} + + diff --git a/source/luametatex/source/libraries/softposit/source/i64_to_p8.c b/source/luametatex/source/libraries/softposit/source/i64_to_p8.c new file mode 100644 index 000000000..646d7e4be --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/i64_to_p8.c @@ -0,0 +1,90 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> + +#include "platform.h" +#include "internals.h" + +posit8_t i64_to_p8( int64_t iA ){ + int_fast8_t k, log2 = 6;//length of bit + union ui8_p8 uZ; + uint_fast8_t uiA; + uint_fast64_t mask = 0x40, fracA; + bool sign; + + if (iA < -48){ //-48 to -MAX_INT rounds to P32 value -268435456 + uZ.ui = 0x81; //-maxpos + return uZ.p; + } + + sign = iA>>63; + if(sign){ + iA = -iA; + } + + if ( iA > 48 ) { + uiA = 0x7F; + } + else if ( iA < 2 ){ + uiA = (iA << 6); + } + else { + fracA = iA; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + + k = log2; + + fracA = (fracA ^ mask); + + uiA = (0x7F ^ (0x3F >> k)) | ( fracA >> (k+1) ) ; + + mask = 0x1 << k; //bitNPlusOne + if (mask & fracA) { + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; + } + } + (sign) ? (uZ.ui = -uiA &0xFF) : (uZ.ui = uiA); + return uZ.p; +} diff --git a/source/luametatex/source/libraries/softposit/source/i64_to_pX2.c b/source/luametatex/source/libraries/softposit/source/i64_to_pX2.c new file mode 100644 index 000000000..ba2c09ed6 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/i64_to_pX2.c @@ -0,0 +1,119 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +posit_2_t i64_to_pX2( int64_t a, int x ) { + int_fast8_t k, log2 = 63;//length of bit (e.g. 18445618173802707967) in int (64 but because we have only 64 bits, so one bit off to accomdate that fact) + union ui32_pX2 uZ; + uint_fast64_t uiA=0; + uint_fast64_t mask = 0x8000000000000000, frac64A; + uint_fast32_t expA; + bool sign; + + sign = a>>63; + if(sign) a = -a; + //NaR + if (a == 0x8000000000000000 || x<2 || x>32 ) + uiA = 0x80000000; + else if (x==2){ + if (a>0) uiA=0x40000000; + } + else if ( a > 0x7FFDFFFFFFFFFFFF){//9222809086901354495 + uiA = 0x7FFFB000; // P32: 9223372036854775808 + if (x<18) uiA&=((int32_t)0x80000000>>(x-1)); + } + else if ( a < 0x2 ) + uiA = (a << 30); + else { + frac64A = a; + while ( !(frac64A & mask) ) { + log2--; + frac64A <<= 1; + } + + k = (log2 >> 2); + + expA = (log2 & 0x3); + frac64A = (frac64A ^ mask); + + if(k>=(x-2)){//maxpos + uiA= 0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)); + } + else if (k==(x-3)){//bitNPlusOne-> first exp bit //bitLast is zero + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)); + if( (expA & 0x2) && ((expA&0x1) | frac64A) ) //bitNPlusOne //bitsMore + uiA |= ((uint32_t)0x80000000>>(x-1)); + } + else if (k==(x-4)){ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | ((expA &0x2)<< (27 - k)); + if(expA&0x1){ + if( (((uint32_t)0x80000000>>(x-1)) & uiA)|| frac64A) + uiA += ((uint32_t)0x80000000>>(x-1)); + } + + } + else if (k==(x-5)){ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)); + mask = (uint64_t)0x800000000 << (k + 32-x); + if (mask & frac64A){ //bitNPlusOne + if (((mask - 1) & frac64A) | (expA&0x1)) { + uiA+= ((uint32_t)0x80000000>>(x-1)); + } + } + } + else{ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)) | ((frac64A>>(k+36)) & ((int32_t)0x80000000>>(x-1))); + mask = (uint64_t)0x800000000 << (k + 32-x); //bitNPlusOne position + if (mask & frac64A) { + if (((mask - 1) & frac64A) | ((mask << 1) & frac64A)) { + uiA+= ((uint32_t)0x80000000>>(x-1)); + } + } + } + } + (sign) ? (uZ.ui = -uiA) : (uZ.ui = uiA); + return uZ.p; +} + + diff --git a/source/luametatex/source/libraries/softposit/source/include/internals.h b/source/luametatex/source/libraries/softposit/source/include/internals.h new file mode 100644 index 000000000..c2eb1f5bf --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/include/internals.h @@ -0,0 +1,178 @@ + +/*============================================================================ + +This C header file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#ifndef internals_h +#define internals_h 1 + +#ifdef __cplusplus +extern "C"{ +#endif + +#include "primitives.h" +#include "softposit.h" +#include "softposit_types.h" + +#include <stdio.h> + +#ifdef SOFTPOSIT_QUAD +#include <quadmath.h> +#endif + + + +enum { + softposit_mulAdd_subC = 1, + softposit_mulAdd_subProd = 2 +}; + + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ +#define signP8UI( a ) ((bool) ((uint8_t) (a)>>7)) +#define signregP8UI( a ) ((bool) (((uint8_t) (a)>>6) & 0x1)) +#define packToP8UI( regime, fracA) ((uint8_t) regime + ((uint8_t)(fracA)) ) + + +posit8_t softposit_addMagsP8( uint_fast8_t, uint_fast8_t ); +posit8_t softposit_subMagsP8( uint_fast8_t, uint_fast8_t ); +posit8_t softposit_mulAddP8( uint_fast8_t, uint_fast8_t, uint_fast8_t, uint_fast8_t ); + + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ +#define signP16UI( a ) ( (bool) ( ( uint16_t ) (a)>>15 ) ) +#define signregP16UI( a ) ( (bool) (((uint16_t) (a)>>14) & 0x1) ) +#define expP16UI( a, regA ) ((int_fast8_t) ((a)>>(13-regA) & 0x0001)) +#define packToP16UI( regime, regA, expA, fracA) ((uint16_t) regime + ((uint16_t) (expA)<< (13-regA)) + ((uint16_t)(fracA)) ) + +posit16_t softposit_addMagsP16( uint_fast16_t, uint_fast16_t ); +posit16_t softposit_subMagsP16( uint_fast16_t, uint_fast16_t ); +posit16_t softposit_mulAddP16( uint_fast16_t, uint_fast16_t, uint_fast16_t, uint_fast16_t ); + + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ +#define signP32UI( a ) ((bool) ((uint32_t) (a)>>31)) +#define signregP32UI( a ) ((bool) (((uint32_t) (a)>>30) & 0x1)) +#define packToP32UI(regime, expA, fracA) ( (uint32_t) regime + (uint32_t) expA + ((uint32_t)(fracA)) ) + +posit32_t softposit_addMagsP32( uint_fast32_t, uint_fast32_t ); +posit32_t softposit_subMagsP32( uint_fast32_t, uint_fast32_t ); +posit32_t softposit_mulAddP32( uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast32_t ); + + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ + +posit_2_t softposit_addMagsPX2( uint_fast32_t, uint_fast32_t, int ); +posit_2_t softposit_subMagsPX2( uint_fast32_t, uint_fast32_t, int ); +posit_2_t softposit_mulAddPX2( uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast32_t, int ); + +/*---------------------------------------------------------------------------- +*----------------------------------------------------------------------------*/ + +posit_1_t softposit_addMagsPX1( uint_fast32_t, uint_fast32_t, int); +posit_1_t softposit_subMagsPX1( uint_fast32_t, uint_fast32_t, int); +posit_1_t softposit_mulAddPX1( uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast32_t, int ); + +/*uint_fast16_t reglengthP32UI (uint32_t); +int_fast16_t regkP32UI(bool, uint_fast32_t); +#define expP32UI( a, regA ) ((int_fast16_t) ((a>>(28-regA)) & 0x2)) +#define regP32UI( a, regLen ) ( ((( uint_fast32_t ) (a) & (0x7FFFFFFF)) >> (30-regLen))) ) +#define isNaRP32UI( a ) ( ((a) ^ 0x80000000) == 0 ) +#define useed32P 16; +//int_fast16_t expP32UI(uint32_t); +#define expP32sizeUI 2; +uint_fast32_t fracP32UI(uint_fast32_t, uint_fast16_t);*/ + + + +/*posit32_t convertDecToP32(posit32); +posit32_t convertfloatToP32(float); +posit32_t convertdoubleToP32(double ); +//posit32_t convertQuadToP32(__float128); +//__float128 convertP32ToQuadDec(posit32_t); + + +//posit32_t c_roundPackToP32( bool, bool, int_fast16_t, int_fast16_t, uint_fast16_t, bool, bool ); + +//#define isNaNP32UI( a ) (((~(a) & 0x7F800000) == 0) && ((a) & 0x007FFFFF)) + + +//posit32_t softposit_roundPackToP32( bool, int_fast16_t, uint_fast32_t ); +//posit32_t softposit_normRoundPackToP32( bool, int_fast16_t, uint_fast32_t ); + +posit32_t softposit_addMagsP32( uint_fast32_t, uint_fast32_t ); +posit32_t softposit_subMagsP32( uint_fast32_t, uint_fast32_t ); +posit32_t softposit_mulAddP32(uint_fast32_t, uint_fast32_t, uint_fast32_t, uint_fast16_t); + + +//quire32_t quire32_add(quire32_t, quire32_t); +//quire32_t quire32_sub(quire32_t, quire32_t); +quire32_t quire32_mul(posit32_t, posit32_t); +quire32_t q32_fdp_add(quire32_t, posit32_t, posit32_t); +quire32_t q32_fdp_sub(quire32_t, posit32_t, posit32_t); +posit32_t convertQ32ToP32(quire32_t); +#define isNaRQ32( q ) ( q.v[0]==0x8000000000000000ULL && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) +#define isQ32Zero(q) (q.v[0]==0 && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) +quire32_t q32_TwosComplement(quire32_t); +#define q32_clr(q) ({\ + q.v[0]=0;\ + q.v[1]=0;\ + q.v[2]=0;\ + q.v[3]=0;\ + q.v[4]=0;\ + q.v[5]=0;\ + q.v[6]=0;\ + q.v[7]=0;\ + q;\ +}) +*/ + +#ifdef __cplusplus +} +#endif + + +#endif + + + diff --git a/source/luametatex/source/libraries/softposit/source/include/primitives.h b/source/luametatex/source/libraries/softposit/source/include/primitives.h new file mode 100644 index 000000000..76f714210 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/include/primitives.h @@ -0,0 +1,52 @@ + +/*============================================================================ + +This C header file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C header file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#ifndef primitives_h +#define primitives_h 1 + +#include <stdbool.h> +#include <stdint.h> + +extern const uint_fast16_t softposit_approxRecipSqrt0[16]; +extern const uint_fast16_t softposit_approxRecipSqrt1[16]; + +#endif diff --git a/source/luametatex/source/libraries/softposit/source/include/softposit.h b/source/luametatex/source/libraries/softposit/source/include/softposit.h new file mode 100644 index 000000000..70dae1dbc --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/include/softposit.h @@ -0,0 +1,653 @@ +/*============================================================================ + +This C header file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C header file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +/*============================================================================ +| Note: If SoftPosit is modified from SoftFloat and is made available as a +| general library for programs to use, it is strongly recommended that a +| platform-specific version of this header, "softposit.h", be created that +| folds in "softposit_types.h" and that eliminates all dependencies on +| compile-time macros. +*============================================================================*/ + + +#ifndef softposit_h +#define softposit_h 1 + +#ifdef __cplusplus +extern "C"{ +#endif + +#include <stdbool.h> +#include <stdint.h> + +#ifdef SOFTPOSIT_QUAD +#include <quadmath.h> +#endif + +#include "softposit_types.h" + +#include <stdio.h> + + +#ifndef THREAD_LOCAL +#define THREAD_LOCAL +#endif + +#define castUI( a ) ( (a).v ) + +/*---------------------------------------------------------------------------- +| Integer-to-posit conversion routines. +*----------------------------------------------------------------------------*/ +posit8_t ui32_to_p8( uint32_t ); +posit16_t ui32_to_p16( uint32_t ); +posit32_t ui32_to_p32( uint32_t ); +//posit64_t ui32_to_p64( uint32_t ); + + +posit8_t ui64_to_p8( uint64_t ); +posit16_t ui64_to_p16( uint64_t ); +posit32_t ui64_to_p32( uint64_t ); +//posit64_t ui64_to_p64( uint64_t ); + +posit8_t i32_to_p8( int32_t ); +posit16_t i32_to_p16( int32_t ); +posit32_t i32_to_p32( int32_t ); +//posit64_t i32_to_p64( int32_t ); + +posit8_t i64_to_p8( int64_t ); +posit16_t i64_to_p16( int64_t ); +posit32_t i64_to_p32( int64_t ); +//posit64_t i64_to_p64( int64_t ); + + + +/*---------------------------------------------------------------------------- +| 8-bit (quad-precision) posit operations. +*----------------------------------------------------------------------------*/ +#define isNaRP8UI( a ) ( ((a) ^ 0x80) == 0 ) + +uint_fast32_t p8_to_ui32( posit8_t ); +uint_fast64_t p8_to_ui64( posit8_t ); +int_fast32_t p8_to_i32( posit8_t); +int_fast64_t p8_to_i64( posit8_t); + +posit16_t p8_to_p16( posit8_t ); +posit32_t p8_to_p32( posit8_t ); +//posit64_t p8_to_p64( posit8_t ); + +posit_1_t p8_to_pX1( posit8_t, int ); +posit_2_t p8_to_pX2( posit8_t, int ); + +posit8_t p8_roundToInt( posit8_t ); +posit8_t p8_add( posit8_t, posit8_t ); +posit8_t p8_sub( posit8_t, posit8_t ); +posit8_t p8_mul( posit8_t, posit8_t ); +posit8_t p8_mulAdd( posit8_t, posit8_t, posit8_t ); +posit8_t p8_div( posit8_t, posit8_t ); +posit8_t p8_sqrt( posit8_t ); +bool p8_eq( posit8_t, posit8_t ); +bool p8_le( posit8_t, posit8_t ); +bool p8_lt( posit8_t, posit8_t ); + + +//Quire 8 +quire8_t q8_fdp_add(quire8_t, posit8_t, posit8_t); +quire8_t q8_fdp_sub(quire8_t, posit8_t, posit8_t); +posit8_t q8_to_p8(quire8_t); +#define isNaRQ8( q ) ( (q).v==0x80000000 ) +#define isQ8Zero(q) ( (q).v==0 ) + +int_fast64_t p8_int( posit8_t ); + +#define q8_clr(q) ({\ + (q).v=0;\ + q;\ +}) + +static inline quire8_t q8Clr(){ + quire8_t q; + q.v=0; + return q; +} + +#define castQ8(a)({\ + union ui32_q8 uA;\ + uA.ui = (a);\ + uA.q;\ +}) + + +#define castP8(a)({\ + union ui8_p8 uA;\ + uA.ui = (a);\ + uA.p;\ +}) + + +#define negP8(a)({\ + union ui8_p8 uA;\ + uA.p = (a);\ + uA.ui = -uA.ui&0xFF;\ + uA.p; \ +}) + +#define absP8(a)({\ + union ui8_p8 uA;\ + uA.p = (a);\ + int mask = uA.ui >> 7;\ + uA.ui = ((uA.ui + mask) ^ mask)&0xFF;\ + uA.p; \ +}) + +//Helper +double convertP8ToDouble(posit8_t); +posit8_t convertDoubleToP8(double); + +/*---------------------------------------------------------------------------- +| 16-bit (half-precision) posit operations. +*----------------------------------------------------------------------------*/ +#define isNaRP16UI( a ) ( ((a) ^ 0x8000) == 0 ) + +uint_fast32_t p16_to_ui32( posit16_t ); +uint_fast64_t p16_to_ui64( posit16_t ); +int_fast32_t p16_to_i32( posit16_t); +int_fast64_t p16_to_i64( posit16_t ); +posit8_t p16_to_p8( posit16_t ); +posit32_t p16_to_p32( posit16_t ); +//posit64_t p16_to_p64( posit16_t ); + +posit_1_t p16_to_pX1( posit16_t, int ); +posit_2_t p16_to_pX2( posit16_t, int ); + +posit16_t p16_roundToInt( posit16_t); +posit16_t p16_add( posit16_t, posit16_t ); +posit16_t p16_sub( posit16_t, posit16_t ); +posit16_t p16_mul( posit16_t, posit16_t ); +posit16_t p16_mulAdd( posit16_t, posit16_t, posit16_t ); +posit16_t p16_div( posit16_t, posit16_t ); +posit16_t p16_sqrt( posit16_t ); +bool p16_eq( posit16_t, posit16_t ); +bool p16_le( posit16_t, posit16_t ); +bool p16_lt( posit16_t, posit16_t ); + + +#ifdef SOFTPOSIT_QUAD + __float128 convertP16ToQuadDec(posit16_t); + posit16_t convertQuadToP16(__float128); +#endif + +//Quire 16 +quire16_t q16_fdp_add(quire16_t, posit16_t, posit16_t); +quire16_t q16_fdp_sub(quire16_t, posit16_t, posit16_t); +posit16_t convertQ16ToP16(quire16_t); +posit16_t q16_to_p16(quire16_t); +#define isNaRQ16( q ) ( (q).v[0]==0x8000000000000000ULL && (q).v[1]==0 ) +#define isQ16Zero(q) (q.v[0]==0 && q.v[1]==0) +quire16_t q16_TwosComplement(quire16_t); + + +int_fast64_t p16_int( posit16_t); + +void printBinary(uint64_t*, int); +void printBinaryPX(uint32_t*, int); +void printHex(uint64_t); +void printHex64(uint64_t); +void printHexPX(uint32_t, int); + +#define q16_clr(q) ({\ + (q).v[0]=0;\ + (q).v[1]=0;\ + q;\ +}) + +static inline quire16_t q16Clr(){ + quire16_t q; + q.v[0]=0; + q.v[1]=0; + return q; +} + +#define castQ16(l, r)({\ + union ui128_q16 uA;\ + uA.ui[0] = l; \ + uA.ui[1] = r; \ + uA.q;\ +}) + + +#define castP16(a)({\ + union ui16_p16 uA;\ + uA.ui = (a);\ + uA.p;\ +}) + + + +#define negP16(a)({\ + union ui16_p16 uA;\ + uA.p = (a);\ + uA.ui = -uA.ui&0xFFFF;\ + uA.p; \ +}) + +#define absP16(a)({\ + union ui16_p16 uA;\ + uA.p = (a);\ + int mask = uA.ui >> 15;\ + uA.ui = ((uA.ui + mask) ^ mask)&0xFFFF;\ + uA.p; \ +}) + +//Helper + +double convertP16ToDouble(posit16_t); +posit16_t convertFloatToP16(float); +posit16_t convertDoubleToP16(double); + +/*---------------------------------------------------------------------------- +| 32-bit (single-precision) posit operations. +*----------------------------------------------------------------------------*/ +uint_fast32_t p32_to_ui32( posit32_t ); +uint_fast64_t p32_to_ui64( posit32_t); +int_fast32_t p32_to_i32( posit32_t ); +int_fast64_t p32_to_i64( posit32_t ); + +posit8_t p32_to_p8( posit32_t ); +posit16_t p32_to_p16( posit32_t ); +//posit64_t p32_to_p64( posit32_t ); + + +posit32_t p32_roundToInt( posit32_t ); +posit32_t p32_add( posit32_t, posit32_t ); +posit32_t p32_sub( posit32_t, posit32_t ); +posit32_t p32_mul( posit32_t, posit32_t ); +posit32_t p32_mulAdd( posit32_t, posit32_t, posit32_t ); +posit32_t p32_div( posit32_t, posit32_t ); +posit32_t p32_sqrt( posit32_t ); +bool p32_eq( posit32_t, posit32_t ); +bool p32_le( posit32_t, posit32_t ); +bool p32_lt( posit32_t, posit32_t ); + +posit_1_t p32_to_pX1( posit32_t, int); +posit_2_t p32_to_pX2( posit32_t, int ); + +#define isNaRP32UI( a ) ( ((a) ^ 0x80000000) == 0 ) + +int64_t p32_int( posit32_t); + +#ifdef SOFTPOSIT_QUAD + __float128 convertP32ToQuad(posit32_t); + posit32_t convertQuadToP32(__float128); +#endif + + +quire32_t q32_fdp_add(quire32_t, posit32_t, posit32_t); +quire32_t q32_fdp_sub(quire32_t, posit32_t, posit32_t); +posit32_t q32_to_p32(quire32_t); +#define isNaRQ32( q ) ( q.v[0]==0x8000000000000000ULL && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) +#define isQ32Zero(q) (q.v[0]==0 && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) +quire32_t q32_TwosComplement(quire32_t); + +#define q32_clr(q) ({\ + q.v[0]=0;\ + q.v[1]=0;\ + q.v[2]=0;\ + q.v[3]=0;\ + q.v[4]=0;\ + q.v[5]=0;\ + q.v[6]=0;\ + q.v[7]=0;\ + q;\ +}) + +static inline quire32_t q32Clr(){ + quire32_t q; + q.v[0]=0; + q.v[1]=0; + q.v[2]=0; + q.v[3]=0; + q.v[4]=0; + q.v[5]=0; + q.v[6]=0; + q.v[7]=0; + return q; +} + +#define castQ32(l0, l1, l2, l3, l4, l5, l6, l7)({\ + union ui512_q32 uA;\ + uA.ui[0] = l0; \ + uA.ui[1] = l1; \ + uA.ui[2] = l2; \ + uA.ui[3] = l3; \ + uA.ui[4] = l4; \ + uA.ui[5] = l5; \ + uA.ui[6] = l6; \ + uA.ui[7] = l7; \ + uA.q;\ +}) + + +#define castP32(a)({\ + posit32_t pA = {.v = (a)};\ + pA; \ +}) + + + +#define negP32(a)({\ + union ui32_p32 uA;\ + uA.p = (a);\ + uA.ui = -uA.ui&0xFFFFFFFF;\ + uA.p; \ +}) + +#define absP32(a)({\ + union ui32_p32 uA;\ + uA.p = (a);\ + int mask = uA.ui >> 31; \ + uA.ui = ((uA.ui + mask) ^ mask)&0xFFFFFFFF; \ + uA.p; \ +}) + +//Helper + +double convertP32ToDouble(posit32_t); +posit32_t convertFloatToP32(float); +posit32_t convertDoubleToP32(double); + + +/*---------------------------------------------------------------------------- +| Dyanamic 2 to 32-bit Posits for es = 2 +*----------------------------------------------------------------------------*/ + +posit_2_t pX2_add( posit_2_t, posit_2_t, int); +posit_2_t pX2_sub( posit_2_t, posit_2_t, int); +posit_2_t pX2_mul( posit_2_t, posit_2_t, int); +posit_2_t pX2_div( posit_2_t, posit_2_t, int); +posit_2_t pX2_mulAdd( posit_2_t, posit_2_t, posit_2_t, int); +posit_2_t pX2_roundToInt( posit_2_t, int ); +posit_2_t ui32_to_pX2( uint32_t, int ); +posit_2_t ui64_to_pX2( uint64_t, int ); +posit_2_t i32_to_pX2( int32_t, int ); +posit_2_t i64_to_pX2( int64_t, int ); +posit_2_t pX2_sqrt( posit_2_t, int ); + +uint_fast32_t pX2_to_ui32( posit_2_t ); +uint_fast64_t pX2_to_ui64( posit_2_t ); +int_fast32_t pX2_to_i32( posit_2_t ); +int_fast64_t pX2_to_i64( posit_2_t ); +int64_t pX2_int( posit_2_t ); + +bool pX2_eq( posit_2_t, posit_2_t); +bool pX2_le( posit_2_t, posit_2_t); +bool pX2_lt( posit_2_t, posit_2_t); + +posit8_t pX2_to_p8( posit_2_t ); +posit16_t pX2_to_p16( posit_2_t ); +posit_2_t pX2_to_pX2( posit_2_t, int); +posit_1_t pX2_to_pX1( posit_2_t, int); +static inline posit32_t pX2_to_p32(posit_2_t pA){ + posit32_t p32 = {.v = pA.v}; + return p32; +} + +#define isNaRPX2UI( a ) ( ((a) ^ 0x80000000) == 0 ) + +//Helper +posit_2_t convertDoubleToPX2(double, int); + +double convertPX2ToDouble(posit_2_t); + +#ifdef SOFTPOSIT_QUAD + __float128 convertPX2ToQuad(posit_2_t); + posit_2_t convertQuadToPX2(__float128, int); +#endif + + +quire_2_t qX2_fdp_add( quire_2_t q, posit_2_t pA, posit_2_t ); +quire_2_t qX2_fdp_sub( quire_2_t q, posit_2_t pA, posit_2_t ); +posit_2_t qX2_to_pX2(quire_2_t, int); +#define isNaRQX2( q ) ( q.v[0]==0x8000000000000000ULL && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) +#define isQX2Zero(q) (q.v[0]==0 && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) +quire_2_t qX2_TwosComplement(quire_2_t); + +#define qX2_clr(q) ({\ + q.v[0]=0;\ + q.v[1]=0;\ + q.v[2]=0;\ + q.v[3]=0;\ + q.v[4]=0;\ + q.v[5]=0;\ + q.v[6]=0;\ + q.v[7]=0;\ + q;\ +}) + +static inline quire_2_t qX2Clr(){ + quire_2_t q; + q.v[0]=0; + q.v[1]=0; + q.v[2]=0; + q.v[3]=0; + q.v[4]=0; + q.v[5]=0; + q.v[6]=0; + q.v[7]=0; + return q; +} + +#define castQX2(l0, l1, l2, l3, l4, l5, l6, l7)({\ + union ui512_qX2 uA;\ + uA.ui[0] = l0; \ + uA.ui[1] = l1; \ + uA.ui[2] = l2; \ + uA.ui[3] = l3; \ + uA.ui[4] = l4; \ + uA.ui[5] = l5; \ + uA.ui[6] = l6; \ + uA.ui[7] = l7; \ + uA.q;\ +}) + + +#define castPX2(a)({\ + posit_2_t pA = {.v = (a)};\ + pA; \ +}) + + + +#define negPX2(a)({\ + union ui32_pX2 uA;\ + uA.p = (a);\ + uA.ui = -uA.ui&0xFFFFFFFF;\ + uA.p; \ +}) + +#define absPX2(a)({\ + union ui32_pX2 uA;\ + uA.p = (a);\ + int mask = uA.ui >> 31; \ + uA.ui = ((uA.ui + mask) ^ mask)&0xFFFFFFFF; \ + uA.p; \ +}) + +/*---------------------------------------------------------------------------- +| Dyanamic 2 to 32-bit Posits for es = 1 +*----------------------------------------------------------------------------*/ + +posit_1_t pX1_add( posit_1_t, posit_1_t, int); +posit_1_t pX1_sub( posit_1_t, posit_1_t, int); +posit_1_t pX1_mul( posit_1_t, posit_1_t, int); +posit_1_t pX1_div( posit_1_t, posit_1_t, int); +posit_1_t pX1_mulAdd( posit_1_t, posit_1_t, posit_1_t, int); +posit_1_t pX1_roundToInt( posit_1_t, int ); +posit_1_t ui32_to_pX1( uint32_t, int ); +posit_1_t ui64_to_pX1( uint64_t, int ); +posit_1_t i32_to_pX1( int32_t, int ); +posit_1_t i64_to_pX1( int64_t, int ); +posit_1_t pX1_sqrt( posit_1_t, int ); + +uint_fast32_t pX1_to_ui32( posit_1_t ); +uint_fast64_t pX1_to_ui64( posit_1_t ); +int_fast32_t pX1_to_i32( posit_1_t ); +int_fast64_t pX1_to_i64( posit_1_t ); +int64_t pX1_int( posit_1_t ); + +bool pX1_eq( posit_1_t, posit_1_t); +bool pX1_le( posit_1_t, posit_1_t); +bool pX1_lt( posit_1_t, posit_1_t); + +posit8_t pX1_to_p8( posit_1_t ); +posit16_t pX1_to_p16( posit_1_t ); +posit32_t pX1_to_p32( posit_1_t ); +posit_1_t pX1_to_pX1( posit_1_t, int); +posit_2_t pX1_to_pX2( posit_1_t, int); + + +#define isNaRpX1UI( a ) ( ((a) ^ 0x80000000) == 0 ) + +//Helper +posit_1_t convertDoubleToPX1(double, int); +double convertPX1ToDouble(posit_1_t); + +#ifdef SOFTPOSIT_QUAD + __float128 convertPX1ToQuad(posit_1_t); + posit_1_t convertQuadToPX1(__float128, int); +#endif + + +quire_1_t qX1_fdp_add( quire_1_t q, posit_1_t pA, posit_1_t ); +quire_1_t qX1_fdp_sub( quire_1_t q, posit_1_t pA, posit_1_t ); +posit_1_t qX1_to_pX1(quire_1_t, int); +#define isNaRqX1( q ) ( q.v[0]==0x8000000000000000ULL && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) +#define isqX1Zero(q) (q.v[0]==0 && q.v[1]==0 && q.v[2]==0 && q.v[3]==0 && q.v[4]==0 && q.v[5]==0 && q.v[6]==0 && q.v[7]==0) +quire_1_t qX1_TwosComplement(quire_1_t); + +#define qX1_clr(q) ({\ + q.v[0]=0;\ + q.v[1]=0;\ + q.v[2]=0;\ + q.v[3]=0;\ + q.v[4]=0;\ + q.v[5]=0;\ + q.v[6]=0;\ + q.v[7]=0;\ + q;\ +}) + +static inline quire_1_t qX1Clr(){ + quire_1_t q; + q.v[0]=0; + q.v[1]=0; + q.v[2]=0; + q.v[3]=0; + q.v[4]=0; + q.v[5]=0; + q.v[6]=0; + q.v[7]=0; + return q; +} + +#define castqX1(l0, l1, l2, l3, l4, l5, l6, l7)({\ + union ui512_qX1 uA;\ + uA.ui[0] = l0; \ + uA.ui[1] = l1; \ + uA.ui[2] = l2; \ + uA.ui[3] = l3; \ + uA.ui[4] = l4; \ + uA.ui[5] = l5; \ + uA.ui[6] = l6; \ + uA.ui[7] = l7; \ + uA.q;\ +}) + + +#define castpX1(a)({\ + posit_1_t pA = {.v = (a)};\ + pA; \ +}) + + + +#define negpX1(a)({\ + union ui32_pX1 uA;\ + uA.p = (a);\ + uA.ui = -uA.ui&0xFFFFFFFF;\ + uA.p; \ +}) + +#define absPX1(a)({\ + union ui32_pX1 uA;\ + uA.p = (a);\ + int mask = uA.ui >> 31; \ + uA.ui = ((uA.ui + mask) ^ mask)&0xFFFFFFFF;\ + uA.p; \ +}) +/*---------------------------------------------------------------------------- +| 64-bit (double-precision) floating-point operations. +*----------------------------------------------------------------------------*/ +/*uint_fast32_t p64_to_ui32( posit64_t, uint_fast16_t, bool ); +uint_fast64_t p64_to_ui64( posit64_t, uint_fast16_t, bool ); +int_fast32_t p64_to_i32( posit64_t, uint_fast16_t, bool ); +int_fast64_t p64_to_i64( posit64_t, uint_fast16_t, bool ); + +posit8_t p64_to_p8( posit64_t ); +posit16_t p64_to_p16( posit64_t ); +posit32_t p64_to_p32( posit64_t ); + +posit64_t p64_roundToInt( posit64_t, uint_fast16_t, bool ); +posit64_t p64_add( posit64_t, posit64_t ); +posit64_t p64_sub( posit64_t, posit64_t ); +posit64_t p64_mul( posit64_t, posit64_t ); +posit64_t p64_mulAdd( posit64_t, posit64_t, posit64_t ); +posit64_t p64_div( posit64_t, posit64_t ); +posit64_t p64_rem( posit64_t, posit64_t ); +posit64_t p64_sqrt( posit64_t ); +bool p64_eq( posit64_t, posit64_t ); +bool p64_le( posit64_t, posit64_t );*/ + +#ifdef __cplusplus +} +#endif + +#endif + diff --git a/source/luametatex/source/libraries/softposit/source/include/softposit_cpp.h b/source/luametatex/source/libraries/softposit/source/include/softposit_cpp.h new file mode 100644 index 000000000..bafdbe188 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/include/softposit_cpp.h @@ -0,0 +1,1928 @@ +/* +Author: S.H. Leong (Cerlane) + +Copyright (c) 2018 Next Generation Arithmetic + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. +*/ + +#ifndef INCLUDE_SOFTPOSIT_CPP_H_ +#define INCLUDE_SOFTPOSIT_CPP_H_ + +#include <iostream> +#include "softposit.h" +#include "math.h" +//#include "positMath.h" + +#ifdef __cplusplus + +struct posit8{ + uint8_t value; + + posit8(double x=0) : value(castUI(convertDoubleToP8(x))) { + } + + //Equal + posit8& operator=(const double a) { + value = castUI(convertDoubleToP8(a)); + return *this; + } + posit8& operator=(const int a) { + value = castUI(i32_to_p8(a)); + return *this; + } + + //Add + posit8 operator+(const posit8 &a) const{ + posit8 ans; + ans.value = castUI(p8_add(castP8(value), castP8(a.value))); + return ans; + } + + //Add equal + posit8& operator+=(const posit8 &a) { + value = castUI(p8_add(castP8(value), castP8(a.value))); + return *this; + } + + //Subtract + posit8 operator-(const posit8 &a) const{ + posit8 ans; + ans.value = castUI(p8_sub(castP8(value), castP8(a.value))); + return ans; + } + + //Subtract equal + posit8& operator-=(const posit8 &a) { + value = castUI(p8_sub(castP8(value), castP8(a.value))); + return *this; + } + + //Multiply + posit8 operator*(const posit8 &a) const{ + posit8 ans; + ans.value = castUI(p8_mul(castP8(value), castP8(a.value))); + return ans; + } + + //Multiply equal + posit8& operator*=(const posit8 &a) { + value = castUI(p8_mul(castP8(value), castP8(a.value))); + return *this; + } + + + //Divide + posit8 operator/(const posit8 &a) const{ + posit8 ans; + ans.value = castUI(p8_div(castP8(value), castP8(a.value))); + return ans; + } + + //Divide equal + posit8& operator/=(const posit8 &a) { + value = castUI(p8_div(castP8(value), castP8(a.value))); + return *this; + } + + //less than + bool operator<(const posit8 &a) const{ + return p8_lt(castP8(value), castP8(a.value)); + } + + //less than equal + bool operator<=(const posit8 &a) const{ + return p8_le(castP8(value), castP8(a.value)); + } + + //equal + bool operator==(const posit8 &a) const{ + return p8_eq(castP8(value), castP8(a.value)); + } + + + //Not equalCPP + bool operator!=(const posit8 &a) const{ + return !p8_eq(castP8(value), castP8(a.value)); + } + + //greater than + bool operator>(const posit8 &a) const{ + return p8_lt(castP8(a.value), castP8(value)); + } + + //greater than equal + bool operator>=(const posit8 &a) const{ + return p8_le(castP8(a.value), castP8(value)); + } + + //plus plus + posit8& operator++() { + value = castUI(p8_add(castP8(value), castP8(0x40))); + return *this; + } + + //minus minus + posit8& operator--() { + value = castUI(p8_sub(castP8(value), castP8(0x40))); + return *this; + } + + //Binary operators + + posit8 operator>>(const int &x) { + posit8 ans; + ans.value = value>>x; + return ans; + } + + posit8& operator>>=(const int &x) { + value = value>>x; + return *this; + } + + posit8 operator<<(const int &x) { + posit8 ans; + ans.value = (value<<x)&0xFF; + return ans; + } + + posit8& operator<<=(const int &x) { + value = (value<<x)&0xFF; + return *this; + } + + + //Negate + posit8 operator-() const{ + posit8 ans; + ans.value = -value; + return ans; + } + + //NOT + posit8 operator~() { + posit8 ans; + ans.value = ~value; + return ans; + } + + //AND + posit8 operator&(const posit8 &a) const{ + posit8 ans; + ans.value = (value & a.value); + return *this; + } + + //AND equal + posit8& operator&=(const posit8 &a) { + value = (value & a.value); + return *this; + } + + //OR + posit8 operator|(const posit8 &a) const{ + posit8 ans; + ans.value = (value | a.value); + return ans; + } + + + //OR equal + posit8& operator|=(const posit8 &a) { + value = (value | a.value); + return *this; + } + + //XOR + posit8 operator^(const posit8 &a) const{ + posit8 ans; + ans.value = (value ^ a.value); + return ans; + } + + //XOR equal + posit8& operator^=(const posit8 &a) { + value = (value ^ a.value); + return *this; + } + + //Logical Operator + //! + bool operator!()const{ + return !value; + } + + //&& + bool operator&&(const posit8 &a) const{ + return (value && a.value); + } + + //|| + bool operator||(const posit8 &a) const{ + return (value || a.value); + } + + bool isNaR(){ + return isNaRP8UI(value); + } + + double toDouble()const{ + return convertP8ToDouble(castP8(value)); + } + + long long int toInt()const{ + return p8_int(castP8(value)); + } + + long long int toRInt()const{ + return p8_to_i64(castP8(value)); + } + posit8& sqrt(){ + value = castUI( p8_sqrt(castP8(value)) ); + return *this; + } + posit8& rint(){ + value = castUI( p8_roundToInt(castP8(value)) ); + return *this; + } + posit8 fma(posit8 a, posit8 b){ // + (a*b) + posit8 ans; + ans.value = castUI(p8_mulAdd(castP8(a.value), castP8(b.value), castP8(value))); + return ans; + } + posit8& toNaR(){ + value = 0x80; + return *this; + } +}; + + +struct posit16{ + uint16_t value; + posit16(double x=0) : value(castUI(convertDoubleToP16(x))) { + } + + //Equal + posit16& operator=(const double a) { + value = castUI(convertDoubleToP16(a)); + return *this; + } + posit16& operator=(const int a) { + value = castUI(i32_to_p16(a)); + return *this; + } + + //Add + posit16 operator+(const posit16 &a) const{ + posit16 ans; + ans.value = castUI(p16_add(castP16(value), castP16(a.value))); + return ans; + } + + //Add equal + posit16& operator+=(const posit16 &a) { + value = castUI(p16_add(castP16(value), castP16(a.value))); + return *this; + } + + //Subtract + posit16 operator-(const posit16 &a) const{ + posit16 ans; + ans.value = castUI(p16_sub(castP16(value), castP16(a.value))); + return ans; + } + + //Subtract equal + posit16& operator-=(const posit16 &a) { + value = castUI(p16_sub(castP16(value), castP16(a.value))); + return *this; + } + + //Multiply + posit16 operator*(const posit16 &a) const{ + posit16 ans; + ans.value = castUI(p16_mul(castP16(value), castP16(a.value))); + return ans; + } + + //Multiply equal + posit16& operator*=(const posit16 &a) { + value = castUI(p16_mul(castP16(value), castP16(a.value))); + return *this; + } + + + //Divide + posit16 operator/(const posit16 &a) const{ + posit16 ans; + ans.value = castUI(p16_div(castP16(value), castP16(a.value))); + return ans; + } + + //Divide equal + posit16& operator/=(const posit16 &a) { + value = castUI(p16_div(castP16(value), castP16(a.value))); + return *this; + } + + //less than + bool operator<(const posit16 &a) const{ + return p16_lt(castP16(value), castP16(a.value)); + } + + //less than equal + bool operator<=(const posit16 &a) const{ + return p16_le(castP16(value), castP16(a.value)); + } + + //equal + bool operator==(const posit16 &a) const{ + return p16_eq(castP16(value), castP16(a.value)); + } + + + //Not equal + bool operator!=(const posit16 &a) const{ + return !p16_eq(castP16(value), castP16(a.value)); + } + + //greater than + bool operator>(const posit16 &a) const{ + return p16_lt(castP16(a.value), castP16(value)); + } + + //greater than equal + bool operator>=(const posit16 &a) const{ + return p16_le(castP16(a.value), castP16(value)); + } + + //plus plus + posit16& operator++() { + value = castUI(p16_add(castP16(value), castP16(0x4000))); + return *this; + } + + //minus minus + posit16& operator--() { + value = castUI(p16_sub(castP16(value), castP16(0x4000))); + return *this; + } + + //Binary operators + + posit16 operator>>(const int &x) { + posit16 ans; + ans.value = value>>x; + return ans; + } + + posit16& operator>>=(const int &x) { + value = value>>x; + return *this; + } + + posit16 operator<<(const int &x) { + posit16 ans; + ans.value = (value<<x)&0xFFFF; + return ans; + } + + posit16& operator<<=(const int &x) { + value = (value<<x)&0xFFFF; + return *this; + } + + //Negate + posit16 operator-() const{ + posit16 ans; + ans.value = -value; + return ans; + } + + //Binary NOT + posit16 operator~() { + posit16 ans; + ans.value = ~value; + return ans; + } + + //AND + posit16 operator&(const posit16 &a) const{ + posit16 ans; + ans.value = (value & a.value); + return ans; + } + + //AND equal + posit16& operator&=(const posit16 &a) { + value = (value & a.value); + return *this; + } + + //OR + posit16 operator|(const posit16 &a) const{ + posit16 ans; + ans.value = (value | a.value); + return ans; + } + + + //OR equal + posit16& operator|=(const posit16 &a) { + value = (value | a.value); + return *this; + } + + //XOR + posit16 operator^(const posit16 &a) const{ + posit16 ans; + ans.value = (value ^ a.value); + return ans; + } + + //XOR equal + posit16& operator^=(const posit16 &a) { + value = (value ^ a.value); + return *this; + } + + //Logical operator + //! + bool operator!()const{ + return !value; + } + + //&& + bool operator&&(const posit16 &a) const{ + return (value && a.value); + } + + //|| + bool operator||(const posit16 &a) const{ + return (value || a.value); + } + + bool isNaR(){ + return isNaRP16UI(value); + } + + double toDouble()const{ + return convertP16ToDouble(castP16(value)); + } + + long long int toInt()const{ + return p16_int(castP16(value)); + } + + long long int toRInt()const{ + return p16_to_i64(castP16(value)); + } + posit16& sqrt(){ + value = castUI( p16_sqrt(castP16(value)) ); + return *this; + } + posit16& rint(){ + value = castUI( p16_roundToInt(castP16(value)) ); + return *this; + } + posit16 fma(posit16 a, posit16 b){ // + (a*b) + posit16 ans; + ans.value = castUI(p16_mulAdd(castP16(a.value), castP16(b.value), castP16(value))); + return ans; + } + posit16& toNaR(){ + value = 0x8000; + return *this; + } + + +}; + +struct posit32{ + uint32_t value; + posit32(double x=0) : value(castUI(convertDoubleToP32(x))) { + } + + //Equal + posit32& operator=(const double a) { + value = castUI(convertDoubleToP32(a)); + return *this; + } + posit32& operator=(const int a) { + value = castUI(i32_to_p32(a)); + return *this; + } + + //Add + posit32 operator+(const posit32 &a) const{ + posit32 ans; + ans.value = castUI(p32_add(castP32(value), castP32(a.value))); + return ans; + } + + //Add equal + posit32& operator+=(const posit32 &a) { + value = castUI(p32_add(castP32(value), castP32(a.value))); + return *this; + } + + //Subtract + posit32 operator-(const posit32 &a) const{ + posit32 ans; + ans.value = castUI(p32_sub(castP32(value), castP32(a.value))); + return ans; + } + + //Subtract equal + posit32& operator-=(const posit32 &a) { + value = castUI(p32_sub(castP32(value), castP32(a.value))); + return *this; + } + + //Multiply + posit32 operator*(const posit32 &a) const{ + posit32 ans; + ans.value = castUI(p32_mul(castP32(value), castP32(a.value))); + return ans; + } + + //Multiply equal + posit32& operator*=(const posit32 &a) { + value = castUI(p32_mul(castP32(value), castP32(a.value))); + return *this; + } + + + //Divide + posit32 operator/(const posit32 &a) const{ + posit32 ans; + ans.value = castUI(p32_div(castP32(value), castP32(a.value))); + return ans; + } + + //Divide equal + posit32& operator/=(const posit32 &a) { + value = castUI(p32_div(castP32(value), castP32(a.value))); + return *this; + } + + //less than + bool operator<(const posit32 &a) const{ + return p32_lt(castP32(value), castP32(a.value)); + } + + //less than equal + bool operator<=(const posit32 &a) const{ + return p32_le(castP32(value), castP32(a.value)); + } + + //equal + bool operator==(const posit32 &a) const{ + return p32_eq(castP32(value), castP32(a.value)); + } + + + //Not equalCPP + bool operator!=(const posit32 &a) const{ + return !p32_eq(castP32(value), castP32(a.value)); + } + + //greater than + bool operator>(const posit32 &a) const{ + return p32_lt(castP32(a.value), castP32(value)); + } + + //greater than equal + bool operator>=(const posit32 &a) const{ + return p32_le(castP32(a.value), castP32(value)); + } + + //plus plus + posit32& operator++() { + value = castUI(p32_add(castP32(value), castP32(0x40000000))); + return *this; + } + + //minus minus + posit32& operator--() { + value = castUI(p32_sub(castP32(value), castP32(0x40000000))); + return *this; + } + + //Binary operators + + posit32 operator>>(const int &x) { + posit32 ans; + ans.value = value>>x; + return ans; + } + + posit32& operator>>=(const int &x) { + value = value>>x; + return *this; + } + + posit32 operator<<(const int &x) { + posit32 ans; + ans.value = (value<<x)&0xFFFFFFFF; + return ans; + } + + posit32& operator<<=(const int &x) { + value = (value<<x)&0xFFFFFFFF; + return *this; + } + + + //Negate + posit32 operator-() const{ + posit32 ans; + ans.value = -value; + return ans; + } + + //NOT + posit32 operator~() { + posit32 ans; + ans.value = ~value; + return ans; + } + + //AND + posit32 operator&(const posit32 &a) const{ + posit32 ans; + ans.value = (value & a.value); + return *this; + } + + //AND equal + posit32& operator&=(const posit32 &a) { + value = (value & a.value); + return *this; + } + + //OR + posit32 operator|(const posit32 &a) const{ + posit32 ans; + ans.value = (value | a.value); + return ans; + } + + + //OR equal + posit32& operator|=(const posit32 &a) { + value = (value | a.value); + return *this; + } + + //XOR + posit32 operator^(const posit32 &a) const{ + posit32 ans; + ans.value = (value ^ a.value); + return ans; + } + + //XOR equal + posit32& operator^=(const posit32 &a) { + value = (value ^ a.value); + return *this; + } + + //Logical Operator + //! + bool operator!()const{ + return !value; + } + + //&& + bool operator&&(const posit32 &a) const{ + return (value && a.value); + } + + //|| + bool operator||(const posit32 &a) const{ + return (value || a.value); + } + + bool isNaR(){ + return isNaRP32UI(value); + } + + double toDouble()const{ + return convertP32ToDouble(castP32(value)); + } + + long long int toInt()const{ + return p32_int(castP32(value)); + } + + long long int toRInt()const{ + return p32_to_i64(castP32(value)); + } + posit32& sqrt(){ + value = castUI( p32_sqrt(castP32(value)) ); + return *this; + } + posit32& rint(){ + value = castUI( p32_roundToInt(castP32(value)) ); + return *this; + } + posit32 fma(posit32 a, posit32 b){ // + (a*b) + posit32 ans; + ans.value = castUI(p32_mulAdd(castP32(a.value), castP32(b.value), castP32(value))); + return ans; + } + + posit32& toNaR(){ + value = 0x80000000; + return *this; + } + + +}; + +struct posit_2{ + uint32_t value; + int x; + posit_2(double v=0, int x=32) : value(castUI(convertDoubleToPX2(v, x))), x(x) { + } + + //Equal + posit_2& operator=(const double a) { + value = castUI(convertDoubleToPX2(a, x)); + return *this; + } + posit_2& operator=(const int a) { + value = castUI(i32_to_pX2(a, x)); + return *this; + } + + //Add + posit_2 operator+(const posit_2 &a) const{ + posit_2 ans; + ans.value = castUI(pX2_add(castPX2(value), castPX2(a.value), x)); + ans.x = x; + return ans; + } + + //Add equal + posit_2& operator+=(const posit_2 &a) { + value = castUI(pX2_add(castPX2(value), castPX2(a.value), x)); + return *this; + } + + //Subtract + posit_2 operator-(const posit_2 &a) const{ + posit_2 ans; + ans.value = castUI(pX2_sub(castPX2(value), castPX2(a.value), x)); + ans.x = x; + return ans; + } + + //Subtract equal + posit_2& operator-=(const posit_2 &a) { + value = castUI(pX2_sub(castPX2(value), castPX2(a.value), x)); + return *this; + } + + //Multiply + posit_2 operator*(const posit_2 &a) const{ + posit_2 ans; + ans.value = castUI(pX2_mul(castPX2(value), castPX2(a.value), x)); + ans.x = x; + return ans; + } + + //Multiply equal + posit_2& operator*=(const posit_2 &a) { + value = castUI(pX2_mul(castPX2(value), castPX2(a.value), x)); + return *this; + } + + + //Divide + posit_2 operator/(const posit_2 &a) const{ + posit_2 ans; + ans.value = castUI(pX2_div(castPX2(value), castPX2(a.value), x)); + ans.x = x; + return ans; + } + + //Divide equal + posit_2& operator/=(const posit_2 &a) { + value = castUI(pX2_div(castPX2(value), castPX2(a.value), x)); + return *this; + } + + //less than + bool operator<(const posit_2 &a) const{ + return pX2_lt(castPX2(value), castPX2(a.value)); + } + + //less than equal + bool operator<=(const posit_2 &a) const{ + return pX2_le(castPX2(value), castPX2(a.value)); + } + + //equal + bool operator==(const posit_2 &a) const{ + return pX2_eq(castPX2(value), castPX2(a.value)); + } + + + //Not equalCPP + bool operator!=(const posit_2 &a) const{ + return !pX2_eq(castPX2(value), castPX2(a.value)); + } + + //greater than + bool operator>(const posit_2 &a) const{ + return pX2_lt(castPX2(a.value), castPX2(value)); + } + + //greater than equal + bool operator>=(const posit_2 &a) const{ + return pX2_le(castPX2(a.value), castPX2(value)); + } + + //plus plus + posit_2& operator++() { + value = castUI(pX2_add(castPX2(value), castPX2(0x40000000), x)); + return *this; + } + + //minus minus + posit_2& operator--() { + value = castUI(pX2_sub(castPX2(value), castPX2(0x40000000), x)); + return *this; + } + + //Binary operators + + posit_2 operator>>(const int &x) { + posit_2 ans; + ans.value = (value>>x) & ((int32_t)0x80000000>>(x-1)); + ans.x = x; + return ans; + } + + posit_2& operator>>=(const int &x) { + value = (value>>x) & ((int32_t)0x80000000>>(x-1)); + return *this; + } + + posit_2 operator<<(const int &x) { + posit_2 ans; + ans.value = (value<<x)&0xFFFFFFFF; + ans.x = x; + return ans; + } + + posit_2& operator<<=(const int &x) { + value = (value<<x)&0xFFFFFFFF; + return *this; + } + + + //Negate + posit_2 operator-() const{ + posit_2 ans; + ans.value = -value; + ans.x = x; + return ans; + } + + //NOT + posit_2 operator~() { + posit_2 ans; + ans.value = ~value; + ans.x = x; + return ans; + } + + //AND + posit_2 operator&(const posit_2 &a) const{ + posit_2 ans; + ans.value = (value & a.value); + return *this; + } + + //AND equal + posit_2& operator&=(const posit_2 &a) { + value = (value & a.value); + return *this; + } + + //OR + posit_2 operator|(const posit_2 &a) const{ + posit_2 ans; + ans.value = (value | a.value); + return ans; + } + + + //OR equal + posit_2& operator|=(const posit_2 &a) { + value = (value | a.value); + return *this; + } + + //XOR + posit_2 operator^(const posit_2 &a) const{ + posit_2 ans; + ans.value = (value ^ a.value); + return ans; + } + + //XOR equal + posit_2& operator^=(const posit_2 &a) { + value = (value ^ a.value); + return *this; + } + + //Logical Operator + //! + bool operator!()const{ + return !value; + } + + //&& + bool operator&&(const posit_2 &a) const{ + return (value && a.value); + } + + //|| + bool operator||(const posit_2 &a) const{ + return (value || a.value); + } + + bool isNaR(){ + return isNaRPX2UI(value); + } + + double toDouble()const{ + return convertPX2ToDouble(castPX2(value)); + } + + long long int toInt()const{ + return pX2_int(castPX2(value)); + } + + long long int toRInt()const{ + return pX2_to_i64(castPX2(value)); + } + posit_2& sqrt(){ + value = castUI( pX2_sqrt(castPX2(value), x) ); + return *this; + } + posit_2& rint(){ + value = castUI( pX2_roundToInt(castPX2(value), x) ); + return *this; + } + posit_2 fma(posit_2 a, posit_2 b){ // + (a*b) + posit_2 ans; + ans.value = castUI(pX2_mulAdd(castPX2(a.value), castPX2(b.value), castPX2(value), x)); + ans.x = x; + return ans; + } + + posit_2 toPositX2(int x){ + posit_2 ans; + ans.value = pX2_to_pX2(castPX2(value), x).v; + ans.x = x; + return ans; + } + posit_2& toNaR(){ + value = 0x80000000; + return *this; + } + + +}; + +struct quire8{ + uint32_t value; + + quire8 (uint32_t value=0) : value(value){ + } + + quire8& clr(){ + value = 0; + return *this; + } + + bool isNaR(){ + return isNaRQ8(castQ8(value)); + } + + quire8& qma(posit8 a, posit8 b){ // q += a*b + quire8_t q = q8_fdp_add(castQ8(value), castP8(a.value), castP8(b.value)); + value = q.v; + return *this; + } + quire8& qms(posit16 a, posit16 b){ // q -= a*b + quire8_t q = q8_fdp_sub(castQ8(value), castP8(a.value), castP8(b.value)); + value = q.v; + return *this; + } + posit8 toPosit(){ + posit8 a; + a.value = castUI(q8_to_p8(castQ8(value))); + return a; + } + +}; +struct quire16{ + uint64_t lvalue; + uint64_t rvalue; + + quire16 (uint64_t lvalue=0, uint64_t rvalue=0) : lvalue(lvalue), rvalue(rvalue){ + } + + quire16& clr(){ + lvalue = 0; + rvalue = 0; + return *this; + } + + bool isNaR(){ + return isNaRQ16(castQ16(lvalue, rvalue)); + } + + quire16& qma(posit16 a, posit16 b){ // q += a*b + quire16_t q = q16_fdp_add(castQ16(lvalue, rvalue), castP16(a.value), castP16(b.value)); + lvalue = q.v[0]; + rvalue = q.v[1]; + return *this; + } + quire16& qms(posit16 a, posit16 b){ // q -= a*b + quire16_t q = q16_fdp_sub(castQ16(lvalue, rvalue), castP16(a.value), castP16(b.value)); + lvalue = q.v[0]; + rvalue = q.v[1]; + return *this; + } + posit16 toPosit(){ + posit16 a; + a.value = castUI(q16_to_p16(castQ16(lvalue, rvalue))); + return a; + } + +}; + +struct quire32{ + uint64_t v0; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t v4; + uint64_t v5; + uint64_t v6; + uint64_t v7; + + quire32 (uint64_t v0=0, uint64_t v1=0, uint64_t v2=0, uint64_t v3=0, uint64_t v4=0, uint64_t v5=0, uint64_t v6=0, uint64_t v7=0) : + v0(v0), v1(v1), v2(v2), v3(v3), v4(v4), v5(v5), v6(v6), v7(v7){ + } + + quire32& clr(){ + v0 = 0; + v1 = 0; + v2 = 0; + v3 = 0; + v4 = 0; + v5 = 0; + v6 = 0; + v7 = 0; + return *this; + } + + bool isNaR(){ + return isNaRQ32(castQ32(v0, v1, v2, v3, v4, v5, v6, v7)); + } + + quire32& qma(posit32 a, posit32 b){ // q += a*b + quire32_t q = q32_fdp_add(castQ32(v0, v1, v2, v3, v4, v5, v6, v7), + castP32(a.value), castP32(b.value)); + v0 = q.v[0]; + v1 = q.v[1]; + v2 = q.v[2]; + v3 = q.v[3]; + v4 = q.v[4]; + v5 = q.v[5]; + v6 = q.v[6]; + v7 = q.v[7]; + return *this; + } + quire32& qms(posit32 a, posit32 b){ // q -= a*b + quire32_t q = q32_fdp_sub(castQ32(v0, v1, v2, v3, v4, v5, v6, v7), castP32(a.value), castP32(b.value)); + v0 = q.v[0]; + v1 = q.v[1]; + v2 = q.v[2]; + v3 = q.v[3]; + v4 = q.v[4]; + v5 = q.v[5]; + v6 = q.v[6]; + v7 = q.v[7]; + return *this; + } + posit32 toPosit(){ + posit32 a; + a.value = castUI(q32_to_p32(castQ32(v0, v1, v2, v3, v4, v5, v6, v7))); + return a; + } + +}; + +struct quire_2{ + uint64_t v0; + uint64_t v1; + uint64_t v2; + uint64_t v3; + uint64_t v4; + uint64_t v5; + uint64_t v6; + uint64_t v7; + int x; + + quire_2 (uint64_t v0=0, uint64_t v1=0, uint64_t v2=0, uint64_t v3=0, uint64_t v4=0, uint64_t v5=0, uint64_t v6=0, uint64_t v7=0, int x=32) : + v0(v0), v1(v1), v2(v2), v3(v3), v4(v4), v5(v5), v6(v6), v7(v7), x(x){ + } + + quire_2& clr(){ + v0 = 0; + v1 = 0; + v2 = 0; + v3 = 0; + v4 = 0; + v5 = 0; + v6 = 0; + v7 = 0; + return *this; + } + + bool isNaR(){ + return isNaRQX2(castQX2(v0, v1, v2, v3, v4, v5, v6, v7)); + } + + quire_2& qma(posit_2 a, posit_2 b){ // q += a*b + quire_2_t q = qX2_fdp_add(castQX2(v0, v1, v2, v3, v4, v5, v6, v7), + castPX2(a.value), castPX2(b.value)); + v0 = q.v[0]; + v1 = q.v[1]; + v2 = q.v[2]; + v3 = q.v[3]; + v4 = q.v[4]; + v5 = q.v[5]; + v6 = q.v[6]; + v7 = q.v[7]; + return *this; + } + quire_2& qms(posit_2 a, posit_2 b){ // q -= a*b + quire_2_t q = qX2_fdp_sub(castQX2(v0, v1, v2, v3, v4, v5, v6, v7), castPX2(a.value), castPX2(b.value)); + v0 = q.v[0]; + v1 = q.v[1]; + v2 = q.v[2]; + v3 = q.v[3]; + v4 = q.v[4]; + v5 = q.v[5]; + v6 = q.v[6]; + v7 = q.v[7]; + return *this; + } + posit_2 toPosit(){ + posit_2 a; + a.value = castUI(qX2_to_pX2(castQX2(v0, v1, v2, v3, v4, v5, v6, v7), x)); + a.x = x; + return a; + } + +}; + +inline posit8 operator+(int a, posit8 b){ + b.value = castUI(p8_add(i32_to_p8(a), castP8(b.value))); + return b; +} +inline posit16 operator+(int a, posit16 b){ + b.value = castUI(p16_add(i32_to_p16(a), castP16(b.value))); + return b; +} +inline posit32 operator+(int a, posit32 b){ + b.value = castUI(p32_add(i32_to_p32(a), castP32(b.value))); + return b; +} +inline posit32 operator+(long long int a, posit32 b){ + b.value = castUI(p32_add(i64_to_p32(a), castP32(b.value))); + return b; +} +inline posit_2 operator+(int a, posit_2 b){ + b.value = castUI(pX2_add(i32_to_pX2(a, b.x), castPX2(b.value), b.x)); + return b; +} +inline posit_2 operator+(long long int a, posit_2 b){ + b.value = castUI(pX2_add(i64_to_pX2(a, b.x), castPX2(b.value), b.x)); + return b; +} + +inline posit8 operator+(double a, posit8 b){ + b.value = castUI(p8_add(convertDoubleToP8(a), castP8(b.value))); + return b; +} +inline posit16 operator+(double a, posit16 b){ + b.value = castUI(p16_add(convertDoubleToP16(a), castP16(b.value))); + return b; +} +inline posit32 operator+(double a, posit32 b){ + b.value = castUI(p32_add(convertDoubleToP32(a), castP32(b.value))); + return b; +} +inline posit_2 operator+(double a, posit_2 b){ + b.value = castUI(pX2_add(convertDoubleToPX2(a, b.x), castPX2(b.value), b.x)); + return b; +} + + +inline posit8 operator-(int a, posit8 b){ + b.value = castUI(p8_sub(i32_to_p8(a), castP8(b.value))); + return b; +} +inline posit16 operator-(int a, posit16 b){ + b.value = castUI(p16_sub(i32_to_p16(a), castP16(b.value))); + return b; +} +inline posit32 operator-(int a, posit32 b){ + b.value = castUI(p32_sub(i32_to_p32(a), castP32(b.value))); + return b; +} +inline posit32 operator-(long long int a, posit32 b){ + b.value = castUI(p32_sub(i64_to_p32(a), castP32(b.value))); + return b; +} +inline posit_2 operator-(int a, posit_2 b){ + b.value = castUI(pX2_sub(i32_to_pX2(a, b.x), castPX2(b.value), b.x)); + return b; +} +inline posit_2 operator-(long long int a, posit_2 b){ + b.value = castUI(pX2_sub(i64_to_pX2(a, b.x), castPX2(b.value), b.x)); + return b; +} + + +inline posit8 operator-(double a, posit8 b){ + b.value = castUI(p8_sub(convertDoubleToP8(a), castP8(b.value))); + return b; +} +inline posit16 operator-(double a, posit16 b){ + b.value = castUI(p16_sub(convertDoubleToP16(a), castP16(b.value))); + return b; +} +inline posit32 operator-(double a, posit32 b){ + b.value = castUI(p32_sub(convertDoubleToP32(a), castP32(b.value))); + return b; +} +inline posit_2 operator-(double a, posit_2 b){ + b.value = castUI(pX2_sub(convertDoubleToPX2(a, b.x), castPX2(b.value), b.x)); + return b; +} + + + +inline posit8 operator/(int a, posit8 b){ + b.value = castUI(p8_div(i32_to_p8(a), castP8(b.value))); + return b; +} +inline posit16 operator/(int a, posit16 b){ + b.value = castUI(p16_div(i32_to_p16(a), castP16(b.value))); + return b; +} +inline posit32 operator/(int a, posit32 b){ + b.value = castUI(p32_div(i32_to_p32(a), castP32(b.value))); + return b; +} +inline posit32 operator/(long long int a, posit32 b){ + b.value = castUI(p32_div(i64_to_p32(a), castP32(b.value))); + return b; +} +inline posit_2 operator/(int a, posit_2 b){ + b.value = castUI(pX2_div(i32_to_pX2(a, b.x), castPX2(b.value), b.x)); + return b; +} +inline posit_2 operator/(long long int a, posit_2 b){ + b.value = castUI(pX2_div(i64_to_pX2(a, b.x), castPX2(b.value), b.x)); + return b; +} + + + +inline posit8 operator/(double a, posit8 b){ + b.value = castUI(p8_div(convertDoubleToP8(a), castP8(b.value))); + return b; +} +inline posit16 operator/(double a, posit16 b){ + b.value = castUI(p16_div(convertDoubleToP16(a), castP16(b.value))); + return b; +} +inline posit32 operator/(double a, posit32 b){ + b.value = castUI(p32_div(convertDoubleToP32(a), castP32(b.value))); + return b; +} +inline posit_2 operator/(double a, posit_2 b){ + b.value = castUI(pX2_div(convertDoubleToPX2(a, b.x), castPX2(b.value), b.x)); + return b; +} + + + +inline posit8 operator*(int a, posit8 b){ + b.value = castUI(p8_mul(i32_to_p8(a), castP8(b.value))); + return b; +} +inline posit16 operator*(int a, posit16 b){ + posit16 ans; + ans.value = castUI(p16_mul(i32_to_p16(a), castP16(b.value))); + return ans; +} +inline posit32 operator*(int a, posit32 b){ + b.value = castUI(p32_mul(i32_to_p32(a), castP32(b.value))); + return b; +} +inline posit32 operator*(long long int a, posit32 b){ + b.value = castUI(p32_mul(i64_to_p32(a), castP32(b.value))); + return b; +} +inline posit_2 operator*(int a, posit_2 b){ + b.value = castUI(pX2_mul(i32_to_pX2(a, b.x), castPX2(b.value), b.x)); + return b; +} +inline posit_2 operator*(long long int a, posit_2 b){ + b.value = castUI(pX2_mul(i64_to_pX2(a, b.x), castPX2(b.value), b.x)); + return b; +} + + +inline posit8 operator*(double a, posit8 b){ + b.value = castUI(p8_mul(convertDoubleToP8(a), castP8(b.value))); + return b; +} +inline posit16 operator*(double a, posit16 b){ + posit16 ans; + ans.value = castUI(p16_mul(convertDoubleToP16(a), castP16(b.value))); + return ans; +} +inline posit32 operator*(double a, posit32 b){ + b.value = castUI(p32_mul(convertDoubleToP32(a), castP32(b.value))); + return b; +} +inline posit_2 operator*(double a, posit_2 b){ + b.value = castUI(pX2_mul(convertDoubleToPX2(a, b.x), castPX2(b.value), b.x)); + return b; +} + + + +//fused-multiply-add +inline posit8 fma(posit8 a, posit8 b, posit8 c){ // (a*b) + c + posit8 ans; + ans.value = castUI(p8_mulAdd(castP8(a.value), castP8(b.value), castP8(c.value))); + return ans; +} +inline posit16 fma(posit16 a, posit16 b, posit16 c){ // (a*b) + c + posit16 ans; + ans.value = castUI(p16_mulAdd(castP16(a.value), castP16(b.value), castP16(c.value))); + return ans; +} +inline posit32 fma(posit32 a, posit32 b, posit32 c){ // (a*b) + c + posit32 ans; + ans.value = castUI(p32_mulAdd(castP32(a.value), castP32(b.value), castP32(c.value))); + return ans; +} +inline posit_2 fma(posit_2 a, posit_2 b, posit_2 c){ // (a*b) + c + posit_2 ans; + ans.value = castUI(pX2_mulAdd(castPX2(a.value), castPX2(b.value), castPX2(c.value), c.x)); + ans.x = c.x; + return ans; +} + + +//Round to nearest integer +inline posit8 rint(posit8 a){ + posit8 ans; + ans.value = castUI( p8_roundToInt(castP8(a.value)) ); + return ans; +} +inline posit16 rint(posit16 a){ + posit16 ans; + ans.value = castUI( p16_roundToInt(castP16(a.value)) ); + return ans; +} +inline posit32 rint(posit32 a){ + posit32 ans; + ans.value = castUI( p32_roundToInt(castP32(a.value)) ); + return ans; +} +inline posit_2 rint(posit_2 a){ + posit_2 ans; + ans.value = castUI( pX2_roundToInt(castPX2(a.value), a.x) ); + ans.x = a.x; + return ans; +} + +//Square root +inline posit8 sqrt(posit8 a){ + posit8 ans; + ans.value = castUI( p8_sqrt(castP8(a.value)) ); + return ans; +} +inline posit16 sqrt(posit16 a){ + posit16 ans; + ans.value = castUI( p16_sqrt(castP16(a.value)) ); + return ans; +} +inline posit32 sqrt(posit32 a){ + posit32 ans; + ans.value = castUI( p32_sqrt(castP32(a.value)) ); + return ans; +} +inline posit_2 sqrt(posit_2 a){ + posit_2 ans; + ans.value = castUI( pX2_sqrt(castPX2(a.value), a.x) ); + ans.x = a.x; + return ans; +} + + + +// Convert to integer + +inline uint32_t uint32 (posit8 a){ + return p8_to_ui32(castP8(a.value)); +} +inline uint32_t uint32 (posit16 a){ + return p16_to_ui32(castP16(a.value)); +} +inline uint32_t uint32 (posit32 a){ + return p32_to_ui32(castP32(a.value)); +} +inline uint32_t uint32 (posit_2 a){ + return pX2_to_ui32(castPX2(a.value)); +} + + + +inline int32_t int32(posit8 a){ + return p8_to_i32(castP8(a.value)); +} +inline int32_t int32(posit16 a){ + return p16_to_i32(castP16(a.value)); +} +inline int32_t int32 (posit32 a){ + return p32_to_i32(castP32(a.value)); +} +inline int32_t int32 (posit_2 a){ + return pX2_to_i32(castPX2(a.value)); +} + + + +inline uint64_t uint64(posit8 a){ + return p8_to_ui64(castP8(a.value)); +} +inline uint64_t uint64(posit16 a){ + return p16_to_ui64(castP16(a.value)); +} +inline uint64_t uint64 (posit32 a){ + return p32_to_ui64(castP32(a.value)); +} +inline uint64_t uint64 (posit_2 a){ + return pX2_to_ui64(castPX2(a.value)); +} + + + +inline int64_t int64(posit8 a){ + return p8_to_i64(castP8(a.value)); +} +inline int64_t int64(posit16 a){ + return p16_to_i64(castP16(a.value)); +} +inline int64_t int64 (posit32 a){ + return p32_to_i64(castP32(a.value)); +} +inline int64_t int64 (posit_2 a){ + return pX2_to_i64(castPX2(a.value)); +} + + +//Convert To Posit +inline posit8 p8(posit16 a){ + posit8 b; + b.value = castUI(p16_to_p8(castP16(a.value))); + return b; +} +inline posit8 p8(posit32 a){ + posit8 b; + b.value = castUI(p32_to_p8(castP32(a.value))); + return b; +} +inline posit8 p8(posit_2 a){ + posit8 b; + b.value = castUI(pX2_to_p8(castPX2(a.value))); + return b; +} + + +inline posit16 p16(posit8 a){ + posit16 b; + b.value = castUI(p8_to_p16(castP8(a.value))); + return b; +} +inline posit16 p16(posit32 a){ + posit16 b; + b.value = castUI(p32_to_p16(castP32(a.value))); + return b; +} +inline posit16 p16(posit_2 a){ + posit16 b; + b.value = castUI(pX2_to_p16(castPX2(a.value))); + return b; +} + + +inline posit32 p32(posit8 a){ + posit32 b; + b.value = castUI(p8_to_p32(castP8(a.value))); + return b; +} +inline posit32 p32(posit16 a){ + posit32 b; + b.value = castUI(p16_to_p32(castP16(a.value))); + return b; +} +inline posit32 p32(posit_2 a){ + posit32 b; + b.value = castUI(pX2_to_p32(castPX2(a.value))); + return b; +} + + +inline posit_2 pX2(posit8 a, int x){ + posit_2 b; + b.value = castUI(p8_to_pX2(castP8(a.value), x)); + b.x = x; + return b; +} +inline posit_2 pX2(posit16 a, int x){ + posit_2 b; + b.value = castUI(p16_to_pX2(castP16(a.value), x)); + b.x = x; + return b; +} +inline posit_2 pX2(posit32 a, int x){ + posit_2 b; + b.value = castUI(p32_to_pX2(castP32(a.value), x)); + b.x = x; + return b; +} +inline posit_2 pX2(posit_2 a, int x){ + posit_2 b; + b.value = castUI(pX2_to_pX2(castPX2(a.value), x)); + b.x = x; + return b; +} + + + +inline posit8 p8(uint32_t a){ + posit8 b; + b.value = castUI(ui32_to_p8(a)); + return b; +} +inline posit16 p16(uint32_t a){ + posit16 b; + b.value = castUI(ui32_to_p16(a)); + return b; +} +inline posit32 p32(uint32_t a){ + posit32 b; + b.value = castUI(ui32_to_p32(a)); + return b; +} +inline posit_2 pX2(uint32_t a, int x){ + posit_2 b; + b.value = castUI(ui32_to_pX2(a, x)); + b.x = x; + return b; +} + + +inline posit8 p8(int32_t a){ + posit8 b; + b.value = castUI(i32_to_p8(a)); + return b; +} +inline posit16 p16(int32_t a){ + posit16 b; + b.value = castUI(i32_to_p16(a)); + return b; +} +inline posit32 p32(int32_t a){ + posit32 b; + b.value = castUI(i32_to_p32(a)); + return b; +} +inline posit_2 pX2(int32_t a, int x){ + posit_2 b; + b.value = castUI(i32_to_pX2(a, x)); + b.x = x; + return b; +} + + + +inline posit8 p8(uint64_t a){ + posit8 b; + b.value = castUI(ui64_to_p8(a)); + return b; +} +inline posit16 p16(uint64_t a){ + posit16 b; + b.value = castUI(ui64_to_p16(a)); + return b; +} +inline posit32 p32(uint64_t a){ + posit32 b; + b.value = castUI(ui64_to_p32(a)); + return b; +} +inline posit_2 pX2(uint64_t a, int x){ + posit_2 b; + b.value = castUI(ui64_to_pX2(a, x)); + b.x = x; + return b; +} + + +inline posit8 p8(int64_t a){ + posit8 b; + b.value = castUI(i64_to_p8(a)); + return b; +} +inline posit16 p16(int64_t a){ + posit16 b; + b.value = castUI(i64_to_p16(a)); + return b; +} +inline posit32 p32(int64_t a){ + posit32 b; + b.value = castUI(i64_to_p32(a)); + return b; +} +inline posit_2 p32(int64_t a, int x){ + posit_2 b; + b.value = castUI(i64_to_pX2(a, x)); + b.x = x; + return b; +} + + +inline posit8 p8(double a){ + posit8 b; + b.value = castUI(convertDoubleToP8(a)); + return b; +} +inline posit16 p16(double a){ + posit16 b; + b.value = castUI(convertDoubleToP16(a)); + return b; +} +inline posit32 p32(double a){ + posit32 b; + b.value = castUI(convertDoubleToP32(a)); + return b; +} +inline posit_2 pX2(double a, int x){ + posit_2 b; + b.value = castUI(convertDoubleToPX2(a, x)); + b.x = x; + return b; +} + + + +inline posit8 p8(quire8 a){ + posit8 b; + b.value = castUI(q8_to_p8(castQ8(a.value))); + return b; +} +inline posit16 p16(quire16 a){ + posit16 b; + b.value = castUI(q16_to_p16(castQ16(a.lvalue, a.rvalue))); + return b; +} +inline posit32 p32(quire32 a){ + posit32 b; + b.value = castUI(q32_to_p32(castQ32(a.v0, a.v1, a.v2, a.v3, a.v4, a.v5, a.v6, a.v7))); + return b; +} +inline posit_2 pX2(quire_2 a){ + posit_2 b; + b.value = castUI(qX2_to_pX2(castQX2(a.v0, a.v1, a.v2, a.v3, a.v4, a.v5, a.v6, a.v7), a.x)); + b.x = a.x; + return b; +} +inline posit_2 pX2(quire_2 a, int x){ + posit_2 b; + b.value = castUI(qX2_to_pX2(castQX2(a.v0, a.v1, a.v2, a.v3, a.v4, a.v5, a.v6, a.v7), x)); + b.x = x; + return b; +} + + +//cout helper functions + +inline std::ostream& operator<<(std::ostream& os, const posit8& p) { + os << p.toDouble(); + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const posit16& p) { + os << p.toDouble(); + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const posit32& p) { + os << p.toDouble(); + return os; +} + +inline std::ostream& operator<<(std::ostream& os, const posit_2& p) { + os << p.toDouble(); + return os; +} + +//Math lib + +/*inline posit8 abs(posit8 a){ + a.value = castUI(p8_abs(castP8(a.value))); + return a; +} + +inline posit16 abs(posit16 a){ + a.value = castUI(p16_abs(castP16(a.value))); + return a; +} + + +inline posit32 abs(posit32 a){ + a.value = castUI(p32_abs(castP32(a.value))); + return a; +} + + +inline posit8 ceil(posit8 a){ + a.value = castUI(p8_ceil(castP8(a.value))); + return a; +} + +inline posit16 ceil(posit16 a){ + a.value = castUI(p16_ceil(castP16(a.value))); + return a; +} + +inline posit32 ceil(posit32 a){ + a.value = castUI(p32_ceil(castP32(a.value))); + return a; +} + + +inline posit8 floor(posit8 a){ + a.value = castUI(p8_floor(castP8(a.value))); + return a; +} +inline posit16 floor(posit16 a){ + a.value = castUI(p16_floor(castP16(a.value))); + return a; +} +inline posit32 floor(posit32 a){ + a.value = castUI(p32_floor(castP32(a.value))); + return a; +} + + +inline posit8 exp(posit8 a){ + a.value = castUI(p8_exp(castP8(a.value))); + return a; +} +inline posit16 exp(posit16 a){ + a.value = castUI(p16_exp(castP16(a.value))); + return a; +} +inline posit32 exp(posit32 a){ + a.value = castUI(convertDoubleToP32(exp(convertP32ToDouble(castP32(a.value))))); + return a; +} + + + +inline posit8 pow(posit8 a, posit8 b){ + a.value = castUI(convertDoubleToP8(pow(convertP8ToDouble(castP8(a.value)), convertP8ToDouble(castP8(b.value))))); + return a; +} +inline posit16 pow(posit16 a, posit16 b){ + a.value = castUI(convertDoubleToP16(pow(convertP16ToDouble(castP16(a.value)), convertP16ToDouble(castP16(b.value))))); + return a; +} +inline posit32 pow(posit32 a, posit32 b){ + a.value = castUI(convertDoubleToP32(pow(convertP32ToDouble(castP32(a.value)), convertP32ToDouble(castP32(b.value))))); + return a; +} + + +inline posit8 log(posit8 a){ + a.value = castUI(convertDoubleToP8(log(convertP8ToDouble(castP8(a.value))))); + return a; +} +inline posit16 log(posit16 a){ + a.value = castUI(convertDoubleToP16(log(convertP16ToDouble(castP16(a.value))))); + return a; +} +inline posit32 log(posit32 a){ + a.value = castUI(convertDoubleToP32(log(convertP32ToDouble(castP32(a.value))))); + return a; +} + + +inline posit8 log2(posit8 a){ + a.value = castUI(convertDoubleToP8(log2(convertP8ToDouble(castP8(a.value))))); + return a; +} +inline posit16 log2(posit16 a){ + a.value = castUI(convertDoubleToP16(log2(convertP16ToDouble(castP16(a.value))))); + return a; +} +inline posit32 log2(posit32 a){ + a.value = castUI(convertDoubleToP32(log2(convertP32ToDouble(castP32(a.value))))); + return a; +} + + +inline posit8 cos(posit8 a){ + a.value = castUI(convertDoubleToP8(cos(convertP8ToDouble(castP8(a.value))))); + return a; +} +inline posit16 cos(posit16 a){ + a.value = castUI(convertDoubleToP16(cos(convertP16ToDouble(castP16(a.value))))); + return a; +} +inline posit32 cos(posit32 a){ + a.value = castUI(convertDoubleToP32(cos(convertP32ToDouble(castP32(a.value))))); + return a; +} + + +inline posit8 sin(posit8 a){ + a.value = castUI(convertDoubleToP8(sin(convertP8ToDouble(castP8(a.value))))); + return a; +} +inline posit16 sin(posit16 a){ + a.value = castUI(convertDoubleToP16(sin(convertP16ToDouble(castP16(a.value))))); + return a; +} +inline posit32 sin(posit32 a){ + a.value = castUI(convertDoubleToP32(sin(convertP32ToDouble(castP32(a.value))))); + return a; +} + + +inline posit8 acos(posit8 a){ + a.value = castUI(convertDoubleToP8(acos(convertP8ToDouble(castP8(a.value))))); + return a; +} +inline posit16 acos(posit16 a){ + a.value = castUI(convertDoubleToP16(acos(convertP16ToDouble(castP16(a.value))))); + return a; +} +inline posit32 acos(posit32 a){ + a.value = castUI(convertDoubleToP32(acos(convertP32ToDouble(castP32(a.value))))); + return a; +}*/ + + +#endif //CPLUSPLUS + +#endif /* INCLUDE_SOFTPOSIT_CPP_H_ */ diff --git a/source/luametatex/source/libraries/softposit/source/include/softposit_types.h b/source/luametatex/source/libraries/softposit/source/include/softposit_types.h new file mode 100644 index 000000000..fe862ca36 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/include/softposit_types.h @@ -0,0 +1,139 @@ + +/*============================================================================ + +This C header file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C header file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#ifndef softposit_types_h +#define softposit_types_h 1 + +#include <stdint.h> + +/*---------------------------------------------------------------------------- +| Types used to pass 16-bit, 32-bit, 64-bit, and 128-bit floating-point +| arguments and results to/from functions. These types must be exactly +| 16 bits, 32 bits, 64 bits, and 128 bits in size, respectively. Where a +| platform has "native" support for IEEE-Standard floating-point formats, +| the types below may, if desired, be defined as aliases for the native types +| (typically 'float' and 'double', and possibly 'long double'). +*----------------------------------------------------------------------------*/ + +#ifdef SOFTPOSIT_EXACT + typedef struct { uint8_t v; bool exact; } posit8_t; + typedef struct { uint_fast16_t v; bool exact; } posit16_t; + typedef struct { uint32_t v; bool exact; } posit32_t; + typedef struct { uint64_t v; bool exact; } posit64_t; + typedef struct { uint64_t v[2]; bool exact; } posit128_t; + + typedef struct { uint64_t v[2]; bool exact; } quire16_t; +#else + typedef struct { uint8_t v; } posit8_t; + typedef struct { uint16_t v; } posit16_t; + typedef struct { uint32_t v; } posit32_t; + typedef struct { uint64_t v; } posit64_t; + typedef struct { uint64_t v[2]; } posit128_t; + + typedef struct { uint32_t v; } quire8_t; + typedef struct { uint64_t v[2]; } quire16_t; + typedef struct { uint64_t v[8]; } quire32_t; + + typedef struct { uint32_t v; } posit_2_t; + typedef struct { uint32_t v; } posit_1_t; + typedef struct { uint32_t v; } posit_0_t; + + typedef struct { uint64_t v[8]; } quire_2_t; + typedef struct { uint64_t v[8]; } quire_1_t; + typedef struct { uint64_t v[8]; } quire_0_t; + +#endif + + +#ifdef SOFTPOSIT_EXACT + typedef struct { uint8_t v; bool exact; } uint8e_t; + typedef struct { uint16_t v; bool exact; } uint16e_t; + typedef struct { uint32_t v; bool exact; } uint32e_t; + typedef struct { uint64_t v; bool exact; } uint64e_t; + typedef struct { uint64_t v[2]; bool exact; } uint128e_t; + + union ui8_p8 { uint8e_t ui; posit8_t p; }; + union ui16_p16 { uint16e_t ui; posit16_t p; }; + union ui32_p32 { uint32e_t ui; posit32_t p; }; + union ui64_p64 { uint64e_t ui; posit64_t p; }; + + union ui128_q16 { uint64_t ui[2]; quire16_t q; }; +#else + union ui8_p8 { uint8_t ui; posit8_t p; }; + union ui16_p16 { uint16_t ui; posit16_t p; }; + union ui32_p32 { uint32_t ui; posit32_t p; }; + union ui64_p64 { uint64_t ui; posit64_t p; }; + union ui128_p128c {uint64_t ui[2]; posit128_t p;}; //c to differentiate from original implementation + + union ui32_pX2 { uint32_t ui; posit_2_t p; }; + union ui32_pX1 { uint32_t ui; posit_1_t p; }; + union ui32_pX0 { uint32_t ui; posit_1_t p; }; + + union ui64_double { uint64_t ui; double d; }; + + union ui32_q8 { + uint32_t ui; // =0; // patched by HH because the compilers don't like this + quire8_t q; + }; + union ui128_q16 { + uint64_t ui[2]; // ={0,0}; // idem + quire16_t q; + }; + + union ui512_q32 { + uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idme + quire32_t q; + }; + + union ui512_qX2 { + uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idem + quire_2_t q; + }; + + union ui512_qX1 { + uint64_t ui[8]; // ={0,0,0,0, 0,0,0,0}; // idem + quire_1_t q; + }; +#endif + + +#endif + diff --git a/source/luametatex/source/libraries/softposit/source/p16_add.c b/source/luametatex/source/libraries/softposit/source/p16_add.c new file mode 100644 index 000000000..07b12bca3 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_add.c @@ -0,0 +1,75 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit16_t p16_add( posit16_t a, posit16_t b ){ + union ui16_p16 uA, uB; + uint_fast16_t uiA, uiB; + union ui16_p16 uZ; + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + + + //Zero or infinity + if (uiA==0 || uiB==0){ // Not required but put here for speed + uZ.ui = uiA | uiB; + return uZ.p; + } + else if ( uiA==0x8000 || uiB==0x8000 ){ + uZ.ui = 0x8000; + return uZ.p; + } + + //different signs + if ((uiA^uiB)>>15) + return softposit_subMagsP16(uiA, uiB); + else + return softposit_addMagsP16(uiA, uiB); + + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_div.c b/source/luametatex/source/libraries/softposit/source/p16_div.c new file mode 100644 index 000000000..2786234ba --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_div.c @@ -0,0 +1,186 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdlib.h> + +#include "platform.h" +#include "internals.h" + +posit16_t p16_div( posit16_t pA, posit16_t pB ) { + union ui16_p16 uA, uB, uZ; + uint_fast16_t uiA, uiB, fracA, fracB, regA, regime, tmp; + bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast8_t expA, kA=0; + uint_fast32_t frac32A, frac32Z, rem; + div_t divresult; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + //Zero or infinity + if ( uiA==0x8000 || uiB==0x8000 || uiB==0){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x8000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x8000; +#endif + return uZ.p; + } + else if (uiA==0){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0; + if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) + uZ.ui.exact = 1; + else + uZ.ui.exact = 0; +#else + uZ.ui = 0; +#endif + return uZ.p; + } + + signA = signP16UI( uiA ); + signB = signP16UI( uiB ); + signZ = signA ^ signB; + if(signA) uiA = (-uiA & 0xFFFF); + if(signB) uiB = (-uiB & 0xFFFF); + regSA = signregP16UI(uiA); + regSB = signregP16UI(uiB); + + tmp = (uiA<<2) & 0xFFFF; + if (regSA){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expA = tmp>>14; + fracA = (0x4000 | tmp); + frac32A = fracA<<14; + + tmp = (uiB<<2) & 0xFFFF; + if (regSB){ + while (tmp>>15){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + fracB = (0x4000 | tmp); + } + else{ + kA++; + while (!(tmp>>15)){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + fracB = (0x4000 | (0x7FFF & tmp)); + } + expA -= tmp>>14; + + divresult = div (frac32A,fracB); + frac32Z = divresult.quot; + rem = divresult.rem; + + if (expA<0){ + expA=1; + kA--; + } + if (frac32Z!=0){ + rcarry = frac32Z >> 14; // this is the hidden bit (14th bit) , extreme right bit is bit 0 + if (!rcarry){ + if (expA==0) kA --; + expA^=1; + frac32Z<<=1; + } + } + if(kA<0){ + regA = (-kA & 0xFFFF); + regSA = 0; + regime = 0x4000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFF - (0x7FFF>>regA); + } + + if(regA>14){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7FFF): (uZ.ui=0x1); + } + else{ + //remove carry and rcarry bits and shift to correct position + frac32Z &= 0x3FFF; + fracA = (uint_fast16_t)frac32Z >> (regA+1); + + if (regA!=14) bitNPlusOne = (frac32Z >> regA) & 0x1; + else if (fracA>0){ + fracA=0; + bitsMore =1; + } + if (regA==14 && expA) bitNPlusOne = 1; + + //sign is always zero + uZ.ui = packToP16UI(regime, regA, expA, fracA); + + if (bitNPlusOne){ + ( ((1<<regA)-1) & frac32Z ) ? (bitsMore=1) : (bitsMore=0); + if (rem) bitsMore =1; + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + if (signZ) uZ.ui = -uZ.ui & 0xFFFF; + + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_eq.c b/source/luametatex/source/libraries/softposit/source/p16_eq.c new file mode 100644 index 000000000..53dd01d93 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_eq.c @@ -0,0 +1,58 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +bool p16_eq( posit16_t pA, posit16_t pB ){ + + union ui16_p16 uA, uB; + int16_t uiA, uiB; + + uA.p = pA; + uiA = (int16_t) uA.ui; + uB.p = pB; + uiB = (int16_t)uB.ui; + + if (uiA==uiB) + return true; + else + return false; +} diff --git a/source/luametatex/source/libraries/softposit/source/p16_le.c b/source/luametatex/source/libraries/softposit/source/p16_le.c new file mode 100644 index 000000000..4e3e2edea --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_le.c @@ -0,0 +1,58 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +bool p16_le( posit16_t pA, posit16_t pB ) { + union ui16_p16 uA, uB; + int16_t uiA, uiB; + + uA.p = pA; + uiA = (int16_t) uA.ui; + uB.p = pB; + uiB = (int16_t)uB.ui; + + if (uiA<=uiB) + return true; + else + return false; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_lt.c b/source/luametatex/source/libraries/softposit/source/p16_lt.c new file mode 100644 index 000000000..5b47098d2 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_lt.c @@ -0,0 +1,60 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +bool p16_lt( posit16_t pA, posit16_t pB ) +{ + union ui16_p16 uA, uB; + int16_t uiA, uiB; + + uA.p = pA; + uiA = (int16_t) uA.ui; + uB.p = pB; + uiB = (int16_t)uB.ui; + + if (uiA<uiB) + return true; + else + return false; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_mul.c b/source/luametatex/source/libraries/softposit/source/p16_mul.c new file mode 100644 index 000000000..bf9c8b967 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_mul.c @@ -0,0 +1,171 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + + +posit16_t p16_mul( posit16_t pA, posit16_t pB ){ + + union ui16_p16 uA, uB, uZ; + uint_fast16_t uiA, uiB; + uint_fast16_t regA, fracA, regime, tmp; + bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast8_t expA; + int_fast8_t kA=0; + uint_fast32_t frac32Z; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + + //NaR or Zero + if ( uiA==0x8000 || uiB==0x8000 ){ + uZ.ui = 0x8000; + return uZ.p; + } + else if (uiA==0 || uiB==0){ + uZ.ui = 0; + return uZ.p; + } + + signA = signP16UI( uiA ); + signB = signP16UI( uiB ); + signZ = signA ^ signB; + + if(signA) uiA = (-uiA & 0xFFFF); + if(signB) uiB = (-uiB & 0xFFFF); + + regSA = signregP16UI(uiA); + regSB = signregP16UI(uiB); + + tmp = (uiA<<2) & 0xFFFF; + if (regSA){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expA = tmp>>14; + fracA = (0x4000 | tmp); + + tmp = (uiB<<2) & 0xFFFF; + if (regSB){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA--; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expA += tmp>>14; + frac32Z = (uint_fast32_t) fracA * (0x4000 | tmp); + + if (expA>1){ + kA++; + expA ^=0x2; + } + + rcarry = frac32Z>>29;//3rd bit of frac32Z + if (rcarry){ + if (expA) kA ++; + expA^=1; + frac32Z>>=1; + } + + if(kA<0){ + regA = (-kA & 0xFFFF); + regSA = 0; + regime = 0x4000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFF - (0x7FFF>>regA); + } + + if(regA>14){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7FFF): (uZ.ui=0x1); + } + else{ + //remove carry and rcarry bits and shift to correct position + frac32Z = (frac32Z&0xFFFFFFF) >> (regA-1); + fracA = (uint_fast16_t) (frac32Z>>16); + + if (regA!=14) bitNPlusOne |= (0x8000 & frac32Z) ; + else if (fracA>0){ + fracA=0; + bitsMore =1; + } + if (regA==14 && expA) bitNPlusOne = 1; + + //sign is always zero + uZ.ui = packToP16UI(regime, regA, expA, fracA); + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne){ + if (0x7FFF & frac32Z) bitsMore=1; + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + + if (signZ) uZ.ui = -uZ.ui & 0xFFFF; + return uZ.p; +} + + + diff --git a/source/luametatex/source/libraries/softposit/source/p16_mulAdd.c b/source/luametatex/source/libraries/softposit/source/p16_mulAdd.c new file mode 100644 index 000000000..03548f7e6 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_mulAdd.c @@ -0,0 +1,63 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit16_t p16_mulAdd( posit16_t a, posit16_t b, posit16_t c ) +{ + union ui16_p16 uA; + uint_fast16_t uiA; + union ui16_p16 uB; + uint_fast16_t uiB; + union ui16_p16 uC; + uint_fast16_t uiC; + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + uC.p = c; + uiC = uC.ui; + return softposit_mulAddP16( uiA, uiB, uiC, 0 ); + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_roundToInt.c b/source/luametatex/source/libraries/softposit/source/p16_roundToInt.c new file mode 100644 index 000000000..383fdb131 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_roundToInt.c @@ -0,0 +1,98 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + + +posit16_t p16_roundToInt( posit16_t pA ) { + + union ui16_p16 uA; + uint_fast16_t mask = 0x2000, scale=0, tmp=0, uiA; + bool bitLast, bitNPlusOne, sign; + + uA.p = pA; + uiA = uA.ui; // Copy of the input. + sign = (uiA > 0x8000); + + // sign is True if pA > NaR. + if (sign) uiA = -uiA & 0xFFFF; // A is now |A|. + if (uiA <= 0x3000) { // 0 <= |pA| <= 1/2 rounds to zero. + uA.ui = 0; + return uA.p; + } + else if (uiA < 0x4800) { // 1/2 < x < 3/2 rounds to 1. + uA.ui = 0x4000; + } + else if (uiA <= 0x5400) { // 3/2 <= x <= 5/2 rounds to 2. + uA.ui = 0x5000; + } + else if (uiA >= 0x7C00) { // If |A| is 256 or greater, leave it unchanged. + return uA.p; // This also takes care of the NaR case, 0x8000. + } + else { // 34% of the cases, we have to decode the posit. + while (mask & uiA) { // Increment scale by 2 for each regime sign bit. + scale += 2; // Regime sign bit is always 1 in this range. + mask >>= 1; // Move the mask right, to the next bit. + } + mask >>= 1; // Skip over termination bit. + if (mask & uiA) scale++; // If exponent is 1, increment the scale. + + mask >>= scale; // Point to the last bit of the integer part. + bitLast = (uiA & mask); // Extract the bit, without shifting it. + + mask >>= 1; + tmp = (uiA & mask); + bitNPlusOne = tmp; // "True" if nonzero. + uiA ^= tmp; // Erase the bit, if it was set. + tmp = uiA & (mask - 1); // tmp has any remaining bits. + uiA ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) uiA += (mask << 1); + } + uA.ui = uiA; + } + if (sign) uA.ui = -uA.ui & 0xFFFF; // Apply the sign of Z. + return uA.p; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_sqrt.c b/source/luametatex/source/libraries/softposit/source/p16_sqrt.c new file mode 100644 index 000000000..24d33f9c0 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_sqrt.c @@ -0,0 +1,143 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +extern const uint_fast16_t softposit_approxRecipSqrt0[]; +extern const uint_fast16_t softposit_approxRecipSqrt1[]; + +posit16_t p16_sqrt( posit16_t pA ) { + + union ui16_p16 uA; + uint_fast16_t expA, fracA, index, r0, shift, sigma0, uiA, uiZ; + uint_fast32_t eSqrR0, fracZ, negRem, recipSqrt, shiftedFracZ; + int_fast16_t kZ; + bool bitNPlusOne; + + uA.p = pA; + uiA = uA.ui; + + // If sign bit is set, return NaR. + if (uiA>>15) { + uA.ui = 0x8000; + return uA.p; + } + // If the argument is zero, return zero. + if (uiA==0) { + uA.ui = 0; + return uA.p; + } + // Compute the square root. Here, kZ is the net power-of-2 scaling of the result. + // Decode the regime and exponent bit; scale the input to be in the range 1 to 4: + if (uiA >> 14) { + kZ = -1; + while (uiA & 0x4000) { + kZ++; + uiA= (uiA<<1) & 0xFFFF; + } + } + else { + kZ = 0; + while (!(uiA & 0x4000)) { + kZ--; + uiA= (uiA<<1) & 0xFFFF; + } + + } + uiA &= 0x3fff; + expA = 1 - (uiA >> 13); + fracA = (uiA | 0x2000) >> 1; + + // Use table look-up of first four bits for piecewise linear approx. of 1/sqrt: + index = ((fracA >> 8) & 0xE) + expA; + + r0 = softposit_approxRecipSqrt0[index] + - (((uint_fast32_t) softposit_approxRecipSqrt1[index] + * (fracA & 0x1FF)) >> 13); + // Use Newton-Raphson refinement to get more accuracy for 1/sqrt: + eSqrR0 = ((uint_fast32_t) r0 * r0) >> 1; + + if (expA) eSqrR0 >>= 1; + sigma0 = 0xFFFF ^ (0xFFFF & (((uint64_t)eSqrR0 * (uint64_t)fracA) >> 18));//~(uint_fast16_t) ((eSqrR0 * fracA) >> 18); + recipSqrt = ((uint_fast32_t) r0 << 2) + (((uint_fast32_t) r0 * sigma0) >> 23); + + // We need 17 bits of accuracy for posit16 square root approximation. + // Multiplying 16 bits and 18 bits needs 64-bit scratch before the right shift: + fracZ = (((uint_fast64_t) fracA) * recipSqrt) >> 13; + + // Figure out the regime and the resulting right shift of the fraction: + if (kZ < 0) { + shift = (-1 - kZ) >> 1; + uiZ = 0x2000 >> shift; + } + else { + shift = kZ >> 1; + uiZ = 0x7fff - (0x7FFF >> (shift + 1)); + } + // Set the exponent bit in the answer, if it is nonzero: + if (kZ & 1) uiZ |= (0x1000 >> shift); + + // Right-shift fraction bits, accounting for 1 <= a < 2 versus 2 <= a < 4: + fracZ = fracZ >> (expA + shift); + + // Trick for eliminating off-by-one cases that only uses one multiply: + fracZ++; + if (!(fracZ & 7)) { + shiftedFracZ = fracZ >> 1; + negRem = (shiftedFracZ * shiftedFracZ) & 0x3FFFF; + if (negRem & 0x20000) { + fracZ |= 1; + } else { + if (negRem) fracZ--; + } + } + // Strip off the hidden bit and round-to-nearest using last 4 bits. + fracZ -= (0x10000 >> shift); + bitNPlusOne = (fracZ >> 3) & 1; + if (bitNPlusOne) { + if (((fracZ >> 4) & 1) | (fracZ & 7)) fracZ += 0x10; + } + // Assemble the result and return it. + uA.ui = uiZ | (fracZ >> 4); + return uA.p; + +} diff --git a/source/luametatex/source/libraries/softposit/source/p16_sub.c b/source/luametatex/source/libraries/softposit/source/p16_sub.c new file mode 100644 index 000000000..a5b9b69fc --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_sub.c @@ -0,0 +1,93 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 A*STAR. All rights reserved. + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + + +posit16_t p16_sub( posit16_t a, posit16_t b ){ + + union ui16_p16 uA, uB; + uint_fast16_t uiA, uiB; + union ui16_p16 uZ; + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + +#ifdef SOFTPOSIT_EXACT + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#endif + + //infinity + if ( uiA==0x8000 || uiB==0x8000 ){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x8000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x8000; +#endif + return uZ.p; + } + //Zero + else if ( uiA==0 || uiB==0 ){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = (uiA | -uiB); + uZ.ui.exact = 0; +#else + uZ.ui = (uiA | -uiB); +#endif + return uZ.p; + } + + //different signs + if ((uiA^uiB)>>15) + return softposit_addMagsP16(uiA, (-uiB & 0xFFFF)); + else + return softposit_subMagsP16(uiA, (-uiB & 0xFFFF)); + + + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_to_i32.c b/source/luametatex/source/libraries/softposit/source/p16_to_i32.c new file mode 100644 index 000000000..d28a44c6a --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_to_i32.c @@ -0,0 +1,97 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +int_fast32_t p16_to_i32( posit16_t pA ){ + union ui16_p16 uA; + int_fast32_t mask, iZ, tmp; + uint_fast16_t scale = 0, uiA; + bool bitLast, bitNPlusOne, sign; + + uA.p = pA; + uiA = uA.ui; // Copy of the input. + //NaR + if (uiA==0x8000) return 0; + + sign = (uiA > 0x8000); // sign is True if pA > NaR. + if (sign) uiA = -uiA & 0xFFFF; // A is now |A|. + + if (uiA <= 0x3000) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x4800) { // 1/2 < x < 3/2 rounds to 1. + iZ = 1; + } + else if (uiA <= 0x5400) { // 3/2 <= x <= 5/2 rounds to 2. + iZ = 2; + } + else { // Decode the posit, left-justifying as we go. + uiA -= 0x4000; // Strip off first regime bit (which is a 1). + while (0x2000 & uiA) { // Increment scale by 2 for each regime sign bit. + scale += 2; // Regime sign bit is always 1 in this range. + uiA = (uiA - 0x2000) << 1; // Remove the bit; line up the next regime bit. + } + uiA <<= 1; // Skip over termination bit, which is 0. + if (0x2000 & uiA) scale++; // If exponent is 1, increment the scale. + iZ = ((uint32_t)uiA | 0x2000) << 17; // Left-justify fraction in 32-bit result (one left bit padding) + mask = 0x40000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ ^= tmp; // Erase the bit, if it was set. + tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ += (mask << 1); + } + + iZ = (uint32_t)iZ >> (30 - scale); // Right-justify the integer. + } + + if (sign) iZ = -iZ; // Apply the sign of the input. + return iZ; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_to_i64.c b/source/luametatex/source/libraries/softposit/source/p16_to_i64.c new file mode 100644 index 000000000..d06859443 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_to_i64.c @@ -0,0 +1,98 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +int_fast64_t p16_to_i64( posit16_t pA ){ + union ui16_p16 uA; + int_fast64_t mask, tmp, iZ; + uint_fast16_t scale = 0, uiA; + bool sign, bitLast, bitNPlusOne; + + uA.p = pA; + uiA = uA.ui; + + // NaR + if (uiA==0x8000) return 0; + + sign = uiA>>15; + if (sign) uiA = -uiA & 0xFFFF; + + if (uiA <= 0x3000) + return 0; + else if (uiA < 0x4800) + iZ = 1; + else if (uiA <= 0x5400) + iZ = 2; + else{ + + uiA -= 0x4000; + + while (0x2000 & uiA) { + scale += 2; + uiA = (uiA - 0x2000) << 1; + } + uiA <<= 1; + if (0x2000 & uiA) scale++; + iZ = ((uint64_t)uiA | 0x2000) << 49; + + mask = 0x4000000000000000 >> scale; + + bitLast = (iZ & mask); + mask >>= 1; + tmp = (iZ & mask); + bitNPlusOne = tmp; + iZ ^= tmp; + tmp = iZ & (mask - 1); // bitsMore + iZ ^= tmp; + + if (bitNPlusOne) + if (bitLast | tmp) iZ += (mask << 1); + + iZ = (uint64_t)iZ >> (62 - scale); + + } + if (sign) iZ = -iZ; + return iZ; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_to_p32.c b/source/luametatex/source/libraries/softposit/source/p16_to_p32.c new file mode 100644 index 000000000..3d03cc617 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_to_p32.c @@ -0,0 +1,109 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ +#include "platform.h" +#include "internals.h" + +posit32_t p16_to_p32( posit16_t pA ) { + + union ui16_p16 uA; + union ui32_p32 uZ; + uint_fast16_t uiA, tmp; + uint_fast32_t exp_frac32A=0, regime; + bool sign, regSA; + int_fast8_t kA=0, regA; + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x8000 || uiA==0 ){ + uZ.ui = (uint32_t)uiA<<16; + return uZ.p; + } + + sign = signP16UI( uiA ); + + if (sign) uiA = -uiA & 0xFFFF; + regSA = signregP16UI(uiA); + + tmp = (uiA<<2) & 0xFFFF; + if (regSA){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + exp_frac32A = (uint32_t) tmp<<16; + + + if(kA<0){ + regA = -kA; + //if (regA&0x1) exp_frac32A |= 0x80000000; + exp_frac32A |= ((uint32_t)(regA&0x1)<<31); + regA = (regA+1)>>1; + if (regA==0) regA=1; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + exp_frac32A |= ((uint32_t)(kA&0x1)<<31); + (kA==0) ? (regA=1) : (regA = (kA+2)>>1); + + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + + } + + exp_frac32A >>=(regA+2); //2 because of sign and regime terminating bit + + uZ.ui = regime + exp_frac32A; + + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_to_p8.c b/source/luametatex/source/libraries/softposit/source/p16_to_p8.c new file mode 100644 index 000000000..74c198b99 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_to_p8.c @@ -0,0 +1,120 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ +#include "platform.h" +#include "internals.h" + +posit8_t p16_to_p8( posit16_t pA ) { + + union ui16_p16 uA; + union ui8_p8 uZ; + uint_fast16_t uiA, tmp, regime; + uint_fast16_t exp_frac16A=0; + bool sign, regSA, bitsMore=0; + int_fast8_t kA=0, regA; + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x8000 || uiA==0 ){ + uZ.ui = (uiA>>8) &0xFF; + return uZ.p; + } + + sign = signP16UI( uiA ); + + if (sign) uiA = -uiA & 0xFFFF; + regSA = signregP16UI(uiA); + + tmp = (uiA<<2) & 0xFFFF; + if (regSA){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + + if (kA<-3 || kA>=3){ + (kA<0) ? (uZ.ui=0x1):(uZ.ui= 0x7F); + } + else{ + //2nd bit exp + exp_frac16A = tmp; + if(kA<0){ + regA = ((-kA)<<1) - (exp_frac16A>>14); + if (regA==0) regA=1; + regSA = 0; + regime = 0x40>>regA; + } + else{ + + (kA==0)?(regA=1 + (exp_frac16A>>14)): (regA = ((kA+1)<<1) + (exp_frac16A>>14) -1); + regSA=1; + regime = 0x7F - (0x7F>>regA); + } + if (regA>5){ + uZ.ui = regime; + } + else{ + //int shift = regA+8; + //exp_frac16A= ((exp_frac16A)&0x3FFF) >> shift; //first 2 bits already empty (for sign and regime terminating bit) + uZ.ui = regime + ( ((exp_frac16A)&0x3FFF)>>(regA+8) ); + + } + + } + + if ( exp_frac16A & (0x80<<regA) ){ + bitsMore = exp_frac16A & (0xFFFF>>(9-regA)); + uZ.ui += (uZ.ui&1) | bitsMore; + + } + if (sign) uZ.ui = -uZ.ui & 0xFF; + + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_to_pX1.c b/source/luametatex/source/libraries/softposit/source/p16_to_pX1.c new file mode 100644 index 000000000..5a6f2a375 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_to_pX1.c @@ -0,0 +1,92 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ +#include "platform.h" +#include "internals.h" + +posit_1_t p16_to_pX1( posit16_t pA, int x ) { + + union ui16_p16 uA; + union ui32_pX1 uZ; + uint_fast16_t tmp; + uint_fast32_t uiA, exp_frac32A=0, regime; + bool sign, regSA; + int_fast8_t kA=0, regA; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = ((uint32_t) uA.ui<<16) &0xFFFFFFFF; + + if (uiA==0x80000000 || uiA==0 ){ + uZ.ui = uiA; + return uZ.p; + } + + sign = signP32UI( uiA ); + if (sign) uiA = -uiA & 0xFFFFFFFF; + + if (x==2){ + uZ.ui=(uiA>0)?(0x40000000):(0); + } + else if (x==32 || (((uint32_t)0xFFFFFFFF>>x) & uiA)==0 ){ + uZ.ui = uiA; + } + else { + + int shift = 32-x; + if( (uiA>>shift)!=(0x7FFFFFFF>>shift) ){ + if( ((uint32_t)0x80000000>>x) & uiA){ + if ( ( ((uint32_t)0x80000000>>(x-1)) & uiA) || (((uint32_t)0x7FFFFFFF>>x) & uiA) ) + uiA += (0x1<<shift); + } + } + uZ.ui = uiA & ((int32_t)0x80000000>>(x-1)); + if (uZ.ui==0) uZ.ui = 0x1<<shift; + + } + + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_to_pX2.c b/source/luametatex/source/libraries/softposit/source/p16_to_pX2.c new file mode 100644 index 000000000..c8696ff12 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_to_pX2.c @@ -0,0 +1,137 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ +#include "platform.h" +#include "internals.h" + +posit_2_t p16_to_pX2( posit16_t pA, int x ) { + + union ui16_p16 uA; + union ui32_pX2 uZ; + uint_fast16_t uiA, tmp; + uint_fast32_t exp_frac32A=0, regime; + bool sign, regSA; + int_fast8_t kA=0, regA; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x8000 || uiA==0 ){ + uZ.ui = (uint32_t)uiA<<16; + return uZ.p; + } + + + sign = signP16UI( uiA ); + if (sign) uiA = -uiA & 0xFFFF; + + if(x==2){ + uZ.ui=(uiA>0)?(0x40000000):(0); + } + else{ + regSA = signregP16UI(uiA); + + tmp = (uiA<<2) & 0xFFFF; + if (regSA){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + exp_frac32A = (uint32_t) tmp<<16; + + if(kA<0){ + regA = -kA; + //if (regA&0x1) exp_frac32A |= 0x80000000; + exp_frac32A |= ((uint32_t)(regA&0x1)<<31); + regA = (regA+1)>>1; + if (regA==0) regA=1; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + exp_frac32A |= ((uint32_t)(kA&0x1)<<31); + (kA==0) ? (regA=1) : (regA = (kA+2)>>1); + + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + + exp_frac32A >>=(regA+2); //2 because of sign and regime terminating bit + + uZ.ui = regime + exp_frac32A; + + int shift = 32-x; + if( (uZ.ui>>shift)!=(0x7FFFFFFF>>shift) ){ + if( ((uint32_t)0x80000000>>x) & uZ.ui){ + if ( ( ((uint32_t)0x80000000>>(x-1)) & uZ.ui) || (((uint32_t)0x7FFFFFFF>>x) & uZ.ui) ) + uZ.ui += (0x1<<shift); + } + } + + uZ.ui &=((int32_t)0x80000000>>(x-1)); + if (uZ.ui==0) uZ.ui = 0x1<<shift; + } + } + + + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_to_ui32.c b/source/luametatex/source/libraries/softposit/source/p16_to_ui32.c new file mode 100644 index 000000000..e69139548 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_to_ui32.c @@ -0,0 +1,95 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +uint_fast32_t p16_to_ui32( posit16_t pA ) { + + union ui16_p16 uA; + uint_fast32_t mask, iZ, tmp; + uint_fast16_t scale = 0, uiA; + bool bitLast, bitNPlusOne; + + uA.p = pA; + uiA = uA.ui; // Copy of the input. + //NaR + //if (uiA==0x8000) return 0; + if (uiA>=0x8000) return 0; //negative + + if (uiA <= 0x3000) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x4800) { // 1/2 < x < 3/2 rounds to 1. + iZ = 1; + } + else if (uiA <= 0x5400) { // 3/2 <= x <= 5/2 rounds to 2. + iZ = 2; + } + else { // Decode the posit, left-justifying as we go. + uiA -= 0x4000; // Strip off first regime bit (which is a 1). + while (0x2000 & uiA) { // Increment scale by 2 for each regime sign bit. + scale += 2; // Regime sign bit is always 1 in this range. + uiA = (uiA - 0x2000) << 1; // Remove the bit; line up the next regime bit. + } + uiA <<= 1; // Skip over termination bit, which is 0. + if (0x2000 & uiA) scale++; // If exponent is 1, increment the scale. + iZ = ((uint32_t)uiA | 0x2000) << 17; // Left-justify fraction in 32-bit result (one left bit padding) + mask = 0x40000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ ^= tmp; // Erase the bit, if it was set. + tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ += (mask << 1); + } + iZ = (uint32_t)iZ >> (30 - scale); // Right-justify the integer. + } + + return iZ; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p16_to_ui64.c b/source/luametatex/source/libraries/softposit/source/p16_to_ui64.c new file mode 100644 index 000000000..021a38bd8 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p16_to_ui64.c @@ -0,0 +1,96 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +uint_fast64_t p16_to_ui64( posit16_t pA ) { + union ui16_p16 uA; + uint_fast64_t mask, iZ, tmp; + uint_fast16_t scale = 0, uiA; + bool bitLast, bitNPlusOne; + + uA.p = pA; + uiA = uA.ui; + //NaR + //if (uiA==0x8000) return 0; + //negative + if (uiA>=0x8000) return 0; + + if (uiA <= 0x3000) { + return 0; + } + else if (uiA < 0x4800) { + iZ = 1; + } + else if (uiA <= 0x5400) { + iZ = 2; + } + else { + uiA -= 0x4000; + while (0x2000 & uiA) { + scale += 2; + uiA = (uiA - 0x2000) << 1; + } + uiA <<= 1; + if (0x2000 & uiA) scale++; + iZ = ((uint64_t)uiA | 0x2000) << 49; + + mask = 0x4000000000000000 >> scale; + + bitLast = (iZ & mask); + mask >>= 1; + tmp = (iZ & mask); + bitNPlusOne = tmp; + iZ ^= tmp; + tmp = iZ & (mask - 1); // bitsMore + iZ ^= tmp; + + if (bitNPlusOne) + if (bitLast | tmp) iZ += (mask << 1); + + iZ = (uint64_t)iZ >> (62 - scale); + + } + return iZ; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_add.c b/source/luametatex/source/libraries/softposit/source/p32_add.c new file mode 100644 index 000000000..61b1537c0 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_add.c @@ -0,0 +1,86 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit32_t p32_add( posit32_t a, posit32_t b ){ + union ui32_p32 uA, uB, uZ; + uint_fast32_t uiA, uiB; + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + +#ifdef SOFTPOSIT_EXACT + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#endif + + //Zero or infinity + if (uiA==0 || uiB==0){ // Not required but put here for speed +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = uiA | uiB; + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#else + uZ.ui = uiA | uiB; +#endif + return uZ.p; + } + else if ( uiA==0x80000000 || uiB==0x80000000 ){ + //printf("in infinity\n"); +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80000000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80000000; +#endif + return uZ.p; + } + + //different signs + if ((uiA^uiB)>>31) + return softposit_subMagsP32(uiA, uiB); + else + return softposit_addMagsP32(uiA, uiB); + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_div.c b/source/luametatex/source/libraries/softposit/source/p32_div.c new file mode 100644 index 000000000..a163a1412 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_div.c @@ -0,0 +1,201 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdlib.h> + +#include "platform.h" +#include "internals.h" + +posit32_t p32_div( posit32_t pA, posit32_t pB ) +{ + union ui32_p32 uA, uB, uZ; + uint_fast32_t uiA, uiB, fracA, fracB, regA, regime, regB, tmp; + bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast8_t kA=0; + int_fast32_t expA; + uint_fast64_t frac64A, frac64Z, rem; + lldiv_t divresult; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + //Zero or infinity + if ( uiA==0x80000000 || uiB==0x80000000 || uiB==0){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80000000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80000000; +#endif + return uZ.p; + } + else if (uiA==0){ +#ifdef SOFTPOSIT_EXACT + + uZ.ui.v = 0; + if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) + uZ.ui.exact = 1; + else + uZ.ui.exact = 0; +#else + uZ.ui = 0; +#endif + return uZ.p; + } + + signA = signP32UI( uiA ); + signB = signP32UI( uiB ); + signZ = signA ^ signB; + if(signA) uiA = (-uiA & 0xFFFFFFFF); + if(signB) uiB = (-uiB & 0xFFFFFFFF); + regSA = signregP32UI(uiA); + regSB = signregP32UI(uiB); + + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA = tmp>>29; //to get 2 bits + fracA = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; + frac64A = (uint64_t) fracA << 30; + + tmp = (uiB<<2)&0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA++; + while (!(tmp>>31)){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA -= tmp>>29; + fracB = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; + + divresult = lldiv (frac64A,(uint_fast64_t)fracB); + frac64Z = divresult.quot; + rem = divresult.rem; + + if (expA<0){ + expA+=4; + kA--; + } + if (frac64Z!=0){ + rcarry = frac64Z >> 30; // this is the hidden bit (14th bit) , extreme right bit is bit 0 + if (!rcarry){ + if (expA==0){ + kA--; + expA=3; + } + else + expA--; + frac64Z<<=1; + } + } + + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + if(regA>30){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); + } + else{ + //remove carry and rcarry bits and shift to correct position + frac64Z &= 0x3FFFFFFF; + + fracA = (uint_fast32_t)frac64Z >> (regA+2); + + if (regA<=28){ + bitNPlusOne = (frac64Z >> (regA +1)) & 0x1; + expA<<= (28-regA); + if (bitNPlusOne) ( ((1<<(regA+1))-1) & frac64Z ) ? (bitsMore=1) : (bitsMore=0); + } + else { + if (regA==30){ + bitNPlusOne = expA&0x2; + bitsMore = (expA&0x1); + expA = 0; + } + else if (regA==29){ + bitNPlusOne = expA&0x1; + expA>>=1; //taken care of by the pack algo + } + if (frac64Z>0){ + fracA=0; + bitsMore =1; + } + + } + if (rem) bitsMore =1; + + uZ.ui = packToP32UI(regime, expA, fracA); + if (bitNPlusOne) uZ.ui += (uZ.ui&1) | bitsMore; + } + + if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_eq.c b/source/luametatex/source/libraries/softposit/source/p32_eq.c new file mode 100644 index 000000000..3091227ef --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_eq.c @@ -0,0 +1,60 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +bool p32_eq( posit32_t a, posit32_t b ) { + union ui32_p32 uA, uB; + int32_t uiA, uiB; + + uA.p = a; + uiA = (int32_t) uA.ui; + uB.p = b; + uiB = (int32_t)uB.ui; + + if(uiA==uiB) + return true; + else + return false; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_le.c b/source/luametatex/source/libraries/softposit/source/p32_le.c new file mode 100644 index 000000000..add7edb77 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_le.c @@ -0,0 +1,59 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +bool p32_le( posit32_t a, posit32_t b ) { + union ui32_p32 uA, uB; + int32_t uiA, uiB; + + uA.p = a; + uiA = (int32_t) uA.ui; + uB.p = b; + uiB = (int32_t)uB.ui; + + if(uiA<=uiB) + return true; + else + return false; + +} diff --git a/source/luametatex/source/libraries/softposit/source/p32_lt.c b/source/luametatex/source/libraries/softposit/source/p32_lt.c new file mode 100644 index 000000000..a6dfe6437 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_lt.c @@ -0,0 +1,59 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +bool p32_lt( posit32_t a, posit32_t b ) { + union ui32_p32 uA, uB; + int32_t uiA, uiB; + + uA.p = a; + uiA = (int32_t) uA.ui; + uB.p = b; + uiB = (int32_t)uB.ui; + + if(uiA<uiB) + return true; + else + return false; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_mul.c b/source/luametatex/source/libraries/softposit/source/p32_mul.c new file mode 100644 index 000000000..460d81aee --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_mul.c @@ -0,0 +1,206 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + + +posit32_t p32_mul( posit32_t pA, posit32_t pB ){ + + + union ui32_p32 uA, uB, uZ; + uint_fast32_t uiA, uiB; + uint_fast32_t regA, fracA, regime, tmp; + bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast32_t expA; + int_fast8_t kA=0; + uint_fast64_t frac64Z; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + +#ifdef SOFTPOSIT_EXACT + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#endif + //NaR or Zero + if ( uiA==0x80000000 || uiB==0x80000000 ){ + +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80000000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80000000; +#endif + return uZ.p; + } + else if (uiA==0 || uiB==0){ +#ifdef SOFTPOSIT_EXACT + + uZ.ui.v = 0; + if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) + uZ.ui.exact = 1; + else + uZ.ui.exact = 0; +#else + uZ.ui = 0; +#endif + return uZ.p; + } + + signA = signP32UI( uiA ); + signB = signP32UI( uiB ); + signZ = signA ^ signB; + + if(signA) uiA = (-uiA & 0xFFFFFFFF); + if(signB) uiB = (-uiB & 0xFFFFFFFF); + + regSA = signregP32UI(uiA); + regSB = signregP32UI(uiB); + + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + + while (tmp>>31){ + + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA = tmp>>29; //to get 2 bits + fracA = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; + + tmp = (uiB<<2)&0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA--; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA += tmp>>29; + frac64Z = (uint_fast64_t) fracA * (((tmp<<1) | 0x40000000) & 0x7FFFFFFF); + + if (expA>3){ + kA++; + expA&=0x3; // -=4 + } + + rcarry = frac64Z>>61;//3rd bit of frac64Z + if (rcarry){ + expA++; + if (expA>3){ + kA ++; + expA&=0x3; + } + frac64Z>>=1; + } + + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + + if(regA>30){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); + } + else{ + //remove carry and rcarry bits and shift to correct position (2 bits exp, so + 1 than 16 bits) + frac64Z = (frac64Z&0xFFFFFFFFFFFFFFF) >> regA; + fracA = (uint_fast32_t) (frac64Z>>32); + if (regA<=28){ + bitNPlusOne |= (0x80000000 & frac64Z); + expA<<= (28-regA); + } + else { + if (regA==30){ + bitNPlusOne = expA&0x2; + bitsMore = (expA&0x1); + expA = 0; + } + else if (regA==29){ + bitNPlusOne = expA&0x1; + expA>>=1; //taken care of by the pack algo + } + if (fracA>0){ + fracA=0; + bitsMore =1; + } + + } + //sign is always zero + uZ.ui = packToP32UI(regime, expA, fracA); + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne){ + if (0x7FFFFFFF & frac64Z) bitsMore=1; + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + + if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_mulAdd.c b/source/luametatex/source/libraries/softposit/source/p32_mulAdd.c new file mode 100644 index 000000000..1cee8a093 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_mulAdd.c @@ -0,0 +1,56 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit32_t p32_mulAdd( posit32_t a, posit32_t b, posit32_t c ) { + + union ui32_p32 uA; + uint_fast32_t uiA; + union ui32_p32 uB; + uint_fast32_t uiB; + union ui32_p32 uC; + uint_fast32_t uiC; + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + uC.p = c; + uiC = uC.ui; + return softposit_mulAddP32( uiA, uiB, uiC, 0 ); + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_roundToInt.c b/source/luametatex/source/libraries/softposit/source/p32_roundToInt.c new file mode 100644 index 000000000..f2e01194e --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_roundToInt.c @@ -0,0 +1,105 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit32_t p32_roundToInt( posit32_t pA ){ + union ui32_p32 uA; + uint_fast32_t mask = 0x20000000, scale=0, tmp=0, uiA, uiZ; + bool bitLast, bitNPlusOne, sign; + + uA.p = pA; + uiA = uA.ui; + sign = uiA>>31; + + // sign is True if pA > NaR. + if (sign) uiA = -uiA & 0xFFFFFFFF; // A is now |A|. + if (uiA <= 0x38000000) { // 0 <= |pA| <= 1/2 rounds to zero. + uA.ui = 0; + return uA.p; + } + else if (uiA < 0x44000000) { // 1/2 < x < 3/2 rounds to 1. + uA.ui = 0x40000000; + } + else if (uiA <= 0x4A000000) { // 3/2 <= x <= 5/2 rounds to 2. + uA.ui = 0x48000000; + } + else if (uiA >= 0x7E800000) { // If |A| is 0x7E800000 (posit is pure integer value), leave it unchanged. + return uA.p; // This also takes care of the NaR case, 0x80000000. + } + else { // 34% of the cases, we have to decode the posit. + + while (mask & uiA) { + scale += 4; + mask >>= 1; + } + mask >>= 1; + + //Exponential (2 bits) + if (mask & uiA) scale+=2; + mask >>= 1; + if (mask & uiA) scale++; + mask >>= scale; + + //the rest of the bits + bitLast = (uiA & mask); + mask >>= 1; + tmp = (uiA & mask); + bitNPlusOne = tmp; + uiA ^= tmp; // Erase the bit, if it was set. + tmp = uiA & (mask - 1); // this is actually bitsMore + + uiA ^= tmp; + + if (bitNPlusOne) { + if (bitLast | tmp) uiA += (mask << 1); + } + uA.ui = uiA; + + + } + if (sign) uA.ui = -uA.ui & 0xFFFFFFFF; + return uA.p; + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_sqrt.c b/source/luametatex/source/libraries/softposit/source/p32_sqrt.c new file mode 100644 index 000000000..e02ec5fd1 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_sqrt.c @@ -0,0 +1,137 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +extern const uint_fast16_t softposit_approxRecipSqrt0[]; +extern const uint_fast16_t softposit_approxRecipSqrt1[]; + + + +posit32_t p32_sqrt( posit32_t pA ) { + + union ui32_p32 uA; + uint_fast32_t index, r0, shift, fracA, expZ, expA; + uint_fast32_t mask, uiA, uiZ; + uint_fast64_t eSqrR0, fracZ, negRem, recipSqrt, shiftedFracZ, sigma0, sqrSigma0; + int_fast32_t eps, shiftZ; + + uA.p = pA; + uiA = uA.ui; + + // If NaR or a negative number, return NaR. + if (uiA & 0x80000000) { + uA.ui = 0x80000000; + return uA.p; + } + // If the argument is zero, return zero. + else if (!uiA) { + return uA.p; + } + // Compute the square root; shiftZ is the power-of-2 scaling of the result. + // Decode regime and exponent; scale the input to be in the range 1 to 4: + if (uiA & 0x40000000) { + shiftZ = -2; + while (uiA & 0x40000000) { + shiftZ += 2; + uiA = (uiA << 1) & 0xFFFFFFFF; + } + } else { + shiftZ = 0; + while (!(uiA & 0x40000000)) { + shiftZ -= 2; + uiA = (uiA << 1) & 0xFFFFFFFF; + } + } + + uiA &= 0x3FFFFFFF; + expA = (uiA >> 28); + shiftZ += (expA >> 1); + expA = (0x1 ^ (expA & 0x1)); + uiA &= 0x0FFFFFFF; + fracA = (uiA | 0x10000000); + + // Use table look-up of first 4 bits for piecewise linear approx. of 1/sqrt: + index = ((fracA >> 24) & 0xE) + expA; + eps = ((fracA >> 9) & 0xFFFF); + r0 = softposit_approxRecipSqrt0[index] + - (((uint_fast32_t) softposit_approxRecipSqrt1[index] * eps) >> 20); + + // Use Newton-Raphson refinement to get 33 bits of accuracy for 1/sqrt: + eSqrR0 = (uint_fast64_t) r0 * r0; + if (!expA) eSqrR0 <<= 1; + sigma0 = 0xFFFFFFFF & (0xFFFFFFFF ^ ((eSqrR0 * (uint64_t)fracA) >> 20)); + recipSqrt = ((uint_fast64_t) r0 << 20) + (((uint_fast64_t) r0 * sigma0) >> 21); + + sqrSigma0 = ((sigma0 * sigma0) >> 35); + recipSqrt += ( (( recipSqrt + (recipSqrt >> 2) - ((uint_fast64_t)r0 << 19) ) * sqrSigma0) >> 46 ); + + + fracZ = (((uint_fast64_t) fracA) * recipSqrt) >> 31; + if (expA) fracZ = (fracZ >> 1); + + // Find the exponent of Z and encode the regime bits. + expZ = shiftZ & 0x3; + if (shiftZ < 0) { + shift = (-1 - shiftZ) >> 2; + uiZ = 0x20000000 >> shift; + } else { + shift = shiftZ >> 2; + uiZ = 0x7FFFFFFF - (0x3FFFFFFF >> shift); + } + + // Trick for eliminating off-by-one cases that only uses one multiply: + fracZ++; + if (!(fracZ & 0xF)) { + shiftedFracZ = fracZ >> 1; + negRem = (shiftedFracZ * shiftedFracZ) & 0x1FFFFFFFF; + if (negRem & 0x100000000) { + fracZ |= 1; + } else { + if (negRem) fracZ--; + } + } + // Strip off the hidden bit and round-to-nearest using last shift+5 bits. + fracZ &= 0xFFFFFFFF; + mask = (1 << (4 + shift)); + if (mask & fracZ) { + if ( ((mask - 1) & fracZ) | ((mask << 1) & fracZ) ) fracZ += (mask << 1); + } + // Assemble the result and return it. + uA.ui = uiZ | (expZ << (27 - shift)) | (fracZ >> (5 + shift)); + return uA.p; +} diff --git a/source/luametatex/source/libraries/softposit/source/p32_sub.c b/source/luametatex/source/libraries/softposit/source/p32_sub.c new file mode 100644 index 000000000..3b5a0f5d9 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_sub.c @@ -0,0 +1,84 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" +posit32_t p32_sub( posit32_t a, posit32_t b ) { + + + union ui32_p32 uA, uB, uZ; + uint_fast32_t uiA, uiB; + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + +#ifdef SOFTPOSIT_EXACT + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#endif + + //infinity + if ( uiA==0x80000000 || uiB==0x80000000 ){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80000000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80000000; +#endif + return uZ.p; + } + //Zero + else if ( uiA==0 || uiB==0 ){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = (uiA | -uiB); + uZ.ui.exact = 0; +#else + uZ.ui = (uiA | -uiB); +#endif + return uZ.p; + } + + //different signs + if ((uiA^uiB)>>31) + return softposit_addMagsP32(uiA, (-uiB & 0xFFFFFFFF)); + else + return softposit_subMagsP32(uiA, (-uiB & 0xFFFFFFFF)); + + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_to_i32.c b/source/luametatex/source/libraries/softposit/source/p32_to_i32.c new file mode 100644 index 000000000..c6306eecc --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_to_i32.c @@ -0,0 +1,102 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +int_fast32_t pX2_to_i32( posit_2_t pA ){ + posit32_t p32 = {.v = pA.v}; + return p32_to_i32(p32); +} +int_fast32_t p32_to_i32( posit32_t pA ){ + + union ui32_p32 uA; + uint_fast64_t iZ64, mask, tmp; + int_fast32_t iZ; + uint_fast32_t scale = 0, uiA; + bool bitLast, bitNPlusOne, bitsMore, sign; + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000) return 0; + + sign = uiA>>31; + if (sign) uiA = -uiA & 0xFFFFFFFF; + + if (uiA <= 0x38000000) return 0; // 0 <= |pA| <= 1/2 rounds to zero. + else if (uiA < 0x44000000) iZ = 1; // 1/2 < x < 3/2 rounds to 1. + else if (uiA <= 0x4A000000) iZ = 2; // 3/2 <= x <= 5/2 rounds to 2. // For speed. Can be commented out + //overflow so return max integer value + else if(uiA>0x7FAFFFFF) return (sign) ? (-2147483648) : (2147483647); //return INT_MAX + else{ + uiA -= 0x40000000; + while (0x20000000 & uiA) { + scale += 4; + uiA = (uiA - 0x20000000) << 1; + } + uiA <<= 1; // Skip over termination bit, which is 0. + if (0x20000000 & uiA) scale+=2; // If first exponent bit is 1, increment the scale. + if (0x10000000 & uiA) scale++; + iZ64 = (((uint64_t)uiA | 0x10000000ULL)&0x1FFFFFFFULL) << 34; // Left-justify fraction in 32-bit result (one left bit padding) + mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ64 & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ64 & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ64 ^= tmp; // Erase the bit, if it was set. + tmp = iZ64 & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ64 ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ64 += (mask << 1); + } + + iZ = (uint64_t)iZ64 >> (62 - scale); // Right-justify the integer. + } + + if (sign){ + iZ = (-iZ & 0xFFFFFFFF); + } + return iZ; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_to_i64.c b/source/luametatex/source/libraries/softposit/source/p32_to_i64.c new file mode 100644 index 000000000..3fa9b81fd --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_to_i64.c @@ -0,0 +1,107 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +int_fast64_t pX2_to_i64( posit_2_t pA ){ + posit32_t p32 = {.v = pA.v}; + return p32_to_i64(p32); +} + +int_fast64_t p32_to_i64( posit32_t pA ){ + + union ui32_p32 uA; + uint_fast64_t mask, tmp; + int_fast64_t iZ; + uint_fast32_t scale = 0, uiA; + bool bitLast, bitNPlusOne, bitsMore, sign; + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000) return 0; + + sign = uiA>>31; + if (sign) uiA = -uiA & 0xFFFFFFFF; + + if (uiA <= 0x38000000) return 0; // 0 <= |pA| <= 1/2 rounds to zero. + else if (uiA < 0x44000000) iZ = 1; // 1/2 < x < 3/2 rounds to 1. + else if (uiA <= 0x4A000000) iZ = 2; // 3/2 <= x <= 5/2 rounds to 2. + //overflow so return max integer value + else if(uiA>0x7FFFAFFF) return (sign) ? (-9223372036854775808) : (0x7FFFFFFFFFFFFFFF); + else{ + uiA -= 0x40000000; + while (0x20000000 & uiA) { + scale += 4; + uiA = (uiA - 0x20000000) << 1; + } + uiA <<= 1; // Skip over termination bit, which is 0. + if (0x20000000 & uiA) scale+=2; // If first exponent bit is 1, increment the scale. + if (0x10000000 & uiA) scale++; + iZ = ((uiA | 0x10000000ULL)&0x1FFFFFFFULL) << 34; // Left-justify fraction in 32-bit result (one left bit padding) + + if(scale<62){ + + mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ ^= tmp; // Erase the bit, if it was set. + tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ += (mask << 1); + } + iZ = ((uint64_t)iZ) >> (62 - scale); // Right-justify the integer. + } + else if (scale>62) + iZ = (uint64_t)iZ << (scale-62); + + } + + if (sign) iZ = -iZ ; + return iZ; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_to_p16.c b/source/luametatex/source/libraries/softposit/source/p32_to_p16.c new file mode 100644 index 000000000..f547aaef9 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_to_p16.c @@ -0,0 +1,122 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit16_t pX2_to_p16( posit_2_t pA ){ + posit32_t p32 = {.v = pA.v}; + return p32_to_p16(p32); +} + +posit16_t p32_to_p16( posit32_t pA ){ + + union ui32_p32 uA; + union ui16_p16 uZ; + uint_fast32_t uiA, tmp=0, exp_frac32A; + uint_fast16_t regime, exp_frac=0; + bool sign, regSA, bitsMore=0, bitNPlusOne=0; + int_fast16_t kA=0, regA; + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000 || uiA==0 ){ + uZ.ui = (uint16_t)(uiA>>16); + return uZ.p; + } + + sign = signP32UI( uiA ); + if (sign) uiA = -uiA & 0xFFFFFFFF; + + if (uiA>0x7F600000) uZ.ui = 0x7FFF; + else if (uiA<0x00A00000) uZ.ui = 0x1; + else{ + regSA = signregP32UI(uiA); + + //regime + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + //exp and frac + exp_frac32A = tmp<<1; +printBinary(&exp_frac32A, 32); +printf("kA: %d\n", kA); + if(kA<0){ + regA = (-kA)<<1; + if (exp_frac32A&0x80000000) regA--; + exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; + regSA = 0; + regime = 0x4000>>regA; + } + else{ + regA = (kA<<1)+1; + if (exp_frac32A&0x80000000) regA++; + exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; + regSA=1; + regime = 0x7FFF - (0x7FFF>>regA); + } + if ((exp_frac32A>>(17+regA)) & 0x1) bitNPlusOne = 1; + if (regA<14) exp_frac = (uint16_t) (exp_frac32A>>(18+regA)); + + uZ.ui = regime + exp_frac; + if (bitNPlusOne){ + if ((exp_frac32A<<(15-regA)) & 0xFFFFFFFF) bitsMore=1; + uZ.ui += (bitNPlusOne & (uZ.ui&1)) | ( bitNPlusOne & bitsMore); + } + } + + + + + if (sign) uZ.ui = (-uZ.ui & 0xFFFF); + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_to_p8.c b/source/luametatex/source/libraries/softposit/source/p32_to_p8.c new file mode 100644 index 000000000..270545cf2 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_to_p8.c @@ -0,0 +1,123 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit8_t pX2_to_p8( posit_2_t pA ){ + posit32_t p32 = {.v = pA.v}; + return p32_to_p8(p32); +} + +posit8_t p32_to_p8( posit32_t pA ){ + + union ui32_p32 uA; + union ui8_p8 uZ; + uint_fast32_t uiA, tmp=0, regime; + uint_fast32_t exp_frac32A=0; + bool sign, regSA, bitsMore=0, bitNPlusOne=0; + int_fast8_t kA=0, regA; + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000 || uiA==0 ){ + uZ.ui = (uint8_t)(uiA>>24) &0xFF; + return uZ.p; + } + + + sign = signP32UI( uiA ); + if (sign) uiA = -uiA & 0xFFFFFFFF; + + if (uiA>0x66000000) uZ.ui = 0x7F; + else if (uiA<0x1A000000) uZ.ui = 0x1; + else{ + regSA = signregP32UI(uiA); + //regime + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + + //2nd and 3rd bit exp + exp_frac32A = tmp; + + if(kA<0){ + regA = ((-kA)<<2) - (exp_frac32A>>29); + + if (regA==0) regA=1; + regSA = 0; + regime = (regA>6) ? (0x1) : (0x40>>regA); + + } + else{ + (kA==0)?(regA=1 + (exp_frac32A>>29)): (regA = (kA<<2) + (exp_frac32A>>29) +1); + regSA=1; + regime = 0x7F - (0x7F>>regA); + } + exp_frac32A = (exp_frac32A<<3) &0xFFFFFFFF; + if (regA>5){ + uZ.ui = regime; + } + else{ + //exp_frac32A= ((exp_frac32A)&0x3F) >> shift; //first 2 bits already empty (for sign and regime terminating bit) + uZ.ui = regime | ( exp_frac32A>>(regA+26) ); + } + if ( exp_frac32A & (0x2000000<<regA) ){ + bitsMore = exp_frac32A & (0xFFFFFFFF>>(7-regA)); + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + + + if (sign) uZ.ui = -uZ.ui & 0xFF; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_to_pX1.c b/source/luametatex/source/libraries/softposit/source/p32_to_pX1.c new file mode 100644 index 000000000..ec74b6f56 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_to_pX1.c @@ -0,0 +1,131 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + + +posit_1_t p32_to_pX1( posit32_t pA, int x ){ + + union ui32_p32 uA; + union ui32_pX1 uZ; + uint_fast32_t uiA, tmp, regime; + uint_fast32_t exp_frac32A=0; + bool sign, regSA, bitNPlusOne, bitsMore; + int_fast8_t kA=0, regA; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000 || uiA==0 ){ + uZ.ui = uiA; + return uZ.p; + } + + sign = signP32UI( uiA ); + if (sign) uiA = -uiA & 0xFFFFFFFF; + + if (x==2){ + uZ.ui=(uiA>0)?(0x40000000):(0); + } + else { + regSA = signregP32UI(uiA); + + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + //exp and frac + exp_frac32A = tmp<<1; +//printf("kA: %d\n", kA); +//printBinary(&exp_frac32A, 32); + if(kA<0){ + regA = (-kA)<<1; + if (exp_frac32A&0x80000000) regA--; + exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = (kA<<1)+1; + if (exp_frac32A&0x80000000) regA++; + exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + + bitNPlusOne = (exp_frac32A >>(regA+33-x))&0x1; + bitsMore = exp_frac32A&(0x7FFFFFFF>>(x-regA-2)); + + if (regA<30) exp_frac32A >>=(2+regA); + else exp_frac32A=0; + uZ.ui = regime + (exp_frac32A & ((int32_t)0x80000000>>(x-1)) ); + + if (uZ.ui==0) uZ.ui = 0x1<<(32-x); + else if (bitNPlusOne){ + uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + } + + } + if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_to_pX2.c b/source/luametatex/source/libraries/softposit/source/p32_to_pX2.c new file mode 100644 index 000000000..bce55adee --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_to_pX2.c @@ -0,0 +1,92 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_2_t pX2_to_pX2( posit_2_t pA, int x ){ + posit32_t p32 = {.v = pA.v}; + return p32_to_pX2(p32, x); +} +posit_2_t p32_to_pX2( posit32_t pA, int x ){ + + union ui32_p32 uA; + union ui32_pX2 uZ; + uint_fast32_t uiA; + bool sign; + + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000 || uiA==0 ){ + uZ.ui = uiA; + return uZ.p; + } + + sign = signP32UI( uiA ); + if (sign) uiA = -uiA & 0xFFFFFFFF; + + if (x==2){ + uZ.ui=(uiA>0)?(0x40000000):(0); + } + else if (x==32 || (((uint32_t)0xFFFFFFFF>>x) & uiA)==0 ){ + uZ.ui = uiA; + } + else { + + int shift = 32-x; + if( (uiA>>shift)!=(0x7FFFFFFF>>shift) ){ + if( ((uint32_t)0x80000000>>x) & uiA){ + if ( ( ((uint32_t)0x80000000>>(x-1)) & uiA) || (((uint32_t)0x7FFFFFFF>>x) & uiA) ) + uiA += (0x1<<shift); + } + } + uZ.ui = uiA & ((int32_t)0x80000000>>(x-1)); + if (uZ.ui==0) uZ.ui = 0x1<<shift; + + } + if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_to_ui32.c b/source/luametatex/source/libraries/softposit/source/p32_to_ui32.c new file mode 100644 index 000000000..d5c885a12 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_to_ui32.c @@ -0,0 +1,108 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +uint_fast32_t pX2_to_ui32( posit_2_t pA ) { + posit32_t p32 = {.v = pA.v}; + return p32_to_ui32(p32); +} + +uint_fast32_t p32_to_ui32( posit32_t pA ) { + + union ui32_p32 uA; + uint_fast64_t iZ64, mask, tmp; + uint_fast32_t iZ, scale = 0, uiA; + bool bitLast, bitNPlusOne; + + uA.p = pA; + uiA = uA.ui; + + //NaR + //if (uiA==0x80000000) return 0; + //negative + if (uiA>=0x80000000) return 0; + + if (uiA <= 0x38000000) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x44000000) { // 1/2 < x < 3/2 rounds to 1. + return 1; + } + else if (uiA <= 0x4A000000) { // 3/2 <= x <= 5/2 rounds to 2. + return 2; + } + //overflow so return max integer value + else if(uiA>0x7FBFFFFF){ + return 0xFFFFFFFF; + } + else { + uiA -= 0x40000000; + while (0x20000000 & uiA) { + scale += 4; + uiA = (uiA - 0x20000000) << 1; + } + uiA <<= 1; // Skip over termination bit, which is 0. + if (0x20000000 & uiA) scale+=2; // If first exponent bit is 1, increment the scale. + if (0x10000000 & uiA) scale++; + iZ64 = (((uint64_t)uiA | 0x10000000ULL)&0x1FFFFFFFULL) << 34; // Left-justify fraction in 32-bit result (one left bit padding) + + mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ64 & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ64 & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ64 ^= tmp; // Erase the bit, if it was set. + tmp = iZ64 & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ64 ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ64 += (mask << 1); + } + iZ = (uint64_t)iZ64 >> (62 - scale); // Right-justify the integer. + } + + return iZ; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p32_to_ui64.c b/source/luametatex/source/libraries/softposit/source/p32_to_ui64.c new file mode 100644 index 000000000..70c367a71 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p32_to_ui64.c @@ -0,0 +1,117 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +uint_fast64_t pX2_to_ui64( posit_2_t pA ) { + posit32_t p32 = {.v = pA.v}; + return p32_to_ui64(p32); + +} + +uint_fast64_t p32_to_ui64( posit32_t pA ) { + + union ui32_p32 uA; + uint_fast64_t mask, iZ, tmp; + uint_fast32_t scale = 0, uiA; + bool bitLast, bitNPlusOne, bitsMore; + + uA.p = pA; + uiA = uA.ui; + + //NaR + //if (uiA==0x80000000) return 0; + //negative + if (uiA>=0x80000000) return 0; + + if (uiA <= 0x38000000) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x44000000) { // 1/2 < x < 3/2 rounds to 1. + return 1; + } + else if (uiA <= 0x4A000000) { // 3/2 <= x <= 5/2 rounds to 2. + return 2; + } + else if (uiA>0x7FFFBFFF){ + return 0xFFFFFFFFFFFFFFFFULL; + } + else { + + uiA -= 0x40000000; + while (0x20000000 & uiA) { + scale += 4; + uiA = (uiA - 0x20000000) << 1; + } + uiA <<= 1; // Skip over termination bit, which is 0. + if (0x20000000 & uiA) scale+=2; // If first exponent bit is 1, increment the scale. + if (0x10000000 & uiA) scale++; + iZ = (((uint64_t)uiA | 0x10000000ULL)&0x1FFFFFFFULL) << 34; // Left-justify fraction in 32-bit result (one left bit padding) + + if(scale<62){ + + mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ ^= tmp; // Erase the bit, if it was set. + tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ += (mask << 1); + } + iZ = (uint64_t)iZ >> (62 - scale); // Right-justify the integer. + } + else if (scale>62){ + iZ = (uint64_t)iZ << (scale-62); + } + + } + return iZ; +} + + + diff --git a/source/luametatex/source/libraries/softposit/source/p8_add.c b/source/luametatex/source/libraries/softposit/source/p8_add.c new file mode 100644 index 000000000..cc654031f --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_add.c @@ -0,0 +1,81 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit8_t p8_add( posit8_t a, posit8_t b ) +{ + union ui8_p8 uA, uB; + uint_fast8_t uiA, uiB; + union ui8_p8 uZ; + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + +#ifdef SOFTPOSIT_EXACT + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#endif + + //Zero or infinity + if (uiA==0 || uiB==0){ // Not required but put here for speed +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = uiA | uiB; + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#else + uZ.ui = uiA | uiB; +#endif + return uZ.p; + } + else if ( uiA==0x80 || uiB==0x80 ){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80; +#endif + return uZ.p; + } + + //different signs + if ((uiA^uiB)>>7) + return softposit_subMagsP8(uiA, uiB); + else + return softposit_addMagsP8(uiA, uiB); + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_div.c b/source/luametatex/source/libraries/softposit/source/p8_div.c new file mode 100644 index 000000000..1e3e5d791 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_div.c @@ -0,0 +1,167 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdlib.h> + +#include "platform.h" +#include "internals.h" + +posit8_t p8_div( posit8_t pA, posit8_t pB ) { + union ui8_p8 uA, uB, uZ; + uint_fast8_t uiA, uiB, fracA, fracB, regA, regime, tmp; + bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast8_t kA=0; + uint_fast16_t frac16A, frac16Z, rem; + div_t divresult; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + //Zero or infinity + if ( uiA==0x80 || uiB==0x80 || uiB==0){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80; +#endif + return uZ.p; + } + else if (uiA==0){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0; + if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) + uZ.ui.exact = 1; + else + uZ.ui.exact = 0; +#else + uZ.ui = 0; +#endif + return uZ.p; + } + + signA = signP8UI( uiA ); + signB = signP8UI( uiB ); + signZ = signA ^ signB; + if(signA) uiA = (-uiA & 0xFF); + if(signB) uiB = (-uiB & 0xFF); + regSA = signregP8UI(uiA); + regSB = signregP8UI(uiB); + + tmp = (uiA<<2) & 0xFF; + if (regSA){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA=-1; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + fracA = (0x80 | tmp); + frac16A = fracA<<7; //hidden bit 2nd bit + + tmp = (uiB<<2) & 0xFF; + if (regSB){ + while (tmp>>7){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + fracB = (0x80 | tmp); + } + else{ + kA++; + while (!(tmp>>7)){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + fracB = (0x80 | (0x7F & tmp)); + } + + divresult = div (frac16A,fracB); + frac16Z = divresult.quot; + rem = divresult.rem; + + if (frac16Z!=0){ + rcarry = frac16Z >> 7; // this is the hidden bit (7th bit) , extreme right bit is bit 0 + if (!rcarry){ + kA --; + frac16Z<<=1; + } + } + + if(kA<0){ + regA = (-kA & 0xFF); + regSA = 0; + regime = 0x40>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7F-(0x7F>>regA); + } + if(regA>6){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7F): (uZ.ui=0x1); + } + else{ + //remove carry and rcarry bits and shift to correct position + frac16Z &=0x7F; + fracA = (uint_fast16_t)frac16Z >> (regA+1); + + bitNPlusOne = (0x1 & (frac16Z>> regA)) ; + uZ.ui = packToP8UI(regime, fracA); + + //uZ.ui = (uint16_t) (regime) + ((uint16_t) (expA)<< (13-regA)) + ((uint16_t)(fracA)); + if (bitNPlusOne){ + (((1<<regA)-1) & frac16Z) ? (bitsMore=1) : (bitsMore=0); + if (rem) bitsMore =1; + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + if (signZ) uZ.ui = -uZ.ui & 0xFF; + + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_eq.c b/source/luametatex/source/libraries/softposit/source/p8_eq.c new file mode 100644 index 000000000..08df82be9 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_eq.c @@ -0,0 +1,52 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +bool p8_eq( posit8_t pA, posit8_t pB ){ + + union ui8_p8 uA, uB; + int8_t uiA, uiB; + + uA.p = pA; + uiA = (int8_t) uA.ui; + uB.p = pB; + uiB = (int8_t)uB.ui; + + if (uiA==uiB) + return true; + else + return false; +} diff --git a/source/luametatex/source/libraries/softposit/source/p8_le.c b/source/luametatex/source/libraries/softposit/source/p8_le.c new file mode 100644 index 000000000..0234792f1 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_le.c @@ -0,0 +1,52 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +bool p8_le( posit8_t pA, posit8_t pB ) { + union ui8_p8 uA, uB; + int8_t uiA, uiB; + + uA.p = pA; + uiA = (int8_t) uA.ui; + uB.p = pB; + uiB = (int8_t)uB.ui; + + if (uiA<=uiB) + return true; + else + return false; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_lt.c b/source/luametatex/source/libraries/softposit/source/p8_lt.c new file mode 100644 index 000000000..c10d903de --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_lt.c @@ -0,0 +1,53 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +bool p8_lt( posit8_t pA, posit8_t pB ) { + union ui8_p8 uA, uB; + int8_t uiA, uiB; + + uA.p = pA; + uiA = (int8_t) uA.ui; + uB.p = pB; + uiB = (int8_t)uB.ui; + + if (uiA<uiB) + return true; + else + return false; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_mul.c b/source/luametatex/source/libraries/softposit/source/p8_mul.c new file mode 100644 index 000000000..d565c7a75 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_mul.c @@ -0,0 +1,167 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit8_t p8_mul( posit8_t pA, posit8_t pB ){ + + union ui8_p8 uA, uB, uZ; + uint_fast8_t uiA, uiB; + uint_fast8_t regA, fracA, regime, tmp; + bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast8_t kA=0; + uint_fast16_t frac16Z; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + +#ifdef SOFTPOSIT_EXACT + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#endif + //NaR or Zero + if ( uiA==0x80 || uiB==0x80 ){ + +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80; +#endif + return uZ.p; + } + else if (uiA==0 || uiB==0){ +#ifdef SOFTPOSIT_EXACT + + uZ.ui.v = 0; + if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) + uZ.ui.exact = 1; + else + uZ.ui.exact = 0; +#else + uZ.ui = 0; +#endif + return uZ.p; + } + + signA = signP8UI( uiA ); + signB = signP8UI( uiB ); + signZ = signA ^ signB; + + if(signA) uiA = (-uiA & 0xFF); + if(signB) uiB = (-uiB & 0xFF); + + regSA = signregP8UI(uiA); + regSB = signregP8UI(uiB); + + tmp = (uiA<<2) & 0xFF; + if (regSA){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA=-1; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + fracA = (0x80 | tmp); + + tmp = (uiB<<2) & 0xFF; + if (regSB){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA--; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + frac16Z = (uint_fast16_t) fracA * (0x80 | tmp); + + rcarry = frac16Z>>15;//1st bit of frac32Z + if (rcarry){ + kA++; + frac16Z>>=1; + } + + if(kA<0){ + regA = (-kA & 0xFF); + regSA = 0; + regime = 0x40>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7F-(0x7F>>regA); + } + + + + if(regA>6){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7F): (uZ.ui=0x1); + } + else{ + //remove carry and rcarry bits and shift to correct position + frac16Z = (frac16Z&0x3FFF) >> regA; + fracA = (uint_fast8_t) (frac16Z>>8); + bitNPlusOne = (0x80 & frac16Z) ; + uZ.ui = packToP8UI(regime, fracA); + + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne){ + if (0x7F & frac16Z) bitsMore=1; + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + + if (signZ) uZ.ui = -uZ.ui & 0xFF; + return uZ.p; +} + + + diff --git a/source/luametatex/source/libraries/softposit/source/p8_mulAdd.c b/source/luametatex/source/libraries/softposit/source/p8_mulAdd.c new file mode 100644 index 000000000..e48a335e8 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_mulAdd.c @@ -0,0 +1,56 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit8_t p8_mulAdd( posit8_t a, posit8_t b, posit8_t c ) +{ + union ui8_p8 uA; + uint_fast8_t uiA; + union ui8_p8 uB; + uint_fast8_t uiB; + union ui8_p8 uC; + uint_fast8_t uiC; + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + uC.p = c; + uiC = uC.ui; + return softposit_mulAddP8( uiA, uiB, uiC, 0 ); + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_roundToInt.c b/source/luametatex/source/libraries/softposit/source/p8_roundToInt.c new file mode 100644 index 000000000..65e1be228 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_roundToInt.c @@ -0,0 +1,95 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit8_t p8_roundToInt( posit8_t pA ) { + + union ui8_p8 uA; + uint_fast8_t mask = 0x20, scale=0, tmp=0, uiA; + bool bitLast, bitNPlusOne, sign; + + uA.p = pA; + uiA = uA.ui; + sign = (uiA > 0x80); + + // sign is True if pA > NaR. + if (sign) uiA = -uiA & 0xFF; + if (uiA <= 0x20) { // 0 <= |pA| <= 1/2 rounds to zero. + uA.ui = 0; + return uA.p; + } + else if (uiA < 0x50) { // 1/2 < x < 3/2 rounds to 1. + uA.ui = 0x40; + } + else if (uiA <= 0x64) { // 3/2 <= x <= 5/2 rounds to 2. + uA.ui = 0x60; + } + else if (uiA >= 0x78) { // If |A| is 8 or greater, leave it unchanged. + return uA.p; // This also takes care of the NaR case, 0x80. + } + else { + while (mask & uiA) { + scale += 1; + mask >>= 1; + } + + mask >>= scale; + bitLast = (uiA & mask); + + mask >>= 1; + tmp = (uiA & mask); + bitNPlusOne = tmp; + uiA ^= tmp; + tmp = uiA & (mask - 1); //bitsMore + uiA ^= tmp; + + if (bitNPlusOne) { + if (bitLast | tmp) uiA += (mask << 1); + } + uA.ui = uiA; + } + if (sign) uA.ui = -uA.ui & 0xFF; + return uA.p; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_sqrt.c b/source/luametatex/source/libraries/softposit/source/p8_sqrt.c new file mode 100644 index 000000000..32a289b8d --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_sqrt.c @@ -0,0 +1,63 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit8_t p8_sqrt( posit8_t pA ) { + union ui8_p8 uA; + uint_fast8_t uiA; + + static const uint8_t p8Sqrt [] = + {0, 8, 11, 14, 16, 18, 20, 21, 23, 24, 25, 27, 28, 29, 30, 31, 32, + 33, 34, 35, 36, 37, 38, 38, 39, 40, 41, 42, 42, 43, 44, 45, 45, 46, + 47, 47, 48, 49, 49, 50, 51, 51, 52, 52, 53, 54, 54, 55, 55, 56, 57, + 57, 58, 58, 59, 59, 60, 60, 61, 61, 62, 62, 63, 63, 64, 64, 65, 65, + 66, 66, 67, 67, 68, 68, 69, 69, 70, 70, 70, 71, 71, 72, 72, 72, 73, + 73, 74, 74, 74, 75, 75, 75, 76, 76, 77, 77, 77, 79, 80, 81, 83, 84, + 85, 86, 87, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 100, + 101, 102, 103, 105, 108, 110, 112, 114, 115, 120}; + uA.p = pA; + uiA = uA.ui; + + if (uiA>=0x80){ + uA.ui = 0x80; + return uA.p; + } + uA.ui = p8Sqrt[uiA]; + + return uA.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_sub.c b/source/luametatex/source/libraries/softposit/source/p8_sub.c new file mode 100644 index 000000000..31c05d874 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_sub.c @@ -0,0 +1,88 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +posit8_t p8_sub( posit8_t a, posit8_t b ){ + + union ui8_p8 uA, uB; + uint_fast8_t uiA, uiB; + union ui8_p8 uZ; + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + + + +#ifdef SOFTPOSIT_EXACT + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#endif + + //infinity + if ( uiA==0x80 || uiB==0x80 ){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80; +#endif + return uZ.p; + } + //Zero + else if ( uiA==0 || uiB==0 ){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = (uiA | -uiB); + uZ.ui.exact = 0; +#else + uZ.ui = (uiA | -uiB); +#endif + return uZ.p; + } + + //different signs + if (signP8UI(uiA^uiB)) + return softposit_addMagsP8(uiA, (-uiB & 0xFF)); + else + return softposit_subMagsP8(uiA, (-uiB & 0xFF)); + + + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_to_i32.c b/source/luametatex/source/libraries/softposit/source/p8_to_i32.c new file mode 100644 index 000000000..f4aaebbac --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_to_i32.c @@ -0,0 +1,96 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +int_fast32_t p8_to_i32( posit8_t pA ){ + union ui8_p8 uA; + int_fast32_t mask, iZ, tmp; + uint_fast8_t scale = 0, uiA; + bool bitLast, bitNPlusOne, sign; + + uA.p = pA; + uiA = uA.ui; // Copy of the input. + //NaR + if (uiA==0x80) return 0; + + sign = (uiA > 0x80); // sign is True if pA > NaR. + + if (sign) uiA = -uiA & 0xFF; // A is now |A|. + + if (uiA <= 0x20) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x50) { // 1/2 < x < 3/2 rounds to 1. + iZ = 1; + } + else { // Decode the posit, left-justifying as we go. + uiA -= 0x40; // Strip off first regime bit (which is a 1). + while (0x20 & uiA) { // Increment scale one for each regime sign bit. + scale ++; // Regime sign bit is always 1 in this range. + uiA = (uiA - 0x20) << 1; // Remove the bit; line up the next regime bit. + } + uiA <<= 1; // Skip over termination bit, which is 0. + + iZ = ((uint32_t)uiA | 0x40) << 24; // Left-justify fraction in 32-bit result (one left bit padding) + + mask = 0x40000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ & mask); + + bitNPlusOne = tmp; // "True" if nonzero. + iZ ^= tmp; // Erase the bit, if it was set. + tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ += (mask << 1); + } + iZ = (uint32_t)iZ >> (30 - scale); // Right-justify the integer. + } + + if (sign) iZ = -iZ; // Apply the sign of the input. + return iZ; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_to_i64.c b/source/luametatex/source/libraries/softposit/source/p8_to_i64.c new file mode 100644 index 000000000..70d85f95d --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_to_i64.c @@ -0,0 +1,97 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +int_fast64_t p8_to_i64( posit8_t pA ) { + + union ui8_p8 uA; + int_fast64_t mask, iZ, tmp; + uint_fast8_t scale = 0, uiA; + bool bitLast, bitNPlusOne, sign; + + uA.p = pA; + uiA = uA.ui; // Copy of the input. + //NaR + if (uiA==0x80) return 0; + + sign = uiA>>7; + if (sign) uiA = -uiA & 0xFF; + + if (uiA <= 0x20) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x50) { // 1/2 < x < 3/2 rounds to 1. + iZ = 1; + } + else { // Decode the posit, left-justifying as we go. + + uiA -= 0x40; // Strip off first regime bit (which is a 1). + while (0x20 & uiA) { // Increment scale by 1 for each regime sign bit. + scale ++; // Regime sign bit is always 1 in this range. + uiA = (uiA - 0x20) << 1; // Remove the bit; line up the next regime bit. + } + uiA <<= 1; // Skip over termination bit, which is 0. + + iZ = ((uint64_t)uiA | 0x40) << 55; // Left-justify fraction in 32-bit result (one left bit padding) + + mask = 0x2000000000000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ ^= tmp; // Erase the bit, if it was set. + tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ += (mask<<1) ; + } + iZ = (uint64_t)iZ >> (61 - scale); // Right-justify the integer. + } + + if (sign) iZ = -iZ; // Apply the sign of the input. + return iZ; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_to_p16.c b/source/luametatex/source/libraries/softposit/source/p8_to_p16.c new file mode 100644 index 000000000..56773e7dd --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_to_p16.c @@ -0,0 +1,107 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ +#include "platform.h" +#include "internals.h" + +posit16_t p8_to_p16( posit8_t pA ) { + + union ui8_p8 uA; + union ui16_p16 uZ; + uint_fast8_t uiA, tmp; + uint_fast16_t exp_frac16A=0, regime; + bool sign, regSA; + int_fast8_t kA=0, regA; + + uA.p = pA; + uiA = uA.ui; + + //NaR or zero + if (uiA==0x80 || uiA==0 ){ + uZ.ui = (uint16_t)uiA<<8; + return uZ.p; + } + + sign = signP8UI( uiA ); + + if (sign) uiA = -uiA & 0xFF; + regSA = signregP8UI(uiA); + + tmp = (uiA<<2) & 0xFF; + if (regSA){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA=-1; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + exp_frac16A = tmp<<8; + + if(kA<0){ + regA = -kA; + if (regA&0x1) exp_frac16A |= 0x8000; + regA = (regA+1)>>1; + if (regA==0) regA=1; + regSA = 0; + regime = 0x4000>>regA; + } + else{ + if (kA&0x1) exp_frac16A |= 0x8000; + regA = (kA+2)>>1; + if (regA==0) regA=1; + regSA=1; + regime = 0x7FFF - (0x7FFF>>regA); + } + + exp_frac16A >>=(regA+2); //2 because of sign and regime terminating bit + + uZ.ui = regime + exp_frac16A; + + if (sign) uZ.ui = -uZ.ui & 0xFFFF; + + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_to_p32.c b/source/luametatex/source/libraries/softposit/source/p8_to_p32.c new file mode 100644 index 000000000..adee50e9e --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_to_p32.c @@ -0,0 +1,111 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ +#include "platform.h" +#include "internals.h" + +posit32_t p8_to_p32( posit8_t pA ) { + + + union ui8_p8 uA; + union ui32_p32 uZ; + uint_fast8_t uiA, tmp; + uint_fast32_t exp_frac32A=0, regime; + bool sign, regSA; + int_fast8_t kA=0, regA; + + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80 || uiA==0 ){ + uZ.ui = (uint32_t)uiA<<24; + return uZ.p; + } + + sign = signP8UI( uiA ); + + if (sign) uiA = -uiA & 0xFF; + regSA = signregP8UI(uiA); + + tmp = (uiA<<2) & 0xFF; + if (regSA){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA=-1; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + exp_frac32A = tmp<<22; + + if(kA<0){ + regA = -kA; + // Place exponent bits + exp_frac32A |= ( ((regA&0x1)| ((regA+1)&0x2))<<29 ); + + regA = (regA+3)>>2; + if (regA==0) regA=1; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + exp_frac32A |= ( (kA&0x3) << 29 ); + + regA = (kA+4)>>2; + if (regA==0) regA=1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + exp_frac32A =((uint_fast32_t)exp_frac32A) >> (regA+1); //2 because of sign and regime terminating bit + + uZ.ui = regime + exp_frac32A; + + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_to_pX1.c b/source/luametatex/source/libraries/softposit/source/p8_to_pX1.c new file mode 100644 index 000000000..84ef367bc --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_to_pX1.c @@ -0,0 +1,130 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ +#include "platform.h" +#include "internals.h" + +posit_1_t p8_to_pX1( posit8_t pA, int x ) { + + union ui8_p8 uA; + union ui32_pX1 uZ; + uint_fast8_t uiA, tmp; + uint_fast32_t exp_frac32A=0, regime; + bool sign, regSA; + int_fast8_t kA=0, regA; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80 || uiA==0 ){ + uZ.ui = (uint32_t)uiA<<24; + return uZ.p; + } + + sign = signP8UI( uiA ); + if (sign) uiA = -uiA & 0xFF; + + if(x==2){ + uZ.ui=(uiA>0)?(0x40000000):(0); + } + else{ + regSA = signregP8UI(uiA); + + tmp = (uiA<<2) & 0xFF; + if (regSA){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA=-1; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + exp_frac32A = tmp<<24; + + if(kA<0){ + regA = -kA; + // Place exponent bits + if (regA&0x1) exp_frac32A |= 0x80000000; + + regA = (regA+1)>>1; + if (regA==0) regA=1; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + if (kA&0x1) exp_frac32A |= 0x80000000; + + regA = (kA+2)>>1; + if (regA==0) regA=1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + exp_frac32A >>=(regA+2); //2 because of sign and regime terminating bit + + uZ.ui = regime + exp_frac32A; + + int shift = 32-x; + + if( (uZ.ui>>shift)!=(0x7FFFFFFF>>shift) ){ + if( ((uint32_t)0x80000000>>x) & uZ.ui){ + if ( ( ((uint32_t)0x80000000>>(x-1)) & uZ.ui) || (((uint32_t)0x7FFFFFFF>>x) & uZ.ui) ) + uZ.ui += (0x1<<shift); + } + } + + uZ.ui &=((int32_t)0x80000000>>(x-1)); + if (uZ.ui==0) uZ.ui = 0x1<<shift; + } + + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_to_pX2.c b/source/luametatex/source/libraries/softposit/source/p8_to_pX2.c new file mode 100644 index 000000000..777235e8a --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_to_pX2.c @@ -0,0 +1,128 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ +#include "platform.h" +#include "internals.h" + +posit_2_t p8_to_pX2( posit8_t pA, int x ) { + + union ui8_p8 uA; + union ui32_pX2 uZ; + uint_fast8_t uiA, tmp; + uint_fast32_t exp_frac32A=0, regime; + bool sign, regSA; + int_fast8_t kA=0, regA; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80 || uiA==0 ){ + uZ.ui = (uint32_t)uiA<<24; + return uZ.p; + } + + sign = signP8UI( uiA ); + if (sign) uiA = -uiA & 0xFF; + if(x==2){ + uZ.ui=(uiA>0)?(0x40000000):(0); + } + else{ + regSA = signregP8UI(uiA); + + tmp = (uiA<<2) & 0xFF; + if (regSA){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA=-1; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + exp_frac32A = tmp<<22; + + if(kA<0){ + regA = -kA; + // Place exponent bits + exp_frac32A |= ( ((regA&0x1)| ((regA+1)&0x2))<<29 ); + + regA = (regA+3)>>2; + if (regA==0) regA=1; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + exp_frac32A |= ( (kA&0x3) << 29 ); + + regA = (kA+4)>>2; + if (regA==0) regA=1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + exp_frac32A =((uint_fast32_t)exp_frac32A) >> (regA+1); //2 because of sign and regime terminating bit + + uZ.ui = regime + exp_frac32A; + + int shift = 32-x; + if( (uZ.ui>>shift)!=(0x7FFFFFFF>>shift) ){ + if( ((uint32_t)0x80000000>>x) & uZ.ui){ + if ( ( ((uint32_t)0x80000000>>(x-1)) & uZ.ui) || (((uint32_t)0x7FFFFFFF>>x) & uZ.ui) ) + uZ.ui += (0x1<<shift); + } + } + uZ.ui &=((int32_t)0x80000000>>(x-1)); + if (uZ.ui==0) uZ.ui = 0x1<<shift; + } + + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_to_ui32.c b/source/luametatex/source/libraries/softposit/source/p8_to_ui32.c new file mode 100644 index 000000000..88a302dda --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_to_ui32.c @@ -0,0 +1,93 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +uint_fast32_t p8_to_ui32( posit8_t pA ) { + + union ui8_p8 uA; + uint_fast32_t mask, iZ, tmp; + uint_fast8_t scale = 0, uiA; + bool bitLast, bitNPlusOne; + + uA.p = pA; + uiA = uA.ui; // Copy of the input. + //NaR + //if (uiA==0x80) return 0; + if (uiA>=0x80) return 0; //negative + + if (uiA <= 0x20) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x50) { // 1/2 < x < 3/2 rounds to 1. + iZ = 1; + } + else { // Decode the posit, left-justifying as we go. + uiA -= 0x40; // Strip off first regime bit (which is a 1). + while (0x20 & uiA) { // Increment scale by 1 for each regime sign bit. + scale ++; // Regime sign bit is always 1 in this range. + uiA = (uiA - 0x20) << 1; // Remove the bit; line up the next regime bit. + } + uiA <<= 1; // Skip over termination bit, which is 0. + + iZ = ((uint32_t)uiA | 0x40) << 24; // Left-justify fraction in 32-bit result (one left bit padding) + + mask = 0x40000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ ^= tmp; // Erase the bit, if it was set. + tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ += (mask<<1) ; + } + iZ = iZ >> (30 - scale); // Right-justify the integer. + } + + return iZ; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/p8_to_ui64.c b/source/luametatex/source/libraries/softposit/source/p8_to_ui64.c new file mode 100644 index 000000000..2a19fe95f --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/p8_to_ui64.c @@ -0,0 +1,94 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +uint_fast64_t p8_to_ui64( posit8_t pA ) { + + union ui8_p8 uA; + uint_fast64_t mask, iZ, tmp; + uint_fast8_t scale = 0, uiA; + bool bitLast, bitNPlusOne; + + uA.p = pA; + uiA = uA.ui; // Copy of the input. + //NaR + //if (uiA==0x80) return 0; + if (uiA>=0x80) return 0; //negative + + if (uiA <= 0x20) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x50) { // 1/2 < x < 3/2 rounds to 1. + iZ = 1; + } + else { // Decode the posit, left-justifying as we go. + + uiA -= 0x40; // Strip off first regime bit (which is a 1). + while (0x20 & uiA) { // Increment scale by 1 for each regime sign bit. + scale ++; // Regime sign bit is always 1 in this range. + uiA = (uiA - 0x20) << 1; // Remove the bit; line up the next regime bit. + } + uiA <<= 1; // Skip over termination bit, which is 0. + + iZ = ((uint64_t)uiA | 0x40) << 55; // Left-justify fraction in 32-bit result (one left bit padding) + + mask = 0x2000000000000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ ^= tmp; // Erase the bit, if it was set. + tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ += (mask<<1) ; + } + iZ = (uint64_t)iZ >> (61 - scale); // Right-justify the integer. + } + + return iZ; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_add.c b/source/luametatex/source/libraries/softposit/source/pX1_add.c new file mode 100644 index 000000000..55569e05c --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_add.c @@ -0,0 +1,77 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_1_t pX1_add( posit_1_t a, posit_1_t b, int x ){ + union ui32_pX1 uA, uB, uZ; + uint_fast32_t uiA, uiB; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + + //Zero or infinity + if (uiA==0 || uiB==0){ // Not required but put here for speed + uZ.ui = uiA | uiB; + return uZ.p; + } + else if ( uiA==0x80000000 || uiB==0x80000000 ){ + uZ.ui = 0x80000000; + return uZ.p; + } + + + //different signs + if ((uiA^uiB)>>31) + return softposit_subMagsPX1(uiA, uiB, x); + else + return softposit_addMagsPX1(uiA, uiB, x); + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_div.c b/source/luametatex/source/libraries/softposit/source/pX1_div.c new file mode 100644 index 000000000..cd68c9bd7 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_div.c @@ -0,0 +1,210 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdlib.h> + +#include "platform.h" +#include "internals.h" + +posit_1_t pX1_div( posit_1_t pA, posit_1_t pB, int x ) { + + union ui32_pX1 uA, uB, uZ; + int regA; + uint_fast32_t uiA, uiB, fracA, fracB, regime, tmp; + bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast8_t kA=0; + int_fast32_t expA; + uint_fast64_t frac64A, frac64Z, rem; + lldiv_t divresult; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + //Zero or infinity + if ( uiA==0x80000000 || uiB==0x80000000 || uiB==0){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80000000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80000000; +#endif + return uZ.p; + } + else if (uiA==0){ +#ifdef SOFTPOSIT_EXACT + + uZ.ui.v = 0; + if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) + uZ.ui.exact = 1; + else + uZ.ui.exact = 0; +#else + uZ.ui = 0; +#endif + return uZ.p; + } + + signA = signP32UI( uiA ); + signB = signP32UI( uiB ); + signZ = signA ^ signB; + if(signA) uiA = (-uiA & 0xFFFFFFFF); + if(signB) uiB = (-uiB & 0xFFFFFFFF); + regSA = signregP32UI(uiA); + regSB = signregP32UI(uiB); + + if (x==2){ + uZ.ui = 0x40000000; + } + else{ + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA = tmp>>30; //to get 1 bits + fracA = (tmp | 0x40000000) & 0x7FFFFFFF; + frac64A = (uint64_t) fracA << 30; + + tmp = (uiB<<2)&0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA++; + while (!(tmp>>31)){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA -= tmp>>30; + fracB = (tmp | 0x40000000) & 0x7FFFFFFF; + + divresult = lldiv (frac64A,(uint_fast64_t)fracB); + frac64Z = divresult.quot; + rem = divresult.rem; + + if (expA<0){ + expA=1; + kA--; + } + if (frac64Z!=0){ + rcarry = frac64Z >> 30; // this is the hidden bit (14th bit) , extreme right bit is bit 0 + if (!rcarry){ + if (expA==0) kA --; + expA^=1; + frac64Z<<=1; + } + } + } + + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + //remove carry and rcarry bits and shift to correct position + frac64Z &= 0x3FFFFFFF; + fracA = (uint_fast32_t)frac64Z >> (regA+1); + + //regime length is smaller than length of posit + if (regA<x){ + if (regA!=(x-2)){ + bitNPlusOne |= (((uint64_t)0x80000000>>(x-regA-1)) & frac64Z); + bitsMore = ((0x7FFFFFFF>>(x-regA-1)) & frac64Z); + fracA&=((int32_t)0x80000000>>(x-1)); + } + else if (frac64Z>0) { + fracA=0; + bitsMore=1; + } + if(regA==(x-2) && expA){ + bitNPlusOne=1; + expA=0; + } + if (rem) bitsMore =1; + } + else{ + regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); + expA=0; + fracA=0; + } + + expA <<= (29-regA); + uZ.ui = packToP32UI(regime, expA, fracA); + if (bitNPlusOne) uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + + } + + if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_eq.c b/source/luametatex/source/libraries/softposit/source/pX1_eq.c new file mode 100644 index 000000000..05a7243b0 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_eq.c @@ -0,0 +1,60 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2021. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +bool pX1_eq( posit_1_t a, posit_1_t b) { + union ui32_pX1 uA, uB; + int32_t uiA, uiB; + + uA.p = a; + uiA = (int32_t) uA.ui; + uB.p = b; + uiB = (int32_t)uB.ui; + + if(uiA==uiB) + return true; + else + return false; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_le.c b/source/luametatex/source/libraries/softposit/source/pX1_le.c new file mode 100644 index 000000000..3986a7503 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_le.c @@ -0,0 +1,59 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2021 NGA. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +bool pX1_le( posit_1_t a, posit_1_t b ) { + union ui32_pX1 uA, uB; + int32_t uiA, uiB; + + uA.p = a; + uiA = (int32_t) uA.ui; + uB.p = b; + uiB = (int32_t)uB.ui; + + if(uiA<=uiB) + return true; + else + return false; + +} diff --git a/source/luametatex/source/libraries/softposit/source/pX1_lt.c b/source/luametatex/source/libraries/softposit/source/pX1_lt.c new file mode 100644 index 000000000..565b449ac --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_lt.c @@ -0,0 +1,59 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2021 NGA. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +bool pX1_lt( posit_1_t a, posit_1_t b ) { + union ui32_pX1 uA, uB; + int32_t uiA, uiB; + + uA.p = a; + uiA = (int32_t) uA.ui; + uB.p = b; + uiB = (int32_t)uB.ui; + + if(uiA<uiB) + return true; + else + return false; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_mul.c b/source/luametatex/source/libraries/softposit/source/pX1_mul.c new file mode 100644 index 000000000..fb5a71dd4 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_mul.c @@ -0,0 +1,213 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + + +posit_1_t pX1_mul( posit_1_t pA, posit_1_t pB, int x ){ + + union ui32_pX1 uA, uB, uZ; + uint_fast32_t uiA, uiB; + int regA; + uint_fast32_t fracA, regime, tmp; + bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast32_t expA; + int_fast8_t kA=0; + uint_fast64_t frac64Z; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + +#ifdef SOFTPOSIT_EXACT + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#endif + //NaR or Zero + if ( uiA==0x80000000 || uiB==0x80000000 ){ + +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80000000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80000000; +#endif + return uZ.p; + } + else if (uiA==0 || uiB==0){ +#ifdef SOFTPOSIT_EXACT + + uZ.ui.v = 0; + if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) + uZ.ui.exact = 1; + else + uZ.ui.exact = 0; +#else + uZ.ui = 0; +#endif + return uZ.p; + } + + signA = signP32UI( uiA ); + signB = signP32UI( uiB ); + signZ = signA ^ signB; + + if(signA) uiA = (-uiA & 0xFFFFFFFF); + if(signB) uiB = (-uiB & 0xFFFFFFFF); + + regSA = signregP32UI(uiA); + regSB = signregP32UI(uiB); + + if (x==2){ + uZ.ui = (regSA®SB) ? (0x40000000) : (0x0); + } + else{ + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + + } + expA = tmp>>30; //to get 1 bits + fracA = (tmp | 0x40000000) & 0x7FFFFFFF; + + tmp = (uiB<<2)&0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA--; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + + } + expA += tmp>>30; + frac64Z = (uint_fast64_t) fracA * ((tmp | 0x40000000) & 0x7FFFFFFF); + if (expA>1){ + kA++; + expA^=0x2; + } + rcarry = frac64Z>>61;//3rd bit of frac64Z + if (rcarry){ + if (expA) kA ++; + expA^=1; + frac64Z>>=1; + } + } + + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + + //remove carry and rcarry bits and shift to correct position (2 bits exp, so + 1 than 16 bits) + frac64Z = (frac64Z &0xFFFFFFFFFFFFFFF)>> (regA-1); + fracA = (uint_fast32_t) (frac64Z>>32); + + //regime length is smaller than length of posit + if (regA<x){ + if (regA!=(x-2)){ + bitNPlusOne |= (((uint64_t)0x8000000000000000>>x) & frac64Z); + bitsMore = ((0x7FFFFFFFFFFFFFFF>>x) & frac64Z); + fracA&=((int32_t)0x80000000>>(x-1)); + } + else if (frac64Z>0){ + fracA=0; + bitsMore=1; + } + if(regA==(x-2) && expA){ + bitNPlusOne=1; + expA=0; + } + } + else{ + regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); + expA=0; + fracA=0; + } + expA <<= (29-regA); + uZ.ui = packToP32UI(regime, expA, fracA); + + if (bitNPlusOne){ + uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + + } + + if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_mulAdd.c b/source/luametatex/source/libraries/softposit/source/pX1_mulAdd.c new file mode 100644 index 000000000..196200f3f --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_mulAdd.c @@ -0,0 +1,56 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_1_t pX1_mulAdd( posit_1_t a, posit_1_t b, posit_1_t c, int x ) { + //a*b + c + union ui32_pX1 uA; + uint_fast32_t uiA; + union ui32_pX1 uB; + uint_fast32_t uiB; + union ui32_pX1 uC; + uint_fast32_t uiC; + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + uC.p = c; + uiC = uC.ui; + return softposit_mulAddPX1( uiA, uiB, uiC, 0, x); + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_roundToInt.c b/source/luametatex/source/libraries/softposit/source/pX1_roundToInt.c new file mode 100644 index 000000000..96fe32d0f --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_roundToInt.c @@ -0,0 +1,110 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_1_t pX1_roundToInt( posit_1_t pA, int x ){ + union ui32_pX1 uA; + uint_fast32_t mask = 0x20000000, scale=0, tmp=0, uiA, uiZ; + bool bitLast, bitNPlusOne, sign; + + uA.p = pA; + uiA = uA.ui; + sign = uiA>>31; + + // sign is True if pA > NaR. + if (sign) uiA = -uiA & 0xFFFFFFFF; // A is now |A|. + if (uiA <= 0x30000000) { // 0 <= |pA| <= 1/2 rounds to zero. + uA.ui = 0; + return uA.p; + } + else if (uiA < 0x48000000) { // 1/2 < x < 3/2 rounds to 1. + uA.ui = 0x40000000; + } + else if (uiA <= 0x54000000) { // 3/2 <= x <= 5/2 rounds to 2. + uA.ui = 0x50000000; + } + else if (uiA >= 0x7FE80000) { // If |A| is 0x7FE800000 (4194304) (posit is pure integer value), leave it unchanged. + if (x>8) return uA.p; // This also takes care of the NaR case, 0x80000000. + else{ + bitNPlusOne=((uint32_t)0x80000000>>x) & uiA; + tmp = ((uint32_t)0x7FFFFFFF>>x)& uiA; //bitsMore + bitLast = ((uint32_t)0x80000000>>(x-1)) & uiA; + if (bitNPlusOne) + if (bitLast | tmp) uiA += bitLast; + uA.ui = uiA; + } + } + else { // 34% of the cases, we have to decode the posit. + + while (mask & uiA) { + scale += 2; + mask >>= 1; + } + mask >>= 1; + if (mask & uiA) scale++; + + mask >>= scale; + + //the rest of the bits + bitLast = (uiA & mask); + mask >>= 1; + tmp = (uiA & mask); + bitNPlusOne = tmp; + uiA ^= tmp; // Erase the bit, if it was set. + tmp = uiA & (mask - 1); // this is actually bitsMore + + uiA ^= tmp; + + if (bitNPlusOne) { + if (bitLast | tmp) uiA += (mask << 1); + } + uA.ui = uiA; + + + } + if (sign) uA.ui = -uA.ui & 0xFFFFFFFF; + return uA.p; + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_sub.c b/source/luametatex/source/libraries/softposit/source/pX1_sub.c new file mode 100644 index 000000000..6a6c3ac95 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_sub.c @@ -0,0 +1,88 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_1_t pX1_sub( posit_1_t a, posit_1_t b, int x) { + union ui32_pX1 uA, uB, uZ; + uint_fast32_t uiA, uiB; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + +#ifdef SOFTPOSIT_EXACT + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#endif + + //infinity + if ( uiA==0x80000000 || uiB==0x80000000 ){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80000000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80000000; +#endif + return uZ.p; + } + //Zero + else if ( uiA==0 || uiB==0 ){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = (uiA | -uiB); + uZ.ui.exact = 0; +#else + uZ.ui = (uiA | -uiB); +#endif + return uZ.p; + } + + //different signs + if ((uiA^uiB)>>31) + return softposit_addMagsPX1(uiA, (-uiB & 0xFFFFFFFF), x); + else + return softposit_subMagsPX1(uiA, (-uiB & 0xFFFFFFFF), x); + + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_to_i32.c b/source/luametatex/source/libraries/softposit/source/pX1_to_i32.c new file mode 100644 index 000000000..58b5b7326 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_to_i32.c @@ -0,0 +1,105 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +int_fast32_t pX1_to_i32( posit_1_t pA ) { + + union ui32_pX1 uA; + uint_fast64_t iZ64, mask, tmp; + int_fast32_t iZ, scale = 0, uiA; + bool sign=0, bitLast, bitNPlusOne; + + uA.p = pA; + uiA = uA.ui; // Copy of the input. + //NaR + if (uiA==0x80000000) return 0; + + sign = (uiA > 0x80000000); // sign is True if pA > NaR. + + if (sign) uiA = -uiA & 0xFFFFFFFF; // A is now |A|. + + if (uiA <= 0x30000000) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x48000000) { // 1/2 < x < 3/2 rounds to 1. + iZ = 1; + } + else if (uiA <= 0x54000000) { // 3/2 <= x <= 5/2 rounds to 2. + iZ = 2; + } + else if (uiA>0x7FFF9FFF){ //2147418112 + return (sign) ? (-2147483648) : (2147483647); + + } + else { // Decode the posit, left-justifying as we go. + uiA -= 0x40000000; // Strip off first regime bit (which is a 1). + while (0x20000000 & uiA) { // Increment scale by 2 for each regime sign bit. + scale += 2; // Regime sign bit is always 1 in this range. + uiA = (uiA - 0x20000000) << 1; // Remove the bit; line up the next regime bit. + } + + uiA <<= 1; // Skip over termination bit, which is 0. + if (0x20000000 & uiA) scale++; // If exponent is 1, increment the scale. + iZ64 = ((uint64_t)uiA | 0x20000000) << 33; // Left-justify fraction in 64-bit result (one left bit padding) + mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ64 & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ64 & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ64 ^= tmp; // Erase the bit, if it was set. + tmp = iZ64 & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ64 ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ64 += (mask << 1); + } + + iZ = (uint64_t)iZ64 >> (62 - scale); // Right-justify the integer. + + } + if (sign) iZ = -iZ; + return iZ; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_to_i64.c b/source/luametatex/source/libraries/softposit/source/pX1_to_i64.c new file mode 100644 index 000000000..990e8ef65 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_to_i64.c @@ -0,0 +1,100 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +int_fast64_t pX1_to_i64( posit_1_t pA ) { + + union ui32_pX1 uA; + int_fast64_t mask, tmp; + uint_fast64_t iZ, scale = 0, uiA; + bool sign=0, bitLast, bitNPlusOne; + + uA.p = pA; + uiA = uA.ui; // Copy of the input. + //NaR + if (uiA==0x80000000) return 0; + + sign = (uiA > 0x80000000); // sign is True if pA > NaR. + if (sign) uiA = -uiA & 0xFFFFFFFF; // A is now |A|. + + + if (uiA <= 0x30000000) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x48000000) { // 1/2 < x < 3/2 rounds to 1. + iZ = 1; + } + else if (uiA <= 0x54000000) { // 3/2 <= x <= 5/2 rounds to 2. + iZ = 2; + } + else { // Decode the posit, left-justifying as we go. + uiA -= 0x40000000; // Strip off first regime bit (which is a 1). + while (0x20000000 & uiA) { // Increment scale by 2 for each regime sign bit. + scale += 2; // Regime sign bit is always 1 in this range. + uiA = (uiA - 0x20000000) << 1; // Remove the bit; line up the next regime bit. + } + uiA <<= 1; // Skip over termination bit, which is 0. + if (0x20000000 & uiA) scale++; // If exponent is 1, increment the scale. + iZ = ((uint64_t)uiA | 0x20000000) << 33; // Left-justify fraction in 64-bit result (one left bit padding) + mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ ^= tmp; // Erase the bit, if it was set. + tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ += (mask << 1); + } + + iZ = (uint64_t)iZ >> (62 - scale); // Right-justify the integer. + + } + if (sign) iZ = -iZ; + return iZ; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_to_p16.c b/source/luametatex/source/libraries/softposit/source/pX1_to_p16.c new file mode 100644 index 000000000..4fbf7df3a --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_to_p16.c @@ -0,0 +1,78 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + + +posit16_t pX1_to_p16( posit_1_t pA ){ + + union ui32_pX1 uA; + union ui16_p16 uZ; + uint_fast32_t uiA; + bool sign; + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000 || uiA==0 ){ + uZ.ui = uiA>>16; + return uZ.p; + } + + sign = signP32UI( uiA ); + if (sign) uiA = -uiA & 0xFFFFFFFF; + + if ((uiA&0xFFFF)==0 ){ + uZ.ui = uiA>>16; + } + else { + if( (uiA>>16)!=0x7FFF ){ + if( (uint32_t)0x8000 & uiA){ + if ( ( ((uint32_t)0x10000) & uiA) || (((uint32_t)0x7FFF) & uiA) ) + uiA += 0x10000; + } + } + uZ.ui = uiA>>16; + if (uZ.ui==0) uZ.ui = 0x1; + + } + if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_to_p32.c b/source/luametatex/source/libraries/softposit/source/pX1_to_p32.c new file mode 100644 index 000000000..774249c6b --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_to_p32.c @@ -0,0 +1,113 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + + +posit32_t pX1_to_p32( posit_1_t pA ){ + + union ui32_pX1 uA; + union ui32_p32 uZ; + uint_fast32_t uiA, tmp, regime; + uint_fast32_t exp_frac32A=0; + bool sign, regSA, bitNPlusOne=0, bitsMore=0; + int_fast8_t kA=0, regA; + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000 || uiA==0 ){ + uZ.ui = uiA; + return uZ.p; + } + + + sign = signP32UI( uiA ); + if (sign) uiA = -uiA & 0xFFFFFFFF; + regSA = signregP32UI(uiA); + + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + + + //2nd bit exp + exp_frac32A = tmp; + + if(kA<0){ + regA = -kA; + exp_frac32A |= ((uint32_t)(regA&0x1)<<31); + regA = (regA+1)>>1; + if (regA==0) regA=1; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + exp_frac32A |= ((uint32_t)(kA&0x1)<<31); + (kA==0) ? (regA=1) : (regA = (kA+2)>>1); + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + bitNPlusOne = (exp_frac32A >>(regA+1))&0x1; + bitsMore = exp_frac32A&(0x7FFFFFFF>>(31-regA)); + + exp_frac32A >>=(regA+2); //2 because of sign and regime terminating bit + uZ.ui = regime + exp_frac32A; + + if (bitNPlusOne){ + uZ.ui += (uZ.ui&1) | bitsMore; + } + + if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_to_p8.c b/source/luametatex/source/libraries/softposit/source/pX1_to_p8.c new file mode 100644 index 000000000..a912cc7ce --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_to_p8.c @@ -0,0 +1,118 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + + +posit8_t pX1_to_p8( posit_1_t pA ){ + + union ui32_pX1 uA; + union ui8_p8 uZ; + uint_fast32_t uiA, tmp, regime; + uint_fast32_t exp_frac32A=0; + bool sign, regSA, bitsMore=0; + int_fast8_t kA=0, regA; + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000 || uiA==0 ){ + uZ.ui = (uiA>>24) & 0xFF; + return uZ.p; + } + + sign = signP32UI( uiA ); + if (sign) uiA = -uiA & 0xFFFFFFFF; + regSA = signregP32UI(uiA); + + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + + } + + if (kA<-3 || kA>=3){ + (kA<0) ? (uZ.ui=0x1):(uZ.ui= 0x7F); + } + else{ + //2nd bit exp + exp_frac32A = tmp; + if(kA<0){ + regA = ((-kA)<<1) - (exp_frac32A>>30); + if (regA==0) regA=1; + regSA = 0; + regime = 0x40>>regA; + } + else{ + + (kA==0)?(regA=1 + (exp_frac32A>>30)): (regA = ((kA+1)<<1) + (exp_frac32A>>30) -1); + regSA=1; + regime = 0x7F - (0x7F>>regA); + } + + if (regA>5){ + uZ.ui = regime; + } + else{ + uZ.ui = regime + ( ((exp_frac32A)&0x3FFFFFFF)>>(regA+24) ); + } + } + + if ( exp_frac32A & (0x800000<<regA)){ + bitsMore = exp_frac32A & ((0x800000<<regA)-1); + uZ.ui += (uZ.ui&1) | bitsMore; + + } + + if (sign) uZ.ui = -uZ.ui & 0xFF; + + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_to_pX1.c b/source/luametatex/source/libraries/softposit/source/pX1_to_pX1.c new file mode 100644 index 000000000..51cba8225 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_to_pX1.c @@ -0,0 +1,88 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_1_t pX1_to_pX1( posit_1_t pA, int x ){ + + union ui32_pX1 uA; + union ui32_pX1 uZ; + uint_fast32_t uiA; + bool sign; + + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000 || uiA==0 ){ + uZ.ui = uiA; + return uZ.p; + } + + sign = signP32UI( uiA ); + if (sign) uiA = -uiA & 0xFFFFFFFF; + + if (x==2){ + uZ.ui=(uiA>0)?(0x40000000):(0); + } + else if (x==32 || (((uint32_t)0xFFFFFFFF>>x) & uiA)==0 ){ + uZ.ui = uiA; + } + else { + + int shift = 32-x; + if( (uiA>>shift)!=(0x7FFFFFFF>>shift) ){ + if( ((uint32_t)0x80000000>>x) & uiA){ + if ( ( ((uint32_t)0x80000000>>(x-1)) & uiA) || (((uint32_t)0x7FFFFFFF>>x) & uiA) ) + uiA += (0x1<<shift); + } + } + uZ.ui = uiA & ((int32_t)0x80000000>>(x-1)); + if (uZ.ui==0) uZ.ui = 0x1<<shift; + + } + if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_to_pX2.c b/source/luametatex/source/libraries/softposit/source/pX1_to_pX2.c new file mode 100644 index 000000000..d608f499c --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_to_pX2.c @@ -0,0 +1,145 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_2_t pX1_to_pX2( posit_1_t pA, int x ){ + + union ui32_pX1 uA; + union ui32_pX2 uZ; + uint_fast32_t uiA, tmp, regime; + uint_fast32_t exp_frac32A=0; + bool sign, regSA, bitNPlusOne, bitsMore; + int_fast8_t kA=0, regA; + + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000 || uiA==0 ){ + uZ.ui = uiA; + return uZ.p; + } + + sign = signP32UI( uiA ); + if (sign) uiA = -uiA & 0xFFFFFFFF; + regSA = signregP32UI(uiA); + + if (x==2){ + uZ.ui=(uiA>0)?(0x40000000):(0); + } + /* else if (x==32 || (((uint32_t)0xFFFFFFFF>>x) & uiA)==0 ){ + uZ.ui = uiA; + }*/ + else { + + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + + //2nd bit exp + exp_frac32A = tmp; + + if(kA<0){ + regA = -kA; + exp_frac32A |= ((uint32_t)(regA&0x1)<<31); + regA = (regA+1)>>1; + if (regA==0) regA=1; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + exp_frac32A |= ((uint32_t)(kA&0x1)<<31); + (kA==0) ? (regA=1) : (regA = (kA+2)>>1); + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + +//printBinary(&exp_frac32A, 32); +//uint32_t temp = (0x7FFFFFFF>>(x-regA-2)); +//printBinary(&temp, 32); +//printBinary(®ime, 32); + bitNPlusOne = (exp_frac32A >>(regA+33-x))&0x1; + bitsMore = exp_frac32A&(0x7FFFFFFF>>(x-regA-2)); +//printf("bitNPlusOne: %d bitsMore: %d\n", bitNPlusOne, bitsMore); + exp_frac32A >>= (regA+2); //2 because of sign and regime terminating bit + uZ.ui = regime + (exp_frac32A & ((int32_t)0x80000000>>(x-1)) ); +//printBinary(&uZ.ui, 32); + //int shift = 32-x; + /*if( (uiA>>shift)!=(0x7FFFFFFF>>shift) ){ + if( ((uint32_t)0x80000000>>x) & uZ.ui){ + if ( ( ((uint32_t)0x80000000>>(x-1)) & uZ.ui) || (((uint32_t)0x7FFFFFFF>>x) & uZ.ui) ) + uZ.ui += (0x1<<shift); + } + } + + uZ.ui &=((int32_t)0x80000000>>(x-1));*/ + if (uZ.ui==0) uZ.ui = (uint32_t)0x1<<(32-x); + else if (bitNPlusOne){ + uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + } + + } + if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_to_ui32.c b/source/luametatex/source/libraries/softposit/source/pX1_to_ui32.c new file mode 100644 index 000000000..428038701 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_to_ui32.c @@ -0,0 +1,100 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +uint_fast32_t pX1_to_ui32( posit_1_t pA ) { + + union ui32_pX1 uA; + uint_fast64_t iZ64, mask, tmp; + uint_fast32_t iZ, scale = 0, uiA; + bool bitLast, bitNPlusOne; + + uA.p = pA; + uiA = uA.ui; // Copy of the input. + //NaR + //if (uiA==0x80000000) return 0; + if (uiA>=0x80000000) return 0; //negative + + if (uiA <= 0x30000000) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x48000000) { // 1/2 < x < 3/2 rounds to 1. + return 1; + } + else if (uiA <= 0x54000000) { // 3/2 <= x <= 5/2 rounds to 2. + return 2; + } + else if (uiA>0x7FFFBFFF){ //4294836223 + return 4294967295; + + } + else { // Decode the posit, left-justifying as we go. + uiA -= 0x40000000; // Strip off first regime bit (which is a 1). + while (0x20000000 & uiA) { // Increment scale by 2 for each regime sign bit. + scale += 2; // Regime sign bit is always 1 in this range. + uiA = (uiA - 0x20000000) << 1; // Remove the bit; line up the next regime bit. + } + uiA <<= 1; // Skip over termination bit, which is 0. + if (0x20000000 & uiA) scale++; // If exponent is 1, increment the scale. + iZ64 = (uint64_t)(uiA | 0x20000000) << 33; // Left-justify fraction in 64-bit result (one left bit padding) + mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ64 & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ64 & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ64 ^= tmp; // Erase the bit, if it was set. + tmp = iZ64 & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ64 ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ64 += (mask << 1); + } + iZ = (uint64_t)iZ64 >> (62 - scale); // Right-justify the integer. + + } + + return iZ; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX1_to_ui64.c b/source/luametatex/source/libraries/softposit/source/pX1_to_ui64.c new file mode 100644 index 000000000..5679c7ea2 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX1_to_ui64.c @@ -0,0 +1,97 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +uint_fast64_t pX1_to_ui64( posit_1_t pA ) { + + union ui32_pX1 uA; + uint_fast64_t mask, tmp; + uint_fast64_t iZ, scale = 0, uiA; + bool bitLast, bitNPlusOne; + + uA.p = pA; + uiA = uA.ui; // Copy of the input. + //NaR + //if (uiA==0x80000000) return 0; + if (uiA>=0x80000000) return 0; //negative + + if (uiA <= 0x30000000) { // 0 <= |pA| <= 1/2 rounds to zero. + return 0; + } + else if (uiA < 0x48000000) { // 1/2 < x < 3/2 rounds to 1. + return 1; + } + else if (uiA <= 0x54000000) { // 3/2 <= x <= 5/2 rounds to 2. + return 2; + } + else { // Decode the posit, left-justifying as we go. + uiA -= 0x40000000; // Strip off first regime bit (which is a 1). + while (0x20000000 & uiA) { // Increment scale by 2 for each regime sign bit. + scale += 2; // Regime sign bit is always 1 in this range. + uiA = (uiA - 0x20000000) << 1; // Remove the bit; line up the next regime bit. + } + uiA <<= 1; // Skip over termination bit, which is 0. + if (0x20000000 & uiA) scale++; // If exponent is 1, increment the scale. + iZ = (uiA | 0x20000000) << 33; // Left-justify fraction in 64-bit result (one left bit padding) + mask = 0x4000000000000000 >> scale; // Point to the last bit of the integer part. + + bitLast = (iZ & mask); // Extract the bit, without shifting it. + mask >>= 1; + tmp = (iZ & mask); + bitNPlusOne = tmp; // "True" if nonzero. + iZ ^= tmp; // Erase the bit, if it was set. + tmp = iZ & (mask - 1); // tmp has any remaining bits. // This is bitsMore + iZ ^= tmp; // Erase those bits, if any were set. + + if (bitNPlusOne) { // logic for round to nearest, tie to even + if (bitLast | tmp) iZ += (mask << 1); + } + + iZ = (uint64_t)iZ >> (62 - scale); // Right-justify the integer. + + } + + return iZ; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX2_add.c b/source/luametatex/source/libraries/softposit/source/pX2_add.c new file mode 100644 index 000000000..1aaed5d09 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX2_add.c @@ -0,0 +1,77 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_2_t pX2_add( posit_2_t a, posit_2_t b, int x ){ + union ui32_pX2 uA, uB, uZ; + uint_fast32_t uiA, uiB; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + + //Zero or infinity + if (uiA==0 || uiB==0){ // Not required but put here for speed + uZ.ui = uiA | uiB; + return uZ.p; + } + else if ( uiA==0x80000000 || uiB==0x80000000 ){ + uZ.ui = 0x80000000; + return uZ.p; + } + + + //different signs + if ((uiA^uiB)>>31) + return softposit_subMagsPX2(uiA, uiB, x); + else + return softposit_addMagsPX2(uiA, uiB, x); + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX2_div.c b/source/luametatex/source/libraries/softposit/source/pX2_div.c new file mode 100644 index 000000000..e6b666463 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX2_div.c @@ -0,0 +1,218 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdlib.h> + +#include "platform.h" +#include "internals.h" + +posit_2_t pX2_div( posit_2_t pA, posit_2_t pB, int x ) { + union ui32_pX2 uA, uB, uZ; + int regA, regB; + uint_fast32_t uiA, uiB, fracA, fracB, regime, tmp; + bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast8_t kA=0; + int_fast32_t expA; + uint_fast64_t frac64A, frac64Z, rem; + lldiv_t divresult; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + //Zero or infinity + if ( uiA==0x80000000 || uiB==0x80000000 || uiB==0){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80000000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80000000; +#endif + return uZ.p; + } + else if (uiA==0){ +#ifdef SOFTPOSIT_EXACT + + uZ.ui.v = 0; + if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) + uZ.ui.exact = 1; + else + uZ.ui.exact = 0; +#else + uZ.ui = 0; +#endif + return uZ.p; + } + + signA = signP32UI( uiA ); + signB = signP32UI( uiB ); + signZ = signA ^ signB; + if(signA) uiA = (-uiA & 0xFFFFFFFF); + if(signB) uiB = (-uiB & 0xFFFFFFFF); + regSA = signregP32UI(uiA); + regSB = signregP32UI(uiB); + + if (x==2){ + uZ.ui = 0x40000000; + } + else{ + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA = tmp>>29; //to get 2 bits + fracA = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; + frac64A = (uint64_t) fracA << 30; + + tmp = (uiB<<2)&0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA++; + while (!(tmp>>31)){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA -= tmp>>29; + fracB = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; + + divresult = lldiv (frac64A,(uint_fast64_t)fracB); + frac64Z = divresult.quot; + rem = divresult.rem; + + if (expA<0){ + expA+=4; + kA--; + } + if (frac64Z!=0){ + rcarry = frac64Z >> 30; // this is the hidden bit (14th bit) , extreme right bit is bit 0 + if (!rcarry){ + if (expA==0){ + kA--; + expA=3; + } + else + expA--; + frac64Z<<=1; + } + } + } + + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + //remove carry and rcarry bits and shift to correct position + frac64Z &= 0x3FFFFFFF; + fracA = (uint_fast32_t)frac64Z >> (regA+2); + //regime length is smaller than length of posit + if (regA<x){ + if (regA<=(x-4)){ + bitNPlusOne=((uint32_t)0x80000000>>(x-regA-2))& frac64Z; + bitsMore = ((0x7FFFFFFF>>(x-regA-2)) & frac64Z); + fracA&=((int32_t)0x80000000>>(x-1)); + } + else { + if (regA==(x-2)){ + bitNPlusOne = expA&0x2; + bitsMore = (expA&0x1); + expA = 0; + } + else if (regA==(x-3)){ + bitNPlusOne = expA&0x1; + expA &=0x2; + } + if (frac64Z>0){ + fracA=0; + bitsMore =1; + } + + } + if (rem) bitsMore =1; + } + else{ + regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); + expA=0; + fracA=0; + } + + expA <<= (28-regA); + uZ.ui = packToP32UI(regime, expA, fracA); + if (bitNPlusOne) uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + + if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX2_eq.c b/source/luametatex/source/libraries/softposit/source/pX2_eq.c new file mode 100644 index 000000000..733e3a35d --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX2_eq.c @@ -0,0 +1,60 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +bool pX2_eq( posit_2_t a, posit_2_t b) { + union ui32_pX2 uA, uB; + int32_t uiA, uiB; + + uA.p = a; + uiA = (int32_t) uA.ui; + uB.p = b; + uiB = (int32_t)uB.ui; + + if(uiA==uiB) + return true; + else + return false; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX2_le.c b/source/luametatex/source/libraries/softposit/source/pX2_le.c new file mode 100644 index 000000000..a60e8ad57 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX2_le.c @@ -0,0 +1,59 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +bool pX2_le( posit_2_t a, posit_2_t b ) { + union ui32_pX2 uA, uB; + int32_t uiA, uiB; + + uA.p = a; + uiA = (int32_t) uA.ui; + uB.p = b; + uiB = (int32_t)uB.ui; + + if(uiA<=uiB) + return true; + else + return false; + +} diff --git a/source/luametatex/source/libraries/softposit/source/pX2_lt.c b/source/luametatex/source/libraries/softposit/source/pX2_lt.c new file mode 100644 index 000000000..1c1ff44e7 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX2_lt.c @@ -0,0 +1,59 @@ + +/*============================================================================ +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014 The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +bool pX2_lt( posit_2_t a, posit_2_t b ) { + union ui32_pX2 uA, uB; + int32_t uiA, uiB; + + uA.p = a; + uiA = (int32_t) uA.ui; + uB.p = b; + uiB = (int32_t)uB.ui; + + if(uiA<uiB) + return true; + else + return false; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX2_mul.c b/source/luametatex/source/libraries/softposit/source/pX2_mul.c new file mode 100644 index 000000000..e76d568d6 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX2_mul.c @@ -0,0 +1,228 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + + +posit_2_t pX2_mul( posit_2_t pA, posit_2_t pB, int x ){ + + union ui32_pX2 uA, uB, uZ; + uint_fast32_t uiA, uiB; + int regA; + uint_fast32_t fracA, regime, tmp; + bool signA, signB, signZ, regSA, regSB, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast32_t expA; + int_fast8_t kA=0; + uint_fast64_t frac64Z; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + +#ifdef SOFTPOSIT_EXACT + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#endif + //NaR or Zero + if ( uiA==0x80000000 || uiB==0x80000000 ){ + +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80000000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80000000; +#endif + return uZ.p; + } + else if (uiA==0 || uiB==0){ +#ifdef SOFTPOSIT_EXACT + + uZ.ui.v = 0; + if ( (uiA==0 && uiA.ui.exact) || (uiB==0 && uiB.ui.exact) ) + uZ.ui.exact = 1; + else + uZ.ui.exact = 0; +#else + uZ.ui = 0; +#endif + return uZ.p; + } + + signA = signP32UI( uiA ); + signB = signP32UI( uiB ); + signZ = signA ^ signB; + + if(signA) uiA = (-uiA & 0xFFFFFFFF); + if(signB) uiB = (-uiB & 0xFFFFFFFF); + + regSA = signregP32UI(uiA); + regSB = signregP32UI(uiB); + + if (x==2){ + uZ.ui = (regSA®SB) ? (0x40000000) : (0x0); + } + else{ + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + //tmpX--; + } + tmp&=0x7FFFFFFF; + + } + expA = tmp>>29; //to get 2 bits + fracA = ((tmp<<1) | 0x40000000) & 0x7FFFFFFF; + + tmp = (uiB<<2)&0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA--; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + + } + expA += tmp>>29; + frac64Z = (uint_fast64_t) fracA * (((tmp<<1) | 0x40000000) & 0x7FFFFFFF); + if (expA>3){ + kA++; + expA&=0x3; // -=4 + } + + rcarry = frac64Z>>61;//3rd bit of frac64Z + if (rcarry){ + expA++; + if (expA>3){ + kA ++; + expA&=0x3; + } + frac64Z>>=1; + } + } + + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + //remove carry and rcarry bits and shift to correct position (2 bits exp, so + 1 than 16 bits) + frac64Z = (frac64Z&0xFFFFFFFFFFFFFFF) >> regA; + fracA = (uint_fast32_t) (frac64Z>>32); + + //regime length is smaller than length of posit + if (regA<x){ + if (regA<=(x-4)){ + bitNPlusOne |= (((uint64_t)0x8000000000000000>>x) & frac64Z); + bitsMore = ((0x7FFFFFFFFFFFFFFF>>x) & frac64Z); + fracA&=((int32_t)0x80000000>>(x-1)); + } + else { + if (regA==(x-2)){ + bitNPlusOne = expA&0x2; + bitsMore = (expA&0x1); + expA = 0; + } + else if (regA==(x-3)){ + bitNPlusOne = expA&0x1; + //expA>>=1; //taken care of by the pack algo + expA &=0x2; + } + + if (frac64Z>0){ + fracA=0; + bitsMore =1; + } + } + } + else{ + regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); + expA=0; + fracA=0; + } + + expA <<= (28-regA); + uZ.ui = packToP32UI(regime, expA, fracA); + + if (bitNPlusOne){ + uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + + } + + if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX2_mulAdd.c b/source/luametatex/source/libraries/softposit/source/pX2_mulAdd.c new file mode 100644 index 000000000..6090bc7e3 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX2_mulAdd.c @@ -0,0 +1,56 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_2_t pX2_mulAdd( posit_2_t a, posit_2_t b, posit_2_t c, int x ) { + //a*b + c + union ui32_pX2 uA; + uint_fast32_t uiA; + union ui32_pX2 uB; + uint_fast32_t uiB; + union ui32_pX2 uC; + uint_fast32_t uiC; + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + uC.p = c; + uiC = uC.ui; + return softposit_mulAddPX2( uiA, uiB, uiC, 0, x); + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX2_roundToInt.c b/source/luametatex/source/libraries/softposit/source/pX2_roundToInt.c new file mode 100644 index 000000000..aaa747344 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX2_roundToInt.c @@ -0,0 +1,113 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_2_t pX2_roundToInt( posit_2_t pA, int x ){ + union ui32_pX2 uA; + uint_fast32_t mask = 0x20000000, scale=0, tmp=0, uiA, uiZ; + bool bitLast, bitNPlusOne, sign; + + uA.p = pA; + uiA = uA.ui; + sign = uiA>>31; + + // sign is True if pA > NaR. + if (sign) uiA = -uiA & 0xFFFFFFFF; // A is now |A|. + if (uiA <= 0x38000000) { // 0 <= |pA| <= 1/2 rounds to zero. + uA.ui = 0; + return uA.p; + } + else if (uiA < 0x44000000) { // 1/2 < x < 3/2 rounds to 1. + uA.ui = 0x40000000; + } + else if (uiA <= 0x4A000000) { // 3/2 <= x <= 5/2 rounds to 2. + uA.ui = (x>4) ? ( 0x48000000 ) : (0x40000000); + } + else if (uiA >= 0x7E800000) { // If |A| is 0x7E800000 (4194304) (posit is pure integer value), leave it unchanged. + if (x>8) return uA.p; // This also takes care of the NaR case, 0x80000000. + else{ + bitNPlusOne=((uint32_t)0x80000000>>x) & uiA; + tmp = ((uint32_t)0x7FFFFFFF>>x)& uiA; //bitsMore + bitLast = ((uint32_t)0x80000000>>(x-1)) & uiA; + if (bitNPlusOne) + if (bitLast | tmp) uiA += bitLast; + uA.ui = uiA; + } + } + else { // 34% of the cases, we have to decode the posit. + + while (mask & uiA) { + scale += 4; + mask >>= 1; + } + mask >>= 1; + + //Exponential (2 bits) + if (mask & uiA) scale+=2; + mask >>= 1; + if (mask & uiA) scale++; + mask >>= scale; + + //the rest of the bits + bitLast = (uiA & mask); + mask >>= 1; + tmp = (uiA & mask); + bitNPlusOne = tmp; + uiA ^= tmp; // Erase the bit, if it was set. + tmp = uiA & (mask - 1); // this is actually bitsMore + + uiA ^= tmp; + + if (bitNPlusOne) { + if (bitLast | tmp) uiA += (mask << 1); + } + uA.ui = uiA; + + + } + if (sign) uA.ui = -uA.ui & 0xFFFFFFFF; + return uA.p; + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX2_sqrt.c b/source/luametatex/source/libraries/softposit/source/pX2_sqrt.c new file mode 100644 index 000000000..f001806c6 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX2_sqrt.c @@ -0,0 +1,154 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +extern const uint_fast16_t softposit_approxRecipSqrt0[]; +extern const uint_fast16_t softposit_approxRecipSqrt1[]; + + + +posit_2_t pX2_sqrt( posit_2_t pA, int x ) { + union ui32_pX2 uA; + uint_fast32_t index, r0, shift, fracA, expZ, expA; + uint_fast32_t mask, uiA, uiZ; + uint_fast64_t eSqrR0, frac64Z, negRem, recipSqrt, shiftedFracZ, sigma0, sqrSigma0; + int_fast32_t eps, shiftZ; + + if (x<2 || x>32){ + uA.ui = 0x80000000; + return uA.p; + } + + uA.p = pA; + uiA = uA.ui; + + // If NaR or a negative number, return NaR. + if (uiA & 0x80000000) { + uA.ui = 0x80000000; + return uA.p; + } + // If the argument is zero, return zero. + else if (!uiA) { + return uA.p; + } + // Compute the square root; shiftZ is the power-of-2 scaling of the result. + // Decode regime and exponent; scale the input to be in the range 1 to 4: + if (uiA & 0x40000000) { + shiftZ = -2; + while (uiA & 0x40000000) { + shiftZ += 2; + uiA = (uiA << 1) & 0xFFFFFFFF; + } + } else { + shiftZ = 0; + while (!(uiA & 0x40000000)) { + shiftZ -= 2; + uiA = (uiA << 1) & 0xFFFFFFFF; + } + } + + uiA &= 0x3FFFFFFF; + expA = (uiA >> 28); + shiftZ += (expA >> 1); + expA = (0x1 ^ (expA & 0x1)); + uiA &= 0x0FFFFFFF; + fracA = (uiA | 0x10000000); + + // Use table look-up of first 4 bits for piecewise linear approx. of 1/sqrt: + index = ((fracA >> 24) & 0xE) + expA; + eps = ((fracA >> 9) & 0xFFFF); + r0 = softposit_approxRecipSqrt0[index] + - (((uint_fast32_t) softposit_approxRecipSqrt1[index] * eps) >> 20); + + // Use Newton-Raphson refinement to get 33 bits of accuracy for 1/sqrt: + eSqrR0 = (uint_fast64_t) r0 * r0; + if (!expA) eSqrR0 <<= 1; + sigma0 = 0xFFFFFFFF & (0xFFFFFFFF ^ ((eSqrR0 * (uint64_t)fracA) >> 20)); + recipSqrt = ((uint_fast64_t) r0 << 20) + (((uint_fast64_t) r0 * sigma0) >> 21); + + sqrSigma0 = ((sigma0 * sigma0) >> 35); + recipSqrt += ( (( recipSqrt + (recipSqrt >> 2) - ((uint_fast64_t)r0 << 19) ) * sqrSigma0) >> 46 ); + + + frac64Z = (((uint_fast64_t) fracA) * recipSqrt) >> 31; + if (expA) frac64Z = (frac64Z >> 1); + + // Find the exponent of Z and encode the regime bits. + expZ = shiftZ & 0x3; + if (shiftZ < 0) { + shift = (-1 - shiftZ) >> 2; + uiZ = 0x20000000 >> shift; + } else { + shift = shiftZ >> 2; + uiZ = 0x7FFFFFFF - (0x3FFFFFFF >> shift); + } + + // Trick for eliminating off-by-one cases that only uses one multiply: + frac64Z++; + if (!(frac64Z & 0xF)) { + shiftedFracZ = frac64Z >> 1; + negRem = (shiftedFracZ * shiftedFracZ) & 0x1FFFFFFFF; + if (negRem & 0x100000000) { + frac64Z |= 1; + } else { + if (negRem) frac64Z--; + } + } + // Strip off the hidden bit and round-to-nearest using last shift+5 bits. + frac64Z &= 0xFFFFFFFF; + mask = (1 << (36 + shift - x)); + if (mask & frac64Z) { + if ( ((mask - 1) & frac64Z) | ((mask << 1) & frac64Z) ) + frac64Z+=(mask << 1) ; + // Assemble the result and return it. + uA.ui = uiZ | (expZ << (27 - shift)) | (frac64Z >> (5 + shift)); + } + else{ + // Assemble the result and return it. + uA.ui = uiZ | (expZ << (27 - shift)) | (frac64Z >> (5 + shift)); + //Check if rounding bits in regime or exp and clean off unwanted bits + if( ((uint32_t)0x80000000>>x) & uA.ui){ + if ( ( ((uint32_t)0x80000000>>(x-1)) & uA.ui) || (((uint32_t)0x7FFFFFFF>>x) & uA.ui) ) + uA.ui = (uA.ui & ((int32_t)0x80000000>>(x-1))) + ((uint32_t)0x80000000>>(x-1)); + } + } + + + uA.ui &=((int32_t)0x80000000>>(x-1)); + return uA.p; +} diff --git a/source/luametatex/source/libraries/softposit/source/pX2_sub.c b/source/luametatex/source/libraries/softposit/source/pX2_sub.c new file mode 100644 index 000000000..49246ac6d --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX2_sub.c @@ -0,0 +1,87 @@ + +/*============================================================================ + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" +posit_2_t pX2_sub( posit_2_t a, posit_2_t b, int x) { + union ui32_pX2 uA, uB, uZ; + uint_fast32_t uiA, uiB; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = a; + uiA = uA.ui; + uB.p = b; + uiB = uB.ui; + +#ifdef SOFTPOSIT_EXACT + uZ.ui.exact = (uiA.ui.exact & uiB.ui.exact); +#endif + + //infinity + if ( uiA==0x80000000 || uiB==0x80000000 ){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = 0x80000000; + uZ.ui.exact = 0; +#else + uZ.ui = 0x80000000; +#endif + return uZ.p; + } + //Zero + else if ( uiA==0 || uiB==0 ){ +#ifdef SOFTPOSIT_EXACT + uZ.ui.v = (uiA | -uiB); + uZ.ui.exact = 0; +#else + uZ.ui = (uiA | -uiB); +#endif + return uZ.p; + } + + //different signs + if ((uiA^uiB)>>31) + return softposit_addMagsPX2(uiA, (-uiB & 0xFFFFFFFF), x); + else + return softposit_subMagsPX2(uiA, (-uiB & 0xFFFFFFFF), x); + + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/pX2_to_pX1.c b/source/luametatex/source/libraries/softposit/source/pX2_to_pX1.c new file mode 100644 index 000000000..35b161947 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/pX2_to_pX1.c @@ -0,0 +1,128 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_1_t pX2_to_pX1( posit_2_t pA, int x ){ + + union ui32_pX2 uA; + union ui32_pX1 uZ; + uint_fast32_t uiA, tmp, regime; + uint_fast32_t exp_frac32A=0; + bool sign, regSA, bitNPlusOne, bitsMore; + int_fast8_t kA=0, regA; + + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + uA.p = pA; + uiA = uA.ui; + + if (uiA==0x80000000 || uiA==0 ){ + uZ.ui = uiA; + return uZ.p; + } + + sign = signP32UI( uiA ); + if (sign) uiA = -uiA & 0xFFFFFFFF; + + + regSA = signregP32UI(uiA); + if (x==2){ + uZ.ui=(uiA>0)?(0x40000000):(0); + } + else { + //regime + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + //exp and frac + exp_frac32A = tmp<<1; + if(kA<0){ + regA = (-kA)<<1; + if (exp_frac32A&0x80000000) regA--; + exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = (kA<<1)+1; + if (exp_frac32A&0x80000000) regA++; + exp_frac32A = (exp_frac32A<<1) &0xFFFFFFFF; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + bitNPlusOne = (exp_frac32A >>(regA+33-x))&0x1; + bitsMore = exp_frac32A&(0x7FFFFFFF>>(x-regA-2)); + + if (regA<30) exp_frac32A >>=(2+regA); + else exp_frac32A=0; + uZ.ui = regime + (exp_frac32A & ((int32_t)0x80000000>>(x-1)) ); + + if (uZ.ui==0) uZ.ui = 0x1<<(32-x); + else if (bitNPlusOne){ + uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + } + } + + if (sign) uZ.ui = (-uZ.ui & 0xFFFFFFFF); + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/quire16_fdp_add.c b/source/luametatex/source/libraries/softposit/source/quire16_fdp_add.c new file mode 100644 index 000000000..959849ef7 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/quire16_fdp_add.c @@ -0,0 +1,185 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <inttypes.h> + +#include "platform.h" +#include "internals.h" + +quire16_t q16_fdp_add( quire16_t q, posit16_t pA, posit16_t pB ){ + + union ui16_p16 uA, uB; + union ui128_q16 uZ, uZ1, uZ2; + uint_fast16_t uiA, uiB; + uint_fast16_t fracA, tmp; + bool signA, signB, signZ2, regSA, regSB, rcarry; + int_fast8_t expA; + int_fast16_t kA=0, shiftRight=0; + uint_fast32_t frac32Z; + //For add + bool rcarryb, b1, b2, rcarryZ;//, rcarrySignZ; + + uZ1.q = q; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + //NaR + if (isNaRQ16(q) || isNaRP16UI(uA.ui) || isNaRP16UI(uB.ui)){ + uZ2.ui[0]=0x8000000000000000ULL; + uZ2.ui[1] = 0; + return uZ2.q; + } + else if (uiA==0 || uiB==0) + return q; + + //max pos (sign plus and minus) + signA = signP16UI( uiA ); + signB = signP16UI( uiB ); + signZ2 = signA ^ signB; + + if(signA) uiA = (-uiA & 0xFFFF); + if(signB) uiB = (-uiB & 0xFFFF); + + regSA = signregP16UI(uiA); + regSB = signregP16UI(uiB); + + tmp = (uiA<<2) & 0xFFFF; + if (regSA){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expA = tmp>>14; + fracA = (0x4000 | tmp); + + tmp = (uiB<<2) & 0xFFFF; + if (regSB){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA--; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expA += tmp>>14; + frac32Z = (uint_fast32_t) fracA * (0x4000 | tmp); + + if (expA>1){ + kA++; + expA ^=0x2; + } + rcarry = frac32Z>>29;//3rd bit (position 2) of frac32Z, hidden bit is 4th bit (position 3) + if (rcarry){ + if (expA) kA ++; + expA^=1; + frac32Z>>=1; + } + + //default dot is between bit 71 and 72, extreme left bit is bit 0. Last right bit is bit 127. + //Scale = 2^es * k + e => 2k + e + int firstPos = 71 - (kA<<1) - expA; + + //No worries about hidden bit moving before position 4 because fraction is right aligned so + //there are 16 spare bits + if (firstPos>63){ //This means entire fraction is in right 64 bits + uZ2.ui[0] = 0; + shiftRight = firstPos-99;//99 = 63+ 4+ 32 + + uZ2.ui[1] = (shiftRight<0) ? ((uint64_t)frac32Z << -shiftRight) : ((uint64_t) frac32Z >> shiftRight); + + } + else{//frac32Z can be in both left64 and right64 + shiftRight = firstPos - 35;// -35= -3-32 + if (shiftRight<0) + uZ2.ui[0] = ((uint64_t)frac32Z) << -shiftRight; + else{ + uZ2.ui[0] = (uint64_t)frac32Z >> shiftRight; + uZ2.ui[1] = (uint64_t) frac32Z << (64 - shiftRight); + } + + } + + if (signZ2){ + if (uZ2.ui[1]>0){ + uZ2.ui[1] = - uZ2.ui[1]; + uZ2.ui[0] = ~uZ2.ui[0]; + } + else{ + uZ2.ui[0] = -uZ2.ui[0]; + } + } + + //Addition + b1 = uZ1.ui[1]&0x1; + b2 = uZ2.ui[1]&0x1; + rcarryb = b1 & b2; + uZ.ui[1] = (uZ1.ui[1]>>1) + (uZ2.ui[1]>>1) + rcarryb; + + rcarryZ = uZ.ui[1]>>63; + + uZ.ui[1] = (uZ.ui[1]<<1 | (b1^b2) ); + + b1 = uZ1.ui[0]&0x1; + b2 = uZ2.ui[0]&0x1; + rcarryb = b1 & b2 ; + int_fast8_t rcarryb3 = b1 + b2 + rcarryZ; + + uZ.ui[0] = (uZ1.ui[0]>>1) + (uZ2.ui[0]>>1) + ((rcarryb3>>1)& 0x1); + //rcarrySignZ = uZ.ui[0]>>63; + + uZ.ui[0] = (uZ.ui[0]<<1 | (rcarryb3 & 0x1) ); + + //Exception handling for NaR + if (isNaRQ16(uZ.q)) uZ.q.v[0] = 0; + + return uZ.q; +} diff --git a/source/luametatex/source/libraries/softposit/source/quire16_fdp_sub.c b/source/luametatex/source/libraries/softposit/source/quire16_fdp_sub.c new file mode 100644 index 000000000..caa034963 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/quire16_fdp_sub.c @@ -0,0 +1,191 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <inttypes.h> + +#include "platform.h" +#include "internals.h" + +quire16_t q16_fdp_sub( quire16_t q, posit16_t pA, posit16_t pB ){ + + union ui16_p16 uA, uB; + union ui128_q16 uZ, uZ1, uZ2; + uint_fast16_t uiA, uiB; + uint_fast16_t fracA, tmp; + bool signA, signB, signZ2, regSA, regSB, rcarry; + int_fast8_t expA; + int_fast16_t kA=0, shiftRight; + uint_fast32_t frac32Z; + //For add + bool rcarryb, b1, b2, rcarryZ;//, rcarrySignZ; + + uZ1.q = q; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + //NaR + if (isNaRQ16(q) || isNaRP16UI(uA.ui) || isNaRP16UI(uB.ui)){ + uZ2.ui[0]=0x8000000000000000ULL; + uZ2.ui[1] = 0; + return uZ2.q; + } + else if (uiA==0 || uiB==0) + return q; + + + //max pos (sign plus and minus) + signA = signP16UI( uiA ); + signB = signP16UI( uiB ); + signZ2 = signA ^ signB; + + if(signA) uiA = (-uiA & 0xFFFF); + if(signB) uiB = (-uiB & 0xFFFF); + + regSA = signregP16UI(uiA); + regSB = signregP16UI(uiB); + + tmp = (uiA<<2) & 0xFFFF; + if (regSA){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expA = tmp>>14; + fracA = (0x4000 | tmp); + + tmp = (uiB<<2) & 0xFFFF; + if (regSB){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA--; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expA += tmp>>14; + frac32Z = (uint_fast32_t) fracA * (0x4000 | tmp); + + if (expA>1){ + kA++; + expA ^=0x2; + } + + rcarry = frac32Z>>29;//3rd bit (position 2) of frac32Z, hidden bit is 4th bit (position 3) + if (rcarry){ + if (expA) kA ++; + expA^=1; + frac32Z>>=1; + } + + //default dot is between bit 71 and 72, extreme left bit is bit 0. Last right bit is bit 127. + //Scale = 2^es * k + e => 2k + e + int firstPos = 71 - (kA<<1) - expA; + + //No worries about hidden bit moving before position 4 because fraction is right aligned so + //there are 16 spare bits + if (firstPos>63){ //This means entire fraction is in right 64 bits + uZ2.ui[0] = 0; + shiftRight = firstPos-99;//99 = 63+ 4+ 32 + if (shiftRight<0)//shiftLeft + uZ2.ui[1] = ((uint64_t)frac32Z) << -shiftRight; + else + uZ2.ui[1] = (uint64_t) frac32Z >> shiftRight; + } + else{//frac32Z can be in both left64 and right64 + shiftRight = firstPos - 35;// -35= -3-32 + if (shiftRight<0) + uZ2.ui[0] = ((uint64_t)frac32Z) << -shiftRight; + else{ + uZ2.ui[0] = (uint64_t)frac32Z >> shiftRight; + uZ2.ui[1] = (uint64_t) frac32Z << (64 - shiftRight); + } + + } + + //This is the only difference from ADD (signZ2) and (!signZ2) + if (!signZ2){ + if (uZ2.ui[1]>0){ + uZ2.ui[1] = - uZ2.ui[1]; + uZ2.ui[0] = ~uZ2.ui[0]; + } + else{ + uZ2.ui[0] = -uZ2.ui[0]; + } + } + + //Subtraction + b1 = uZ1.ui[1]&0x1; + b2 = uZ2.ui[1]&0x1; + rcarryb = b1 & b2; + uZ.ui[1] = (uZ1.ui[1]>>1) + (uZ2.ui[1]>>1) + rcarryb; + + rcarryZ = uZ.ui[1]>>63; + + uZ.ui[1] = (uZ.ui[1]<<1 | (b1^b2) ); + + + b1 = uZ1.ui[0]&0x1; + b2 = uZ2.ui[0]&0x1; + rcarryb = b1 & b2 ; + int_fast8_t rcarryb3 = b1 + b2 + rcarryZ; + + uZ.ui[0] = (uZ1.ui[0]>>1) + (uZ2.ui[0]>>1) + ((rcarryb3>>1)& 0x1); + //rcarrySignZ = uZ.ui[0]>>63; + + + uZ.ui[0] = (uZ.ui[0]<<1 | (rcarryb3 & 0x1) ); + + //Exception handling + if (isNaRQ16(uZ.q)) uZ.q.v[0] = 0; + + return uZ.q; +} diff --git a/source/luametatex/source/libraries/softposit/source/quire32_fdp_add.c b/source/luametatex/source/libraries/softposit/source/quire32_fdp_add.c new file mode 100644 index 000000000..8adac09fb --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/quire32_fdp_add.c @@ -0,0 +1,202 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <inttypes.h> +#include <string.h> + +#include "platform.h" +#include "internals.h" + +quire_2_t qX2_fdp_add( quire_2_t q, posit_2_t pA, posit_2_t pB ){ + union ui512_q32 uQZ; + union ui32_p32 uA, uB; + memcpy(uQZ.ui, q.v, 8*sizeof(uint64_t)); + uA.ui = pA.v; + uB.ui = pB.v; + uQZ.q = q32_fdp_add(uQZ.q, uA.p, uB.p); + memcpy(q.v, uQZ.ui, 8*sizeof(uint64_t)); + return q; +} + +quire32_t q32_fdp_add( quire32_t q, posit32_t pA, posit32_t pB ){ + + union ui32_p32 uA, uB; + union ui512_q32 uZ, uZ1, uZ2; + uint_fast32_t uiA, uiB; + uint_fast32_t fracA, tmp; + bool signA, signB, signZ2, regSA, regSB, rcarry; + int_fast32_t expA; + int_fast16_t kA=0, shiftRight=0; + uint_fast64_t frac64Z; + //For add + bool rcarryb, b1, b2, rcarryZ=0; + + uZ1.q = q; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + uZ2.q = q32Clr(uZ2.q); //set it to zero + //NaR + if (isNaRQ32(q) || isNaRP32UI(uA.ui) || isNaRP32UI(uB.ui)){ + //set to all zeros + uZ2.ui[0]=0x8000000000000000ULL; + return uZ2.q; + } + else if (uiA==0 || uiB==0) + return q; + + + //max pos (sign plus and minus) + signA = signP32UI( uiA ); + signB = signP32UI( uiB ); + signZ2 = signA ^ signB; + + if(signA) uiA = (-uiA & 0xFFFFFFFF); + if(signB) uiB = (-uiB & 0xFFFFFFFF); + + regSA = signregP32UI(uiA); + regSB = signregP32UI(uiB); + + tmp = (uiA<<2) & 0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA = tmp>>29; //to get 2 bits + fracA = ((tmp<<2) | 0x80000000) & 0xFFFFFFFF; + + + tmp = (uiB<<2) & 0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA--; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA += tmp>>29; + frac64Z = (uint_fast64_t) fracA * (((tmp<<2) | 0x80000000) & 0xFFFFFFFF); + + if (expA>3){ + kA++; + expA&=0x3; // -=4 + } + //Will align frac64Z such that hidden bit is the first bit on the left. + rcarry = frac64Z>>63;//1st bit of frac64Z + if (rcarry){ + expA++; + if (expA>3){ + kA ++; + expA&=0x3; + } + } + else + frac64Z<<=1; + + //default dot is between bit 271 and 272, extreme left bit is bit 0. Last right bit is bit 512. + //Minpos is 120 position to the right of binary point (dot) + //Scale = 2^es * k + e => 2k + e + int firstPos = 271 - (kA<<2) - expA; + + //Moving in chunk of 64. If it is in first chunk, a part might be in the chunk right to it. Simply have to handle that. + int i; + for (i=0; i<8; i++){ + if (firstPos<(i+1)*64){ + //Need to check how much of the fraction is in the next 64 bits + shiftRight = firstPos - (i*64); + uZ2.ui[i] = frac64Z >> shiftRight; + + if (i!=7 && shiftRight!=0) uZ2.ui[i+1] = frac64Z << (64 - shiftRight); + break; + } + } + + if (signZ2){ + for (i=7; i>=0; i--){ + if (uZ2.ui[i]>0){ + uZ2.ui[i] = - uZ2.ui[i]; + i--; + while(i>=0){ + uZ2.ui[i] = ~uZ2.ui[i]; + i--; + } + break; + } + } + } + + //Addition + for (i=7; i>=0; i--){ + b1 = uZ1.ui[i] & 0x1; + b2 = uZ2.ui[i] & 0x1; + if (i==7){ + rcarryb = b1 & b2; + uZ.ui[i] = (uZ1.ui[i]>>1) + (uZ2.ui[i]>>1) + rcarryb; + rcarryZ = uZ.ui[i]>>63; + uZ.ui[i] = (uZ.ui[i]<<1 | (b1^b2) ); + } + else{ + int_fast8_t rcarryb3 = b1 + b2 + rcarryZ; + uZ.ui[i] = (uZ1.ui[i]>>1) + (uZ2.ui[i]>>1) + (rcarryb3>>1); + rcarryZ = uZ.ui[i]>>63; + uZ.ui[i] = (uZ.ui[i]<<1 | (rcarryb3 & 0x1) ); + } + + } + + //Exception handling + if (isNaRQ32(uZ.q) ) uZ.q.v[0]=0; + + return uZ.q; +} diff --git a/source/luametatex/source/libraries/softposit/source/quire32_fdp_sub.c b/source/luametatex/source/libraries/softposit/source/quire32_fdp_sub.c new file mode 100644 index 000000000..1a132eefd --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/quire32_fdp_sub.c @@ -0,0 +1,205 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <inttypes.h> +#include <string.h> + +#include "platform.h" +#include "internals.h" + +//c-(a*b) + +quire_2_t qX2_fdp_sub( quire_2_t q, posit_2_t pA, posit_2_t pB ){ + union ui512_q32 uQZ; + union ui32_p32 uA, uB; + memcpy(uQZ.ui, q.v, 8*sizeof(uint64_t)); + uA.ui = pA.v; + uB.ui = pB.v; + uQZ.q = q32_fdp_sub(uQZ.q, uA.p, uB.p); + memcpy(q.v, uQZ.ui, 8*sizeof(uint64_t)); + return q; +} + +quire32_t q32_fdp_sub( quire32_t q, posit32_t pA, posit32_t pB ){ + + union ui32_p32 uA, uB; + union ui512_q32 uZ, uZ1, uZ2; + uint_fast32_t uiA, uiB; + uint_fast32_t fracA, tmp; + bool signA, signB, signZ2, regSA, regSB, rcarry; + int_fast32_t expA; + int_fast16_t kA=0, shiftRight=0; + uint_fast64_t frac64Z; + //For sub + bool rcarryb, b1, b2, rcarryZ; + + uZ1.q = q; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + uZ2.q = q32Clr(uZ2.q); //set it to zero + //NaR + if (isNaRQ32(q) || isNaRP32UI(uA.ui) || isNaRP32UI(uB.ui)){ + //set to all zeros + uZ2.ui[0]=0x8000000000000000ULL; + return uZ2.q; + } + else if (uiA==0 || uiB==0) + return q; + + //max pos (sign plus and minus) + signA = signP32UI( uiA ); + signB = signP32UI( uiB ); + signZ2 = signA ^ signB; + + if(signA) uiA = (-uiA & 0xFFFFFFFF); + if(signB) uiB = (-uiB & 0xFFFFFFFF); + + regSA = signregP32UI(uiA); + regSB = signregP32UI(uiB); + + tmp = (uiA<<2) & 0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA = tmp>>29; //to get 2 bits + fracA = ((tmp<<2) | 0x80000000) & 0xFFFFFFFF; + + tmp = (uiB<<2) & 0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA--; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA += tmp>>29; + frac64Z = (uint_fast64_t) fracA * (((tmp<<2) | 0x80000000) & 0xFFFFFFFF); + + if (expA>3){ + kA++; + expA&=0x3; // -=4 + } + //Will align frac64Z such that hidden bit is the first bit on the left. + rcarry = frac64Z>>63;//1st bit of frac64Z + if (rcarry){ + expA++; + if (expA>3){ + kA ++; + expA&=0x3; + } + //frac64Z>>=1; + } + else + frac64Z<<=1; + + //default dot is between bit 271 and 272, extreme left bit is bit 0. Last right bit is bit 512. + //Minpos is 120 position to the right of binary point (dot) + //Scale = 2^es * k + e => 2k + e + int firstPos = 271 - (kA<<2) - expA; + + //Moving in chunk of 64. If it is in first chunk, a part might be in the chunk right to it. Simply have to handle that. + int i; + for (i=0; i<8; i++){ + if (firstPos<(i+1)*64){ + //Need to check how much of the fraction is in the next 64 bits + shiftRight = firstPos - (i*64); + uZ2.ui[i] = frac64Z >> shiftRight; + if (i!=7 && shiftRight!=0) uZ2.ui[i+1] = frac64Z << (64 - shiftRight); + break; + } + } + + + //This is the only difference from ADD (signZ2) and (!signZ2) + if (!signZ2){ + for (i=7; i>=0; i--){ + if (uZ2.ui[i]>0){ + uZ2.ui[i] = - uZ2.ui[i]; + i--; + while(i>=0){ + uZ2.ui[i] = ~uZ2.ui[i]; + i--; + } + break; + } + } + + } + + //Subtraction + for (i=7; i>=0; i--){ + b1 = uZ1.ui[i] & 0x1; + b2 = uZ2.ui[i] & 0x1; + if (i==7){ + rcarryb = b1 & b2; + uZ.ui[i] = (uZ1.ui[i]>>1) + (uZ2.ui[i]>>1) + rcarryb; + rcarryZ = uZ.ui[i]>>63; + uZ.ui[i] = (uZ.ui[i]<<1 | (b1^b2) ); + } + else{ + int_fast8_t rcarryb3 = b1 + b2 + rcarryZ; + uZ.ui[i] = (uZ1.ui[i]>>1) + (uZ2.ui[i]>>1) + (rcarryb3>>1); + rcarryZ = uZ.ui[i]>>63; + uZ.ui[i] = (uZ.ui[i]<<1 | (rcarryb3 & 0x1) ); + } + + } + + //Exception handling + if (isNaRQ32(uZ.q) ) uZ.q.v[0]=0; + + return uZ.q; +} diff --git a/source/luametatex/source/libraries/softposit/source/quire8_fdp_add.c b/source/luametatex/source/libraries/softposit/source/quire8_fdp_add.c new file mode 100644 index 000000000..8d8497352 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/quire8_fdp_add.c @@ -0,0 +1,137 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include <inttypes.h> + +#include "platform.h" +#include "internals.h" + +quire8_t q8_fdp_add( quire8_t q, posit8_t pA, posit8_t pB ){ + union ui8_p8 uA, uB; + union ui32_q8 uqZ, uqZ1, uqZ2; + uint_fast8_t uiA, uiB; + uint_fast8_t fracA, tmp; + bool signA, signB, signZ2, regSA, regSB, rcarry; + int_fast8_t kA=0, shiftRight=0; + uint_fast32_t frac32Z; + + + uqZ1.q = q; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + //NaR + if (isNaRQ8(q) || isNaRP8UI(uA.ui) || isNaRP8UI(uB.ui)){ + uqZ2.ui=0x80000000; + return uqZ2.q; + } + else if (uiA==0 || uiB==0) + return q; + + + //max pos (sign plus and minus) + signA = signP8UI( uiA ); + signB = signP8UI( uiB ); + signZ2 = signA ^ signB; + + if(signA) uiA = (-uiA & 0xFF); + if(signB) uiB = (-uiB & 0xFF); + + regSA = signregP8UI(uiA); + regSB = signregP8UI(uiB); + + tmp = (uiA<<2) & 0xFF; + if (regSA){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA=-1; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + fracA = (0x80 | tmp); + + tmp = (uiB<<2) & 0xFF; + if (regSB){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA--; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + frac32Z = (uint_fast32_t)( fracA * (0x80 | tmp) ) <<16; + + rcarry = frac32Z>>31;//1st bit (position 2) of frac32Z, hidden bit is 4th bit (position 3) + if (rcarry){ + kA ++; + frac32Z>>=1; + } + + + + //default dot is between bit 19 and 20, extreme left bit is bit 0. Last right bit is bit 31. + //Scale = 2^es * k + e => 2k + e // firstPost = 19-kA, shift = firstPos -1 (because frac32Z start from 2nd bit) + //int firstPos = 19 - kA; + shiftRight = 18-kA; + + uqZ2.ui = frac32Z>> shiftRight; + + + if (signZ2) uqZ2.ui = -uqZ2.ui & 0xFFFFFFFF; + + + //Addition + uqZ.ui = uqZ2.ui + uqZ1.ui; + //Exception handling + if (isNaRQ8(uqZ.q) ) uqZ.q.v = 0; + + return uqZ.q; +} diff --git a/source/luametatex/source/libraries/softposit/source/quire8_fdp_sub.c b/source/luametatex/source/libraries/softposit/source/quire8_fdp_sub.c new file mode 100644 index 000000000..f1de20cb7 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/quire8_fdp_sub.c @@ -0,0 +1,136 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <inttypes.h> + +#include "platform.h" +#include "internals.h" + + +//q - (pA*pB) +quire8_t q8_fdp_sub( quire8_t q, posit8_t pA, posit8_t pB ){ + + union ui8_p8 uA, uB; + union ui32_q8 uqZ, uqZ1, uqZ2; + uint_fast8_t uiA, uiB; + uint_fast8_t fracA, tmp; + bool signA, signB, signZ2, regSA, regSB, rcarry; + int_fast8_t kA=0, shiftRight=0; + uint_fast32_t frac32Z; + + uqZ1.q = q; + + uA.p = pA; + uiA = uA.ui; + uB.p = pB; + uiB = uB.ui; + + //NaR + if (isNaRQ8(q) || isNaRP8UI(uA.ui) || isNaRP8UI(uB.ui)){ + uqZ2.ui=0x80000000; + return uqZ2.q; + } + else if (uiA==0 || uiB==0) + return q; + + + //max pos (sign plus and minus) + signA = signP8UI( uiA ); + signB = signP8UI( uiB ); + signZ2 = signA ^ signB; + + if(signA) uiA = (-uiA & 0xFF); + if(signB) uiB = (-uiB & 0xFF); + + regSA = signregP8UI(uiA); + regSB = signregP8UI(uiB); + + tmp = (uiA<<2) & 0xFF; + if (regSA){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA=-1; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + fracA = (0x80 | tmp); + + tmp = (uiB<<2) & 0xFF; + if (regSB){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA--; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + frac32Z = (uint_fast32_t)( fracA * (0x80 | tmp) ) <<16; + + rcarry = frac32Z>>31;//1st bit (position 2) of frac32Z, hidden bit is 4th bit (position 3) + if (rcarry){ + kA ++; + frac32Z>>=1; + } + + //default dot is between bit 19 and 20, extreme left bit is bit 0. Last right bit is bit 31. + //Scale = 2^es * k + e => 2k + e // firstPost = 19-kA, shift = firstPos -1 (because frac32Z start from 2nd bit) + //int firstPos = 19 - kA; + shiftRight = 18-kA; + + uqZ2.ui = frac32Z>> shiftRight; + + //This is the only difference from ADD (signZ2) and (!signZ2) + if (!signZ2) uqZ2.ui = -uqZ2.ui & 0xFFFFFFFF; + + //Addition + uqZ.ui = uqZ2.ui + uqZ1.ui; + + //Exception handling + if (isNaRQ8(uqZ.q) ) uqZ.q.v=0; + + return uqZ.q; +} diff --git a/source/luametatex/source/libraries/softposit/source/quire_helper.c b/source/luametatex/source/libraries/softposit/source/quire_helper.c new file mode 100644 index 000000000..110e40433 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/quire_helper.c @@ -0,0 +1,194 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + + +void printBinary(uint64_t * s, int size) { + int i; + + uint64_t number = *s; + int bitSize = size -1; + for(i = 0; i < size; ++i) { + if(i%8 == 0) + putchar(' '); + printf("%llu", (number >> (bitSize-i))&1); + } + printf("\n"); + +} +void printBinaryQuire32(quire32_t * s){ + int size = 512; + int dotPos = 272; + int bitSize = 63; + + int n = 0; + uint64_t number = s->v[n]; + for(int i = 0; i < size; ++i) { + if (i!=0 && i%64==0){ + printf("\n"); + n++; + number = s->v[n]; + } + if(i%8 == 0) + putchar(' '); + if (i==dotPos) + putchar('.'); + printf("%llu", (number >> (bitSize-i))&1); + } + printf("\n"); +} + +void printBinaryQuire16(quire16_t * s){ + int size = 128; + int dotPos = 72; + int bitSize = 63; + + int n = 0; + uint64_t number = s->v[n]; + for(int i = 0; i < size; ++i) { + if (i!=0 && i%64==0){ + printf("\n"); + n++; + number = s->v[n]; + } + if(i%8 == 0) + putchar(' '); + if (i==dotPos) + putchar('.'); + printf("%llu", (number >> (bitSize-i))&1); + } + printf("\n"); +} + +void printBinaryQuire8(quire8_t * s){ + int size = 32; + uint32_t number = s->v; + int dotPos = 20; + + int bitSize = size -1; + for(int i = 0; i < size; ++i) { + if(i%8 == 0) + putchar(' '); + if (i==dotPos) + putchar('.'); + printf("%u", (number >> (bitSize-i))&1); + } + printf("\n"); +} + +void printBinaryPX(uint32_t * s, int size) { + int i; + uint32_t number = *s; + number >>= (32-size); + int bitSize = size -1; + for(i = 0; i < size; ++i){ + if(i%8 == 0) + putchar(' '); + printf("%u", (number >> (bitSize-i))&1); + } + printf("\n"); + +} +void printHex64(uint64_t s) { + printf("%016llx\n", s); + +} +void printHex(uint64_t s) { + printf("0x%llx\n", s); + +} +void printHexPX(uint32_t s, int size) { + s>>=(32-size); + printf("0x%x\n", s); + +} +quire16_t q16_TwosComplement(quire16_t q){ + if (!isQ16Zero(q) && !isNaRQ16(q)){ + if (q.v[1]==0){ + q.v[0] = -q.v[0]; + } + else{ + q.v[1] = - q.v[1]; + q.v[0] = ~q.v[0]; + } + } + return q; + +} + +quire32_t q32_TwosComplement(quire32_t q){ + if (!isQ32Zero(q) && !isNaRQ32(q)){ + int i=7; + bool found = false; + while(i){ + if (found){ + q.v[i] = ~q.v[i]; + } + else{ + if (q.v[i]!=0){ + q.v[i] = -q.v[i]; + found = true; + } + } + i--; + } + } + return q; + +} + +quire_2_t qX2_TwosComplement(quire_2_t q){ + if (!isQX2Zero(q) && !isNaRQX2(q)){ + int i=7; + bool found = false; + while(i){ + if (found){ + q.v[i] = ~q.v[i]; + } + else{ + if (q.v[i]!=0){ + q.v[i] = -q.v[i]; + found = true; + } + } + i--; + } + } + return q; + +} + + diff --git a/source/luametatex/source/libraries/softposit/source/s_addMagsP16.c b/source/luametatex/source/libraries/softposit/source/s_addMagsP16.c new file mode 100644 index 000000000..a90b4bb65 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_addMagsP16.c @@ -0,0 +1,172 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +#include "stdlib.h" +#include <math.h> + +#ifdef SOFTPOSIT_EXACT +posit16_t softposit_addMagsP16( uint_fast16_t uiA, uint_fast16_t uiB, bool isExact){ +#else +posit16_t softposit_addMagsP16( uint_fast16_t uiA, uint_fast16_t uiB ){ +#endif + + uint_fast16_t regA, uiX, uiY; + uint_fast32_t frac32A, frac32B; + uint_fast16_t fracA=0, regime, tmp; + bool sign, regSA, regSB, rcarry=0, bitNPlusOne=0, bitsMore=0; + int_fast8_t kA=0, expA; + int_fast16_t shiftRight; + union ui16_p16 uZ; + + sign = signP16UI( uiA ); //sign is always positive.. actually don't have to do this. + if (sign){ + uiA = -uiA & 0xFFFF; + uiB = -uiB & 0xFFFF; + } + + if ((int_fast16_t)uiA < (int_fast16_t)uiB){ + uiX = uiA; + uiY = uiB; + uiA = uiY; + uiB = uiX; + } + regSA = signregP16UI( uiA ); + regSB = signregP16UI( uiB ); + + tmp = (uiA<<2) & 0xFFFF; + if (regSA){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expA = tmp>>14; + frac32A = (0x4000 | tmp) << 16; + shiftRight = kA; + + tmp = (uiB<<2) & 0xFFFF; + if (regSB){ + while (tmp>>15){ + shiftRight--; + tmp= (tmp<<1) & 0xFFFF; + } + frac32B = (0x4000 | tmp) <<16; + } + else{ + shiftRight++; + while (!(tmp>>15)){ + shiftRight++; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + frac32B = ( (0x4000 | tmp) <<16 ) & 0x7FFFFFFF; + } + + //This is 2kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) + shiftRight = (shiftRight<<1) + expA - (tmp>>14); + + if (shiftRight==0){ + frac32A += frac32B; + //rcarry is one + if (expA) kA ++; + expA^=1; + frac32A>>=1; + } + else{ + //Manage CLANG (LLVM) compiler when shifting right more than number of bits + (shiftRight>31) ? (frac32B=0): (frac32B >>= shiftRight); //frac32B >>= shiftRight + + frac32A += frac32B; + rcarry = 0x80000000 & frac32A; //first left bit + if(rcarry){ + if (expA) kA ++; + expA^=1; + frac32A>>=1; + } + } + if(kA<0){ + regA = (-kA & 0xFFFF); + regSA = 0; + regime = 0x4000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFF - (0x7FFF>>regA); + } + if(regA>14){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7FFF): (uZ.ui=0x1); + } + else{ + //remove hidden bits + frac32A = (frac32A & 0x3FFFFFFF) >>(regA + 1) ; + fracA = frac32A>>16; + if (regA!=14) bitNPlusOne = (frac32A>>15) & 0x1; + else if (frac32A>0){ + fracA=0; + bitsMore =1; + } + if (regA==14 && expA) bitNPlusOne = 1; + uZ.ui = packToP16UI(regime, regA, expA, fracA); + if (bitNPlusOne){ + if ( frac32A&0x7FFF ) bitsMore=1; + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + + if (sign) uZ.ui = -uZ.ui & 0xFFFF; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_addMagsP32.c b/source/luametatex/source/libraries/softposit/source/s_addMagsP32.c new file mode 100644 index 000000000..47ecc1b3a --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_addMagsP32.c @@ -0,0 +1,173 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit32_t softposit_addMagsP32( uint_fast32_t uiA, uint_fast32_t uiB ) { + uint_fast16_t regA, regB; + uint_fast64_t frac64A=0, frac64B=0; + uint_fast32_t fracA=0, regime, tmp; + bool sign, regSA, regSB, rcarry=0, bitNPlusOne=0, bitsMore=0; + int_fast8_t kA=0; + int_fast32_t expA; + int_fast16_t shiftRight; + union ui32_p32 uZ; + + sign = signP32UI( uiA ); + if (sign){ + uiA = -uiA & 0xFFFFFFFF; + uiB = -uiB & 0xFFFFFFFF; + } + + if ((int_fast32_t)uiA < (int_fast32_t)uiB){ + uiA ^= uiB; + uiB ^= uiA; + uiA ^= uiB; + } + regSA = signregP32UI( uiA ); + regSB = signregP32UI( uiB ); + + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + + expA = tmp>>29; //to get 2 bits + frac64A = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; + shiftRight = kA; + + tmp = (uiB<<2) & 0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + shiftRight--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + shiftRight++; + while (!(tmp>>31)){ + shiftRight++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + frac64B = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; + //This is 4kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) + shiftRight = (shiftRight<<2) + expA - (tmp>>29); + + //Manage CLANG (LLVM) compiler when shifting right more than number of bits + (shiftRight>63) ? (frac64B=0): (frac64B >>= shiftRight); //frac64B >>= shiftRight + + frac64A += frac64B; + + rcarry = 0x8000000000000000 & frac64A; //first left bit + if (rcarry){ + expA++; + if (expA>3){ + kA ++; + expA&=0x3; + } + frac64A>>=1; + } + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + if(regA>30){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); + } + else{ + //remove hidden bits + frac64A = (frac64A & 0x3FFFFFFFFFFFFFFF) >>(regA + 2) ; // 2 bits exp + + fracA = frac64A>>32; + + if (regA<=28){ + bitNPlusOne |= (0x80000000 & frac64A) ; + expA <<= (28-regA); + } + else { + if (regA==30){ + bitNPlusOne = expA&0x2; + bitsMore = (expA&0x1); + expA = 0; + } + else if (regA==29){ + bitNPlusOne = expA&0x1; + expA>>=1; + } + if (fracA>0){ + fracA=0; + bitsMore =1; + } + } + + uZ.ui = packToP32UI(regime, expA, fracA); + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne){ + if (0x7FFFFFFF & frac64A) bitsMore=1; + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_addMagsP8.c b/source/luametatex/source/libraries/softposit/source/s_addMagsP8.c new file mode 100644 index 000000000..8e0a74641 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_addMagsP8.c @@ -0,0 +1,145 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + +#ifdef SOFTPOSIT_EXACT +posit8_t softposit_addMagsP8( uint_fast8_t uiA, uint_fast8_t uiB, bool isExact){ +#else +posit8_t softposit_addMagsP8( uint_fast8_t uiA, uint_fast8_t uiB ){ +#endif + + uint_fast8_t regA; + uint_fast16_t frac16A, frac16B; + uint_fast8_t fracA=0, regime, tmp; + bool sign, regSA, regSB, rcarry=0, bitNPlusOne=0, bitsMore=0; + int_fast8_t kA=0; + int_fast16_t shiftRight; + union ui8_p8 uZ; + + sign = signP8UI( uiA ); //sign is always positive.. actually don't have to do this. + if (sign){ + uiA = -uiA & 0xFF; + uiB = -uiB & 0xFF; + } + + if ((int_fast8_t)uiA < (int_fast8_t)uiB){ + uiA ^= uiB; + uiB ^= uiA; + uiA ^= uiB; + } + regSA = signregP8UI( uiA ); + regSB = signregP8UI( uiB ); + + tmp = (uiA<<2) & 0xFF; + if (regSA){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA=-1; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + frac16A = (0x80 | tmp) << 7; + shiftRight = kA; + + tmp = (uiB<<2) & 0xFF; + if (regSB){ + while (tmp>>7){ + shiftRight--; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + shiftRight++; + while (!(tmp>>7)){ + shiftRight++; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + frac16B = (0x80 | tmp) <<7 ; + + //Manage CLANG (LLVM) compiler when shifting right more than number of bits + (shiftRight>7) ? (frac16B=0): (frac16B >>= shiftRight); //frac32B >>= shiftRight + + frac16A += frac16B; + + rcarry = 0x8000 & frac16A; //first left bit + if (rcarry){ + kA++; + frac16A>>=1; + } + + if(kA<0){ + regA = (-kA & 0xFF); + regSA = 0; + regime = 0x40>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7F-(0x7F>>regA); + } + + if(regA>6){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7F): (uZ.ui=0x1); + } + else{ + frac16A = (frac16A&0x3FFF) >> regA; + fracA = (uint_fast8_t) (frac16A>>8); + bitNPlusOne = (0x80 & frac16A) ; + uZ.ui = packToP8UI(regime, fracA); + + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne){ + if (0x7F & frac16A) bitsMore=1; + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + if (sign) uZ.ui = -uZ.ui & 0xFF; + return uZ.p; +} + + diff --git a/source/luametatex/source/libraries/softposit/source/s_addMagsPX1.c b/source/luametatex/source/libraries/softposit/source/s_addMagsPX1.c new file mode 100644 index 000000000..608ca948c --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_addMagsPX1.c @@ -0,0 +1,190 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_1_t softposit_addMagsPX1( uint_fast32_t uiA, uint_fast32_t uiB, int x ) { + int regA; + uint_fast64_t frac64A=0, frac64B=0; + uint_fast32_t fracA=0, regime, tmp; + bool sign, regSA, regSB, rcarry=0, bitNPlusOne=0, bitsMore=0; + int_fast8_t kA=0; + int_fast32_t expA=0; + int_fast16_t shiftRight; + union ui32_pX1 uZ; + + + sign = signP32UI( uiA ); + if (sign){ + uiA = -uiA & 0xFFFFFFFF; + uiB = -uiB & 0xFFFFFFFF; + } + + if ((int_fast32_t)uiA < (int_fast32_t)uiB){ + uiA ^= uiB; + uiB ^= uiA; + uiA ^= uiB; + } + regSA = signregP32UI( uiA ); + regSB = signregP32UI( uiB ); + + if (x==2){ + uZ.ui = (regSA|regSB) ? (0x40000000) : (0x0); + } + else{ + tmp = (uiA<<2)&0xFFFFFFFF; + + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + + expA = tmp>>30; //to get 1 bits + frac64A = ((0x40000000ULL | tmp) & 0x7FFFFFFFULL) <<32; + shiftRight = kA; + + tmp = (uiB<<2) & 0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + shiftRight--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + shiftRight++; + while (!(tmp>>31)){ + shiftRight++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + + frac64B = ((0x40000000ULL | tmp) & 0x7FFFFFFFULL) <<32; + //This is 2kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) + shiftRight = (shiftRight<<1) + expA - (tmp>>30); + + if (shiftRight==0){ + frac64A += frac64B; + //rcarry is one + if (expA) kA ++; + expA^=1; + frac64A>>=1; + } + else{ + //Manage CLANG (LLVM) compiler when shifting right more than number of bits + (shiftRight>63) ? (frac64B=0): (frac64B >>= shiftRight); //frac64B >>= shiftRight + + frac64A += frac64B; + + rcarry = 0x8000000000000000 & frac64A; //first left bit + if (rcarry){ + if (expA) kA ++; + expA^=1;; + frac64A>>=1; + } + } + + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + //remove hidden bits + frac64A = (frac64A & 0x3FFFFFFFFFFFFFFF) >>(regA + 1) ; // 2 bits exp + fracA = frac64A>>32; + + //regime length is smaller than length of posit + if (regA<x){ + if (regA!=(x-2)) + bitNPlusOne |= (((uint64_t)0x8000000000000000>>x) & frac64A); + else if (frac64A>0){ + fracA=0; + bitsMore =1; + } + if (regA==(x-2) && expA){ + bitNPlusOne = 1; + expA=0; + } + } + else{ + regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); + expA=0; + fracA=0; + } + fracA &=((int32_t)0x80000000>>(x-1)); + + expA <<= (29-regA); + uZ.ui = packToP32UI(regime, expA, fracA); + + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne){ + if ((0x7FFFFFFFFFFFFFFF>>x) & frac64A) bitsMore=1; + uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + } + } + + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_addMagsPX2.c b/source/luametatex/source/libraries/softposit/source/s_addMagsPX2.c new file mode 100644 index 000000000..d4515b8c6 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_addMagsPX2.c @@ -0,0 +1,198 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file is part of the SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_2_t softposit_addMagsPX2( uint_fast32_t uiA, uint_fast32_t uiB, int x ) { + int regA; + uint_fast64_t frac64A=0, frac64B=0; + uint_fast32_t fracA=0, regime, tmp; + bool sign, regSA, regSB, rcarry=0, bitNPlusOne=0, bitsMore=0; + int_fast8_t kA=0; + int_fast32_t expA=0; + int_fast16_t shiftRight; + union ui32_pX2 uZ; + + + sign = signP32UI( uiA ); + if (sign){ + uiA = -uiA & 0xFFFFFFFF; + uiB = -uiB & 0xFFFFFFFF; + } + + if ((int_fast32_t)uiA < (int_fast32_t)uiB){ + uiA ^= uiB; + uiB ^= uiA; + uiA ^= uiB; + } + regSA = signregP32UI( uiA ); + regSB = signregP32UI( uiB ); + + if (x==2){ + uZ.ui = (regSA|regSB) ? (0x40000000) : (0x0); + } + else{ + //int tmpX = x-2; + tmp = (uiA<<2)&0xFFFFFFFF; + + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + + } + tmp&=0x7FFFFFFF; + } + + expA = tmp>>29; //to get 2 bits + frac64A = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; + shiftRight = kA; + + tmp = (uiB<<2) & 0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + shiftRight--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + shiftRight++; + while (!(tmp>>31)){ + shiftRight++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + frac64B = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; + //This is 4kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) + shiftRight = (shiftRight<<2) + expA - (tmp>>29); + + //Manage CLANG (LLVM) compiler when shifting right more than number of bits + (shiftRight>63) ? (frac64B=0): (frac64B >>= shiftRight); //frac64B >>= shiftRight + + frac64A += frac64B; + + rcarry = 0x8000000000000000 & frac64A; //first left bit + if (rcarry){ + expA++; + if (expA>3){ + kA ++; + expA&=0x3; + } + frac64A>>=1; + } + + + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + //remove hidden bits + frac64A = (frac64A & 0x3FFFFFFFFFFFFFFF) >>(regA + 2) ; // 2 bits exp + fracA = frac64A>>32; + + //regime length is smaller than length of posit + if (regA<x){ + if (regA<=(x-4)){ + bitNPlusOne |= (((uint64_t)0x80000000<<(32-x))& frac64A) ; + //expA <<= (28-regA); + } + else { + if (regA==(x-2)){ + bitNPlusOne = expA&0x2; + bitsMore = (expA&0x1); + expA = 0; + } + else if (regA==(x-3)){ + bitNPlusOne = expA&0x1; + //expA>>=1; + expA &=0x2; + } + if (fracA>0){ + fracA=0; + bitsMore =1; + } + } + } + else{ + regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); + expA=0; + fracA=0; + } + fracA &=((int32_t)0x80000000>>(x-1)); + + expA <<= (28-regA); + uZ.ui = packToP32UI(regime, expA, fracA); + + + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne){ + if (((uint64_t)0xFFFFFFFFFFFFFFFF>>(x+1)) & frac64A) bitsMore=1; + uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + } + } + + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_approxRecipSqrt_1Ks.c b/source/luametatex/source/libraries/softposit/source/s_approxRecipSqrt_1Ks.c new file mode 100644 index 000000000..f85522321 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_approxRecipSqrt_1Ks.c @@ -0,0 +1,52 @@ +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> +#include "platform.h" + +const uint_fast16_t softposit_approxRecipSqrt0[16] = { + 0xb4c9, 0xffab, 0xaa7d, 0xf11c, 0xa1c5, 0xe4c7, 0x9a43, 0xda29, + 0x93b5, 0xd0e5, 0x8ded, 0xc8b7, 0x88c6, 0xc16d, 0x8424, 0xbae1 +}; +const uint_fast16_t softposit_approxRecipSqrt1[16] = { + 0xa5a5, 0xea42, 0x8c21, 0xc62d, 0x788f, 0xaa7f, 0x6928, 0x94b6, + 0x5cc7, 0x8335, 0x52a6, 0x74e2, 0x4a3e, 0x68fe, 0x432b, 0x5efd +}; + diff --git a/source/luametatex/source/libraries/softposit/source/s_mulAddP16.c b/source/luametatex/source/libraries/softposit/source/s_mulAddP16.c new file mode 100644 index 000000000..9b5cf6020 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_mulAddP16.c @@ -0,0 +1,275 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +//softposit_mulAdd_subC => (uiA*uiB)-uiC +//softposit_mulAdd_subProd => uiC - (uiA*uiB) +//Default is always op==0 +posit16_t softposit_mulAddP16( uint_fast16_t uiA, uint_fast16_t uiB, uint_fast16_t uiC, uint_fast16_t op ){ + + + union ui16_p16 uZ; + uint_fast16_t regZ, fracA, fracZ, regime, tmp; + bool signA, signB, signC, signZ, regSA, regSB, regSC, regSZ, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast8_t expA, expC, expZ; + int_fast16_t kA=0, kC=0, kZ=0, shiftRight; + uint_fast32_t frac32C=0, frac32Z=0; + + //NaR + if ( uiA==0x8000 || uiB==0x8000 || uiC==0x8000 ){ + uZ.ui = 0x8000; + return uZ.p; + } + else if (uiA==0 || uiB==0){ + if (op == softposit_mulAdd_subC) + uZ.ui = -uiC; + else + uZ.ui = uiC; + return uZ.p; + } + + signA = signP16UI( uiA ); + signB = signP16UI( uiB ); + signC = signP16UI( uiC );//^ (op == softposit_mulAdd_subC); + signZ = signA ^ signB;// ^ (op == softposit_mulAdd_subProd); + + if(signA) uiA = (-uiA & 0xFFFF); + if(signB) uiB = (-uiB & 0xFFFF); + if(signC) uiC = (-uiC & 0xFFFF); + + regSA = signregP16UI(uiA); + regSB = signregP16UI(uiB); + regSC = signregP16UI(uiC); + + tmp = (uiA<<2) & 0xFFFF; + if (regSA){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expA = tmp>>14; + fracA = (0x8000 | (tmp<<1)); //use first bit here for hidden bit to get more bits + + tmp = (uiB<<2) & 0xFFFF; + if (regSB){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA--; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expA += tmp>>14; + frac32Z = (uint_fast32_t) fracA * (0x8000 | (tmp <<1)); // first bit hidden bit + + if (expA>1){ + kA++; + expA ^=0x2; + } + + rcarry = frac32Z>>31;//1st bit of frac32Z + if (rcarry){ + if (expA) kA ++; + expA^=1; + frac32Z>>=1; + } + + //Add + if (uiC!=0){ + tmp = (uiC<<2) & 0xFFFF; + if (regSC){ + while (tmp>>15){ + kC++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kC=-1; + while (!(tmp>>15)){ + kC--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expC = tmp>>14; + frac32C = (0x4000 | tmp) << 16; + shiftRight = ((kA-kC)<<1) + (expA-expC); //actually this is the scale + + if (shiftRight<0){ // |uiC| > |Prod Z| + if (shiftRight<=-31){ + bitsMore = 1; + frac32Z = 0; + } + else if (((frac32Z<<(32+shiftRight))&0xFFFFFFFF)!=0) bitsMore = 1; + if (signZ==signC) + frac32Z = frac32C + (frac32Z>>-shiftRight); + else {//different signs + frac32Z = frac32C - (frac32Z>>-shiftRight) ; + signZ=signC; + if (bitsMore) frac32Z-=1; + } + kZ = kC; + expZ = expC; + + } + else if (shiftRight>0){// |uiC| < |Prod| + //if (frac32C&((1<<shiftRight)-1)) bitsMore = 1; + if(shiftRight>=31){ + bitsMore = 1; + frac32C = 0; + } + else if (((frac32C<<(32-shiftRight))&0xFFFFFFFF)!=0) bitsMore = 1; + if (signZ==signC) + frac32Z = frac32Z + (frac32C>>shiftRight); + else{ + frac32Z = frac32Z - (frac32C>>shiftRight); + if (bitsMore) frac32Z-=1; + } + kZ = kA; + expZ = expA; + + } + else{ + if(frac32C==frac32Z && signZ!=signC ){ //check if same number + uZ.ui = 0; + return uZ.p; + } + else{ + if (signZ==signC) + frac32Z += frac32C; + else{ + if (frac32Z<frac32C){ + frac32Z = frac32C - frac32Z; + signZ = signC; + } + else{ + frac32Z -= frac32C; + } + } + } + kZ = kA;// actually can be kC too, no diff + expZ = expA; //same here + } + + rcarry = 0x80000000 & frac32Z; //first left bit + if(rcarry){ + if (expZ) kZ ++; + expZ^=1; + if (frac32Z&0x1) bitsMore = 1; + frac32Z=(frac32Z>>1)&0x7FFFFFFF; + } + else { + //for subtract cases + if (frac32Z!=0){ + while((frac32Z>>29)==0){ + kZ--; + frac32Z<<=2; + } + } + bool ecarry = (0x40000000 & frac32Z)>>30; + + if(!ecarry){ + if (expZ==0) kZ--; + expZ^=1; + frac32Z<<=1; + } + } + } + else{ + kZ = kA; + expZ=expA; + } + + if(kZ<0){ + regZ = (-kZ & 0xFFFF); + regSZ = 0; + regime = 0x4000>>regZ; + } + else{ + regZ = kZ+1; + regSZ=1; + regime = 0x7FFF - (0x7FFF>>regZ); + } + + if(regZ>14){ + //max or min pos. exp and frac does not matter. + (regSZ) ? (uZ.ui= 0x7FFF): (uZ.ui=0x1); + } + else{ + //remove hidden bits + frac32Z &= 0x3FFFFFFF; + fracZ = frac32Z >> (regZ + 17); + + if (regZ!=14) bitNPlusOne = (frac32Z>>regZ) & 0x10000; + else if (frac32Z>0){ + fracZ=0; + bitsMore =1; + } + if (regZ==14 && expZ) bitNPlusOne = 1; + uZ.ui = packToP16UI(regime, regZ, expZ, fracZ); + if (bitNPlusOne){ + if ( (frac32Z<<(16-regZ)) &0xFFFFFFFF ) bitsMore =1; + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + + if (signZ) uZ.ui = -uZ.ui & 0xFFFF; + return uZ.p; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_mulAddP32.c b/source/luametatex/source/libraries/softposit/source/s_mulAddP32.c new file mode 100644 index 000000000..c1dcb6a27 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_mulAddP32.c @@ -0,0 +1,296 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + + +posit32_t + softposit_mulAddP32( + uint_fast32_t uiA, uint_fast32_t uiB, uint_fast32_t uiC, uint_fast32_t op ){ + + union ui32_p32 uZ; + uint_fast32_t regA, regZ, fracA, fracZ, regime, tmp; + bool signA, signB, signC, signZ, regSA, regSB, regSC, regSZ, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast32_t expA, expC, expZ; + int_fast16_t kA=0, kC=0, kZ=0, shiftRight; + uint_fast64_t frac64C, frac64Z; + + //NaR + if ( uiA==0x80000000 || uiB==0x80000000 || uiC==0x80000000 ){ + uZ.ui = 0x80000000; + return uZ.p; + } + else if (uiA==0 || uiB==0){ + if (op == softposit_mulAdd_subC) + uZ.ui = -uiC; + else + uZ.ui = uiC; + return uZ.p; + } + + signA = signP32UI( uiA ); + signB = signP32UI( uiB ); + signC = signP32UI( uiC );//^ (op == softposit_mulAdd_subC); + signZ = signA ^ signB;// ^ (op == softposit_mulAdd_subProd); + + if(signA) uiA = (-uiA & 0xFFFFFFFF); + if(signB) uiB = (-uiB & 0xFFFFFFFF); + if(signC) uiC = (-uiC & 0xFFFFFFFF); + + regSA = signregP32UI(uiA); + regSB = signregP32UI(uiB); + regSC = signregP32UI(uiC); + + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA = tmp>>29; //to get 2 bits + fracA = ((tmp<<2) | 0x80000000) & 0xFFFFFFFF; + + tmp = (uiB<<2)&0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA--; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA += tmp>>29; + frac64Z = (uint_fast64_t) fracA * (((tmp<<2) | 0x80000000) & 0xFFFFFFFF); + + if (expA>3){ + kA++; + expA&=0x3; // -=4 + } + + rcarry = frac64Z>>63;//1st bit of frac64Z + if (rcarry){ + expA++; + if (expA>3){ + kA ++; + expA&=0x3; + } + frac64Z>>=1; + } + + if (uiC!=0){ + tmp = (uiC<<2)&0xFFFFFFFF; + if (regSC){ + while (tmp>>31){ + kC++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kC=-1; + while (!(tmp>>31)){ + kC--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expC = tmp>>29; //to get 2 bits + frac64C = (((tmp<<1) | 0x40000000ULL) & 0x7FFFFFFFULL)<<32; + shiftRight = ((kA-kC)<<2) + (expA-expC); + + if (shiftRight<0){ // |uiC| > |Prod| + if (shiftRight<=-63){ + bitsMore = 1; + frac64Z = 0; + //set bitsMore to one? + } + else if ((frac64Z<<(64+shiftRight))!=0) bitsMore = 1; + if (signZ==signC) + frac64Z = frac64C + (frac64Z>>-shiftRight); + else {//different signs + frac64Z = frac64C - (frac64Z>>-shiftRight) ; + signZ=signC; + if (bitsMore) frac64Z-=1; + } + kZ = kC; + expZ = expC; + + } + else if (shiftRight>0){// |uiC| < |Prod| + //if (frac32C&((1<<shiftRight)-1)) bitsMore = 1; + if(shiftRight>=63) { + bitsMore = 1; + frac64C = 0; + } + else if ((frac64C<<(64-shiftRight))!=0) bitsMore = 1; + if (signZ==signC) + frac64Z = frac64Z + (frac64C>>shiftRight); + else{ + frac64Z = frac64Z - (frac64C>>shiftRight); + if (bitsMore) frac64Z-=1; + } + kZ = kA; + expZ = expA; + + } + else{ + if(frac64C==frac64Z && signZ!=signC ){ //check if same number + uZ.ui = 0; + return uZ.p; + } + else{ + if (signZ==signC) + frac64Z += frac64C; + else{ + if (frac64Z<frac64C){ + frac64Z = frac64C - frac64Z; + signZ = signC; + } + else{ + frac64Z -= frac64C; + } + } + } + kZ = kA;// actually can be kC too, no diff + expZ = expA; //same here + } + rcarry = (uint64_t)frac64Z>>63; //first left bit + + if(rcarry){ + expZ++; + if (expZ>3){ + kZ++; + expZ&=0x3; + } + frac64Z=(frac64Z>>1)&0x7FFFFFFFFFFFFFFF; + } + else { + //for subtract cases + if (frac64Z!=0){ + while((frac64Z>>59)==0){ + kZ--; + frac64Z<<=4; + } + while((frac64Z>>62)==0){ + expZ--; + frac64Z<<=1; + if (expZ<0){ + kZ--; + expZ=3; + } + } + } + } + + } + else{ + kZ = kA; + expZ=expA; + } + if(kZ<0){ + regZ = -kZ; + regSZ = 0; + regime = 0x40000000>>regZ; + } + else{ + regZ = kZ+1; + regSZ=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regZ); + } + + if(regZ>30){ + //max or min pos. exp and frac does not matter. + (regSZ) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); + } + else{ + + if (regZ<=28){ + //remove hidden bits + frac64Z &= 0x3FFFFFFFFFFFFFFF; + fracZ = frac64Z >> (regZ + 34);//frac32Z>>16; + bitNPlusOne |= (0x200000000 & (frac64Z >>regZ ) ) ; + expZ <<= (28-regZ); + } + else { + if (regZ==30){ + bitNPlusOne = expZ&0x2; + bitsMore = (expZ&0x1); + expZ = 0; + } + else if (regZ==29){ + bitNPlusOne = expZ&0x1; + expZ>>=1; + } + if (fracZ>0){ + fracZ=0; + bitsMore =1; + + } + } + uZ.ui = packToP32UI(regime, expZ, fracZ); + + if (bitNPlusOne){ + if ( (frac64Z<<(32-regZ)) &0xFFFFFFFFFFFFFFFF ) bitsMore =1; + uZ.ui += (uZ.ui&1) | bitsMore; + } + + } + if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFFFFFFFFFF; + return uZ.p; + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_mulAddP8.c b/source/luametatex/source/libraries/softposit/source/s_mulAddP8.c new file mode 100644 index 000000000..a99756678 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_mulAddP8.c @@ -0,0 +1,243 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +//softposit_mulAdd_subC => (uiA*uiB)-uiC +//softposit_mulAdd_subProd => uiC - (uiA*uiB) +//Default is always op==0 +posit8_t softposit_mulAddP8( uint_fast8_t uiA, uint_fast8_t uiB, uint_fast8_t uiC, uint_fast8_t op ){ + + + union ui8_p8 uZ; + uint_fast8_t regZ, fracA, fracZ, regime, tmp; + bool signA, signB, signC, signZ, regSA, regSB, regSC, regSZ, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast8_t kA=0, kC=0, kZ=0, shiftRight; + uint_fast16_t frac16C, frac16Z; + + //NaR + if ( uiA==0x80 || uiB==0x80 || uiC==0x80 ){ + uZ.ui = 0x80; + return uZ.p; + } + else if (uiA==0 || uiB==0){ + if (op == softposit_mulAdd_subC) + uZ.ui = -uiC; + else + uZ.ui = uiC; + return uZ.p; + } + + signA = signP8UI( uiA ); + signB = signP8UI( uiB ); + signC = signP8UI( uiC );//^ (op == softposit_mulAdd_subC); + signZ = signA ^ signB;// ^ (op == softposit_mulAdd_subProd); + + if(signA) uiA = (-uiA & 0xFF); + if(signB) uiB = (-uiB & 0xFF); + if(signC) uiC = (-uiC & 0xFF); + + regSA = signregP8UI(uiA); + regSB = signregP8UI(uiB); + regSC = signregP8UI(uiC); + + tmp = (uiA<<2) & 0xFF; + if (regSA){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA=-1; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + fracA = (0x80 | tmp); //use first bit here for hidden bit to get more bits + + tmp = (uiB<<2) & 0xFF; + if (regSB){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA--; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + frac16Z = (uint_fast16_t) fracA * (0x80 | tmp); + + rcarry = frac16Z>>15;//1st bit of frac16Z + if (rcarry){ + kA++; + frac16Z>>=1; + } + + if (uiC!=0){ + tmp = (uiC<<2) & 0xFF; + if (regSC){ + while (tmp>>7){ + kC++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kC=-1; + while (!(tmp>>7)){ + kC--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + frac16C = (0x80 | tmp) <<7 ; + shiftRight = (kA-kC); + + if (shiftRight<0){ // |uiC| > |Prod| + if (shiftRight<=-15) { + bitsMore = 1; + frac16Z=0; + } + else if (((frac16Z<<(16+shiftRight))&0xFFFF)!=0) bitsMore = 1; + if (signZ==signC) + frac16Z = frac16C + (frac16Z>>-shiftRight); + else {//different signs + frac16Z = frac16C - (frac16Z>>-shiftRight) ; + signZ=signC; + if (bitsMore) frac16Z-=1; + } + kZ = kC; + + } + else if (shiftRight>0){// |uiC| < |Prod| + + if(shiftRight>=15){ + bitsMore = 1; + frac16C = 0; + } + else if (((frac16C<<(16-shiftRight))&0xFFFF)!=0) bitsMore = 1; + if (signZ==signC) + frac16Z += (frac16C>>shiftRight); + else{ + frac16Z -= (frac16C>>shiftRight); + if (bitsMore) frac16Z-=1; + } + kZ = kA; + } + else{ + if(frac16C==frac16Z && signZ!=signC ){ //check if same number + uZ.ui = 0; + return uZ.p; + } + else{ + if (signZ==signC) + frac16Z += frac16C; + else{ + if (frac16Z<frac16C){ + frac16Z = frac16C - frac16Z; + signZ = signC; + } + else{ + frac16Z -= frac16C; + } + } + } + kZ = kA;// actually can be kC too, no diff + } + + rcarry = 0x8000 & frac16Z; //first left bit + if(rcarry){ + kZ ++; + frac16Z=(frac16Z>>1)&0x7FFF; + } + else { + + //for subtract cases + if (frac16Z!=0){ + while((frac16Z>>14)==0){ + kZ--; + frac16Z<<=1; + } + } + } + + } + else{ + kZ = kA; + } + + if(kZ<0){ + regZ = (-kZ & 0xFF); + regSZ = 0; + regime = 0x40>>regZ; + } + else{ + regZ = kZ+1; + regSZ=1; + regime = 0x7F - (0x7F>>regZ); + } + + if(regZ>6){ + //max or min pos. exp and frac does not matter. + (regSZ) ? (uZ.ui= 0x7F): (uZ.ui=0x1); + } + else{ + //remove hidden bits + frac16Z &= 0x3FFF; + + fracZ = (frac16Z >> regZ) >> 8; + + bitNPlusOne = ((frac16Z>>regZ) & 0x80); + uZ.ui = packToP8UI(regime, fracZ); + + if (bitNPlusOne){ + if ( (frac16Z<<(9-regZ)) &0xFFFF ) bitsMore =1; + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + + if (signZ) uZ.ui = -uZ.ui & 0xFF; + return uZ.p; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_mulAddPX1.c b/source/luametatex/source/libraries/softposit/source/s_mulAddPX1.c new file mode 100644 index 000000000..78482d192 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_mulAddPX1.c @@ -0,0 +1,341 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +//a*b+c +posit_1_t + softposit_mulAddPX1( + uint_fast32_t uiA, uint_fast32_t uiB, uint_fast32_t uiC, uint_fast32_t op, int x ){ + + union ui32_pX1 uZ; + int regZ; + uint_fast32_t fracA, fracZ, regime, tmp; + bool signA, signB, signC, signZ, regSA, regSB, regSC, regSZ, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast32_t expA, expC, expZ; + int_fast16_t kA=0, kC=0, kZ=0, shiftRight; + uint_fast64_t frac64C, frac64Z; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + //NaR + if ( uiA==0x80000000 || uiB==0x80000000 || uiC==0x80000000 ){ + uZ.ui = 0x80000000; + return uZ.p; + } + else if (uiA==0 || uiB==0){ + if (op == softposit_mulAdd_subC) + uZ.ui = -uiC; + else + uZ.ui = uiC; + return uZ.p; + } + + signA = signP32UI( uiA ); + signB = signP32UI( uiB ); + signC = signP32UI( uiC );//^ (op == softposit_mulAdd_subC); + signZ = signA ^ signB;// ^ (op == softposit_mulAdd_subProd); + + if(signA) uiA = (-uiA & 0xFFFFFFFF); + if(signB) uiB = (-uiB & 0xFFFFFFFF); + if(signC) uiC = (-uiC & 0xFFFFFFFF); + + regSA = signregP32UI(uiA); + regSB = signregP32UI(uiB); + regSC = signregP32UI(uiC); + + if (x==2){ + uZ.ui = (regSA®SB) ? (0x40000000) : (0x0); + if (signZ){// i.e. negative prod + if (signC){ + uZ.ui |= uiC; + uZ.ui = -uZ.ui & 0xFFFFFFFF; + } + else{//prod is negative + if (uiC==uZ.ui) uZ.ui = 0; + else uZ.ui =(uZ.ui>0)?( 0xC0000000):(0x40000000); + } + } + else{ //prod : same sign signZ=0 + if (signC){ + if (uiC==uZ.ui) uZ.ui = 0; + else uZ.ui = (uZ.ui>0) ? (0x40000000) : (0xC0000000); + } + else{//C is positive + uZ.ui |= uiC; + } + } + return uZ.p; + } + else{ + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA = tmp>>30; //to get 2 bits + fracA = ((tmp<<1) | 0x80000000) & 0xFFFFFFFF; + + tmp = (uiB<<2)&0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA--; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA += tmp>>30; + frac64Z = (uint_fast64_t) fracA * (((tmp<<1) | 0x80000000) & 0xFFFFFFFF); + + if (expA>1){ + kA++; + expA^=0x2; + } + + rcarry = frac64Z>>63;//1st bit of frac64Z + if (rcarry){ + if (expA) kA ++; + expA^=1; + frac64Z>>=1; + } + + if (uiC!=0){ + tmp = (uiC<<2)&0xFFFFFFFF; + if (regSC){ + while (tmp>>31){ + kC++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kC=-1; + while (!(tmp>>31)){ + kC--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } +//printBinary(&expC, 32); + expC = tmp>>30; //to get 1 bits + frac64C = ((tmp | 0x40000000ULL) & 0x7FFFFFFFULL)<<32; + shiftRight = ((kA-kC)<<1) + (expA-expC); +//printf("shiftRight: %d kA: %d kC: %d\n", shiftRight, kA, kC); +//printBinary(&frac64Z, 64); +//printBinary(&frac64C, 64); + if (shiftRight<0){ // |uiC| > |Prod| + if (shiftRight<=-63){ + bitsMore = 1; + frac64Z = 0; + //set bitsMore to one? + } + else if ((frac64Z<<(64+shiftRight))!=0) bitsMore = 1; +//printf("bitsMore: %d\n", bitsMore); + if (signZ==signC) + frac64Z = frac64C + (frac64Z>>-shiftRight); + else {//different signs + frac64Z = frac64C - (frac64Z>>-shiftRight) ; + signZ=signC; + if (bitsMore) frac64Z-=1; + } + kZ = kC; + expZ = expC; +//printf("kZ: %d expZ: %d\n", kZ, expZ); +//printBinary(&frac64Z, 64); + } + else if (shiftRight>0){// |uiC| < |Prod| + //if (frac32C&((1<<shiftRight)-1)) bitsMore = 1; + if(shiftRight>=63) { + bitsMore = 1; + frac64C = 0; + } + else if ((frac64C<<(64-shiftRight))!=0) bitsMore = 1; + if (signZ==signC) + frac64Z = frac64Z + (frac64C>>shiftRight); + else{ + frac64Z = frac64Z - (frac64C>>shiftRight); + if (bitsMore) frac64Z-=1; + } + kZ = kA; + expZ = expA; + + } + else{ + if(frac64C==frac64Z && signZ!=signC ){ //check if same number + uZ.ui = 0; + return uZ.p; + } + else{ + if (signZ==signC) + frac64Z += frac64C; + else{ + if (frac64Z<frac64C){ + frac64Z = frac64C - frac64Z; + signZ = signC; + } + else{ + frac64Z -= frac64C; + } + } + } + kZ = kA;// actually can be kC too, no diff + expZ = expA; //same here + } + rcarry = (uint64_t)frac64Z>>63; //first left bit + + if(rcarry){ + if (expZ) kZ ++; + expZ^=1; + if (frac64Z&0x1) bitsMore = 1; + frac64Z=(frac64Z>>1)&0x7FFFFFFFFFFFFFFF; + } + else { + //for subtract cases + if (frac64Z!=0){ + while((frac64Z>>61)==0){ + kZ--; + frac64Z<<=2; + } + } + bool ecarry = (0x4000000000000000 & frac64Z)>>62; + + if(!ecarry){ + if (expZ==0) kZ--; + expZ^=1; + frac64Z<<=1; + } + +//printf("kZ: %d expZ: %d\n", kZ, expZ); +//printf("frac64Z:\n"); +//printBinary(&frac64Z,64); + + } + + } + else{ + kZ = kA; + expZ=expA; + } + + if(kZ<0){ + regZ = -kZ; + regSZ = 0; + regime = 0x40000000>>regZ; + } + else{ + regZ = kZ+1; + regSZ=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regZ); + } +//printf("regZ: %d regSZ: %d kZ: %d expZ: %d\n", regZ, regSZ, kZ, expZ); +//printBinary(&frac64Z,64); + if(regZ>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSZ) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + + if (regZ<x){ + //remove hidden bits + frac64Z &= 0x3FFFFFFFFFFFFFFF; + fracZ = frac64Z >> (regZ + 33);//frac32Z>>16; +//printBinary(&frac64Z,64); +//printBinary(&fracZ,32); + if (regZ!=(x-2)){ + bitNPlusOne |= (((uint64_t)0x8000000000000000>>(x-regZ-1)) & frac64Z); + bitsMore = ((0x7FFFFFFFFFFFFFFF>>(x-regZ-1)) & frac64Z); + fracZ&=((int32_t)0x80000000>>(x-1)); + } + else if (frac64Z>0){ + fracZ=0; + bitsMore=1; + } + if(regZ==(x-2) && expZ){ + bitNPlusOne=1; + expZ=0; + } + } + else{ + regime=(regSZ) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); + expZ=0; + fracZ=0; + } +//printBinary(&fracZ, 32); +//printf("expZ: %d bitNPlusOne: %d bitsMore; %d\n", expZ, bitNPlusOne, bitsMore); + expZ <<= (29-regZ); + + uZ.ui = packToP32UI(regime, expZ, fracZ); +//printBinary(&uZ.ui, 32); + if (bitNPlusOne){ + //if (((uint64_t)0xFFFFFFFFFFFFFFFF>>(x+1)) & frac64Z) bitsMore=1; + uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + + } + if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; + } +//printf("expA: %d expC: %d expZ: %d kZ: %d\n", expA, expC, expZ, kZ); + + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_mulAddPX2.c b/source/luametatex/source/libraries/softposit/source/s_mulAddPX2.c new file mode 100644 index 000000000..3401422e6 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_mulAddPX2.c @@ -0,0 +1,345 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +//a*b+c +posit_2_t + softposit_mulAddPX2( + uint_fast32_t uiA, uint_fast32_t uiB, uint_fast32_t uiC, uint_fast32_t op, int x ){ + + union ui32_pX2 uZ; + int regZ; + uint_fast32_t fracA, fracZ, regime, tmp; + bool signA, signB, signC, signZ, regSA, regSB, regSC, regSZ, bitNPlusOne=0, bitsMore=0, rcarry; + int_fast32_t expA, expC, expZ; + int_fast16_t kA=0, kC=0, kZ=0, shiftRight; + uint_fast64_t frac64C, frac64Z; + + if (x<2 || x>32){ + uZ.ui = 0x80000000; + return uZ.p; + } + + //NaR + if ( uiA==0x80000000 || uiB==0x80000000 || uiC==0x80000000 ){ + uZ.ui = 0x80000000; + return uZ.p; + } + else if (uiA==0 || uiB==0){ + if (op == softposit_mulAdd_subC) + uZ.ui = -uiC; + else + uZ.ui = uiC; + return uZ.p; + } + + signA = signP32UI( uiA ); + signB = signP32UI( uiB ); + signC = signP32UI( uiC );//^ (op == softposit_mulAdd_subC); + signZ = signA ^ signB;// ^ (op == softposit_mulAdd_subProd); + + if(signA) uiA = (-uiA & 0xFFFFFFFF); + if(signB) uiB = (-uiB & 0xFFFFFFFF); + if(signC) uiC = (-uiC & 0xFFFFFFFF); + + regSA = signregP32UI(uiA); + regSB = signregP32UI(uiB); + regSC = signregP32UI(uiC); + + if (x==2){ + uZ.ui = (regSA®SB) ? (0x40000000) : (0x0); + if (signZ){// i.e. negative prod + if (signC){ + uZ.ui |= uiC; + uZ.ui = -uZ.ui & 0xFFFFFFFF; + } + else{//prod is negative + if (uiC==uZ.ui) uZ.ui = 0; + else uZ.ui =(uZ.ui>0)?( 0xC0000000):(0x40000000); + } + } + else{ //prod : same sign signZ=0 + if (signC){ + if (uiC==uZ.ui) uZ.ui = 0; + else uZ.ui = (uZ.ui>0) ? (0x40000000) : (0xC0000000); + } + else{//C is positive + uZ.ui |= uiC; + } + } + return uZ.p; + } + else{ + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA = tmp>>29; //to get 2 bits + fracA = ((tmp<<2) | 0x80000000) & 0xFFFFFFFF; + + tmp = (uiB<<2)&0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA--; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + expA += tmp>>29; + frac64Z = (uint_fast64_t) fracA * (((tmp<<2) | 0x80000000) & 0xFFFFFFFF); + + if (expA>3){ + kA++; + expA&=0x3; // -=4 + } + + rcarry = frac64Z>>63;//1st bit of frac64Z + if (rcarry){ + expA++; + if (expA>3){ + kA ++; + expA&=0x3; + } + frac64Z>>=1; + } + + if (uiC!=0){ + tmp = (uiC<<2)&0xFFFFFFFF; + if (regSC){ + while (tmp>>31){ + kC++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kC=-1; + while (!(tmp>>31)){ + kC--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + + expC = tmp>>29; //to get 2 bits + frac64C = (((tmp<<1) | 0x40000000ULL) & 0x7FFFFFFFULL)<<32; + shiftRight = ((kA-kC)<<2) + (expA-expC); + + if (shiftRight<0){ // |uiC| > |Prod| + if (shiftRight<=-63){ + bitsMore = 1; + frac64Z = 0; + //set bitsMore to one? + } + else if ((frac64Z<<(64+shiftRight))!=0) bitsMore = 1; + + if (signZ==signC) + frac64Z = frac64C + (frac64Z>>-shiftRight); + else {//different signs + frac64Z = frac64C - (frac64Z>>-shiftRight) ; + signZ=signC; + if (bitsMore) frac64Z-=1; + } + kZ = kC; + expZ = expC; + } + else if (shiftRight>0){// |uiC| < |Prod| + //if (frac32C&((1<<shiftRight)-1)) bitsMore = 1; + if(shiftRight>=63) { + bitsMore = 1; + frac64C = 0; + } + else if ((frac64C<<(64-shiftRight))!=0) bitsMore = 1; + if (signZ==signC) + frac64Z = frac64Z + (frac64C>>shiftRight); + else{ + frac64Z = frac64Z - (frac64C>>shiftRight); + if (bitsMore) frac64Z-=1; + } + kZ = kA; + expZ = expA; + + } + else{ + if(frac64C==frac64Z && signZ!=signC ){ //check if same number + uZ.ui = 0; + return uZ.p; + } + else{ + if (signZ==signC) + frac64Z += frac64C; + else{ + if (frac64Z<frac64C){ + frac64Z = frac64C - frac64Z; + signZ = signC; + } + else{ + frac64Z -= frac64C; + } + } + } + kZ = kA;// actually can be kC too, no diff + expZ = expA; //same here + } + rcarry = (uint64_t)frac64Z>>63; //first left bit + + if(rcarry){ + expZ++; + if (expZ>3){ + kZ++; + expZ&=0x3; + } + frac64Z=(frac64Z>>1)&0x7FFFFFFFFFFFFFFF; + } + else { + //for subtract cases + if (frac64Z!=0){ + while((frac64Z>>59)==0){ + kZ--; + frac64Z<<=4; + } + while((frac64Z>>62)==0){ + expZ--; + frac64Z<<=1; + if (expZ<0){ + kZ--; + expZ=3; + } + } + } + + } + + } + else{ + kZ = kA; + expZ=expA; + } + + if(kZ<0){ + regZ = -kZ; + regSZ = 0; + regime = 0x40000000>>regZ; + } + else{ + regZ = kZ+1; + regSZ=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regZ); + } + + if(regZ>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSZ) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + + if (regZ<x){ + //remove hidden bits + frac64Z &= 0x3FFFFFFFFFFFFFFF; + fracZ = frac64Z >> (regZ + 34);//frac32Z>>16; + + if (regZ<=(x-4)){ + bitNPlusOne |= (((uint64_t)0x8000000000000000>>(x-regZ-2)) & frac64Z) ; + bitsMore = (((uint64_t)0x7FFFFFFFFFFFFFFF>>(x-regZ-2)) & frac64Z) ; + fracZ &=((int32_t)0x80000000>>(x-1)); + } + else { + + if (regZ==(x-2)){ + + bitNPlusOne = expZ&0x2; + bitsMore = (expZ&0x1); + expZ = 0; + } + else if (regZ==(x-3)){ + + bitNPlusOne = expZ&0x1; + expZ &=0x2; + + } + if (frac64Z>0){ + fracZ=0; + bitsMore =1; + + } + } + } + else{ + regime=(regSZ) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); + expZ=0; + fracZ=0; + } + + expZ <<= (28-regZ); + + uZ.ui = packToP32UI(regime, expZ, fracZ); + + if (bitNPlusOne){ + uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + + } + if (signZ) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; + } + + +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_subMagsP16.c b/source/luametatex/source/libraries/softposit/source/s_subMagsP16.c new file mode 100644 index 000000000..a0c9c33e2 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_subMagsP16.c @@ -0,0 +1,175 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +#ifdef SOFTPOSIT_EXACT +posit16_t softposit_subMagsP16( uint_fast16_t uiA, uint_fast16_t uiB, bool isExact){ +#else +posit16_t softposit_subMagsP16( uint_fast16_t uiA, uint_fast16_t uiB ){ +#endif + uint_fast16_t regA; + uint_fast32_t frac32A, frac32B; + uint_fast16_t fracA=0, regime, tmp; + bool sign=0, regSA, regSB, ecarry=0, bitNPlusOne=0, bitsMore=0; + int_fast16_t shiftRight; + int_fast8_t kA=0, expA; + union ui16_p16 uZ; + + //Both uiA and uiB are actually the same signs if uiB inherits sign of sub + //Make both positive + sign = signP16UI( uiA ); + (sign)?(uiA = (-uiA & 0xFFFF)): (uiB = (-uiB & 0xFFFF)); + + if (uiA==uiB){ //essential, if not need special handling + uZ.ui = 0; + return uZ.p; + } + if(uiA<uiB){ + uiA ^= uiB; + uiB ^= uiA; + uiA ^= uiB; + (sign) ? (sign = 0 ) : (sign=1); //A becomes B + } + + regSA = signregP16UI( uiA ); + regSB = signregP16UI( uiB ); + + tmp = (uiA<<2) & 0xFFFF; + if (regSA){ + while (tmp>>15){ + kA++; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>15)){ + kA--; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + expA = tmp>>14; + frac32A = (0x4000 | tmp) << 16; + shiftRight = kA; + + tmp = (uiB<<2) & 0xFFFF; + if (regSB){ + while (tmp>>15){ + shiftRight--; + tmp= (tmp<<1) & 0xFFFF; + } + } + else{ + shiftRight++; + while (!(tmp>>15)){ + shiftRight++; + tmp= (tmp<<1) & 0xFFFF; + } + tmp&=0x7FFF; + } + frac32B = (0x4000 | tmp) <<16; + //This is 2kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) + + shiftRight = (shiftRight<<1) + expA - (tmp>>14); + + if (shiftRight!=0){ + if (shiftRight>=29){ + uZ.ui = uiA; + if (sign) uZ.ui = -uZ.ui & 0xFFFF; + return uZ.p; + } + else + frac32B >>= shiftRight; + } + + frac32A -= frac32B; + + while((frac32A>>29)==0){ + kA--; + frac32A<<=2; + } + ecarry = (0x40000000 & frac32A)>>30; + if(!ecarry){ + if (expA==0) kA--; + expA^=1; + frac32A<<=1; + } + + if(kA<0){ + regA = (-kA & 0xFFFF); + regSA = 0; + regime = 0x4000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFF - (0x7FFF>>regA); + } + + if(regA>14){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7FFF): (uZ.ui=0x1); + } + else{ + //remove hidden bits + frac32A = (frac32A & 0x3FFFFFFF) >>(regA + 1) ; + fracA = frac32A>>16; + if (regA!=14) bitNPlusOne = (frac32A>>15) & 0x1; + else if (frac32A>0){ + fracA=0; + bitsMore =1; + } + if (regA==14 && expA) bitNPlusOne = 1; + uZ.ui = packToP16UI(regime, regA, expA, fracA); + if (bitNPlusOne){ + if ( frac32A&0x7FFF ) bitsMore=1; + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + if (sign) uZ.ui = -uZ.ui & 0xFFFF; + return uZ.p; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_subMagsP32.c b/source/luametatex/source/libraries/softposit/source/s_subMagsP32.c new file mode 100644 index 000000000..4771626de --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_subMagsP32.c @@ -0,0 +1,194 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit32_t softposit_subMagsP32( uint_fast32_t uiA, uint_fast32_t uiB ) { + + uint_fast16_t regA, regB; + uint_fast64_t frac64A=0, frac64B=0; + uint_fast32_t fracA=0, regime, tmp; + bool sign, regSA, regSB, ecarry=0, bitNPlusOne=0, bitsMore=0; + int_fast8_t kA=0; + int_fast32_t expA=0; + int_fast16_t shiftRight; + union ui32_p32 uZ; + + sign = signP32UI( uiA ); + if (sign) + uiA = -uiA & 0xFFFFFFFF; + else + uiB = -uiB & 0xFFFFFFFF; + + if (uiA==uiB){ //essential, if not need special handling + uZ.ui = 0; + return uZ.p; + } + if ((int_fast32_t)uiA < (int_fast32_t)uiB){ + uiA ^= uiB; + uiB ^= uiA; + uiA ^= uiB; + (sign) ? (sign = 0 ) : (sign=1); //A becomes B + } + regSA = signregP32UI( uiA ); + regSB = signregP32UI( uiB ); + + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + + expA = tmp>>29; //to get 2 bits + frac64A = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; + shiftRight = kA; + + + tmp = (uiB<<2) & 0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + shiftRight--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + + } + else{ + shiftRight++; + while (!(tmp>>31)){ + shiftRight++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + + } + frac64B = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; + + //This is 4kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) + shiftRight = (shiftRight<<2) + expA - (tmp>>29); + if (shiftRight>63){ + uZ.ui = uiA; + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; + } + else + (frac64B >>= shiftRight); + + frac64A -= frac64B; + + while((frac64A>>59)==0){ + kA--; + frac64A<<=4; + } + ecarry = (0x4000000000000000 & frac64A);//(0x4000000000000000 & frac64A)>>62; + while (!ecarry){ + if (expA==0){ + kA--; + expA=3; + } + else + expA--; + frac64A<<=1; + ecarry = (0x4000000000000000 & frac64A); + } + + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + if(regA>30){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7FFFFFFF): (uZ.ui=0x1); + } + else{ + //remove hidden bits + frac64A = (frac64A & 0x3FFFFFFFFFFFFFFF) >>(regA + 2) ; // 2 bits exp + + fracA = frac64A>>32; + + if (regA<=28){ + bitNPlusOne |= (0x80000000 & frac64A) ; + expA <<= (28-regA); + } + else { + if (regA==30){ + bitNPlusOne = expA&0x2; + bitsMore = (expA&0x1); + expA = 0; + } + else if (regA==29){ + bitNPlusOne = expA&0x1; + expA>>=1; + } + if (fracA>0){ + fracA=0; + bitsMore =1; + } + + } + + uZ.ui = packToP32UI(regime, expA, fracA); + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne){ + if (0x7FFFFFFF & frac64A)bitsMore=1; + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_subMagsP8.c b/source/luametatex/source/libraries/softposit/source/s_subMagsP8.c new file mode 100644 index 000000000..dd48c4225 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_subMagsP8.c @@ -0,0 +1,160 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + + +#include "platform.h" +#include "internals.h" + + +#ifdef SOFTPOSIT_EXACT +posit8_t softposit_subMagsP8( uint_fast8_t uiA, uint_fast8_t uiB, bool isExact){ +#else +posit8_t softposit_subMagsP8( uint_fast8_t uiA, uint_fast8_t uiB ){ +#endif + uint_fast8_t regA; + uint_fast16_t frac16A, frac16B; + uint_fast8_t fracA=0, regime, tmp; + bool sign=0, regSA, regSB, ecarry=0, bitNPlusOne=0, bitsMore=0; + int_fast16_t shiftRight; + int_fast8_t kA=0; + union ui8_p8 uZ; + + + //Both uiA and uiB are actually the same signs if uiB inherits sign of sub + //Make both positive + sign = signP8UI( uiA ); + (sign)? (uiA = (-uiA & 0xFF)): (uiB = (-uiB & 0xFF)); + + if (uiA==uiB){ //essential, if not need special handling + uZ.ui = 0; + return uZ.p; + } + if(uiA<uiB){ + uiA ^= uiB; + uiB ^= uiA; + uiA ^= uiB; + (sign) ? (sign = 0 ) : (sign=1); //A becomes B + } + + regSA = signregP8UI( uiA ); + regSB = signregP8UI( uiB ); + + tmp = (uiA<<2) & 0xFF; + if (regSA){ + while (tmp>>7){ + kA++; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + kA=-1; + while (!(tmp>>7)){ + kA--; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + frac16A = (0x80 | tmp) << 7; + shiftRight = kA; + + tmp = (uiB<<2) & 0xFF; + if (regSB){ + while (tmp>>7){ + shiftRight--; + tmp= (tmp<<1) & 0xFF; + } + } + else{ + shiftRight++; + while (!(tmp>>7)){ + shiftRight++; + tmp= (tmp<<1) & 0xFF; + } + tmp&=0x7F; + } + frac16B = (0x80 | tmp) <<7; + + + if (shiftRight>=14){ + uZ.ui = uiA; + if (sign) uZ.ui = -uZ.ui & 0xFFFF; + return uZ.p; + } + else + frac16B >>= shiftRight; + + frac16A -= frac16B; + + while((frac16A>>14)==0){ + kA--; + frac16A<<=1; + } + ecarry = (0x4000 & frac16A)>>14; + if(!ecarry){ + kA--; + frac16A<<=1; + } + + if(kA<0){ + regA = (-kA & 0xFF); + regSA = 0; + regime = 0x40>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7F-(0x7F>>regA); + } + + if(regA>6){ + //max or min pos. exp and frac does not matter. + (regSA) ? (uZ.ui= 0x7F): (uZ.ui=0x1); + } + else{ + frac16A = (frac16A&0x3FFF) >> regA; + fracA = (uint_fast8_t) (frac16A>>8); + bitNPlusOne = (0x80 & frac16A) ; + uZ.ui = packToP8UI(regime, fracA); + + if (bitNPlusOne){ + if (0x7F & frac16A) bitsMore=1; + uZ.ui += (uZ.ui&1) | bitsMore; + } + } + if (sign) uZ.ui = -uZ.ui & 0xFF; + return uZ.p; + +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_subMagsPX1.c b/source/luametatex/source/libraries/softposit/source/s_subMagsPX1.c new file mode 100644 index 000000000..e6b6b4837 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_subMagsPX1.c @@ -0,0 +1,202 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_1_t softposit_subMagsPX1( uint_fast32_t uiA, uint_fast32_t uiB, int x ) { + + int regA; + uint_fast64_t frac64A=0, frac64B=0; + uint_fast32_t fracA=0, regime, tmp; + bool sign, regSA, regSB, ecarry=0, bitNPlusOne=0, bitsMore=0; + int_fast8_t kA=0; + int_fast32_t expA=0; + int_fast16_t shiftRight; + union ui32_pX1 uZ; + + sign = signP32UI( uiA ); + if (sign) + uiA = -uiA & 0xFFFFFFFF; + else + uiB = -uiB & 0xFFFFFFFF; + + if (uiA==uiB){ //essential, if not need special handling + uZ.ui = 0; + return uZ.p; + } + if ((int_fast32_t)uiA < (int_fast32_t)uiB){ + uiA ^= uiB; + uiB ^= uiA; + uiA ^= uiB; + (sign) ? (sign = 0 ) : (sign=1); //A becomes B + } + regSA = signregP32UI( uiA ); + regSB = signregP32UI( uiB ); + + if (x==2){ + uZ.ui = (regSA==regSB) ? (0x0): (0x40000000) ; + } + else{ + + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + + + expA = tmp>>30; //to get 1 bits + frac64A = ((0x40000000ULL | tmp) & 0x7FFFFFFFULL) <<32; + shiftRight = kA; + + tmp = (uiB<<2) & 0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + shiftRight--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + + } + else{ + shiftRight++; + while (!(tmp>>31)){ + shiftRight++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + + } + frac64B = ((0x40000000ULL | tmp) & 0x7FFFFFFFULL) <<32; + //This is 4kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) + shiftRight = (shiftRight<<1) + expA - (tmp>>30); + if (shiftRight>60){ + uZ.ui = uiA; + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; + } + else + (frac64B >>= shiftRight); + + frac64A -= frac64B; + + while((frac64A>>61)==0){ + kA--; + frac64A<<=2; + } + ecarry = (0x4000000000000000 & frac64A);//(0x4000000000000000 & frac64A)>>62; + if (!ecarry){ + if (expA==0) kA--; + expA^=1; + frac64A<<=1; + } + + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + //remove hidden bits + frac64A = (frac64A & 0x3FFFFFFFFFFFFFFF) >>(regA + 1) ; // 2 bits exp + fracA = frac64A>>32; + + //regime length is smaller than length of posit + if (regA<x){ + if (regA<=(x-4)){ + bitNPlusOne |= (((uint64_t)0x80000000<<(32-x)) & frac64A) ; + //expA <<= (28-regA); + } + else { + if (regA!=(x-2)) + bitNPlusOne |= (((uint64_t)0x8000000000000000>>x) & frac64A); + else if (frac64A>0){ + fracA=0; + bitsMore =1; + } + if (regA==(x-2) && expA){ + bitNPlusOne = 1; + expA=0; + } + + } + } + else{ + regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); + expA=0; + fracA=0; + } + fracA &=((int32_t)0x80000000>>(x-1)); + + expA <<= (29-regA); + uZ.ui = packToP32UI(regime, expA, fracA); + + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne){ + if ((0x7FFFFFFFFFFFFFFF>>x) & frac64A) bitsMore=1; + uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + } + } + + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/s_subMagsPX2.c b/source/luametatex/source/libraries/softposit/source/s_subMagsPX2.c new file mode 100644 index 000000000..4a1106c23 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/s_subMagsPX2.c @@ -0,0 +1,213 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017, 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016, 2017 The Regents of the +University of California. All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include "platform.h" +#include "internals.h" + +posit_2_t softposit_subMagsPX2( uint_fast32_t uiA, uint_fast32_t uiB, int x ) { + + int regA; + uint_fast64_t frac64A=0, frac64B=0; + uint_fast32_t fracA=0, regime, tmp; + bool sign, regSA, regSB, ecarry=0, bitNPlusOne=0, bitsMore=0; + int_fast8_t kA=0; + int_fast32_t expA=0; + int_fast16_t shiftRight; + union ui32_pX2 uZ; + + sign = signP32UI( uiA ); + if (sign) + uiA = -uiA & 0xFFFFFFFF; + else + uiB = -uiB & 0xFFFFFFFF; + + if (uiA==uiB){ //essential, if not need special handling + uZ.ui = 0; + return uZ.p; + } + if ((int_fast32_t)uiA < (int_fast32_t)uiB){ + uiA ^= uiB; + uiB ^= uiA; + uiA ^= uiB; + (sign) ? (sign = 0 ) : (sign=1); //A becomes B + } + regSA = signregP32UI( uiA ); + regSB = signregP32UI( uiB ); + + if (x==2){ + uZ.ui = (regSA==regSB) ? (0x0): (0x40000000) ; + } + else{ + + tmp = (uiA<<2)&0xFFFFFFFF; + if (regSA){ + while (tmp>>31){ + kA++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + } + else{ + kA=-1; + while (!(tmp>>31)){ + kA--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + } + + + expA = tmp>>29; //to get 2 bits + frac64A = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; + shiftRight = kA; + + tmp = (uiB<<2) & 0xFFFFFFFF; + if (regSB){ + while (tmp>>31){ + shiftRight--; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + + } + else{ + shiftRight++; + while (!(tmp>>31)){ + shiftRight++; + tmp= (tmp<<1) & 0xFFFFFFFF; + } + tmp&=0x7FFFFFFF; + + } + frac64B = ((0x40000000ULL | tmp<<1) & 0x7FFFFFFFULL) <<32; + //This is 4kZ + expZ; (where kZ=kA-kB and expZ=expA-expB) + shiftRight = (shiftRight<<2) + expA - (tmp>>29); + if (shiftRight>63){ + uZ.ui = uiA; + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; + } + else + (frac64B >>= shiftRight); + + frac64A -= frac64B; + + while((frac64A>>59)==0){ + kA--; + frac64A<<=4; + } + ecarry = (0x4000000000000000 & frac64A);//(0x4000000000000000 & frac64A)>>62; + while (!ecarry){ + if (expA==0){ + kA--; + expA=3; + } + else + expA--; + frac64A<<=1; + ecarry = (0x4000000000000000 & frac64A); + } + + + + if(kA<0){ + regA = -kA; + regSA = 0; + regime = 0x40000000>>regA; + } + else{ + regA = kA+1; + regSA=1; + regime = 0x7FFFFFFF - (0x7FFFFFFF>>regA); + } + if(regA>(x-2)){ + //max or min pos. exp and frac does not matter. + uZ.ui=(regSA) ? (0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)) ): (0x1 << (32-x)); + } + else{ + //remove hidden bits + frac64A = (frac64A & 0x3FFFFFFFFFFFFFFF) >>(regA + 2) ; // 2 bits exp + fracA = frac64A>>32; + + //regime length is smaller than length of posit + if (regA<x){ + if (regA<=(x-4)){ + bitNPlusOne |= (((uint64_t)0x80000000<<(32-x)) & frac64A) ; + //expA <<= (28-regA); + } + else { + if (regA==(x-2)){ + bitNPlusOne = expA&0x2; + bitsMore = (expA&0x1); + expA = 0; + } + else if (regA==(x-3)){ + bitNPlusOne = expA&0x1; + //expA>>=1; + expA &=0x2; + } + if (fracA>0){ + fracA=0; + bitsMore =1; + } + + } + } + else{ + regime=(regSA) ? (regime & ((int32_t)0x80000000>>(x-1)) ): (regime << (32-x)); + expA=0; + fracA=0; + } + fracA &=((int32_t)0x80000000>>(x-1)); + + expA <<= (28-regA); + uZ.ui = packToP32UI(regime, expA, fracA); + + //n+1 frac bit is 1. Need to check if another bit is 1 too if not round to even + if (bitNPlusOne){ + if (((uint64_t)0xFFFFFFFFFFFFFFFF>>(x+1)) & frac64A) bitsMore=1; + uZ.ui += (uint32_t)(((uZ.ui>>(32-x))&1) | bitsMore) << (32-x) ; + } + } + } + + if (sign) uZ.ui = -uZ.ui & 0xFFFFFFFF; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/ui32_to_p16.c b/source/luametatex/source/libraries/softposit/source/ui32_to_p16.c new file mode 100644 index 000000000..3e1f0b546 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/ui32_to_p16.c @@ -0,0 +1,74 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> + +#include "platform.h" +#include "internals.h" + +posit16_t ui32_to_p16( uint32_t a ){ + int_fast8_t k, log2 = 25; + union ui16_p16 uZ; + uint_fast16_t uiA; + uint_fast32_t expA, mask = 0x02000000, fracA; + + if ( a > 0x08000000 ) uiA = 0x7FFF; //134217729 to MAX_INT round to maxpos + else if ( a > 0x02FFFFFF ) uiA = 0x7FFE; + else if ( a < 2 ) uiA = (a << 14); + else { + fracA = a; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + k = log2 >> 1; + expA = (log2 & 0x1) << (12 - k); + fracA = (fracA ^ mask); + + uiA = (0x7FFF ^ (0x3FFF >> k)) | expA | ( fracA >> (k + 13)); + mask = 0x1000 << k; //bitNPlusOne + if (mask & fracA) { + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; + } + } + uZ.ui = uiA; + return uZ.p; +} diff --git a/source/luametatex/source/libraries/softposit/source/ui32_to_p32.c b/source/luametatex/source/libraries/softposit/source/ui32_to_p32.c new file mode 100644 index 000000000..d35ca0429 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/ui32_to_p32.c @@ -0,0 +1,77 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> + +#include "platform.h" +#include "internals.h" + +posit32_t ui32_to_p32( uint32_t a ) { + int_fast8_t k, log2 = 31;//length of bit (e.g. 4294966271) in int (32 but because we have only 32 bits, so one bit off to accomdate that fact) + union ui32_p32 uZ; + uint_fast32_t uiA; + uint_fast32_t expA, mask = 0x80000000, fracA; + + if ( a > 4294966271) + uiA = 0x7FC00000; // 4294967296 + else if ( a < 0x2 ) + uiA = (a << 30); + else { + fracA = a; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + k = (log2 >> 2); + expA = (log2 & 0x3) << (27 - k); + fracA = (fracA ^ mask); + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | expA | fracA>>(k+4); + + mask = 0x8 << k; //bitNPlusOne + + if (mask & fracA) + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; + + } + uZ.ui = uiA; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/ui32_to_p8.c b/source/luametatex/source/libraries/softposit/source/ui32_to_p8.c new file mode 100644 index 000000000..5b0c3fefc --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/ui32_to_p8.c @@ -0,0 +1,75 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> + +#include "platform.h" +#include "internals.h" + +posit8_t ui32_to_p8( uint32_t a ){ + int_fast8_t k, log2 = 6;//length of bit + union ui8_p8 uZ; + uint_fast8_t uiA; + uint_fast32_t mask = 0x40, fracA; + + if ( a > 48 ) uiA = 0x7F; + else if ( a < 2 ) uiA = (a << 6); + else { + fracA = a; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + + k = log2; + + fracA = (fracA ^ mask); + + uiA = (0x7F ^ (0x3F >> k)) | ( fracA >> (k+1) ) ; + + mask = 0x1 << k; //bitNPlusOne + if (mask & fracA) { + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; + } + } + uZ.ui = uiA; + return uZ.p; +} diff --git a/source/luametatex/source/libraries/softposit/source/ui32_to_pX2.c b/source/luametatex/source/libraries/softposit/source/ui32_to_pX2.c new file mode 100644 index 000000000..8cb6c635b --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/ui32_to_pX2.c @@ -0,0 +1,113 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> + +#include "platform.h" +#include "internals.h" + +posit_2_t ui32_to_pX2( uint32_t a, int x ) { + int_fast8_t k, log2 = 31;//length of bit (e.g. 4294966271) in int (32 but because we have only 32 bits, so one bit off to accomdate that fact) + union ui32_pX2 uZ; + uint_fast32_t uiA=0; + uint_fast32_t expA, mask = 0x80000000, fracA; + + //NaR + if (a == 0x80000000 || x<2 || x>32) + uiA = 0x80000000; + else if (x==2){ + if (a>0) uiA=0x40000000; + } + else if ( a > 0xFFFFFBFF){//4294966271 + uiA = 0x7FC00000; // 4294967296 + if (x<12) uiA&=((int32_t)0x80000000>>(x-1)); + } + else if ( a < 0x2 ){ + uiA = (a << 30); + } + else { + fracA = a; + + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + k = (log2 >> 2); + expA = (log2 & 0x3) ; + fracA = (fracA ^ mask); + + if(k>=(x-2)){//maxpos + uiA = 0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)); + + } + else if (k==(x-3)){//bitNPlusOne-> first exp bit //bitLast is zero + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)); + if( (expA & 0x2) && ((expA&0x1) | fracA) ) //bitNPlusOne //bitsMore + uiA |= ((uint32_t)0x80000000>>(x-1)); + } + else if (k==(x-4)){ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | ((expA &0x2)<< (27 - k)); + if(expA&0x1){ + if( (((uint32_t)0x80000000>>(x-1)) & uiA)| fracA) + uiA += ((uint32_t)0x80000000>>(x-1)); + } + } + else if (k==(x-5)){ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)); + mask = 0x8 << (k -x); + if (mask & fracA){ //bitNPlusOne + if (((mask - 1) & fracA) | (expA&0x1)) { + uiA+= ((uint32_t)0x80000000>>(x-1)); + } + } + } + else{ + uiA = ((0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)) | fracA>>(k+4)) & ((int32_t)0x80000000>>(x-1));; + mask = 0x8 << (k-x); //bitNPlusOne + if (mask & fracA) + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA+= ((uint32_t)0x80000000>>(x-1)); + } + + } + uZ.ui = uiA; + return uZ.p; +} + diff --git a/source/luametatex/source/libraries/softposit/source/ui64_to_p16.c b/source/luametatex/source/libraries/softposit/source/ui64_to_p16.c new file mode 100644 index 000000000..b6620114c --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/ui64_to_p16.c @@ -0,0 +1,74 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> + +#include "platform.h" +#include "internals.h" + +posit16_t ui64_to_p16( uint64_t a ) { + int_fast8_t k, log2 = 25; + union ui16_p16 uZ; + uint_fast16_t uiA; + uint_fast64_t expA, mask = 0x0000000002000000, fracA; + + if ( a > 0x0000000008000000 ) uiA = 0x7FFF; //134217729 to MAX_INT round to maxpos + else if ( a > 0x0000000002FFFFFF ) uiA = 0x7FFE; + else if ( a < 2 ) uiA = (a << 14); + else { + fracA = a; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + k = log2 >> 1; + expA = (log2 & 0x1) << (12 - k); + fracA = fracA ^ mask; + uiA = (0x7FFF ^ (0x3FFF >> k)) | expA | (fracA >> (k + 13)); + mask = 0x1000 << k; + if (mask & fracA) { + if ( ((mask - 1) & fracA) | ((mask << 1) & fracA) ) uiA++; + } + } + uZ.ui = uiA; + return uZ.p; + +} diff --git a/source/luametatex/source/libraries/softposit/source/ui64_to_p32.c b/source/luametatex/source/libraries/softposit/source/ui64_to_p32.c new file mode 100644 index 000000000..c32a00548 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/ui64_to_p32.c @@ -0,0 +1,82 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> + +#include "platform.h" +#include "internals.h" + +posit32_t ui64_to_p32( uint64_t a ) { + + int_fast8_t k, log2 = 63;//length of bit (e.g. 18445618173802708992) in int (64 but because we have only 64 bits, so one bit off to accommodate that fact) + union ui32_p32 uZ; + uint_fast64_t uiA; + uint_fast64_t mask = 0x8000000000000000, fracA; + uint_fast32_t expA; + + + if ( a > 18445618173802708991ULL)//0xFFFBFFFFFFFFFFFF is the midpoint + uiA = 0x7FFFC000; // P32: 18446744073709552000 + else if ( a < 0x2 ) + uiA = (a << 30); + else { + fracA = a; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + + k = (log2 >> 2); + + expA = (log2 & 0x3) << (27 - k); + fracA = (fracA ^ mask); + + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | expA | fracA>>(k+36); + + mask = 0x800000000 << k; //bitNPlusOne + + if (mask & fracA) { + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; + } + } + uZ.ui = uiA; + return uZ.p; +} diff --git a/source/luametatex/source/libraries/softposit/source/ui64_to_p8.c b/source/luametatex/source/libraries/softposit/source/ui64_to_p8.c new file mode 100644 index 000000000..ec931ffdf --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/ui64_to_p8.c @@ -0,0 +1,73 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane) and John Gustafson. + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> + +#include "platform.h" +#include "internals.h" + +posit8_t ui64_to_p8( uint64_t a ){ + int_fast8_t k, log2 = 6;//length of bit + union ui8_p8 uZ; + uint_fast8_t uiA; + uint_fast64_t mask = 0x40, fracA; + + if ( a > 48 ) uiA = 0x7F; + else if ( a < 2 ) uiA = (a << 6); + else { + fracA = a; + while ( !(fracA & mask) ) { + log2--; + fracA <<= 1; + } + + k = log2; + fracA = (fracA ^ mask); + uiA = (0x7F ^ (0x3F >> k)) | ( fracA >> (k+1) ) ; + + mask = 0x1 << k; //bitNPlusOne + if (mask & fracA) { + if (((mask - 1) & fracA) | ((mask << 1) & fracA)) uiA++; + } + } + uZ.ui = uiA; + return uZ.p; +} diff --git a/source/luametatex/source/libraries/softposit/source/ui64_to_pX1.c b/source/luametatex/source/libraries/softposit/source/ui64_to_pX1.c new file mode 100644 index 000000000..663983084 --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/ui64_to_pX1.c @@ -0,0 +1,124 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> + +#include "platform.h" +#include "internals.h" + +posit_1_t ui64_to_pX1 ( uint64_t a, int x ) { + int_fast8_t k, log2 = 63;//60;//length of bit (e.g. 576460752303423488 = 2^59) in int (64 but because we have only 64 bits, so one bit off to accommodate that fact) + union ui32_pX1 uZ; + uint_fast64_t uiA=0; + uint_fast64_t mask = 0x8000000000000000, frac64A; + uint_fast32_t expA; + + //NaR + if (a == 0x8000000000000000 || x<2 || x>32) + uiA = 0x80000000; + else if (x==2){ + if (a>0) uiA=0x40000000; + } + else if ( a > 0x8000000000000000){//576460752303423488 -> wrong number need to change +uint64_t test = ((uint64_t)0x80000000>>(x-1)); +printBinary(&test, 32); + uiA = 0x7FFFFFFF & ((uint64_t)0x80000000>>(x-1)); // 1152921504606847000 + } + else if ( a < 0x2 ) + uiA = (a << 30); + else { + frac64A = a; +//printBinary(&frac64A, 64); + while ( !(frac64A & mask) ) { + log2--; + frac64A <<= 1; + } +//printf("after regime:\n"); +//printBinary(&frac64A, 64); + k = (log2 >> 1); + + expA = (log2 & 0x1) ; +//printf("expA:\n"); +//printBinary(&expA, 32); + frac64A = (frac64A ^ mask) <<1; +//printf("frac64A:\n"); +//printBinary(&frac64A, 64); +//printf("log2: %d k: %d\n", log2, k); + + + + if(k>=(x-2)){//maxpos + uiA = 0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)); + } + else if (k==(x-3)){//bitNPlusOne-> exp bit //bitLast is zero + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)); +//printBinary(&a, 64); +//printBinary(&uiA, 32); + if( (expA & 0x1) && frac64A ) //bitNPlusOne //bitsMore + uiA |= ((uint32_t)0x80000000>>(x-1)); + } + else if (k==(x-4)){ //bitLast = regime terminating bit + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (28 - k)); +//printBinary(&a, 64); +//printBinary(&uiA, 32); + mask = (uint64_t)0x800000000 << (k + 32-x); +//printBinary(&mask, 64); +//printBinary(&frac64A, 64); + if (mask & frac64A){ //bitNPlusOne + if (((mask - 1) & frac64A) | (expA&0x1)) { + uiA+= ((uint32_t)0x80000000>>(x-1)); + } + } + } + else{ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (28 - k)) | ((frac64A>>(k+36)) & ((int32_t)0x80000000>>(x-1))); +//printBinary(&uiA, 32); + mask = (uint64_t)0x800000000 << (k + 32-x); //bitNPlusOne position + if (mask & frac64A) { + if (((mask - 1) & frac64A) | ((mask << 1) & frac64A)) { + uiA+= ((uint32_t)0x80000000>>(x-1)); + } + } + } + } + uZ.ui = uiA; + return uZ.p; +} diff --git a/source/luametatex/source/libraries/softposit/source/ui64_to_pX2.c b/source/luametatex/source/libraries/softposit/source/ui64_to_pX2.c new file mode 100644 index 000000000..1250d26fd --- /dev/null +++ b/source/luametatex/source/libraries/softposit/source/ui64_to_pX2.c @@ -0,0 +1,115 @@ + +/*============================================================================ + +This C source file is part of the SoftPosit Posit Arithmetic Package +by S. H. Leong (Cerlane). + +Copyright 2017 2018 A*STAR. All rights reserved. + +This C source file was based on SoftFloat IEEE Floating-Point Arithmetic +Package, Release 3d, by John R. Hauser. + +Copyright 2011, 2012, 2013, 2014, 2015, 2016 The Regents of the University of +California. All Rights Reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions, and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions, and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + 3. Neither the name of the University nor the names of its contributors may + be used to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS "AS IS", AND ANY +EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED +WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE, ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE FOR ANY +DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES +(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; +LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND +ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +=============================================================================*/ + +#include <stdint.h> + +#include "platform.h" +#include "internals.h" + +posit_2_t ui64_to_pX2 ( uint64_t a, int x ) { + int_fast8_t k, log2 = 63;//length of bit (e.g. 18445618173802708991) in int (64 but because we have only 64 bits, so one bit off to accommodate that fact) + union ui32_pX2 uZ; + uint_fast64_t uiA=0; + uint_fast64_t mask = 0x8000000000000000, frac64A; + uint_fast32_t expA; + + //NaR + if (a == 0x8000000000000000 || x<2 || x>32) + uiA = 0x80000000; + else if (x==2){ + if (a>0) uiA=0x40000000; + } + else if ( a > 0xFFFBFFFFFFFFFFFF){//18445618173802708991 + uiA = 0x7FFFC000; // 18446744073709552000 + if (x<18) uiA&=((int32_t)0x80000000>>(x-1)); + } + else if ( a < 0x2 ) + uiA = (a << 30); + else { + frac64A = a; + while ( !(frac64A & mask) ) { + log2--; + frac64A <<= 1; + } + + k = (log2 >> 2); + + expA = (log2 & 0x3) ; + frac64A = (frac64A ^ mask); + + if(k>=(x-2)){//maxpos + uiA = 0x7FFFFFFF & ((int32_t)0x80000000>>(x-1)); + } + else if (k==(x-3)){//bitNPlusOne-> first exp bit //bitLast is zero + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)); + if( (expA & 0x2) && ((expA&0x1) | frac64A) ) //bitNPlusOne //bitsMore + uiA |= ((uint32_t)0x80000000>>(x-1)); + } + else if (k==(x-4)){ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | ((expA &0x2)<< (27 - k)); + if(expA&0x1){ + if( (((uint32_t)0x80000000>>(x-1)) & uiA)|| frac64A) + uiA += ((uint32_t)0x80000000>>(x-1)); + } + + } + else if (k==(x-5)){ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)); + mask = (uint64_t)0x800000000 << (k + 32-x); + if (mask & frac64A){ //bitNPlusOne + if (((mask - 1) & frac64A) | (expA&0x1)) { + uiA+= ((uint32_t)0x80000000>>(x-1)); + } + } + } + else{ + uiA = (0x7FFFFFFF ^ (0x3FFFFFFF >> k)) | (expA<< (27 - k)) | ((frac64A>>(k+36)) & ((int32_t)0x80000000>>(x-1))); + mask = (uint64_t)0x800000000 << (k + 32-x); //bitNPlusOne position + if (mask & frac64A) { + if (((mask - 1) & frac64A) | ((mask << 1) & frac64A)) { + uiA+= ((uint32_t)0x80000000>>(x-1)); + } + } + } + } + uZ.ui = uiA; + return uZ.p; +} diff --git a/source/luametatex/source/lua/lmtenginelib.c b/source/luametatex/source/lua/lmtenginelib.c index 8e99aa29a..b9a647775 100644 --- a/source/luametatex/source/lua/lmtenginelib.c +++ b/source/luametatex/source/lua/lmtenginelib.c @@ -313,6 +313,7 @@ static void enginelib_show_credits(void) " decnumber : Mike Cowlishaw from IBM (one of the number models in MP)\n" " avl : Richard (adapted a bit to fit in)\n" " hjn : Raph Levien (derived from TeX's hyphenator, but adapted again)\n" + " softposit : S. H. Leong (Cerlane)\n" "\n" "The code base contains more names and references. Some libraries are partially adapted or\n" "have been replaced. The MetaPost library has additional functionality, some of which is\n" @@ -899,6 +900,7 @@ static const luaL_Reg lmt_libs_extra_function_list[] = { { "xmath", luaopen_xmath }, { "xcomplex", luaopen_xcomplex }, { "xdecimal", luaopen_xdecimal }, + { "posit", luaopen_posit }, { NULL, NULL }, }; diff --git a/source/luametatex/source/lua/lmtfontlib.c b/source/luametatex/source/lua/lmtfontlib.c index 9850d59e7..f743f960e 100644 --- a/source/luametatex/source/lua/lmtfontlib.c +++ b/source/luametatex/source/lua/lmtfontlib.c @@ -332,6 +332,10 @@ static void fontlib_aux_font_char_from_lua(lua_State *L, halfword f, int i, int if (target) { set_charinfo_tag(co, extend_last_tag); } + set_boolean_field_by_index(target, keepbase, 0); + if (target) { + set_charinfo_tag(co, keep_base_tag); + } lua_push_key(parts); if (lua_rawget(L, -2) == LUA_TTABLE) { set_charinfo_tag(co, extensible_tag); diff --git a/source/luametatex/source/lua/lmtinterface.h b/source/luametatex/source/lua/lmtinterface.h index 2636ea2d7..47460acac 100644 --- a/source/luametatex/source/lua/lmtinterface.h +++ b/source/luametatex/source/lua/lmtinterface.h @@ -94,6 +94,8 @@ extern int luaextend_io (lua_State *L); extern int luaextend_string (lua_State *L); extern int luaextend_xcomplex (lua_State *L); +extern int luaopen_posit (lua_State *L); + /*tex We finetune the string hasher. When playing with \LUAJIT\ we found that its hashes was pretty @@ -784,6 +786,8 @@ make_lua_key(L, internal_int);\ make_lua_key(L, internal_int_reference);\ make_lua_key(L, internal_mu_glue);\ make_lua_key(L, internal_mu_glue_reference);\ +make_lua_key(L, internal_posit);\ +make_lua_key(L, internal_posit_reference);\ make_lua_key(L, internal_toks);\ make_lua_key(L, internal_toks_reference);\ make_lua_key(L, internaldimension);\ @@ -795,6 +799,7 @@ make_lua_key(L, italic);\ make_lua_key(L, italic_correction);\ make_lua_key(L, italiccorrection);\ make_lua_key(L, iterator_value);\ +make_lua_key(L, keepbase);\ make_lua_key(L, kern);\ make_lua_key(L, kerns);\ make_lua_key(L, language);\ @@ -1008,6 +1013,7 @@ make_lua_key(L, permitall);\ make_lua_key(L, permitglue);\ make_lua_key(L, permitmathreplace);\ make_lua_key(L, phantom);\ +make_lua_key(L, posit);\ make_lua_key(L, post);\ make_lua_key(L, post_linebreak);\ make_lua_key(L, postadjust);\ @@ -1093,6 +1099,8 @@ make_lua_key(L, register_int);\ make_lua_key(L, register_int_reference);\ make_lua_key(L, register_mu_glue);\ make_lua_key(L, register_mu_glue_reference);\ +make_lua_key(L, register_posit);\ +make_lua_key(L, register_posit_reference);\ make_lua_key(L, register_toks);\ make_lua_key(L, register_toks_reference);\ make_lua_key(L, registerdimension);\ @@ -1577,15 +1585,21 @@ inline static int lmt_roundnumber(lua_State *L, int i) return n == 0.0 ? 0 : lround(n); } +inline static unsigned int lmt_uroundnumber(lua_State *L, int i) +{ + double n = lua_tonumber(L, i); + return n == 0.0 ? 0 : (unsigned int) lround(n); +} + inline static int lmt_optroundnumber(lua_State *L, int i, int dflt) { double n = luaL_optnumber(L, i, dflt); return n == 0.0 ? 0 : lround(n); } -inline static unsigned int lmt_uroundnumber(lua_State *L, int i) +inline static int lmt_opturoundnumber(lua_State *L, int i, int dflt) { - double n = lua_tonumber(L, i); + double n = luaL_optnumber(L, i, dflt); return n == 0.0 ? 0 : (unsigned int) lround(n); } @@ -1609,7 +1623,7 @@ inline static void lua_set_string_by_key(lua_State *L, const char *a, const char lua_setfield(L, -2, a); } -inline static void lua_set_string_by_index(lua_State *L, int a, const char *b) +inline static void lua_set_string_by_index(lua_State *L, lua_Integer a, const char *b) { lua_pushstring(L, b ? b : ""); lua_rawseti(L, -2, a); diff --git a/source/luametatex/source/lua/lmtmplib.c b/source/luametatex/source/lua/lmtmplib.c index e7df5e963..660499e7f 100644 --- a/source/luametatex/source/lua/lmtmplib.c +++ b/source/luametatex/source/lua/lmtmplib.c @@ -49,6 +49,7 @@ static const char *mplib_math_options[] = { "double", "binary", /* not available in luatex */ "decimal", + "posit", NULL }; diff --git a/source/luametatex/source/lua/lmtstatuslib.c b/source/luametatex/source/lua/lmtstatuslib.c index 841ddeec0..ee785e806 100644 --- a/source/luametatex/source/lua/lmtstatuslib.c +++ b/source/luametatex/source/lua/lmtstatuslib.c @@ -329,6 +329,7 @@ static int statslib_getconstants(lua_State *L) lua_set_integer_by_key(L, "max_toks_register_index", max_toks_register_index); lua_set_integer_by_key(L, "max_box_register_index", max_box_register_index); lua_set_integer_by_key(L, "max_int_register_index", max_int_register_index); + lua_set_integer_by_key(L, "max_float_register_index", max_posit_register_index); lua_set_integer_by_key(L, "max_dimen_register_index", max_dimen_register_index); lua_set_integer_by_key(L, "max_attribute_register_index", max_attribute_register_index); lua_set_integer_by_key(L, "max_glue_register_index", max_glue_register_index); diff --git a/source/luametatex/source/lua/lmttexlib.c b/source/luametatex/source/lua/lmttexlib.c index 426ca222b..39afd94fb 100644 --- a/source/luametatex/source/lua/lmttexlib.c +++ b/source/luametatex/source/lua/lmttexlib.c @@ -37,6 +37,7 @@ # define TEX_METATABLE_MUSKIP "tex.muskip" # define TEX_METATABLE_MUGLUE "tex.muglue" # define TEX_METATABLE_DIMEN "tex.dimen" +# define TEX_METATABLE_FLOAT "tex.float" # define TEX_METATABLE_COUNT "tex.count" # define TEX_METATABLE_TOKS "tex.toks" # define TEX_METATABLE_BOX "tex.box" @@ -1709,6 +1710,39 @@ static int texlib_getcount(lua_State *L) return 1; } +static int texlib_isfloat(lua_State *L) +{ + return texlib_aux_checked_register(L, register_posit_cmd, register_posit_base, max_posit_register_index, posit_cmd); +} + +static int texlib_setfloat(lua_State *L) +{ + int flags = 0; + int index = 0; + int slot = lmt_check_for_flags(L, 1, &flags, 1, 0); + int state = texlib_aux_check_for_index(L, slot++, "float", &index, internal_posit_cmd, register_posit_cmd, internal_posit_base, register_posit_base, max_posit_register_index, posit_cmd); + if (state >= 0) { + halfword value = tex_double_to_posit(luaL_optnumber(L, slot++, 0)).v; + if (state == 2) { + tex_define(flags, index, posit_cmd, value); + } else { + tex_set_tex_count_register(index, value, flags, state); + if (state == 1 && lua_toboolean(L, slot)) { + tex_update_par_par(internal_posit_cmd, index); + } + } + } + return 0; +} + +static int texlib_getfloat(lua_State *L) +{ + int index; + int state = texlib_aux_check_for_index(L, 1, "float", &index, internal_posit_cmd, register_posit_cmd, internal_posit_base, register_posit_base, max_posit_register_index, posit_cmd); + lua_pushnumber(L, tex_posit_to_double(state >= 0 ? (state == 2 ? eq_value(index) : tex_get_tex_posit_register(index, state)) : 0)); + return 1; +} + static int texlib_isattribute(lua_State *L) { return texlib_aux_checked_register(L, register_attribute_cmd, register_attribute_base, max_attribute_register_index, -1); @@ -2561,6 +2595,29 @@ static int texlib_set_item(lua_State* L, int index, int prefixes) break; } return 1; + case internal_posit_cmd: + case register_posit_cmd: /* ? */ + switch (lua_type(L, slot)) { + case LUA_TNUMBER: + { + int n = tex_double_to_posit(lua_tonumber(L, slot++)).v; + if (cmd == register_posit_cmd) { + tex_word_define(flags, eq_value(cs), n); + } else { + tex_assign_internal_posit_value(lua_toboolean(L, slot) ? add_frozen_flag(flags) : flags, eq_value(cs), n); + } + break; + } + // case userdata: + // { + // /* todo */ + // break; + // } + default: + luaL_error(L, "number expected"); + break; + } + return 1; case internal_dimen_cmd: case register_dimen_cmd: { @@ -2781,6 +2838,9 @@ static int texlib_aux_scan_internal(lua_State *L, int cmd, int code, int values) case attr_val_level: lua_pushinteger(L, cur_val); break; + case posit_val_level: + lua_pushnumber(L, tex_posit_to_double(cur_val)); + break; case glue_val_level: case mu_val_level: switch (values) { @@ -2874,6 +2934,7 @@ static int texlib_aux_someitem(lua_State *L, int code) case font_char_dp_code: case font_char_ic_code: case font_char_ta_code: + case font_char_ba_code: /* these read a char, todo */ break; case font_size_code: @@ -3126,6 +3187,8 @@ static int texlib_get_internal(lua_State *L, int index, int all) case register_int_cmd: case internal_attribute_cmd: case register_attribute_cmd: + case internal_posit_cmd: + case register_posit_cmd: case internal_dimen_cmd: case register_dimen_cmd: case lua_value_cmd: @@ -3134,6 +3197,7 @@ static int texlib_get_internal(lua_State *L, int index, int all) case set_page_property_cmd: case char_given_cmd: case integer_cmd: + case posit_cmd: case dimension_cmd: case gluespec_cmd: case mugluespec_cmd: @@ -4695,6 +4759,46 @@ static int texlib_setintegervalue(lua_State *L) return 0; } +static int texlib_setfloatvalue(lua_State *L) +{ + size_t len; + const char *str = lua_tolstring(L, 1, &len); + if (len > 0) { + int cs = tex_string_locate(str, len, 1); + int flags = 0; + lmt_check_for_flags(L, 3, &flags, 1, 0); + if (tex_define_permitted(cs, flags)) { + unsigned value = tex_double_to_posit(luaL_optnumber(L, 2, 0)).v; + if (value >= min_posit && value <= max_posit) { + tex_define(flags, cs, (quarterword) posit_cmd, value); + } else { + tex_formatted_error("lua", "posit only accepts values in the range %i-%i", min_posit, max_posit); + } + } + } + return 0; +} + +static int texlib_setcardinalvalue(lua_State *L) +{ + size_t len; + const char *str = lua_tolstring(L, 1, &len); + if (len > 0) { + int cs = tex_string_locate(str, len, 1); + int flags = 0; + lmt_check_for_flags(L, 3, &flags, 1, 0); + if (tex_define_permitted(cs, flags)) { + unsigned value = lmt_opturoundnumber(L, 2, 0); + if (value >= min_cardinal && value <= max_cardinal) { + tex_define(flags, cs, (quarterword) integer_cmd, value); + } else { + tex_formatted_error("lua", "cardinal only accepts values in the range %d-%d", min_cardinal, max_cardinal); + } + } + } + return 0; +} + static int texlib_setdimensionvalue(lua_State *L) { size_t len; @@ -4727,7 +4831,11 @@ static int texlib_aux_getvalue(lua_State *L, halfword level, halfword cs) halfword value = 0; tex_begin_inserted_list(tex_get_available_token(cs_token_flag + cs)); if (tex_scan_tex_value(level, &value)) { - lua_pushinteger(L, value); + if (level == posit_val_level) { + lua_pushnumber(L, tex_posit_to_double(value)); + } else { + lua_pushinteger(L, value); + } return 1; } } @@ -4760,6 +4868,31 @@ static int texlib_getintegervalue(lua_State *L) /* todo, now has duplicate in to return 1; } +static int texlib_getfloatvalue(lua_State *L) /* todo, now has duplicate in tokenlib */ +{ + if (lua_type(L, 1) == LUA_TSTRING) { + size_t len; + const char *str = lua_tolstring(L, 1, &len); + if (len > 0) { + int cs = tex_string_locate(str, len, 0); + switch (eq_type(cs)) { + case posit_cmd: + lua_pushnumber(L, tex_posit_to_double(eq_value(cs))); + return 1; + case call_cmd: + case protected_call_cmd: + case semi_protected_call_cmd: + return texlib_aux_getvalue(L, posit_val_level, cs); + default: + /* twice a lookup but fast enough for now */ + return texlib_getfloat(L); + } + } + } + lua_pushnil(L); + return 1; +} + static int texlib_getdimensionvalue(lua_State *L) /* todo, now has duplicate in tokenlib */ { if (lua_type(L, 1) == LUA_TSTRING) { @@ -4771,6 +4904,9 @@ static int texlib_getdimensionvalue(lua_State *L) /* todo, now has duplicate in case dimension_cmd: lua_pushinteger(L, eq_value(cs)); return 1; + case posit_cmd: + lua_pushinteger(L, tex_posit_to_dimension(eq_value(cs))); + return 1; case call_cmd: case protected_call_cmd: case semi_protected_call_cmd: @@ -4831,6 +4967,12 @@ static int texlib_setrunstate(lua_State *L) return 0; } +/*tex + todo: Some of these keywords can be removed from the interface keys, saves bytes and never accessed + as key. +*/ + + static int texlib_gethyphenationvalues(lua_State *L) { lua_createtable(L, 2, 17); @@ -4915,6 +5057,9 @@ static int texlib_getnoadoptionvalues(lua_State *L) lua_push_key_at_index(L, stretch, noad_option_stretch); lua_push_key_at_index(L, center, noad_option_center); lua_push_key_at_index(L, scale, noad_option_scale); + lua_push_key_at_index(L, keepbase, noad_option_keep_base); + + // lua_set_string_by_index(L, noad_option_keep_base, "keepbase"); return 1; } @@ -5483,6 +5628,9 @@ static const struct luaL_Reg texlib_function_list[] = { { "isdimen", texlib_isdimen }, { "setdimen", texlib_setdimen }, { "getdimen", texlib_getdimen }, + { "isfloat", texlib_isfloat }, + { "setfloat", texlib_setfloat }, + { "getfloat", texlib_getfloat }, { "isskip", texlib_isskip }, { "setskip", texlib_setskip }, { "getskip", texlib_getskip }, @@ -5597,6 +5745,11 @@ static const struct luaL_Reg texlib_function_list[] = { { "integerdef", texlib_setintegervalue }, { "setintegervalue", texlib_setintegervalue }, { "getintegervalue", texlib_getintegervalue }, + { "positdef", texlib_setfloatvalue }, + { "setpositvalue", texlib_setfloatvalue }, + { "getpositvalue", texlib_getfloatvalue }, + { "setcardinalvalue", texlib_setcardinalvalue }, + { "getcardinalvalue", texlib_getintegervalue }, { "dimensiondef", texlib_setdimensionvalue }, { "setdimensionvalue", texlib_setdimensionvalue }, { "getdimensionvalue", texlib_getdimensionvalue }, @@ -5680,6 +5833,7 @@ defineindexers(muskip) defineindexers(muglue) defineindexers(dimen) defineindexers(count) +defineindexers(float) defineindexers(toks) defineindexers(box) defineindexers(sfcode) @@ -5712,6 +5866,7 @@ int luaopen_tex(lua_State *L) lmt_make_table(L, "muglue", TEX_METATABLE_MUGLUE, texlib_index_muglue, texlib_newindex_muglue); lmt_make_table(L, "dimen", TEX_METATABLE_DIMEN, texlib_index_dimen, texlib_newindex_dimen); lmt_make_table(L, "count", TEX_METATABLE_COUNT, texlib_index_count, texlib_newindex_count); + lmt_make_table(L, "posit", TEX_METATABLE_FLOAT, texlib_index_float, texlib_newindex_float); lmt_make_table(L, "toks", TEX_METATABLE_TOKS, texlib_index_toks, texlib_newindex_toks); lmt_make_table(L, "box", TEX_METATABLE_BOX, texlib_index_box, texlib_newindex_box); lmt_make_table(L, "sfcode", TEX_METATABLE_SFCODE, texlib_index_sfcode, texlib_newindex_sfcode); diff --git a/source/luametatex/source/lua/lmttokenlib.c b/source/luametatex/source/lua/lmttokenlib.c index 5259a1478..1b50f18d2 100644 --- a/source/luametatex/source/lua/lmttokenlib.c +++ b/source/luametatex/source/lua/lmttokenlib.c @@ -163,6 +163,8 @@ void lmt_tokenlib_initialize(void) lmt_interface.command_names[register_int_cmd] = (command_item) { .id = register_int_cmd, .lua = lua_key_index(register_int), .name = lua_key(register_int), .kind = register_command_item, .min = 0, .max = max_int_register_index, .base = register_int_base, .fixedvalue = 0 }; lmt_interface.command_names[internal_attribute_cmd] = (command_item) { .id = internal_attribute_cmd, .lua = lua_key_index(internal_attribute), .name = lua_key(internal_attribute), .kind = unused_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[register_attribute_cmd] = (command_item) { .id = register_attribute_cmd, .lua = lua_key_index(register_attribute), .name = lua_key(register_attribute), .kind = register_command_item, .min = 0, .max = max_attribute_register_index, .base = register_attribute_base, .fixedvalue = 0 }; + lmt_interface.command_names[internal_posit_cmd] = (command_item) { .id = internal_posit_cmd, .lua = lua_key_index(internal_posit), .name = lua_key(internal_posit), .kind = unused_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; + lmt_interface.command_names[register_posit_cmd] = (command_item) { .id = register_posit_cmd, .lua = lua_key_index(register_posit), .name = lua_key(register_posit), .kind = register_command_item, .min = 0, .max = max_posit_register_index, .base = register_posit_base, .fixedvalue = 0 }; lmt_interface.command_names[internal_dimen_cmd] = (command_item) { .id = internal_dimen_cmd, .lua = lua_key_index(internal_dimen), .name = lua_key(internal_dimen), .kind = internal_command_item, .min = first_dimen_code, .max = last_dimen_code, .base = internal_dimen_base, .fixedvalue = 0 }; lmt_interface.command_names[register_dimen_cmd] = (command_item) { .id = register_dimen_cmd, .lua = lua_key_index(register_dimen), .name = lua_key(register_dimen), .kind = register_command_item, .min = 0, .max = max_dimen_register_index, .base = register_dimen_base, .fixedvalue = 0 }; lmt_interface.command_names[internal_glue_cmd] = (command_item) { .id = internal_glue_cmd, .lua = lua_key_index(internal_glue), .name = lua_key(internal_glue), .kind = internal_command_item, .min = first_glue_code, .max = last_glue_code, .base = internal_glue_base, .fixedvalue = 0 }; @@ -183,6 +185,7 @@ void lmt_tokenlib_initialize(void) lmt_interface.command_names[set_font_cmd] = (command_item) { .id = set_font_cmd, .lua = lua_key_index(set_font), .name = lua_key(set_font), .kind = data_command_item, .min = 0, .max = max_font_size, .base = 0, .fixedvalue = 0 }; lmt_interface.command_names[define_font_cmd] = (command_item) { .id = define_font_cmd, .lua = lua_key_index(define_font), .name = lua_key(define_font), .kind = token_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[integer_cmd] = (command_item) { .id = integer_cmd, .lua = lua_key_index(integer), .name = lua_key(integer), .kind = data_command_item, .min = min_integer, .max = max_integer, .base = direct_entry, .fixedvalue = 0 }; + lmt_interface.command_names[posit_cmd] = (command_item) { .id = posit_cmd, .lua = lua_key_index(posit), .name = lua_key(posit), .kind = data_command_item, .min = min_posit, .max = max_posit, .base = direct_entry, .fixedvalue = 0 }; lmt_interface.command_names[dimension_cmd] = (command_item) { .id = dimension_cmd, .lua = lua_key_index(dimension), .name = lua_key(dimension), .kind = data_command_item, .min = min_dimen, .max = max_dimen, .base = direct_entry, .fixedvalue = 0 }; lmt_interface.command_names[gluespec_cmd] = (command_item) { .id = gluespec_cmd, .lua = lua_key_index(gluespec), .name = lua_key(gluespec), .kind = regular_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[mugluespec_cmd] = (command_item) { .id = mugluespec_cmd, .lua = lua_key_index(mugluespec), .name = lua_key(mugluespec), .kind = regular_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; @@ -232,6 +235,8 @@ void lmt_tokenlib_initialize(void) lmt_interface.command_names[register_int_reference_cmd] = (command_item) { .id = register_int_reference_cmd, .lua = lua_key_index(register_int_reference), .name = lua_key(register_int_reference), .kind = regular_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[internal_attribute_reference_cmd] = (command_item) { .id = internal_attribute_reference_cmd, .lua = lua_key_index(internal_attribute_reference), .name = lua_key(internal_attribute_reference), .kind = regular_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[register_attribute_reference_cmd] = (command_item) { .id = register_attribute_reference_cmd, .lua = lua_key_index(register_attribute_reference), .name = lua_key(register_attribute_reference), .kind = regular_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; + lmt_interface.command_names[internal_posit_reference_cmd] = (command_item) { .id = internal_posit_reference_cmd, .lua = lua_key_index(internal_posit_reference), .name = lua_key(internal_posit_reference), .kind = regular_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; + lmt_interface.command_names[register_posit_reference_cmd] = (command_item) { .id = register_posit_reference_cmd, .lua = lua_key_index(register_posit_reference), .name = lua_key(register_posit_reference), .kind = regular_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[internal_dimen_reference_cmd] = (command_item) { .id = internal_dimen_reference_cmd, .lua = lua_key_index(internal_dimen_reference), .name = lua_key(internal_dimen_reference), .kind = regular_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[register_dimen_reference_cmd] = (command_item) { .id = register_dimen_reference_cmd, .lua = lua_key_index(register_dimen_reference), .name = lua_key(register_dimen_reference), .kind = regular_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; lmt_interface.command_names[register_dimen_reference_cmd + 1] = (command_item) { .id = unknown_value, .lua = 0, .name = NULL, .kind = unused_command_item, .min = ignore_entry, .max = ignore_entry, .base = ignore_entry, .fixedvalue = 0 }; @@ -900,8 +905,9 @@ static int tokenlib_scan_integer(lua_State *L) static int tokenlib_scan_cardinal(lua_State *L) { saved_tex_scanner texstate = tokenlib_aux_save_tex_scanner(); + int eq = lua_toboolean(L, 1); unsigned int v = 0; - tex_scan_cardinal(&v, 0); + tex_scan_cardinal(eq, &v, 0); lua_pushinteger(L, (unsigned int) v); tokenlib_aux_unsave_tex_scanner(texstate); return 1; @@ -1145,20 +1151,36 @@ static int tokenlib_scan_integer_indeed(lua_State *L, int cardinal) tokenlib_aux_goto_first_candidate_x(); } /*tex we collapse as in |scan_dimen| */ - if (! cardinal) { - while(1) { - if (cur_tok == minus_token) { - negative = ! negative; - } else if (cur_tok != plus_token) { - break; - } - tokenlib_aux_goto_first_candidate_x(); +// if (! cardinal) { +// while(1) { +// if (cur_tok == minus_token) { +// negative = ! negative; +// } else if (cur_tok != plus_token) { +// break; +// } +// tokenlib_aux_goto_first_candidate_x(); +// } +// if (negative) { +// luaL_addchar(&b, '-'); +// } +// } else if (cur_tok == minus_token) { +// tex_normal_warning("scanner", "positive number expected, ignoring minus sign"); +// tokenlib_aux_goto_first_candidate_x(); +// } + while(1) { + if (cur_tok == minus_token) { + negative = ! negative; + } else if (cur_tok != plus_token) { + break; } - if (negative) { + tokenlib_aux_goto_first_candidate_x(); + } + if (negative) { + if (cardinal) { + tex_normal_warning("scanner", "positive number expected, ignoring minus sign"); + } else { luaL_addchar(&b, '-'); } - } else if (cur_tok == minus_token) { - tex_normal_warning("scanner", "positive number expected, ignoring minus sign"); tokenlib_aux_goto_first_candidate_x(); } if (cur_tok == zero_token) { @@ -3009,9 +3031,9 @@ static int tokenlib_get_meaning(lua_State *L) int chr = eq_value(cs); if (lua_toboolean(L, 2)) { if (lua_toboolean(L, 3)) { - lmt_token_list_to_lua(L, token_link(chr)); + lmt_token_list_to_lua(L, token_link(chr)); /* makes table sub tables */ } else { - lmt_token_register_to_lua(L, chr); + lmt_token_register_to_lua(L, chr); /* makes table */ } } else { char *str = tex_tokenlist_to_tstring(chr, 1, NULL, 0, 0, 0, 0); @@ -3069,6 +3091,8 @@ static void tokenlib_aux_expand_macros_in_tokenlist(halfword p) tex_end_token_list(); } +/* token.getmacro(t[,true][,true] : [also preamble] [only preamble] */ + static int tokenlib_get_macro(lua_State *L) { if (lua_type(L, 1) == LUA_TSTRING) { @@ -3083,7 +3107,7 @@ static int tokenlib_get_macro(lua_State *L) tokenlib_aux_expand_macros_in_tokenlist(chr); // todo: use return value instead of def_ref str = tex_tokenlist_to_tstring(lmt_input_state.def_ref, 1, NULL, 0, 0, 0, 1); } else { - str = tex_tokenlist_to_tstring(chr, 1, NULL, 1, 0, 0, 0); + str = tex_tokenlist_to_tstring(chr, 1, NULL, lua_toboolean(L, 3) ? 2 : 1, 0, 0, 0); } lua_pushstring(L, str ? str : ""); return 1; diff --git a/source/luametatex/source/luametatex.h b/source/luametatex/source/luametatex.h index e9a2d3724..eac028cb2 100644 --- a/source/luametatex/source/luametatex.h +++ b/source/luametatex/source/luametatex.h @@ -92,7 +92,7 @@ # define luametatex_version 210 # define luametatex_revision 8 # define luametatex_version_string "2.10.08" -# define luametatex_development_id 20230407 +# define luametatex_development_id 20230426 # define luametatex_name_camelcase "LuaMetaTeX" # define luametatex_name_lowercase "luametatex" @@ -259,6 +259,7 @@ extern version_state_info lmt_version_state; # include "utilities/auxarithmetic.h" # include "utilities/auxmemory.h" +# include "utilities/auxposit.h" # include "utilities/auxzlib.h" # include "tex/texmainbody.h" diff --git a/source/luametatex/source/luaoptional/lmtzint.c b/source/luametatex/source/luaoptional/lmtzint.c index 0783238c0..aa3ebceea 100644 --- a/source/luametatex/source/luaoptional/lmtzint.c +++ b/source/luametatex/source/luaoptional/lmtzint.c @@ -103,6 +103,46 @@ typedef struct { struct zint_vector *vector; } zint_symbol_211; +typedef struct { + int symbology; + float height; + float scale; + int whitespace_width; + int whitespace_height; + int border_width; + int output_options; + char fgcolour[10]; + char bgcolour[10]; + char *fgcolor; + char *bgcolor; + char outfile[256]; + char primary[128]; + int option_1; + int option_2; + int option_3; + int show_hrt; + int fontsize; + int input_mode; + float dpmm; + int eci; + float dot_size; + float guard_descent; + struct zint_structapp structapp; + int warn_level; + int debug; + unsigned char text[128]; + int rows; + int width; + unsigned char encoded_data[200][144]; + float row_height[200]; + char errtxt[100]; + unsigned char *bitmap; + int bitmap_width; + int bitmap_height; + unsigned char *alphamap; + unsigned int bitmap_byte_length; + struct zint_vector *vector; +} zint_symbol_212; typedef struct zint_rectangle zint_rectangle; @@ -182,6 +222,26 @@ static void lmt_zint_get_circle_211 lmt->next = zint->next; } +static void lmt_zint_get_circle_212 +( + zint_circle *zint_, + lmt_zint_circle *lmt +) +{ + struct { + float x; + float y; + float diameter; + float width; + int colour; + zint_circle *next; + } *zint = (void*) zint_; + lmt->x = (double) zint->x; + lmt->y = (double) zint->y; + lmt->d = (double) zint->diameter; + lmt->next = zint->next; +} + typedef struct zint_hexagon zint_hexagon; typedef struct { @@ -263,6 +323,12 @@ static zint_vector *lmt_zint_vector_211(zint_symbol *symbol_) return symbol->vector; } +static zint_vector *lmt_zint_vector_212(zint_symbol *symbol_) +{ + zint_symbol_212 *symbol = (void*) symbol_; + return symbol->vector; +} + static void lmt_zint_symbol_set_options_210(zint_symbol *symbol_, int symbology, int input_mode, int output_options, int square) { zint_symbol_210 *symbol = (void*) symbol_; @@ -284,6 +350,17 @@ static void lmt_zint_symbol_set_options_211(zint_symbol *symbol_, int symbology, } } +static void lmt_zint_symbol_set_options_212(zint_symbol *symbol_, int symbology, int input_mode, int output_options, int square) +{ + zint_symbol_212 *symbol = (void*) symbol_; + symbol->symbology = symbology; + symbol->input_mode = input_mode; + symbol->output_options = output_options; + if (square) { + symbol->option_3 = ZINT_DM_SQUARE; + } +} + typedef struct zintlib_state_info { int initialized; @@ -363,10 +440,14 @@ static int zintlib_initialize(lua_State * L) lmt_zint_get_circle = lmt_zint_get_circle_210; lmt_zint_vector = lmt_zint_vector_210; lmt_zint_symbol_set_options = lmt_zint_symbol_set_options_210; - } else { + } else if (zintlib_state.version < 212) { lmt_zint_get_circle = lmt_zint_get_circle_211; lmt_zint_vector = lmt_zint_vector_211; lmt_zint_symbol_set_options = lmt_zint_symbol_set_options_211; + } else { + lmt_zint_get_circle = lmt_zint_get_circle_212; + lmt_zint_vector = lmt_zint_vector_212; + lmt_zint_symbol_set_options = lmt_zint_symbol_set_options_212; } } } diff --git a/source/luametatex/source/luarest/lmtposit.c b/source/luametatex/source/luarest/lmtposit.c new file mode 100644 index 000000000..e7f12b7d2 --- /dev/null +++ b/source/luametatex/source/luarest/lmtposit.c @@ -0,0 +1,654 @@ +/* + See license.txt in the root of this project. +*/ + +/*tex + This is an experiment using the posit (unum) implementation from https://gitlab.com/cerlane/SoftPosit#known, which is + afaiks the standard. At some point it migh tbe interesting to have this as MetaPost number plugin too, but first I need + to figure out some helpers (sin, cos, pow etc). + + Watch out: this is just a playground for me and a few others. There are \CONTEXT\ interfaces but these are also quite + experimental. For instance we might move to 64 bit posits. And how about quires. It all depends on developments in + this area. + + The standard is at: + + https://posithub.org/docs/posit_standard-2.pdf + + The reference code can be found here: + + https://gitlab.com/cerlane/SoftPosit + + However, the implementation lags behind the standard: no posit64 and no functions except from a few that add, subtract, + multiply, divide etc. But I will keep an eye in it. + + Todo: check if we used the right functions (also in auxposit). + +*/ + +# include <luametatex.h> + +# define POSIT_METATABLE "posit number" + +inline static posit_t *positlib_push(lua_State *L) +{ + posit p = lua_newuserdatauv(L, sizeof(posit_t), 0); + luaL_setmetatable(L, POSIT_METATABLE); + return p; +} + +inline static int positlib_new(lua_State *L) +{ + posit p = positlib_push(L); + switch (lua_type(L, 1)) { + case LUA_TSTRING: + *p = double_to_posit(lua_tonumber(L, 1)); + break; + case LUA_TNUMBER: + if (lua_isinteger(L, 1)) { + *p = i64_to_posit(lua_tointeger(L, 1)); + } else { + *p = double_to_posit(lua_tonumber(L, 1)); + } + break; + default: + p->v = 0; + break; + } + return 1; +} + +inline static int positlib_toposit(lua_State *L) +{ + if (lua_type(L, 1) == LUA_TNUMBER) { + posit_t p = double_to_posit(lua_tonumber(L, 1)); + lua_pushinteger(L, p.v); + } else { + lua_pushinteger(L, 0); + } + return 1; +} + +inline static int positlib_fromposit(lua_State *L) +{ + if (lua_type(L, 1) == LUA_TNUMBER) { + posit_t p = { .v = lmt_roundnumber(L, 1) }; + lua_pushnumber(L, posit_to_double(p)); + } else { + lua_pushinteger(L, 0); + } + return 1; +} + +/* + This is nicer for the user. Beware, we create a userdata object on the stack so we need to + replace the original non userdata. +*/ + +static posit_t *positlib_get(lua_State *L, int i) +{ + switch (lua_type(L, i)) { + case LUA_TUSERDATA: + return (posit) luaL_checkudata(L, i, POSIT_METATABLE); + case LUA_TSTRING: + { + posit p = positlib_push(L); + *p = double_to_posit(lua_tonumber(L, i)); + lua_replace(L, i); + return p; + } + case LUA_TNUMBER: + { + posit p = positlib_push(L); + if (lua_isinteger(L, i)) { + *p = i64_to_posit(lua_tointeger(L, 1)); + } else { + *p = double_to_posit(lua_tonumber(L, i)); + } + lua_replace(L, i); + return p; + } + default: + { + posit p = positlib_push(L); + lua_replace(L, i); + return p; + } + } +} + +static int positlib_tostring(lua_State *L) +{ + posit p = positlib_get(L, 1); + double d = posit_to_double(*p); + lua_pushnumber(L, d); + lua_tostring(L, -1); + return 1; +} + + +static int positlib_tonumber(lua_State *L) +{ + posit p = positlib_get(L, 1); + double d = posit_to_double(*p); + lua_pushnumber(L, d); + return 1; +} + +static int positlib_copy(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = *a; + return 1; +} + +static int positlib_eq(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + lua_pushboolean(L, posit_eq(*a, *b)); + return 1; +} + +static int positlib_le(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + lua_pushboolean(L, posit_le(*a, *b)); + return 1; +} + +static int positlib_lt(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + lua_pushboolean(L, posit_lt(*a, *b)); + return 1; +} + +static int positlib_add(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + posit p = positlib_push(L); + *p = posit_add(*a, *b); + return 1; +} + +static int positlib_sub(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + posit p = positlib_push(L); + *p = posit_sub(*a, *b); + return 1; +} + +static int positlib_mul(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + posit p = positlib_push(L); + *p = posit_mul(*a, *b); + return 1; +} + +static int positlib_div(lua_State *L) { + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + posit p = positlib_push(L); + *p = posit_div(*a, *b); + return 1; +} + +static int positlib_round(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = posit_round_to_integer(*a); + return 1; +} + +static int positlib_rounded(lua_State *L) +{ + posit a = positlib_get(L, 1); + lua_pushinteger(L, posit_to_integer(*a)); + return 1; +} + +static int positlib_integer(lua_State *L) +{ + posit p = positlib_get(L, 1); + lua_pushinteger(L, (lua_Integer) posit_to_i64(*p)); + return 1; +} + +static int positlib_NaN(lua_State *L) +{ + posit p = positlib_get(L, 1); + lua_pushboolean(L, p->v == (uint32_t) 0x80000000); + return 1; +} + +static int positlib_NaR(lua_State *L) +{ + posit p = positlib_get(L, 1); + lua_pushboolean(L, posit_is_NaR(p->v)); + return 1; +} + +// static int positlib_idiv(lua_State *L) { +// return 0; +// } + +// static int positlib_mod(lua_State *L) { +// return 0; +// } + +static int positlib_neg(lua_State* L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = posit_neg(*a); + return 1; +} + +static int positlib_min(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + posit p = positlib_push(L); + *p = posit_lt(*a, *b) ? *a : *b; + return 1; +} + +static int positlib_max(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + posit p = positlib_push(L); + *p = posit_lt(*a, *b) ? *b : *a; + return 1; +} + +static int positlib_pow(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(pow(posit_to_double(*a),posit_to_double(*b))); + return 1; +} + +static int positlib_abs(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = posit_abs(*a); + return 1; +} + +static int positlib_sqrt(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = posit_sqrt(*a); + return 1; +} + +// static int positlib_ln(lua_State *L) +// { +// posit a = positlib_get(L, 1); +// posit p = positlib_push(L); +// *p = double_to_posit(ln(posit_to_double(*a))); +// return 1; +// } + +static int positlib_log10(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(log10(posit_to_double(*a))); + return 1; +} + +static int positlib_log1p(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(log1p(posit_to_double(*a))); + return 1; +} + +static int positlib_log2(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(log2(posit_to_double(*a))); + return 1; +} + +static int positlib_logb(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(logb(posit_to_double(*a))); + return 1; +} + +static int positlib_log(lua_State *L) +{ + if (lua_gettop(L) == 1) { + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(log(posit_to_double(*a))); + } else { + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + posit p = positlib_push(L); + double d = posit_to_double(*a); + double n = posit_to_double(*b); + if (n == 10.0) { + n = (lua_Number) log10(d); + } else if (n == 2.0) { + n = (lua_Number) log2(d); + } else { + n = (lua_Number) log(d) / (lua_Number) log(n); + } + *p = double_to_posit(n); + } + return 1; +} + +static int positlib_exp(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(exp(posit_to_double(*a))); + return 1; +} + +static int positlib_exp2(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(exp2(posit_to_double(*a))); + return 1; +} + +static int positlib_ceil(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(ceil(posit_to_double(*a))); + return 1; +} + +static int positlib_floor(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(floor(posit_to_double(*a))); + return 1; +} + +static int positlib_modf(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + posit q = positlib_push(L); + double d; + *q = double_to_posit(modf(posit_to_double(*a),&d)); + *p = double_to_posit(d); + return 2; +} + +static int positlib_sin(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(sin(posit_to_double(*a))); + return 1; +} + +static int positlib_cos(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(cos(posit_to_double(*a))); + return 1; +} + +static int positlib_tan(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(tan(posit_to_double(*a))); + return 1; +} + +static int positlib_asin(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(asin(posit_to_double(*a))); + return 1; +} + +static int positlib_acos(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(acos(posit_to_double(*a))); + return 1; +} + +static int positlib_atan(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit p = positlib_push(L); + *p = double_to_posit(atan(posit_to_double(*a))); + return 1; +} + +static int positlib_rotate(lua_State *L) +{ + posit a = positlib_get(L, 1); + lua_Integer n = luaL_optinteger(L, 2, 1); + posit p = positlib_push(L); + if (n > 0) { + p->v = (a->v >> n) | (a->v << (posit_bits - n)); + } else if (n < 0) { + p->v = (a->v << n) | (a->v >> (posit_bits - n)); + } else { + p->v = a->v; + } + return 1; +} + +static int positlib_shift(lua_State *L) +{ + posit a = positlib_get(L, 1); + lua_Integer shift = luaL_optinteger(L, 2, 1); + posit p = positlib_push(L); + if (shift > 0) { + p->v = (a->v >> shift) & 0xFFFFFFFF; + } else if (shift < 0) { + p->v = (a->v << -shift) & 0xFFFFFFFF; + } else { + p->v = a->v; + } + return 1; +} + +static int positlib_left(lua_State *L) +{ + posit a = positlib_get(L, 1); + lua_Integer shift = luaL_optinteger(L, 2, 1); + posit p = positlib_push(L); + p->v = (a->v << shift) & 0xFFFFFFFF; + return 1; +} + +static int positlib_right(lua_State *L) +{ + posit_t *a = positlib_get(L, 1); + lua_Integer shift = - luaL_optinteger(L, 2, 1); + posit_t *p = positlib_push(L); + p->v = (a->v >> shift) & 0xFFFFFFFF; + return 1; +} + +static int positlib_and(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + posit p = positlib_push(L); + p->v = (a->v) & (b->v); + return 1; +} + +static int positlib_or(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + posit p = positlib_push(L); + p->v = (a->v) | (b->v); + return 1; +} + +static int positlib_xor(lua_State *L) +{ + posit a = positlib_get(L, 1); + posit b = positlib_get(L, 2); + posit p = positlib_push(L); + p->v = (a->v) ^ (b->v); + return 1; +} + +static const luaL_Reg positlib_function_list[] = +{ + /* management */ + { "new", positlib_new }, + { "copy", positlib_copy }, + { "tostring", positlib_tostring }, + { "tonumber", positlib_tonumber }, + { "integer", positlib_integer }, + { "rounded", positlib_rounded }, + { "toposit", positlib_toposit }, + { "fromposit", positlib_fromposit }, + /* operators */ + { "__add", positlib_add }, + // { "__idiv", positlib_idiv }, + { "__div", positlib_div }, + // { "__mod", positlib_mod }, + { "__eq", positlib_eq }, + { "__le", positlib_le }, + { "__lt", positlib_lt }, + { "__mul", positlib_mul }, + { "__sub", positlib_sub }, + { "__unm", positlib_neg }, + { "__pow", positlib_pow }, + { "__bor", positlib_or }, + { "__bxor", positlib_xor }, + { "__band", positlib_and }, + { "__shl", positlib_left }, + { "__shr", positlib_right }, + /* */ + { "NaN", positlib_NaN }, + { "NaN", positlib_NaR }, + /* */ + { "bor", positlib_or }, + { "bxor", positlib_xor }, + { "band", positlib_and }, + { "shift", positlib_shift }, + { "rotate", positlib_rotate }, + /* */ + { "min", positlib_min }, + { "max", positlib_max }, + { "abs", positlib_abs }, + { "conj", positlib_neg }, + { "modf", positlib_modf }, + /* */ + { "acos", positlib_acos }, + // { "acosh", positlib_acosh }, + { "asin", positlib_asin }, + // { "asinh", positlib_asinh }, + { "atan", positlib_atan }, + // { "atan2", positlib_atan2 }, + // { "atanh", positlib_atanh }, + // { "cbrt", positlib_cbrt }, + { "ceil", positlib_ceil }, + // { "copysign", positlib_copysign }, + { "cos", positlib_cos }, + // { "cosh", positlib_cosh }, + // { "deg", positlib_deg }, + // { "erf", positlib_erf }, + // { "erfc", positlib_erfc }, + { "exp", positlib_exp }, + { "exp2", positlib_exp2 }, + // { "expm1", positlib_expm1 }, + // { "fabs", positlib_fabs }, + // { "fdim", positlib_fdim }, + { "floor", positlib_floor }, + // { "fma", positlib_fma }, + // { "fmax", positlib_fmax }, + // { "fmin", positlib_fmin }, + // { "fmod", positlib_fmod }, + // { "frexp", positlib_frexp }, + // { "gamma", positlib_gamma }, + // { "hypot", positlib_hypot }, + // { "isfinite", positlib_isfinite }, + // { "isinf", positlib_isinf }, + // { "isnan", positlib_isnan }, + // { "isnormal", positlib_isnormal }, + // { "j0", positlib_j0 }, + // { "j1", positlib_j1 }, + // { "jn", positlib_jn }, + // { "ldexp", positlib_ldexp }, + // { "lgamma", positlib_lgamma }, + { "log", positlib_log }, + { "log10", positlib_log10 }, + { "log1p", positlib_log1p }, + { "log2", positlib_log2 }, + { "logb", positlib_logb }, + // { "modf", positlib_modf }, + // { "nearbyint", positlib_nearbyint }, + // { "nextafter", positlib_nextafter }, + { "pow", positlib_pow }, + // { "rad", positlib_rad }, + // { "remainder", positlib_remainder }, + // { "remquo", positlib_fremquo }, + { "round", positlib_round }, + // { "scalbn", positlib_scalbn }, + { "sin", positlib_sin }, + // { "sinh", positlib_sinh }, + { "sqrt", positlib_sqrt }, + { "tan", positlib_tan }, + // { "tanh", positlib_tanh }, + // { "tgamma", positlib_tgamma }, + // { "trunc", positlib_trunc }, + // { "y0", positlib_y0 }, + // { "y1", positlib_y1 }, + // { "yn", positlib_yn }, + /* */ + { NULL, NULL }, +}; + +int luaopen_posit(lua_State *L) +{ + luaL_newmetatable(L, POSIT_METATABLE); + luaL_setfuncs(L, positlib_function_list, 0); + lua_pushliteral(L, "__index"); + lua_pushvalue(L, -2); + lua_settable(L, -3); + lua_pushliteral(L, "__tostring"); + lua_pushliteral(L, "tostring"); + lua_gettable(L, -3); + lua_settable(L, -3); + lua_pushliteral(L, "__name"); + lua_pushliteral(L, "posit"); + lua_settable(L, -3); + return 1; +} diff --git a/source/luametatex/source/mp/mpc/mp.c b/source/luametatex/source/mp/mpc/mp.c index 2d0320868..ec8b815e2 100644 --- a/source/luametatex/source/mp/mpc/mp.c +++ b/source/luametatex/source/mp/mpc/mp.c @@ -7,6 +7,7 @@ # include "mpmathdouble.h" # include "mpmathbinary.h" # include "mpmathdecimal.h" +# include "mpmathposit.h" # include "mpstrings.h" @@ -1071,6 +1072,9 @@ MP mp_initialize (MP_options * opt) case mp_math_binary_mode: mp->math = mp_initialize_binary_math(mp); break; + case mp_math_posit_mode: + mp->math = mp_initialize_posit_math(mp); + break; default: mp->math = mp_initialize_double_math(mp); break; @@ -1152,6 +1156,9 @@ MP mp_initialize (MP_options * opt) case mp_math_decimal_mode: set_internal_string(mp_number_system_internal, mp_intern(mp, "decimal")); break; + case mp_math_posit_mode: + set_internal_string(mp_number_system_internal, mp_intern(mp, "posit")); + break; case mp_math_binary_mode: set_internal_string(mp_number_system_internal, mp_intern(mp, "binary")); break; diff --git a/source/luametatex/source/mp/mpc/mp.h b/source/luametatex/source/mp/mpc/mp.h index 0b8e3def9..d5de57d35 100644 --- a/source/luametatex/source/mp/mpc/mp.h +++ b/source/luametatex/source/mp/mpc/mp.h @@ -6,6 +6,7 @@ # include "avl.h" # include "auxmemory.h" +# include "auxposit.h" # include <string.h> # include <setjmp.h> @@ -19,12 +20,14 @@ typedef enum mp_number_type { mp_angle_type, mp_double_type, mp_binary_type, - mp_decimal_type + mp_decimal_type, + mp_posit_type } mp_number_type; typedef union mp_number_store { void *num; double dval; int val; + posit_t pval; } mp_number_store; typedef struct mp_number_data { mp_number_store data; @@ -76,7 +79,8 @@ typedef enum mp_math_mode { mp_math_scaled_mode, mp_math_double_mode, mp_math_binary_mode, - mp_math_decimal_mode + mp_math_decimal_mode, + mp_math_posit_mode } mp_math_mode; typedef struct mp_knot_data *mp_knot; typedef struct mp_knot_data { diff --git a/source/luametatex/source/mp/mpc/mpmathdecimal.c b/source/luametatex/source/mp/mpc/mpmathdecimal.c index 268217f3e..597fe9f61 100644 --- a/source/luametatex/source/mp/mpc/mpmathdecimal.c +++ b/source/luametatex/source/mp/mpc/mpmathdecimal.c @@ -152,24 +152,63 @@ mp_decimal_info mp_decimal_data = { .last_cached_factorial = 0, .initialized = 0, }; + +void mp_decnumber_check(MP mp, decNumber *dec, decContext *context) +{ + int test = 0; + (void) mp; + if (context->status & DEC_Overflow) { + test = 1; + context->status &= ~DEC_Overflow; + } + if (context->status & DEC_Underflow) { + test = 1; + context->status &= ~DEC_Underflow; + } + if (context->status & DEC_Errors) { + test = 1; + decNumberZero(dec); + } + context->status = 0; + if (decNumberIsSpecial(dec)) { + test = 1; + if (decNumberIsInfinite(dec)) { + if (decNumberIsNegative(dec)) { + decNumberCopyNegate(dec, &mp_decimal_data.EL_GORDO_decNumber); + } else { + decNumberCopy(dec, &mp_decimal_data.EL_GORDO_decNumber); + } + } else { + decNumberZero(dec); + } + } + if (decNumberIsZero(dec) && decNumberIsNegative(dec)) { + decNumberZero(dec); + } + mp->arith_error = test; +} + static void checkZero(decNumber *ret) { if (decNumberIsZero(ret) && decNumberIsNegative(ret)) { decNumberZero(ret); } } + static int decNumberLess(decNumber *a, decNumber *b) { decNumber comp; decNumberCompare(&comp, a, b, &mp_decimal_data.set); return decNumberIsNegative(&comp); } + static int decNumberGreater(decNumber *a, decNumber *b) { decNumber comp; decNumberCompare(&comp, a, b, &mp_decimal_data.set); return decNumberIsPositive(&comp); } + static void decNumberFromDouble(decNumber *A, double B) { char buffer[1000]; @@ -183,6 +222,7 @@ static void decNumberFromDouble(decNumber *A, double B) } decNumberFromString(A, buffer, &mp_decimal_data.set); } + static double decNumberToDouble(decNumber *A) { char *buffer = mp_memory_allocate(A->digits + 14); @@ -197,40 +237,6 @@ static double decNumberToDouble(decNumber *A) } } -void mp_decnumber_check(MP mp, decNumber *dec, decContext *context) -{ - int test = 0; - (void) mp; - if (context->status & DEC_Overflow) { - test = 1; - context->status &= ~DEC_Overflow; - } - if (context->status & DEC_Underflow) { - test = 1; - context->status &= ~DEC_Underflow; - } - if (context->status & DEC_Errors) { - test = 1; - decNumberZero(dec); - } - context->status = 0; - if (decNumberIsSpecial(dec)) { - test = 1; - if (decNumberIsInfinite(dec)) { - if (decNumberIsNegative(dec)) { - decNumberCopyNegate(dec, &mp_decimal_data.EL_GORDO_decNumber); - } else { - decNumberCopy(dec, &mp_decimal_data.EL_GORDO_decNumber); - } - } else { - decNumberZero(dec); - } - } - if (decNumberIsZero(dec) && decNumberIsNegative(dec)) { - decNumberZero(dec); - } - mp->arith_error = test; -} static void decNumberAtan(decNumber *result, decNumber *x_orig, decContext *localset) { @@ -1436,7 +1442,6 @@ static void ran_array(long aa[],int n) } } - static void ran_start(long seed) { int t, j; @@ -1484,8 +1489,6 @@ static void ran_start(long seed) mp_decimal_random_data.ptr = &mp_decimal_random_data.started; } -# define ran_arr_next() (*mp_decimal_random_data.ptr>=0? *mp_decimal_random_data.ptr++: ran_arr_cycle()) - static long ran_arr_cycle(void) { if (mp_decimal_random_data.ptr == &mp_decimal_random_data.dummy) { @@ -1528,7 +1531,7 @@ static void mp_next_unif_random (MP mp, mp_number *ret) { decNumber a; decNumber b; - unsigned long int op = (unsigned)ran_arr_next(); + unsigned long int op = (unsigned) (*mp_decimal_random_data.ptr>=0? *mp_decimal_random_data.ptr++: ran_arr_cycle()); (void) mp; decNumberFromInt32(&a, op); decNumberFromInt32(&b, MM); diff --git a/source/luametatex/source/mp/mpc/mpmathdouble.c b/source/luametatex/source/mp/mpc/mpmathdouble.c index ab661d96b..b633cd1bc 100644 --- a/source/luametatex/source/mp/mpc/mpmathdouble.c +++ b/source/luametatex/source/mp/mpc/mpmathdouble.c @@ -42,7 +42,6 @@ # define set_cur_cmd(A) mp->cur_mod_->command = (A) # define set_cur_mod(A) mp->cur_mod_->data.n.data.dval = (A) -static int mp_ab_vs_cd (mp_number *a, mp_number *b, mp_number *c, mp_number *d); static void mp_allocate_abs (MP mp, mp_number *n, mp_number_type t, mp_number *v); static void mp_allocate_clone (MP mp, mp_number *n, mp_number_type t, mp_number *v); static void mp_allocate_double (MP mp, mp_number *n, double v); @@ -266,7 +265,7 @@ math_data *mp_initialize_double_math(MP mp) math->md_print = mp_double_print_number; math->md_tostring = mp_double_number_tostring; math->md_modulo = mp_number_modulo; - math->md_ab_vs_cd = mp_ab_vs_cd; + math->md_ab_vs_cd = mp_double_ab_vs_cd; math->md_crossing_point = mp_double_crossing_point; math->md_scan_numeric = mp_double_scan_numeric_token; math->md_scan_fractional = mp_double_scan_fractional_token; @@ -719,9 +718,17 @@ void mp_double_velocity (MP mp, mp_number *ret, mp_number *st, mp_number *ct, mp } } -int mp_ab_vs_cd (mp_number *a_orig, mp_number *b_orig, mp_number *c_orig, mp_number *d_orig) +int mp_double_ab_vs_cd (mp_number *a_orig, mp_number *b_orig, mp_number *c_orig, mp_number *d_orig) { - return mp_double_ab_vs_cd(a_orig, b_orig, c_orig, d_orig); + double ab = a_orig->data.dval * b_orig->data.dval; + double cd = c_orig->data.dval * d_orig->data.dval; + if (ab > cd) { + return 1; + } else if (ab < cd) { + return -1; + } else { + return 0; + } } static void mp_double_crossing_point (MP mp, mp_number *ret, mp_number *aa, mp_number *bb, mp_number *cc) @@ -918,8 +925,9 @@ void mp_double_n_arg (MP mp, mp_number *ret, mp_number *x_orig, mp_number *y_ori } else { ret->type = mp_angle_type; ret->data.dval = atan2(y_orig->data.dval, x_orig->data.dval) * (180.0 / PI) * angle_multiplier; - if (ret->data.dval == -0.0) - ret->data.dval = 0.0; + if (ret->data.dval == -0.0) { + ret->data.dval = 0.0; + } } } @@ -1031,8 +1039,6 @@ static void mp_double_aux_ran_start(long seed) mp_double_random_data.ptr = &mp_double_random_data.started; } -# define mp_double_aux_ran_arr_next() (*mp_double_random_data.ptr>=0? *mp_double_random_data.ptr++: mp_double_aux_ran_arr_cycle()) - static long mp_double_aux_ran_arr_cycle(void) { if (mp_double_random_data.ptr == &mp_double_random_data.dummy) { @@ -1075,7 +1081,7 @@ void mp_number_modulo(mp_number *a, mp_number *b) static void mp_next_unif_random (MP mp, mp_number *ret) { - unsigned long int op = (unsigned) mp_double_aux_ran_arr_next(); + unsigned long int op = (unsigned) (*mp_double_random_data.ptr>=0? *mp_double_random_data.ptr++: mp_double_aux_ran_arr_cycle()); double a = op / (MM * 1.0); (void) mp; ret->data.dval = a; @@ -1146,15 +1152,3 @@ static void mp_double_m_norm_rand (MP mp, mp_number *ret) mp_free_number(mp, &u); } -int mp_double_ab_vs_cd (mp_number *a_orig, mp_number *b_orig, mp_number *c_orig, mp_number *d_orig) -{ - double ab = a_orig->data.dval * b_orig->data.dval; - double cd = c_orig->data.dval * d_orig->data.dval; - if (ab > cd) { - return 1; - } else if (ab < cd) { - return -1; - } else { - return 0; - } -} diff --git a/source/luametatex/source/mp/mpc/mpmathposit.c b/source/luametatex/source/mp/mpc/mpmathposit.c new file mode 100644 index 000000000..138f2a02a --- /dev/null +++ b/source/luametatex/source/mp/mpc/mpmathposit.c @@ -0,0 +1,1308 @@ +/* This file is generated by "mtxrun --script "mtx-wtoc.lua" from the metapost cweb files. */ + + +# include "mpconfig.h" +# include "mpmathposit.h" + + +# define mp_fraction_multiplier 4096 +# define mp_angle_multiplier 16 +# define mp_warning_limit pow(2.0,52) +# define odd(A) (abs(A)%2==1) +# define two_to_the(A) (1<<(unsigned)(A)) +# define set_cur_cmd(A) mp->cur_mod_->command = (A) +# define set_cur_mod(A) mp->cur_mod_->data.n.data.pval = (A) + +static void mp_allocate_abs (MP mp, mp_number *n, mp_number_type t, mp_number *v); +static void mp_allocate_clone (MP mp, mp_number *n, mp_number_type t, mp_number *v); +static void mp_allocate_double (MP mp, mp_number *n, double v); +static void mp_allocate_number (MP mp, mp_number *n, mp_number_type t); +static int mp_posit_ab_vs_cd (mp_number *a, mp_number *b, mp_number *c, mp_number *d); +static void mp_posit_abs (mp_number *A); +static void mp_posit_crossing_point (MP mp, mp_number *ret, mp_number *a, mp_number *b, mp_number *c); +static void mp_posit_fraction_to_round_scaled (mp_number *x); +static void mp_posit_m_exp (MP mp, mp_number *ret, mp_number *x_orig); +static void mp_posit_m_log (MP mp, mp_number *ret, mp_number *x_orig); +static void mp_posit_m_norm_rand (MP mp, mp_number *ret); +static void mp_posit_m_unif_rand (MP mp, mp_number *ret, mp_number *x_orig); +static void mp_posit_n_arg (MP mp, mp_number *ret, mp_number *x, mp_number *y); +static void mp_posit_number_make_fraction (MP mp, mp_number *r, mp_number *p, mp_number *q); +static void mp_posit_number_make_scaled (MP mp, mp_number *r, mp_number *p, mp_number *q); +static void mp_posit_number_take_fraction (MP mp, mp_number *r, mp_number *p, mp_number *q); +static void mp_posit_number_take_scaled (MP mp, mp_number *r, mp_number *p, mp_number *q); +static void mp_posit_power_of (MP mp, mp_number *r, mp_number *a, mp_number *b); +static void mp_posit_print_number (MP mp, mp_number *n); +static void mp_posit_pyth_add (MP mp, mp_number *r, mp_number *a, mp_number *b); +static void mp_posit_pyth_sub (MP mp, mp_number *r, mp_number *a, mp_number *b); +static void mp_posit_scan_fractional_token (MP mp, int n); +static void mp_posit_scan_numeric_token (MP mp, int n); +static void mp_posit_set_precision (MP mp); +static void mp_posit_sin_cos (MP mp, mp_number *z_orig, mp_number *n_cos, mp_number *n_sin); +static void mp_posit_slow_add (MP mp, mp_number *ret, mp_number *x_orig, mp_number *y_orig); +static void mp_posit_square_rt (MP mp, mp_number *ret, mp_number *x_orig); +static void mp_posit_velocity (MP mp, mp_number *ret, mp_number *st, mp_number *ct, mp_number *sf, mp_number *cf, mp_number *t); +static void mp_free_posit_math (MP mp); +static void mp_free_number (MP mp, mp_number *n); +static void mp_init_randoms (MP mp, int seed); +static void mp_number_abs_clone (mp_number *A, mp_number *B); +static void mp_number_add (mp_number *A, mp_number *B); +static void mp_number_add_scaled (mp_number *A, int B); +static void mp_number_angle_to_scaled (mp_number *A); +static void mp_number_clone (mp_number *A, mp_number *B); +static void mp_number_divide_int (mp_number *A, int B); +static void mp_number_double (mp_number *A); +static int mp_number_equal (mp_number *A, mp_number *B); +static void mp_number_floor (mp_number *i); +static void mp_number_fraction_to_scaled (mp_number *A); +static int mp_number_greater (mp_number *A, mp_number *B); +static void mp_number_half (mp_number *A); +static int mp_number_less (mp_number *A, mp_number *B); +static void mp_number_modulo (mp_number *a, mp_number *b); +static void mp_number_multiply_int (mp_number *A, int B); +static void mp_number_negate (mp_number *A); +static void mp_number_negated_clone (mp_number *A, mp_number *B); +static int mp_number_nonequalabs (mp_number *A, mp_number *B); +static int mp_number_odd (mp_number *A); +static void mp_number_scaled_to_angle (mp_number *A); +static void mp_number_scaled_to_fraction (mp_number *A); +static void mp_number_subtract (mp_number *A, mp_number *B); +static void mp_number_swap (mp_number *A, mp_number *B); +static int mp_number_to_boolean (mp_number *A); +static double mp_number_to_double (mp_number *A); +static int mp_number_to_int (mp_number *A); +static int mp_number_to_scaled (mp_number *A); +static int mp_round_unscaled (mp_number *x_orig); +static void mp_set_posit_from_addition (mp_number *A, mp_number *B, mp_number *C); +static void mp_set_posit_from_boolean (mp_number *A, int B); +static void mp_set_posit_from_div (mp_number *A, mp_number *B, mp_number *C); +static void mp_set_posit_from_double (mp_number *A, double B); +static void mp_set_posit_from_int (mp_number *A, int B); +static void mp_set_posit_from_int_div (mp_number *A, mp_number *B, int C); +static void mp_set_posit_from_int_mul (mp_number *A, mp_number *B, int C); +static void mp_set_posit_from_mul (mp_number *A, mp_number *B, mp_number *C); +static void mp_set_posit_from_of_the_way (MP mp, mp_number *A, mp_number *t, mp_number *B, mp_number *C); +static void mp_set_posit_from_scaled (mp_number *A, int B); +static void mp_set_posit_from_subtraction (mp_number *A, mp_number *B, mp_number *C); +static void mp_set_posit_half_from_addition (mp_number *A, mp_number *B, mp_number *C); +static void mp_set_posit_half_from_subtraction (mp_number *A, mp_number *B, mp_number *C); +static void mp_wrapup_numeric_token (MP mp, unsigned char *start, unsigned char *stop); +static char *mp_posit_number_tostring (MP mp, mp_number *n); +typedef struct mp_posit_info { + posit_t unity; + posit_t zero; + posit_t one; + posit_t two; + posit_t three; + posit_t four; + posit_t five; + posit_t eight; + posit_t seven; + posit_t sixteen; + posit_t half_unit; + posit_t minusone; + posit_t three_quarter_unit; + posit_t d16; + posit_t d64; + posit_t d256; + posit_t d4096; + posit_t d65536; + posit_t dp90; + posit_t dp180; + posit_t dp270; + posit_t dp360; + posit_t dm90; + posit_t dm180; + posit_t dm270; + posit_t dm360; + posit_t fraction_multiplier; + posit_t negative_fraction_multiplier; + posit_t angle_multiplier; + posit_t fraction_one; + posit_t fraction_two; + posit_t fraction_three; + posit_t fraction_four; + posit_t fraction_half; + posit_t fraction_one_and_half; + posit_t one_eighty_degrees; + posit_t negative_one_eighty_degrees; + posit_t three_sixty_degrees; + posit_t no_crossing; + posit_t one_crossing; + posit_t zero_crossing; + posit_t error_correction; + posit_t pi; + posit_t pi_divided_by_180; + posit_t epsilon; + posit_t EL_GORDO; + posit_t negative_EL_GORDO; + posit_t one_third_EL_GORDO; + posit_t coef; + posit_t coef_bound; + posit_t scaled_threshold; + posit_t fraction_threshold; + posit_t equation_threshold; + posit_t near_zero_angle; + posit_t p_over_v_threshold; + posit_t warning_limit; + posit_t sqrt_two_mul_fraction_one; + posit_t sqrt_five_minus_one_mul_fraction_one_and_half; + posit_t three_minus_sqrt_five_mul_fraction_one_and_half; + posit_t d180_divided_by_pi_mul_angle; + int initialized; +} mp_posit_info; +mp_posit_info mp_posit_data = { + .initialized = 0, +}; +inline static posit_t mp_posit_make_fraction (posit_t p, posit_t q) { return posit_mul(posit_div(p,q), mp_posit_data.fraction_multiplier); } +inline static posit_t mp_posit_take_fraction (posit_t p, posit_t q) { return posit_div(posit_mul(p,q), mp_posit_data.fraction_multiplier); } +inline static posit_t mp_posit_make_scaled (posit_t p, posit_t q) { return posit_div(p,q); } +math_data *mp_initialize_posit_math(MP mp) +{ + math_data *math = (math_data *) mp_memory_allocate(sizeof(math_data)); + if (! mp_posit_data.initialized) { + mp_posit_data.initialized = 1; + mp_posit_data.unity = integer_to_posit(1); + mp_posit_data.zero = integer_to_posit(0); + mp_posit_data.one = integer_to_posit(1); + mp_posit_data.two = integer_to_posit(2); + mp_posit_data.three = integer_to_posit(3); + mp_posit_data.four = integer_to_posit(4); + mp_posit_data.five = integer_to_posit(5); + mp_posit_data.seven = integer_to_posit(7); + mp_posit_data.eight = integer_to_posit(8); + mp_posit_data.sixteen = integer_to_posit(16); + mp_posit_data.dp90 = integer_to_posit(90); + mp_posit_data.dp180 = integer_to_posit(180); + mp_posit_data.dp270 = integer_to_posit(270); + mp_posit_data.dp360 = integer_to_posit(360); + mp_posit_data.dm90 = integer_to_posit(-90); + mp_posit_data.dm180 = integer_to_posit(-180); + mp_posit_data.dm270 = integer_to_posit(-270); + mp_posit_data.dm360 = integer_to_posit(-360); + mp_posit_data.d16 = integer_to_posit(16); + mp_posit_data.d64 = integer_to_posit(64); + mp_posit_data.d256 = integer_to_posit(256); + mp_posit_data.d4096 = integer_to_posit(4096); + mp_posit_data.d65536 = integer_to_posit(65536); + mp_posit_data.minusone = posit_neg(mp_posit_data.one); + mp_posit_data.half_unit = posit_div(mp_posit_data.unity, mp_posit_data.two); + mp_posit_data.three_quarter_unit = posit_mul(mp_posit_data.three, posit_div(mp_posit_data.unity,mp_posit_data.four)); + mp_posit_data.fraction_multiplier = integer_to_posit(mp_fraction_multiplier); + mp_posit_data.negative_fraction_multiplier = posit_neg(mp_posit_data.fraction_multiplier); + mp_posit_data.angle_multiplier = integer_to_posit(mp_angle_multiplier); + mp_posit_data.fraction_one = mp_posit_data.fraction_multiplier; + mp_posit_data.fraction_two = posit_mul(mp_posit_data.fraction_multiplier, mp_posit_data.two); + mp_posit_data.fraction_three = posit_mul(mp_posit_data.fraction_multiplier, mp_posit_data.three); + mp_posit_data.fraction_four = posit_mul(mp_posit_data.fraction_multiplier, mp_posit_data.four); + mp_posit_data.fraction_half = posit_div(mp_posit_data.fraction_multiplier, mp_posit_data.two); + mp_posit_data.fraction_one_and_half = posit_add(mp_posit_data.fraction_multiplier, mp_posit_data.fraction_half); + mp_posit_data.one_eighty_degrees = posit_mul(mp_posit_data.angle_multiplier, mp_posit_data.dp180); + mp_posit_data.negative_one_eighty_degrees = posit_mul(mp_posit_data.angle_multiplier, mp_posit_data.dm180); + mp_posit_data.three_sixty_degrees = posit_mul(mp_posit_data.angle_multiplier, mp_posit_data.dp360); + mp_posit_data.no_crossing = posit_add(mp_posit_data.fraction_multiplier, mp_posit_data.one); + mp_posit_data.one_crossing = mp_posit_data.fraction_multiplier; + mp_posit_data.zero_crossing = mp_posit_data.zero; + mp_posit_data.error_correction = double_to_posit(1E-12); + mp_posit_data.warning_limit = posit_pow(mp_posit_data.two, integer_to_posit(52)); + mp_posit_data.pi = double_to_posit(3.1415926535897932384626433832795028841971); + mp_posit_data.pi_divided_by_180 = posit_div(mp_posit_data.pi, mp_posit_data.dp180); + mp_posit_data.epsilon = posit_pow(mp_posit_data.two, integer_to_posit(-52.0)); + mp_posit_data.EL_GORDO = posit_sub(posit_div(double_to_posit(DBL_MAX),mp_posit_data.two), mp_posit_data.one); + mp_posit_data.negative_EL_GORDO = posit_neg(mp_posit_data.EL_GORDO); + mp_posit_data.one_third_EL_GORDO = posit_div(mp_posit_data.EL_GORDO, mp_posit_data.three); + mp_posit_data.coef = posit_div(mp_posit_data.seven, mp_posit_data.three); + mp_posit_data.coef_bound = posit_mul(mp_posit_data.coef, mp_posit_data.fraction_multiplier); + mp_posit_data.scaled_threshold = double_to_posit(0.000122); + mp_posit_data.near_zero_angle = posit_mul(double_to_posit(0.0256), mp_posit_data.angle_multiplier); + mp_posit_data.p_over_v_threshold = integer_to_posit(0x80000); + mp_posit_data.equation_threshold = double_to_posit(0.001); + mp_posit_data.sqrt_two_mul_fraction_one = + posit_mul( + posit_sqrt(mp_posit_data.two), + mp_posit_data.fraction_one + ); + mp_posit_data.sqrt_five_minus_one_mul_fraction_one_and_half = + posit_mul( + posit_mul( + mp_posit_data.three, + mp_posit_data.fraction_half + ), + posit_sub( + posit_sqrt(mp_posit_data.five), + mp_posit_data.one + ) + ); + mp_posit_data.three_minus_sqrt_five_mul_fraction_one_and_half = + posit_mul( + posit_mul( + mp_posit_data.three, + mp_posit_data.fraction_half + ), + posit_sub( + mp_posit_data.three, + posit_sqrt(mp_posit_data.five) + ) + ); + mp_posit_data.d180_divided_by_pi_mul_angle = + posit_mul( + posit_div( + mp_posit_data.dp180, + mp_posit_data.pi + ), + mp_posit_data.angle_multiplier + ); + } + math->md_allocate = mp_allocate_number; + math->md_free = mp_free_number; + math->md_allocate_clone = mp_allocate_clone; + math->md_allocate_abs = mp_allocate_abs; + math->md_allocate_double = mp_allocate_double; + mp_allocate_number(mp, &math->md_precision_default, mp_scaled_type); + mp_allocate_number(mp, &math->md_precision_max, mp_scaled_type); + mp_allocate_number(mp, &math->md_precision_min, mp_scaled_type); + mp_allocate_number(mp, &math->md_epsilon_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_inf_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_negative_inf_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_warning_limit_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_one_third_inf_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_unity_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_two_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_three_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_half_unit_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_three_quarter_unit_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_zero_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_arc_tol_k, mp_fraction_type); + mp_allocate_number(mp, &math->md_fraction_one_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_fraction_half_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_fraction_three_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_fraction_four_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_three_sixty_deg_t, mp_angle_type); + mp_allocate_number(mp, &math->md_one_eighty_deg_t, mp_angle_type); + mp_allocate_number(mp, &math->md_negative_one_eighty_deg_t, mp_angle_type); + mp_allocate_number(mp, &math->md_one_k, mp_scaled_type); + mp_allocate_number(mp, &math->md_sqrt_8_e_k, mp_scaled_type); + mp_allocate_number(mp, &math->md_twelve_ln_2_k, mp_fraction_type); + mp_allocate_number(mp, &math->md_coef_bound_k, mp_fraction_type); + mp_allocate_number(mp, &math->md_coef_bound_minus_1, mp_fraction_type); + mp_allocate_number(mp, &math->md_twelvebits_3, mp_scaled_type); + mp_allocate_number(mp, &math->md_twentysixbits_sqrt2_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_twentyeightbits_d_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_twentysevenbits_sqrt2_d_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_fraction_threshold_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_half_fraction_threshold_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_scaled_threshold_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_half_scaled_threshold_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_near_zero_angle_t, mp_angle_type); + mp_allocate_number(mp, &math->md_p_over_v_threshold_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_equation_threshold_t, mp_scaled_type); + math->md_precision_default.data.pval = posit_mul(mp_posit_data.d16, mp_posit_data.unity); + math->md_precision_max.data.pval = posit_mul(mp_posit_data.d16, mp_posit_data.unity); + math->md_precision_min.data.pval = posit_mul(mp_posit_data.d16, mp_posit_data.unity); + math->md_epsilon_t.data.pval = mp_posit_data.epsilon; + math->md_inf_t.data.pval = mp_posit_data.EL_GORDO; + math->md_negative_inf_t.data.pval = mp_posit_data.negative_EL_GORDO; + math->md_one_third_inf_t.data.pval = mp_posit_data.one_third_EL_GORDO; + math->md_warning_limit_t.data.pval = mp_posit_data.warning_limit; + math->md_unity_t.data.pval = mp_posit_data.unity; + math->md_two_t.data.pval = mp_posit_data.two; + math->md_three_t.data.pval = mp_posit_data.three; + math->md_half_unit_t.data.pval = mp_posit_data.half_unit; + math->md_three_quarter_unit_t.data.pval = mp_posit_data.three_quarter_unit; + math->md_arc_tol_k.data.pval = posit_div(mp_posit_data.unity, mp_posit_data.d4096); + math->md_fraction_one_t.data.pval = mp_posit_data.fraction_one; + math->md_fraction_half_t.data.pval = mp_posit_data.fraction_half; + math->md_fraction_three_t.data.pval = mp_posit_data.fraction_three; + math->md_fraction_four_t.data.pval = mp_posit_data.fraction_four; + math->md_three_sixty_deg_t.data.pval = mp_posit_data.three_sixty_degrees; + math->md_one_eighty_deg_t.data.pval = mp_posit_data.one_eighty_degrees; + math->md_negative_one_eighty_deg_t.data.pval = mp_posit_data.negative_one_eighty_degrees; + math->md_one_k.data.pval = posit_div(mp_posit_data.one, mp_posit_data.d64); + math->md_sqrt_8_e_k.data.pval = double_to_posit(1.71552776992141359295); + math->md_twelve_ln_2_k.data.pval = posit_mul(double_to_posit(8.31776616671934371292), mp_posit_data.d256); + math->md_twelvebits_3.data.pval = posit_div(integer_to_posit(1365), mp_posit_data.unity); + math->md_twentysixbits_sqrt2_t.data.pval = posit_div(integer_to_posit(94906266), mp_posit_data.d65536); + math->md_twentyeightbits_d_t.data.pval = posit_div(integer_to_posit(35596755), mp_posit_data.d65536); + math->md_twentysevenbits_sqrt2_d_t.data.pval = posit_div(integer_to_posit(25170707), mp_posit_data.d65536); + math->md_coef_bound_k.data.pval = mp_posit_data.coef_bound; + math->md_coef_bound_minus_1.data.pval = posit_sub(mp_posit_data.coef_bound, posit_div(mp_posit_data.one, mp_posit_data.d65536)); + math->md_fraction_threshold_t.data.pval = double_to_posit(0.04096); + math->md_half_fraction_threshold_t.data.pval = posit_div(mp_posit_data.fraction_threshold, mp_posit_data.two); + math->md_scaled_threshold_t.data.pval = mp_posit_data.scaled_threshold; + math->md_half_scaled_threshold_t.data.pval = posit_div(mp_posit_data.scaled_threshold,mp_posit_data.two); + math->md_near_zero_angle_t.data.pval = mp_posit_data.near_zero_angle; + math->md_p_over_v_threshold_t.data.pval = mp_posit_data.p_over_v_threshold; + math->md_equation_threshold_t.data.pval = mp_posit_data.equation_threshold; + math->md_from_int = mp_set_posit_from_int; + math->md_from_boolean = mp_set_posit_from_boolean; + math->md_from_scaled = mp_set_posit_from_scaled; + math->md_from_double = mp_set_posit_from_double; + math->md_from_addition = mp_set_posit_from_addition; + math->md_half_from_addition = mp_set_posit_half_from_addition; + math->md_from_subtraction = mp_set_posit_from_subtraction; + math->md_half_from_subtraction = mp_set_posit_half_from_subtraction; + math->md_from_oftheway = mp_set_posit_from_of_the_way; + math->md_from_div = mp_set_posit_from_div; + math->md_from_mul = mp_set_posit_from_mul; + math->md_from_int_div = mp_set_posit_from_int_div; + math->md_from_int_mul = mp_set_posit_from_int_mul; + math->md_negate = mp_number_negate; + math->md_add = mp_number_add; + math->md_subtract = mp_number_subtract; + math->md_half = mp_number_half; + math->md_do_double = mp_number_double; + math->md_abs = mp_posit_abs; + math->md_clone = mp_number_clone; + math->md_negated_clone = mp_number_negated_clone; + math->md_abs_clone = mp_number_abs_clone; + math->md_swap = mp_number_swap; + math->md_add_scaled = mp_number_add_scaled; + math->md_multiply_int = mp_number_multiply_int; + math->md_divide_int = mp_number_divide_int; + math->md_to_boolean = mp_number_to_boolean; + math->md_to_scaled = mp_number_to_scaled; + math->md_to_double = mp_number_to_double; + math->md_to_int = mp_number_to_int; + math->md_odd = mp_number_odd; + math->md_equal = mp_number_equal; + math->md_less = mp_number_less; + math->md_greater = mp_number_greater; + math->md_nonequalabs = mp_number_nonequalabs; + math->md_round_unscaled = mp_round_unscaled; + math->md_floor_scaled = mp_number_floor; + math->md_fraction_to_round_scaled = mp_posit_fraction_to_round_scaled; + math->md_make_scaled = mp_posit_number_make_scaled; + math->md_make_fraction = mp_posit_number_make_fraction; + math->md_take_fraction = mp_posit_number_take_fraction; + math->md_take_scaled = mp_posit_number_take_scaled; + math->md_velocity = mp_posit_velocity; + math->md_n_arg = mp_posit_n_arg; + math->md_m_log = mp_posit_m_log; + math->md_m_exp = mp_posit_m_exp; + math->md_m_unif_rand = mp_posit_m_unif_rand; + math->md_m_norm_rand = mp_posit_m_norm_rand; + math->md_pyth_add = mp_posit_pyth_add; + math->md_pyth_sub = mp_posit_pyth_sub; + math->md_power_of = mp_posit_power_of; + math->md_fraction_to_scaled = mp_number_fraction_to_scaled; + math->md_scaled_to_fraction = mp_number_scaled_to_fraction; + math->md_scaled_to_angle = mp_number_scaled_to_angle; + math->md_angle_to_scaled = mp_number_angle_to_scaled; + math->md_init_randoms = mp_init_randoms; + math->md_sin_cos = mp_posit_sin_cos; + math->md_slow_add = mp_posit_slow_add; + math->md_sqrt = mp_posit_square_rt; + math->md_print = mp_posit_print_number; + math->md_tostring = mp_posit_number_tostring; + math->md_modulo = mp_number_modulo; + math->md_ab_vs_cd = mp_posit_ab_vs_cd; + math->md_crossing_point = mp_posit_crossing_point; + math->md_scan_numeric = mp_posit_scan_numeric_token; + math->md_scan_fractional = mp_posit_scan_fractional_token; + math->md_free_math = mp_free_posit_math; + math->md_set_precision = mp_posit_set_precision; + return math; +} +void mp_posit_set_precision (MP mp) +{ + (void) mp; +} +void mp_free_posit_math (MP mp) +{ + mp_free_number(mp, &(mp->math->md_three_sixty_deg_t)); + mp_free_number(mp, &(mp->math->md_one_eighty_deg_t)); + mp_free_number(mp, &(mp->math->md_negative_one_eighty_deg_t)); + mp_free_number(mp, &(mp->math->md_fraction_one_t)); + mp_free_number(mp, &(mp->math->md_zero_t)); + mp_free_number(mp, &(mp->math->md_half_unit_t)); + mp_free_number(mp, &(mp->math->md_three_quarter_unit_t)); + mp_free_number(mp, &(mp->math->md_unity_t)); + mp_free_number(mp, &(mp->math->md_two_t)); + mp_free_number(mp, &(mp->math->md_three_t)); + mp_free_number(mp, &(mp->math->md_one_third_inf_t)); + mp_free_number(mp, &(mp->math->md_inf_t)); + mp_free_number(mp, &(mp->math->md_negative_inf_t)); + mp_free_number(mp, &(mp->math->md_warning_limit_t)); + mp_free_number(mp, &(mp->math->md_one_k)); + mp_free_number(mp, &(mp->math->md_sqrt_8_e_k)); + mp_free_number(mp, &(mp->math->md_twelve_ln_2_k)); + mp_free_number(mp, &(mp->math->md_coef_bound_k)); + mp_free_number(mp, &(mp->math->md_coef_bound_minus_1)); + mp_free_number(mp, &(mp->math->md_fraction_threshold_t)); + mp_free_number(mp, &(mp->math->md_half_fraction_threshold_t)); + mp_free_number(mp, &(mp->math->md_scaled_threshold_t)); + mp_free_number(mp, &(mp->math->md_half_scaled_threshold_t)); + mp_free_number(mp, &(mp->math->md_near_zero_angle_t)); + mp_free_number(mp, &(mp->math->md_p_over_v_threshold_t)); + mp_free_number(mp, &(mp->math->md_equation_threshold_t)); + mp_memory_free(mp->math); +} + +void mp_allocate_number (MP mp, mp_number *n, mp_number_type t) +{ + (void) mp; + n->data.pval = mp_posit_data.zero; + n->type = t; +} + +void mp_allocate_clone (MP mp, mp_number *n, mp_number_type t, mp_number *v) +{ + (void) mp; + n->type = t; + n->data.pval = v->data.pval; +} + +void mp_allocate_abs (MP mp, mp_number *n, mp_number_type t, mp_number *v) +{ + (void) mp; + n->type = t; + n->data.pval = posit_fabs(v->data.pval); +} + +void mp_allocate_double (MP mp, mp_number *n, double v) +{ + (void) mp; + n->type = mp_scaled_type; + n->data.pval = double_to_posit(v); +} + +void mp_free_number (MP mp, mp_number *n) +{ + (void) mp; + n->type = mp_nan_type; +} + +void mp_set_posit_from_int(mp_number *A, int B) +{ + A->data.pval = integer_to_posit(B); +} + +void mp_set_posit_from_boolean(mp_number *A, int B) +{ + A->data.pval = integer_to_posit(B); +} + +void mp_set_posit_from_scaled(mp_number *A, int B) +{ + A->data.pval = posit_div(integer_to_posit(B), mp_posit_data.d65536); +} + +void mp_set_posit_from_double(mp_number *A, double B) +{ + A->data.pval = double_to_posit(B); +} + +void mp_set_posit_from_addition(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_add(B->data.pval, C->data.pval); +} + +void mp_set_posit_half_from_addition(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_div(posit_add(B->data.pval,C->data.pval), mp_posit_data.two); +} + +void mp_set_posit_from_subtraction(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_sub(B->data.pval, C->data.pval); +} + +void mp_set_posit_half_from_subtraction(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_div(posit_sub(B->data.pval, C->data.pval), mp_posit_data.two); +} + +void mp_set_posit_from_div(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_div(B->data.pval, C->data.pval); +} + +void mp_set_posit_from_mul(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_mul(B->data.pval, C->data.pval); +} + +void mp_set_posit_from_int_div(mp_number *A, mp_number *B, int C) +{ + A->data.pval = posit_div(B->data.pval, integer_to_posit(C)); +} + +void mp_set_posit_from_int_mul(mp_number *A, mp_number *B, int C) +{ + A->data.pval = posit_mul(A->data.pval, integer_to_posit(C)); +} + +void mp_set_posit_from_of_the_way (MP mp, mp_number *A, mp_number *t, mp_number *B, mp_number *C) +{ + (void) mp; + A->data.pval = posit_sub(B->data.pval, mp_posit_take_fraction(posit_sub(B->data.pval, C->data.pval), t->data.pval)); +} + +void mp_number_negate(mp_number *A) +{ + A->data.pval = posit_neg(A->data.pval); +} + +void mp_number_add(mp_number *A, mp_number *B) +{ + A->data.pval = posit_add(A->data.pval, B->data.pval); +} + +void mp_number_subtract(mp_number *A, mp_number *B) +{ + A->data.pval = posit_sub(A->data.pval, B->data.pval); +} + +void mp_number_half(mp_number *A) +{ + A->data.pval = posit_div(A->data.pval, mp_posit_data.two); +} + +void mp_number_double(mp_number *A) +{ + A->data.pval = posit_mul(A->data.pval, mp_posit_data.two); +} + +void mp_number_add_scaled(mp_number *A, int B) +{ + A->data.pval = posit_add(A->data.pval, posit_div(integer_to_posit(B), mp_posit_data.d65536)); +} + +void mp_number_multiply_int(mp_number *A, int B) +{ + A->data.pval = posit_mul(A->data.pval, integer_to_posit(B)); +} + +void mp_number_divide_int(mp_number *A, int B) +{ + A->data.pval = posit_div(A->data.pval, integer_to_posit(B)); +} + +void mp_posit_abs(mp_number *A) +{ + A->data.pval = posit_fabs(A->data.pval); +} + +void mp_number_clone(mp_number *A, mp_number *B) +{ + A->data.pval = B->data.pval; +} + +void mp_number_negated_clone(mp_number *A, mp_number *B) +{ + A->data.pval = posit_neg(B->data.pval); +} + +void mp_number_abs_clone(mp_number *A, mp_number *B) +{ + A->data.pval = posit_fabs(B->data.pval); +} + +void mp_number_swap(mp_number *A, mp_number *B) +{ + posit_t swap_tmp = A->data.pval; + A->data.pval = B->data.pval; + B->data.pval = swap_tmp; +} + +void mp_number_fraction_to_scaled(mp_number *A) +{ + A->type = mp_scaled_type; + A->data.pval = posit_div(A->data.pval, mp_posit_data.fraction_multiplier); +} + +void mp_number_angle_to_scaled(mp_number *A) +{ + A->type = mp_scaled_type; + A->data.pval = posit_div(A->data.pval, mp_posit_data.angle_multiplier); +} + +void mp_number_scaled_to_fraction(mp_number *A) +{ + A->type = mp_fraction_type; + A->data.pval = posit_mul(A->data.pval, mp_posit_data.fraction_multiplier); +} + +void mp_number_scaled_to_angle(mp_number *A) +{ + A->type = mp_angle_type; + A->data.pval = posit_mul(A->data.pval, mp_posit_data.angle_multiplier); +} + +int mp_number_to_scaled(mp_number *A) +{ + return posit_to_integer(posit_mul(A->data.pval, mp_posit_data.d65536)); +} + +int mp_number_to_int(mp_number *A) +{ + return posit_to_integer(A->data.pval); +} + +int mp_number_to_boolean(mp_number *A) +{ + return posit_eq_zero(A->data.pval) ? 0 : 1; +} + +double mp_number_to_double(mp_number *A) +{ + return posit_to_double(A->data.pval); +} + +int mp_number_odd(mp_number *A) +{ + return odd(posit_to_integer(A->data.pval)); +} + +int mp_number_equal(mp_number *A, mp_number *B) +{ + return posit_eq(A->data.pval, B->data.pval); +} + +int mp_number_greater(mp_number *A, mp_number *B) +{ + return posit_gt(A->data.pval, B->data.pval); +} + +int mp_number_less(mp_number *A, mp_number *B) +{ + return posit_lt(A->data.pval, B->data.pval); +} + +int mp_number_nonequalabs(mp_number *A, mp_number *B) +{ + return ! posit_eq(posit_fabs(A->data.pval), posit_fabs(B->data.pval)); +} + +char *mp_posit_number_tostring (MP mp, mp_number *n) +{ + static char set[64]; + int l = 0; + char *ret = mp_memory_allocate(64); + (void) mp; + snprintf(set, 64, "%.20g", posit_to_double(n->data.pval)); + while (set[l] == ' ') { + l++; + } + strcpy(ret, set+l); + return ret; +} + +void mp_posit_print_number (MP mp, mp_number *n) +{ + char *str = mp_posit_number_tostring(mp, n); + mp_print_e_str(mp, str); + mp_memory_free(str); +} + + +void mp_posit_slow_add (MP mp, mp_number *ret, mp_number *x_orig, mp_number *y_orig) +{ + if (posit_gt(x_orig->data.pval, mp_posit_data.zero)) { + if (posit_le(y_orig->data.pval, posit_sub(mp_posit_data.EL_GORDO, x_orig->data.pval))) { + ret->data.pval = posit_add(x_orig->data.pval, y_orig->data.pval); + } else { + mp->arith_error = 1; + ret->data.pval = mp_posit_data.EL_GORDO; + } + } else if (posit_le(posit_neg(y_orig->data.pval), posit_add(mp_posit_data.EL_GORDO, x_orig->data.pval))) { + ret->data.pval = posit_add(x_orig->data.pval, y_orig->data.pval); + } else { + mp->arith_error = 1; + ret->data.pval = mp_posit_data.negative_EL_GORDO; + } +} + +void mp_posit_number_make_fraction (MP mp, mp_number *ret, mp_number *p, mp_number *q) { + (void) mp; + ret->data.pval = mp_posit_make_fraction(p->data.pval, q->data.pval); +} + +void mp_posit_number_take_fraction (MP mp, mp_number *ret, mp_number *p, mp_number *q) { + (void) mp; + ret->data.pval = mp_posit_take_fraction(p->data.pval, q->data.pval); +} + +void mp_posit_number_take_scaled (MP mp, mp_number *ret, mp_number *p_orig, mp_number *q_orig) +{ + (void) mp; + ret->data.pval = posit_mul(p_orig->data.pval, q_orig->data.pval); +} + +void mp_posit_number_make_scaled (MP mp, mp_number *ret, mp_number *p_orig, mp_number *q_orig) +{ + (void) mp; + ret->data.pval = posit_div(p_orig->data.pval, q_orig->data.pval); +} + +void mp_wrapup_numeric_token (MP mp, unsigned char *start, unsigned char *stop) +{ + double result; + char *end = (char *) stop; + errno = 0; + result = strtod((char *) start, &end); + if (errno == 0) { + set_cur_mod(double_to_posit(result)); + if (result >= mp_warning_limit) { + if (posit_gt(internal_value(mp_warning_check_internal).data.pval, mp_posit_data.zero) && (mp->scanner_status != mp_tex_flushing_state)) { + char msg[256]; + mp_snprintf(msg, 256, "Number is too large (%g)", result); + mp_error( + mp, + msg, + "Continue and I'll try to cope with that big value; but it might be dangerous." + "(Set warningcheck := 0 to suppress this message.)" + ); + } + } + } else if (mp->scanner_status != mp_tex_flushing_state) { + mp_error( + mp, + "Enormous number has been reduced.", + "I could not handle this number specification probably because it is out of" + "range." + ); + set_cur_mod(mp_posit_data.EL_GORDO); + } + set_cur_cmd(mp_numeric_command); +} + +static void mp_posit_aux_find_exponent (MP mp) +{ + if (mp->buffer[mp->cur_input.loc_field] == 'e' || mp->buffer[mp->cur_input.loc_field] == 'E') { + mp->cur_input.loc_field++; + if (!(mp->buffer[mp->cur_input.loc_field] == '+' + || mp->buffer[mp->cur_input.loc_field] == '-' + || mp->char_class[mp->buffer[mp->cur_input.loc_field]] == mp_digit_class)) { + mp->cur_input.loc_field--; + return; + } + if (mp->buffer[mp->cur_input.loc_field] == '+' + || mp->buffer[mp->cur_input.loc_field] == '-') { + mp->cur_input.loc_field++; + } + while (mp->char_class[mp->buffer[mp->cur_input.loc_field]] == mp_digit_class) { + mp->cur_input.loc_field++; + } + } +} + +void mp_posit_scan_fractional_token (MP mp, int n) +{ + unsigned char *start = &mp->buffer[mp->cur_input.loc_field -1]; + unsigned char *stop; + (void) n; + while (mp->char_class[mp->buffer[mp->cur_input.loc_field]] == mp_digit_class) { + mp->cur_input.loc_field++; + } + mp_posit_aux_find_exponent(mp); + stop = &mp->buffer[mp->cur_input.loc_field-1]; + mp_wrapup_numeric_token(mp, start, stop); +} + +void mp_posit_scan_numeric_token (MP mp, int n) +{ + unsigned char *start = &mp->buffer[mp->cur_input.loc_field -1]; + unsigned char *stop; + (void) n; + while (mp->char_class[mp->buffer[mp->cur_input.loc_field]] == mp_digit_class) { + mp->cur_input.loc_field++; + } + if (mp->buffer[mp->cur_input.loc_field] == '.' && mp->buffer[mp->cur_input.loc_field+1] != '.') { + mp->cur_input.loc_field++; + while (mp->char_class[mp->buffer[mp->cur_input.loc_field]] == mp_digit_class) { + mp->cur_input.loc_field++; + } + } + mp_posit_aux_find_exponent(mp); + stop = &mp->buffer[mp->cur_input.loc_field-1]; + mp_wrapup_numeric_token(mp, start, stop); +} + +void mp_posit_velocity (MP mp, mp_number *ret, mp_number *st, mp_number *ct, mp_number *sf, mp_number *cf, mp_number *t) +{ + posit_t acc, num, denom; + (void) mp; + acc = mp_posit_take_fraction( + mp_posit_take_fraction( + posit_sub(st->data.pval, posit_div(sf->data.pval, mp_posit_data.sixteen)), + posit_sub(sf->data.pval, posit_div(st->data.pval, mp_posit_data.sixteen)) + ), + posit_sub(ct->data.pval,cf->data.pval) + ); + num = posit_add( + mp_posit_data.fraction_two, + mp_posit_take_fraction( + acc, + mp_posit_data.sqrt_two_mul_fraction_one + ) + ); + denom = posit_add( + mp_posit_data.fraction_three, + posit_add( + mp_posit_take_fraction( + ct->data.pval, + mp_posit_data.sqrt_five_minus_one_mul_fraction_one_and_half + ), + mp_posit_take_fraction( + cf->data.pval, + mp_posit_data.three_minus_sqrt_five_mul_fraction_one_and_half + ) + ) + ); + if (posit_ne(t->data.pval, mp_posit_data.unity)) { + num = mp_posit_make_scaled(num, t->data.pval); + } + if (posit_ge(posit_div(num, mp_posit_data.four), denom)) { + ret->data.pval = mp_posit_data.fraction_four; + } else { + ret->data.pval = mp_posit_make_fraction(num, denom); + } +} + +int mp_posit_ab_vs_cd (mp_number *a_orig, mp_number *b_orig, mp_number *c_orig, mp_number *d_orig) +{ + posit_t ab = posit_mul(a_orig->data.pval, b_orig->data.pval); + posit_t cd = posit_mul(c_orig->data.pval, d_orig->data.pval); + if (posit_eq(ab,cd)) { + return 0; + } else if (posit_lt(ab,cd)) { + return -1; + } else { + return 1; + } +} + +static void mp_posit_crossing_point (MP mp, mp_number *ret, mp_number *aa, mp_number *bb, mp_number *cc) +{ + posit_t d; + posit_t xx, x0, x1, x2; + posit_t a = aa->data.pval; + posit_t b = bb->data.pval; + posit_t c = cc->data.pval; + (void) mp; + if (posit_lt(a, mp_posit_data.zero)) { + ret->data.pval = mp_posit_data.zero_crossing; + return; + } + if (posit_ge(c, mp_posit_data.zero)) { + if (posit_ge(b, mp_posit_data.zero)) { + if (posit_gt(c, mp_posit_data.zero)) { + ret->data.pval = mp_posit_data.no_crossing; + } else if (posit_eq_zero(a) && posit_eq_zero(b)) { + ret->data.pval = mp_posit_data.no_crossing; + } else { + ret->data.pval = mp_posit_data.one_crossing; + } + return; + } + if (posit_eq_zero(a)) { + ret->data.pval = mp_posit_data.zero_crossing; + return; + } + } else if (posit_eq_zero(a) && posit_le(b, mp_posit_data.zero)) { + ret->data.pval = mp_posit_data.zero_crossing; + return; + } + d = mp_posit_data.epsilon; + x0 = a; + x1 = posit_sub(a, b); + x2 = posit_sub(b, c); + do { + posit_t x = posit_add(posit_div(posit_add(x1, x2), mp_posit_data.two), mp_posit_data.error_correction); + if (posit_gt(posit_sub(x1, x0), x0)) { + x2 = x; + x0 = posit_add(x0, x0); + d = posit_add(d, d); + } else { + xx = posit_sub(posit_add(x1, x), x0); + if (posit_gt(xx, x0)) { + x2 = x; + x0 = posit_add(x0, x0); + d = posit_add(d, d); + } else { + x0 = posit_sub(x0, xx); + if (posit_le(x, x0) && posit_le(posit_add(x, x2), x0)) { + ret->data.pval = mp_posit_data.no_crossing; + return; + } + x1 = x; + d = posit_add(posit_add(d, d), mp_posit_data.epsilon); + } + } + } while (posit_lt(d, mp_posit_data.fraction_one)); + ret->data.pval = posit_sub(d, mp_posit_data.fraction_one); +} + +int mp_round_unscaled(mp_number *x_orig) +{ + return posit_i_round(x_orig->data.pval); +} + +void mp_number_floor(mp_number *i) +{ + i->data.pval = posit_floor(i->data.pval); +} + +void mp_posit_fraction_to_round_scaled(mp_number *x_orig) +{ + x_orig->type = mp_scaled_type; + x_orig->data.pval = posit_div(x_orig->data.pval, mp_posit_data.fraction_multiplier); +} + +void mp_posit_square_rt (MP mp, mp_number *ret, mp_number *x_orig) +{ + if (posit_gt(x_orig->data.pval, mp_posit_data.zero)) { + ret->data.pval = posit_sqrt(x_orig->data.pval); + } else { + if (posit_lt(x_orig->data.pval, mp_posit_data.zero)) { + char msg[256]; + char *xstr = mp_posit_number_tostring(mp, x_orig); + mp_snprintf(msg, 256, "Square root of %s has been replaced by 0", xstr); + mp_memory_free(xstr); + mp_error( + mp, + msg, + "Since I don't take square roots of negative numbers, I'm zeroing this one.\n" + "Proceed, with fingers crossed." + ); + } + ret->data.pval = mp_posit_data.zero; + } +} + +void mp_posit_pyth_add (MP mp, mp_number *ret, mp_number *a_orig, mp_number *b_orig) +{ + ret->data.pval = posit_sqrt( + posit_add( + posit_mul( + a_orig->data.pval, + a_orig->data.pval + ), + posit_mul( + b_orig->data.pval, + b_orig->data.pval + ) + ) + ); +} + +void mp_posit_pyth_sub (MP mp, mp_number *ret, mp_number *a_orig, mp_number *b_orig) +{ + if (posit_gt(a_orig->data.pval,b_orig->data.pval)) { + a_orig->data.pval = posit_sqrt( + posit_sub( + posit_mul( + a_orig->data.pval, + a_orig->data.pval + ), + posit_mul( + b_orig->data.pval, + b_orig->data.pval + ) + ) + ); + } else { + if (posit_lt(a_orig->data.pval,b_orig->data.pval)) { + char msg[256]; + char *astr = mp_posit_number_tostring(mp, a_orig); + char *bstr = mp_posit_number_tostring(mp, b_orig); + mp_snprintf(msg, 256, "Pythagorean subtraction %s+-+%s has been replaced by 0", astr, bstr); + mp_memory_free(astr); + mp_memory_free(bstr); + mp_error( + mp, + msg, + "Since I don't take square roots of negative numbers, Im zeroing this one.\n" + "Proceed, with fingers crossed." + ); + } + a_orig->data.pval = mp_posit_data.zero; + } + ret->data.pval = a_orig->data.pval; +} + +void mp_posit_power_of (MP mp, mp_number *ret, mp_number *a_orig, mp_number *b_orig) +{ + errno = 0; + ret->data.pval = posit_pow(a_orig->data.pval, b_orig->data.pval); + if (errno) { + mp->arith_error = 1; + ret->data.pval = mp_posit_data.EL_GORDO; + } +} + +void mp_posit_m_log (MP mp, mp_number *ret, mp_number *x_orig) +{ + if (posit_gt(x_orig->data.pval,mp_posit_data.zero)) { + ret->data.pval = posit_mul(posit_log(x_orig->data.pval),mp_posit_data.d256); + } else { + char msg[256]; + char *xstr = mp_posit_number_tostring(mp, x_orig); + mp_snprintf(msg, 256, "Logarithm of %s has been replaced by 0", xstr); + mp_memory_free(xstr); + mp_error( + mp, + msg, + "Since I don't take logs of non-positive numbers, I'm zeroing this one.\n" + "Proceed, with fingers crossed." + ); + ret->data.pval = mp_posit_data.zero; + } +} + +void mp_posit_m_exp (MP mp, mp_number *ret, mp_number *x_orig) +{ + errno = 0; + ret->data.pval = posit_exp(posit_div(x_orig->data.pval,mp_posit_data.d256)); + if (errno) { + if (posit_gt(x_orig->data.pval,mp_posit_data.zero)) { + mp->arith_error = 1; + ret->data.pval = mp_posit_data.EL_GORDO; + } else { + ret->data.pval = mp_posit_data.zero; + } + } +} + +void mp_posit_n_arg (MP mp, mp_number *ret, mp_number *x_orig, mp_number *y_orig) +{ + if (posit_eq_zero(x_orig->data.pval) && posit_eq_zero(y_orig->data.pval)) { + mp_error( + mp, + "angle(0,0) is taken as zero", + "The 'angle' between two identical points is undefined. I'm zeroing this one.\n" + "Proceed, with fingers crossed." + ); + ret->data.pval = mp_posit_data.zero; + } else { + ret->type = mp_angle_type; + ret->data.pval = posit_mul( + posit_atan2( + y_orig->data.pval, + x_orig->data.pval + ), + mp_posit_data.d180_divided_by_pi_mul_angle + ); + } +} + +void mp_posit_sin_cos (MP mp, mp_number *z_orig, mp_number *n_cos, mp_number *n_sin) +{ + posit_t rad = posit_div(z_orig->data.pval, mp_posit_data.angle_multiplier); + (void) mp; + if (posit_eq(rad, mp_posit_data.dp90) || posit_eq(rad, mp_posit_data.dm270)) { + n_cos->data.pval = mp_posit_data.zero; + n_sin->data.pval = mp_posit_data.fraction_multiplier; + } else if (posit_eq(rad, mp_posit_data.dm90) || posit_eq(rad, mp_posit_data.dp270)) { + n_cos->data.pval = mp_posit_data.zero; + n_sin->data.pval = mp_posit_data.negative_fraction_multiplier; + } else if (posit_eq(rad, mp_posit_data.dp180) || posit_eq(rad, mp_posit_data.dm180)) { + n_cos->data.pval = mp_posit_data.negative_fraction_multiplier; + n_sin->data.pval = mp_posit_data.zero; + } else { + rad = posit_mul(rad,mp_posit_data.pi_divided_by_180); + n_cos->data.pval = posit_mul(posit_cos(rad),mp_posit_data.fraction_multiplier); + n_sin->data.pval = posit_mul(posit_sin(rad),mp_posit_data.fraction_multiplier); + } +} + +# define KK 100 +# define LL 37 +# define MM (1L<<30) +# define mod_diff(x,y) (((x)-(y))&(MM-1)) +# define TT 70 +# define is_odd(x) ((x)&1) +# define QUALITY 1009 + + +typedef struct mp_posit_random_info { + long x[KK]; + long buf[QUALITY]; + long dummy; + long started; + long *ptr; +} mp_posit_random_info; + +static mp_posit_random_info mp_posit_random_data = { + .dummy = -1, + .started = -1, + .ptr = &mp_posit_random_data.dummy +}; + +static void mp_posit_aux_ran_array(long aa[], int n) +{ + int i, j; + for (j = 0; j < KK; j++) { + aa[j] = mp_posit_random_data.x[j]; + } + for (; j < n; j++) { + aa[j] = mod_diff(aa[j - KK], aa[j - LL]); + } + for (i = 0; i < LL; i++, j++) { + mp_posit_random_data.x[i] = mod_diff(aa[j - KK], aa[j - LL]); + } + for (; i < KK; i++, j++) { + mp_posit_random_data.x[i] = mod_diff(aa[j - KK], mp_posit_random_data.x[i - LL]); + } +} + + +static void mp_posit_aux_ran_start(long seed) +{ + int t, j; + long x[KK + KK - 1]; + long ss = (seed+2) & (MM - 2); + for (j = 0; j < KK; j++) { + x[j] = ss; + ss <<= 1; + if (ss >= MM) { + ss -= MM - 2; + } + } + x[1]++; + for (ss = seed & (MM - 1), t = TT - 1; t;) { + for (j = KK - 1; j > 0; j--) { + x[j + j] = x[j]; + x[j + j - 1] = 0; + } + for (j = KK + KK - 2; j >= KK; j--) { + x[j - (KK -LL)] = mod_diff(x[j - (KK - LL)], x[j]); + x[j - KK] = mod_diff(x[j - KK], x[j]); + } + if (is_odd(ss)) { + for (j = KK; j>0; j--) { + x[j] = x[j-1]; + } + x[0] = x[KK]; + x[LL] = mod_diff(x[LL], x[KK]); + } + if (ss) { + ss >>= 1; + } else { + t--; + } + } + for (j = 0; j < LL; j++) { + mp_posit_random_data.x[j + KK - LL] = x[j]; + } + for (;j < KK; j++) { + mp_posit_random_data.x[j - LL] = x[j]; + } + for (j = 0; j < 10; j++) { + mp_posit_aux_ran_array(x, KK + KK - 1); + } + mp_posit_random_data.ptr = &mp_posit_random_data.started; +} + +static long mp_posit_aux_ran_arr_cycle(void) +{ + if (mp_posit_random_data.ptr == &mp_posit_random_data.dummy) { + mp_posit_aux_ran_start(314159L); + } + mp_posit_aux_ran_array(mp_posit_random_data.buf, QUALITY); + mp_posit_random_data.buf[KK] = -1; + mp_posit_random_data.ptr = mp_posit_random_data.buf + 1; + return mp_posit_random_data.buf[0]; +} + +void mp_init_randoms (MP mp, int seed) +{ + int k = 1; + int j = abs(seed); + int f = (int) mp_fraction_multiplier; + while (j >= f) { + j = j/2; + } + for (int i = 0; i <= 54; i++) { + int jj = k; + k = j - k; + j = jj; + if (k < 0) { + k += f; + } + mp->randoms[(i * 21) % 55].data.pval = integer_to_posit(j); + } + mp_new_randoms(mp); + mp_new_randoms(mp); + mp_new_randoms(mp); + mp_posit_aux_ran_start((unsigned long) seed); +} + +void mp_number_modulo(mp_number *a, mp_number *b) +{ + a->data.pval = posit_mul(posit_modf(posit_div(a->data.pval, b->data.pval)), b->data.pval); +} + +static void mp_next_unif_random (MP mp, mp_number *ret) +{ + unsigned long int op = (unsigned) (*mp_posit_random_data.ptr >=0 ? *mp_posit_random_data.ptr++: mp_posit_aux_ran_arr_cycle()); + double a = op / (MM * 1.0); + (void) mp; + ret->data.pval = double_to_posit(a); +} + +static void mp_next_random (MP mp, mp_number *ret) +{ + if ( mp->j_random==0) { + mp_new_randoms(mp); + } else { + mp->j_random = mp->j_random-1; + } + mp_number_clone(ret, &(mp->randoms[mp->j_random])); +} + +static void mp_posit_m_unif_rand (MP mp, mp_number *ret, mp_number *x_orig) +{ + mp_number x, abs_x, u, y; + mp_allocate_number(mp, &y, mp_fraction_type); + mp_allocate_clone(mp, &x, mp_scaled_type, x_orig); + mp_allocate_abs(mp, &abs_x, mp_scaled_type, &x); + mp_allocate_number(mp, &u, mp_scaled_type); + mp_next_unif_random(mp, &u); + y.data.pval = posit_mul(abs_x.data.pval, u.data.pval); + mp_free_number(mp, &u); + if (mp_number_equal(&y, &abs_x)) { + mp_number_clone(ret, &((math_data *)mp->math)->md_zero_t); + } else if (mp_number_greater(&x, &((math_data *)mp->math)->md_zero_t)) { + mp_number_clone(ret, &y); + } else { + mp_number_negated_clone(ret, &y); + } + mp_free_number(mp, &abs_x); + mp_free_number(mp, &x); + mp_free_number(mp, &y); +} + +static void mp_posit_m_norm_rand (MP mp, mp_number *ret) +{ + mp_number abs_x, u, r, la, xa; + mp_allocate_number(mp, &la, mp_scaled_type); + mp_allocate_number(mp, &xa, mp_scaled_type); + mp_allocate_number(mp, &abs_x, mp_scaled_type); + mp_allocate_number(mp, &u, mp_scaled_type); + mp_allocate_number(mp, &r, mp_scaled_type); + do { + do { + mp_number v; + mp_allocate_number(mp, &v, mp_scaled_type); + mp_next_random(mp, &v); + mp_number_subtract(&v, &((math_data *)mp->math)->md_fraction_half_t); + mp_posit_number_take_fraction(mp, &xa, &((math_data *)mp->math)->md_sqrt_8_e_k, &v); + mp_free_number(mp, &v); + mp_next_random(mp, &u); + mp_number_clone(&abs_x, &xa); + mp_posit_abs(&abs_x); + } while (! mp_number_less(&abs_x, &u)); + mp_posit_number_make_fraction(mp, &r, &xa, &u); + mp_number_clone(&xa, &r); + mp_posit_m_log(mp, &la, &u); + mp_set_posit_from_subtraction(&la, &((math_data *)mp->math)->md_twelve_ln_2_k, &la); + } while (mp_posit_ab_vs_cd(&((math_data *)mp->math)->md_one_k, &la, &xa, &xa) < 0); + mp_number_clone(ret, &xa); + mp_free_number(mp, &r); + mp_free_number(mp, &abs_x); + mp_free_number(mp, &la); + mp_free_number(mp, &xa); + mp_free_number(mp, &u); +} + diff --git a/source/luametatex/source/mp/mpc/mpmathposit.h b/source/luametatex/source/mp/mpc/mpmathposit.h new file mode 100644 index 000000000..da4dfeee1 --- /dev/null +++ b/source/luametatex/source/mp/mpc/mpmathposit.h @@ -0,0 +1,13 @@ +/* This file is generated by "mtxrun --script "mtx-wtoc.lua" from the metapost cweb files. */ + + +# ifndef MPMATHPOSIT_H +# define MPMATHPOSIT_H 1 + +# include "mp.h" +# include "softposit.h" + +math_data *mp_initialize_posit_math (MP mp); + +# endif + diff --git a/source/luametatex/source/mp/mpw/mp.w b/source/luametatex/source/mp/mpw/mp.w index f4bdb58ca..7390181ea 100644 --- a/source/luametatex/source/mp/mpw/mp.w +++ b/source/luametatex/source/mp/mpw/mp.w @@ -200,6 +200,7 @@ abstraction. # include "avl.h" # include "auxmemory.h" +# include "auxposit.h" # include <string.h> # include <setjmp.h> @@ -231,6 +232,7 @@ typedef struct MP_instance { # include "mpmathdouble.h" # include "mpmathbinary.h" # include "mpmathdecimal.h" +# include "mpmathposit.h" # include "mpstrings.h" @h @<Declarations@> @@ -293,13 +295,15 @@ typedef enum mp_number_type { mp_angle_type, mp_double_type, mp_binary_type, - mp_decimal_type + mp_decimal_type, + mp_posit_type } mp_number_type; typedef union mp_number_store { void *num; double dval; int val; + posit_t pval; } mp_number_store; typedef struct mp_number_data { @@ -568,6 +572,9 @@ MP mp_initialize (MP_options * opt) case mp_math_binary_mode: mp->math = mp_initialize_binary_math(mp); break; + case mp_math_posit_mode: + mp->math = mp_initialize_posit_math(mp); + break; default: mp->math = mp_initialize_double_math(mp); break; @@ -586,6 +593,9 @@ MP mp_initialize (MP_options * opt) case mp_math_decimal_mode: set_internal_string(mp_number_system_internal, mp_intern(mp, "decimal")); break; + case mp_math_posit_mode: + set_internal_string(mp_number_system_internal, mp_intern(mp, "posit")); + break; case mp_math_binary_mode: set_internal_string(mp_number_system_internal, mp_intern(mp, "binary")); break; @@ -1868,7 +1878,8 @@ typedef enum mp_math_mode { mp_math_scaled_mode, mp_math_double_mode, mp_math_binary_mode, - mp_math_decimal_mode + mp_math_decimal_mode, + mp_math_posit_mode } mp_math_mode; @ @<Option variables@>= @@ -22558,7 +22569,7 @@ static void mp_do_unary (MP mp, int c) case mp_cos_d_operation: /* This is rather inefficient, esp decimal, to calculate both each time. We could - pass NULL as signal to do only one. + pass NULL as signal to do only one, or just have n_sin and n_cos. */ if (mp->cur_exp.type != mp_known_type) { mp_bad_unary(mp, c); @@ -22569,7 +22580,7 @@ static void mp_do_unary (MP mp, int c) new_fraction(n_sin); new_fraction(n_cos); number_clone(arg1, cur_exp_value_number); - number_clone(arg2, unity_t); + number_clone(arg2, unity_t); /* maybe dp360 */ number_multiply_int(arg2, 360); number_modulo(arg1, arg2); convert_scaled_to_angle(arg1); diff --git a/source/luametatex/source/mp/mpw/mpmathdecimal.w b/source/luametatex/source/mp/mpw/mpmathdecimal.w index 124c77ecb..359223271 100644 --- a/source/luametatex/source/mp/mpw/mpmathdecimal.w +++ b/source/luametatex/source/mp/mpw/mpmathdecimal.w @@ -227,6 +227,8 @@ mp_decimal_info mp_decimal_data = { .initialized = 0, }; +@ See mpmathdouble for documentation. @c + static void checkZero(decNumber *ret) { if (decNumberIsZero(ret) && decNumberIsNegative(ret)) { @@ -277,21 +279,26 @@ static double decNumberToDouble(decNumber *A) } } -@ Borrowed code from libdfp: +/*tex -$$ \arctan(x) = x - \frac {x^3}{3} + \frac {x^5{5} - \frac {x^7}{7} + \ldots$$ + \startformula + \arctan(x) = x - \frac {x^3}{3} + \frac {x^5{5} - \frac {x^7}{7} + \ldots + \stopformula -This power series works well, if $x$ is close to zero ($|x|<0.5$). If x is -larger, the series converges too slowly, so in order to get a smaller x, we apply -the identity + This power series works well, if $x$ is close to zero ($|x|<0.5$). If x is + larger, the series converges too slowly, so in order to get a smaller x, we apply + the identity -$$ \arctan(x) = 2 \arctan \left (\frac {\sqrt{1 + x^2}-1} {x} \right) $$ + \startformula + \arctan(x) = 2 \arctan \left (\frac {\sqrt{1 + x^2}-1} {x} \right) + \stopformula -twice. The first application gives us a new $x$ with $x < 1$. The second -application gives us a new x with $x < 0.4142136$. For that $x$, we use the power -series and multiply the result by four. + twice. The first application gives us a new $x$ with $x < 1$. The second + application gives us a new x with $x < 0.4142136$. For that $x$, we use the power + series and multiply the result by four. + +*/ -@c static void decNumberAtan(decNumber *result, decNumber *x_orig, decContext *localset) { decNumber x; @@ -373,7 +380,6 @@ static void decNumberAtan2(decNumber *result, decNumber *y, decNumber *x, decCon } } -@ @c math_data *mp_initialize_decimal_math (MP mp) { math_data *math = (math_data *) mp_memory_allocate(sizeof(math_data)); @@ -613,10 +619,6 @@ void mp_free_decimal_math (MP mp) mp_memory_free(mp->math); } -@ Creating and destruction of |mp_number| objects. Let's hope that mimalloc keeps -a pool for these. - -@ @c void mp_allocate_number (MP mp, mp_number *n, mp_number_type t) { (void) mp; @@ -625,7 +627,6 @@ void mp_allocate_number (MP mp, mp_number *n, mp_number_type t) decNumberZero(n->data.num); } -@ @c void mp_allocate_clone (MP mp, mp_number *n, mp_number_type t, mp_number *v) { (void) mp; @@ -635,7 +636,6 @@ void mp_allocate_clone (MP mp, mp_number *n, mp_number_type t, mp_number *v) decNumberCopy(n->data.num, v->data.num); } -@ @c void mp_allocate_abs (MP mp, mp_number *n, mp_number_type t, mp_number *v) { (void) mp; @@ -654,7 +654,6 @@ void mp_allocate_double (MP mp, mp_number *n, double v) decNumberFromDouble(n->data.num, v); } -@ @c void mp_free_number (MP mp, mp_number *n) { (void) mp; @@ -857,13 +856,6 @@ void mp_number_scaled_to_angle(mp_number *A) decNumberMultiply(A->data.num, A->data.num, &mp_decimal_data.angle_multiplier_decNumber, &mp_decimal_data.set); } -@* Query functions. - -@ Convert a number to a scaled value. |decNumberToInt32| is not able to make this -conversion properly, so instead we are using |decNumberToDouble| and a typecast. -Bad! - -@c int mp_number_to_scaled(mp_number *A) { int result; @@ -958,26 +950,6 @@ int mp_number_nonequalabs(mp_number *A, mp_number *B) return ! decNumberIsZero(&res); } -@ Fixed-point arithmetic is done on {\sl scaled integers} that are multiples of -$2^{-16}$. In other words, a binary point is assumed to be sixteen bit positions -from the right end of a binary computer word. - -@ One of \MP's most common operations is the calculation of -$\lfloor{a+b\over2}\rfloor$, the midpoint of two given integers |a| and~|b|. The -most decent way to do this is to write |(a+b)/2|; but on many machines it is -more efficient to calculate |(a+b)>>1|. - -Therefore the midpoint operation will always be denoted by |half(a+b)| in this -program. If \MP\ is being implemented with languages that permit binary shifting, -the |half| macro should be changed to make this operation as efficient as -possible. Since some systems have shift operators that can only be trusted to -work on positive numbers, there is also a macro |halfp| that is used only when -the quantity being halved is known to be positive or zero. - -@ Here is a procedure analogous to |print_int|. The current version is fairly -stupid, and it is not round-trip safe, but this is good enough for a beta test. - -@c char *mp_decnumber_tostring(decNumber *n) { decNumber corrected; @@ -994,7 +966,6 @@ char *mp_decimal_number_tostring (MP mp, mp_number *n) return mp_decnumber_tostring(n->data.num); } -@ @c void mp_decimal_print_number (MP mp, mp_number *n) { char *str = mp_decnumber_tostring(n->data.num); @@ -1002,51 +973,12 @@ void mp_decimal_print_number (MP mp, mp_number *n) mp_memory_free(str); } -@ Addition is not always checked to make sure that it doesn't overflow, but in -places where overflow isn't too unlikely the |slow_add| routine is used. - -@c void mp_decimal_slow_add (MP mp, mp_number *ret, mp_number *A, mp_number *B) { (void) mp; decNumberAdd(ret->data.num, A->data.num, B->data.num, &mp_decimal_data.set); } -@ The |make_fraction| routine produces the |fraction| equivalent of |p/q|, given -integers |p| and~|q|; it computes the integer -$f=\lfloor2^{28}p/q+{1\over2}\rfloor$, when $p$ and $q$ are positive. If |p| and -|q| are both of the same scaled type |t|, the \quote {type relation} -|make_fraction(t,t)=fraction| is valid; and it's also possible to use the -subroutine \quote {backwards,} using the relation |make_fraction(t,fraction)=t| -between scaled types. - -If the result would have magnitude $2^{31}$ or more, |make_fraction| sets -|arith_error:=true|. Most of \MP's internal computations have been designed to -avoid this sort of error. - -If this subroutine were programmed in assembly language on a typical machine, we -could simply compute |(@t$2^{28}$@>*p)div q|, since a double-precision product -can often be input to a fixed-point division instruction. But when we are -restricted to int-eger arithmetic it is necessary either to resort to -multiple-precision maneuvering or to use a simple but slow iteration. The -multiple-precision technique would be about three times faster than the code -adopted here, but it would be comparatively long and tricky, involving about -sixteen additional multiplications and divisions. - -This operation is part of \MP's \quote {inner loop}; indeed, it will consume nearly -10\pct! of the running time (exclusive of input and output) if the code below is -left unchanged. A machine-dependent recoding will therefore make \MP\ run faster. -The present implementation is highly portable, but slow; it avoids multiplication -and division except in the initial stage. System wizards should be careful to -replace it with a routine that is guaranteed to produce identical results in all -cases. @^system dependencies@> - -As noted below, a few more routines should also be replaced by machine-dependent -code, for efficiency. But when a procedure is not part of the \quote {inner loop,} -such changes aren't advisable; simplicity and robustness are preferable to -trickery, unless the cost is too high. @^inner loop@> - -@c void mp_decimal_make_fraction (MP mp, decNumber *ret, decNumber *p, decNumber *q) { decNumberDivide(ret, p, q, &mp_decimal_data.set); @@ -1059,16 +991,6 @@ void mp_decimal_number_make_fraction (MP mp, mp_number *ret, mp_number *p, mp_nu mp_decimal_make_fraction(mp, ret->data.num, p->data.num, q->data.num); } -@ The dual of |make_fraction| is |take_fraction|, which multiplies a given -integer~|q| by a fraction~|f|. When the operands are positive, it computes -$p=\lfloor qf/2^{28}+{1\over2}\rfloor$, a symmetric function of |q| and~|f|. - -This routine is even more \quote {inner loopy} than |make_fraction|; the present -implementation consumes almost 20\pct! of \MP's computation time during typical -jobs, so a machine-language substitute is advisable. @^inner loop@> @^system -dependencies@> - -@c void mp_decimal_take_fraction (MP mp, decNumber *ret, decNumber *p, decNumber *q) { (void) mp; @@ -1081,39 +1003,18 @@ void mp_decimal_number_take_fraction (MP mp, mp_number *ret, mp_number *p, mp_nu mp_decimal_take_fraction(mp, ret->data.num, p->data.num, q->data.num); } -@ When we want to multiply something by a |scaled| quantity, we use a scheme -analogous to |take_fraction| but with a different scaling. Given positive -operands, |take_scaled| computes the quantity $p=\lfloor -qf/2^{16}+{1\over2}\rfloor$. - -Once again it is a good idea to use a machine-language replacement if possible; -otherwise |take_scaled| will use more than 2\pct! of the running time when the -Computer Modern fonts are being generated. @^inner loop@> - -@c void mp_decimal_number_take_scaled (MP mp, mp_number *ret, mp_number *p_orig, mp_number *q_orig) { (void) mp; decNumberMultiply(ret->data.num, p_orig->data.num, q_orig->data.num, &mp_decimal_data.set); } -@ For completeness, there's also |make_scaled|, which computes a quotient as a -|scaled| number instead of as a |fraction|. In other words, the result is -$\lfloor2^{16}p/q+{1\over2}\rfloor$, if the operands are positive. \ (This -procedure is not used especially often, so it is not part of \MP's inner loop.) - -@c void mp_decimal_number_make_scaled (MP mp, mp_number *ret, mp_number *p_orig, mp_number *q_orig) { decNumberDivide(ret->data.num, p_orig->data.num, q_orig->data.num, &mp_decimal_data.set); mp_decnumber_check(mp, ret->data.num, &mp_decimal_data.set); } -@ @* Scanning numbers in the input. - -The definitions below are temporarily here - -@ @c void mp_wrapup_numeric_token (MP mp, unsigned char *start, unsigned char *stop) { decNumber result; @@ -1161,7 +1062,6 @@ void mp_wrapup_numeric_token (MP mp, unsigned char *start, unsigned char *stop) set_cur_cmd((mp_variable_type) mp_numeric_command); } -@ @c static void find_exponent (MP mp) { if (mp->buffer[mp->cur_input.loc_field] == 'e' @@ -1196,9 +1096,6 @@ void mp_decimal_scan_fractional_token (MP mp, int n) mp_wrapup_numeric_token(mp, start, stop); } -@ We just have to collect bytes. - -@c void mp_decimal_scan_numeric_token (MP mp, int n) { unsigned char *start = &mp->buffer[mp->cur_input.loc_field -1]; @@ -1218,32 +1115,6 @@ void mp_decimal_scan_numeric_token (MP mp, int n) mp_wrapup_numeric_token(mp, start, stop); } -@ The |scaled| quantities in \MP\ programs are generally supposed to be less than -$2^{12}$ in absolute value, so \MP\ does much of its internal arithmetic with -28~significant bits of precision. A |fraction| denotes a scaled integer whose -binary point is assumed to be 28 bit positions from the right. - -@ Here is a typical example of how the routines above can be used. It computes -the function $${1\over3\tau}f(\theta,\phi)= -{\tau^{-1}\bigl(2+\sqrt2\,(\sin\theta-{1\over16}\sin\phi) -(\sin\phi-{1\over16}\sin\theta)(\cos\theta-\cos\phi)\bigr)\over -3\,\bigl(1+{1\over2}(\sqrt5-1)\cos\theta+{1\over2}(3-\sqrt5\,)\cos\phi\bigr)},$$ -where $\tau$ is a |scaled| \quote {tension} parameter. This is \MP's magic fudge -factor for placing the first control point of a curve that starts at an angle -$\theta$ and ends at an angle $\phi$ from the straight path. (Actually, if the -stated quantity exceeds 4, \MP\ reduces it to~4.) - -The trigonometric quantity to be multiplied by $\sqrt2$ is less than $\sqrt2$. -(It's a sum of eight terms whose absolute values can be bounded using relations -such as $\sin\theta\cos\theta|1\over2|$.) Thus the numerator is positive; and -since the tension $\tau$ is constrained to be at least $3\over4$, the numerator -is less than $16\over3$. The denominator is nonnegative and at most~6. - -The angles $\theta$ and $\phi$ are given implicitly in terms of |fraction| -arguments |st|, |ct|, |sf|, and |cf|, representing $\sin\theta$, $\cos\theta$, -$\sin\phi$, and $\cos\phi$, respectively. - -@c void mp_decimal_velocity (MP mp, mp_number *ret, mp_number *st, mp_number *ct, mp_number *sf, mp_number *cf, mp_number *t) { decNumber acc, num, denom; /* registers for intermediate calculations */ @@ -1300,12 +1171,6 @@ void mp_decimal_velocity (MP mp, mp_number *ret, mp_number *st, mp_number *ct, m mp_decnumber_check(mp, ret->data.num, &mp_decimal_data.set); } -@ The following somewhat different subroutine tests rigorously if $ab$ is greater -than, equal to, or less than~$cd$, given integers $(a,b,c,d)$. In most cases a -quick decision is reached. The result is $+1$, 0, or~$-1$ in the three respective -cases. - -@c int mp_ab_vs_cd (mp_number *a_orig, mp_number *b_orig, mp_number *c_orig, mp_number *d_orig) { decNumber a, b, c, d; @@ -1325,33 +1190,6 @@ int mp_ab_vs_cd (mp_number *a_orig, mp_number *b_orig, mp_number *c_orig, mp_num } } -@ Now here's a subroutine that's handy for all sorts of path computations: Given -a quadratic polynomial $B(a,b,c;t)$, the |crossing_point| function returns the -unique |fraction| value |t| between 0 and~1 at which $B(a,b,c;t)$ changes from -positive to negative, or returns |t=fraction_one+1| if no such value exists. If -|a<0| (so that $B(a,b,c;t)$ is already negative at |t=0|), |crossing_point| -returns the value zero. - -The general bisection method is quite simple when $n=2$, hence |crossing_point| -does not take much time. At each stage in the recursion we have a subinterval -defined by |l| and~|j| such that $B(a,b,c;2^{-l}(j+t))=B(x_0,x_1,x_2;t)$, and we -want to \quote {zero in} on the subinterval where $x_0\G0$ and $\min(x_1,x_2)<0$. - -It is convenient for purposes of calculation to combine the values of |l| and~|j| -in a single variable $d=2^l+j$, because the operation of bisection then -corresponds simply to doubling $d$ and possibly adding~1. Furthermore it proves -to be convenient to modify our previous conventions for bisection slightly, -maintaining the variables $X_0=2^lx_0$, $X_1=2^l(x_0-x_1)$, and -$X_2=2^l(x_1-x_2)$. With these variables the conditions $x_0\ge0$ and -$\min(x_1,x_2)<0$ are equivalent to $\max(X_1,X_1+X_2)>X_0\ge0$. - -The following code maintains the invariant relations -$0\L|x0|<\max(|x1|,|x1|+|x2|)$, $\vert|x1|\vert<2^{30}$, $\vert|x2|\vert<2^{30}$; -it has been constructed in such a way that no arithmetic overflow will occur if -the inputs satisfy $a<2^{30}$, $\vert a-b\vert<2^{30}$, and $\vert -b-c\vert<2^{30}$. - -@c static void mp_decimal_crossing_point (MP mp, mp_number *ret, mp_number *aa, mp_number *bb, mp_number *cc) { decNumber a, b, c; @@ -1426,20 +1264,11 @@ static void mp_decimal_crossing_point (MP mp, mp_number *ret, mp_number *aa, mp_ mp_decnumber_check(mp, ret->data.num, &mp_decimal_data.set); } -@ We conclude this set of elementary routines with some simple rounding and -truncation operations. - -@ |round_unscaled| rounds a |scaled| and converts it to |int| - -@c int mp_round_unscaled(mp_number *x_orig) { return (int) lround(mp_number_to_double(x_orig)); } -@ |number_floor| floors a number - -@c void mp_number_floor(mp_number *i) { int round = mp_decimal_data.set.round; @@ -1448,20 +1277,12 @@ void mp_number_floor(mp_number *i) mp_decimal_data.set.round = round; } -@ |fraction_to_scaled| rounds a |fraction| and converts it to |scaled| - -@c void mp_decimal_fraction_to_round_scaled(mp_number *x_orig) { x_orig->type = mp_scaled_type; decNumberDivide(x_orig->data.num, x_orig->data.num, &mp_decimal_data.fraction_multiplier_decNumber, &mp_decimal_data.set); } -@* Algebraic and transcendental functions. \MP\ computes all of the necessary -special functions from scratch, without relying on |real| arithmetic or system -subroutines for sines, cosines, etc. - -@ @c void mp_decimal_square_rt (MP mp, mp_number *ret, mp_number *x_orig) { decNumber x; @@ -1487,9 +1308,6 @@ void mp_decimal_square_rt (MP mp, mp_number *ret, mp_number *x_orig) mp_decnumber_check(mp, ret->data.num, &mp_decimal_data.set); } -@ Pythagorean addition $\psqrt{a^2+b^2}$ is implemented by a quick hack - -@c void mp_decimal_pyth_add (MP mp, mp_number *ret, mp_number *a_orig, mp_number *b_orig) { decNumber a, b; @@ -1509,9 +1327,6 @@ void mp_decimal_pyth_add (MP mp, mp_number *ret, mp_number *a_orig, mp_number *b mp_decnumber_check(mp, ret->data.num, &mp_decimal_data.set); } -@ Here is a similar algorithm for $\psqrt{a^2-b^2}$. Same quick hack, also. - -@c void mp_decimal_pyth_sub (MP mp, mp_number *ret, mp_number *a_orig, mp_number *b_orig) { decNumber a, b; @@ -1545,19 +1360,12 @@ void mp_decimal_pyth_sub (MP mp, mp_number *ret, mp_number *a_orig, mp_number *b mp_decnumber_check(mp, ret->data.num, &mp_decimal_data.set); } -@ Power $a^b}$: - -@c void mp_decimal_power_of (MP mp, mp_number *ret, mp_number *a_orig, mp_number *b_orig) { decNumberPower(ret->data.num, a_orig->data.num, b_orig->data.num, &mp_decimal_data.set); mp_decnumber_check(mp, ret->data.num, &mp_decimal_data.set); } -@ Here is the routine that calculates $2^8$ times the natural logarithm of a -|scaled| quantity; - -@c void mp_decimal_m_log (MP mp, mp_number *ret, mp_number *x_orig) { if (! decNumberIsPositive((decNumber *) x_orig->data.num)) { @@ -1583,10 +1391,6 @@ void mp_decimal_m_log (MP mp, mp_number *ret, mp_number *x_orig) mp_decnumber_check(mp, ret->data.num, &mp_decimal_data.set); } -@ Conversely, the exponential routine calculates $\exp(x/2^8)$, when |x| is -|scaled|. - -@c void mp_decimal_m_exp (MP mp, mp_number *ret, mp_number *x_orig) { decNumber temp, twofivesix; @@ -1606,10 +1410,6 @@ void mp_decimal_m_exp (MP mp, mp_number *ret, mp_number *x_orig) mp_decimal_data.limitedset.status = 0; } -@ Given integers |x| and |y|, not both zero, the |n_arg| function returns the -|angle| whose tangent points in the direction $(x,y)$. - -@c void mp_decimal_n_arg (MP mp, mp_number *ret, mp_number *x_orig, mp_number *y_orig) { if (decNumberIsZero((decNumber *) x_orig->data.num) && decNumberIsZero((decNumber *) y_orig->data.num)) { @@ -1635,14 +1435,6 @@ void mp_decimal_n_arg (MP mp, mp_number *ret, mp_number *x_orig, mp_number *y_or mp_decnumber_check(mp, ret->data.num, &mp_decimal_data.set); } -@ Conversely, the |n_sin_cos| routine takes an |angle| and produces the sine and -cosine of that angle. The results of this routine are stored in global integer -variables |n_sin| and |n_cos|. - -First, we need a decNumber function that calculates sines and cosines using the -Taylor series. This function is fairly optimized. - -@c static void sinecosine(decNumber *theangle, decNumber *c, decNumber *s) { int prec = mp_decimal_data.set.digits/2; @@ -1688,9 +1480,6 @@ static void sinecosine(decNumber *theangle, decNumber *c, decNumber *s) } } -@ Calculate sines and cosines. - -@c void mp_decimal_sin_cos (MP mp, mp_number *z_orig, mp_number *n_cos, mp_number *n_sin) { decNumber rad; @@ -1717,9 +1506,6 @@ void mp_decimal_sin_cos (MP mp, mp_number *z_orig, mp_number *n_cos, mp_number * mp_decnumber_check(mp, n_sin->data.num, &mp_decimal_data.set); } -@ This is the {\tt http://www-cs-faculty.stanford.edu/~uno/programs/rng.c} -with small cosmetic modifications. - @c # define KK 100 /* the long lag */ # define LL 37 /* the short lag */ @@ -1743,10 +1529,6 @@ static mp_decimal_random_info mp_decimal_random_data = { .ptr = &mp_decimal_random_data.dummy }; -/* put n new random numbers in aa */ -/* long aa[] destination */ -/* int n array length (must be at least KK) */ - static void ran_array(long aa[],int n) { int i, j; @@ -1764,14 +1546,6 @@ static void ran_array(long aa[],int n) } } -/* - the following routines are from exercise 3.6--15L after calling |ran_start|, - get new randoms by, e.g., "|x=ran_arr_next()|" - - Do this before using |ran_array|, |long seed| selector for different - streams. -*/ - static void ran_start(long seed) { int t, j; @@ -1826,8 +1600,6 @@ static void ran_start(long seed) mp_decimal_random_data.ptr = &mp_decimal_random_data.started; } -# define ran_arr_next() (*mp_decimal_random_data.ptr>=0? *mp_decimal_random_data.ptr++: ran_arr_cycle()) - static long ran_arr_cycle(void) { if (mp_decimal_random_data.ptr == &mp_decimal_random_data.dummy) { @@ -1840,9 +1612,6 @@ static long ran_arr_cycle(void) return mp_decimal_random_data.buf[0]; } -@ To initialize the |randoms| table, we call the following routine. - -@c void mp_init_randoms (MP mp, int seed) { int k = 1; /* more or less random integers */ @@ -1866,21 +1635,16 @@ void mp_init_randoms (MP mp, int seed) ran_start((unsigned long) seed); } -@ @c void mp_decimal_number_modulo(mp_number *a, mp_number *b) { decNumberRemainder(a->data.num, a->data.num, b->data.num, &mp_decimal_data.set); } -@ To consume a random integer for the uniform generator, the program below will -say |next_unif_random|. - -@c static void mp_next_unif_random (MP mp, mp_number *ret) { decNumber a; decNumber b; - unsigned long int op = (unsigned)ran_arr_next(); + unsigned long int op = (unsigned) (*mp_decimal_random_data.ptr>=0? *mp_decimal_random_data.ptr++: ran_arr_cycle()); (void) mp; decNumberFromInt32(&a, op); decNumberFromInt32(&b, MM); @@ -1889,9 +1653,6 @@ static void mp_next_unif_random (MP mp, mp_number *ret) mp_decnumber_check(mp, ret->data.num, &mp_decimal_data.set); } -@ To consume a random fraction, the program below will say |next_random|. - -@c static void mp_next_random (MP mp, mp_number *ret) { if (mp->j_random == 0) { @@ -1902,14 +1663,6 @@ static void mp_next_random (MP mp, mp_number *ret) mp_number_clone(ret, &(mp->randoms[mp->j_random])); } -@ To produce a uniform random number in the range |0<=u<x| or |0>=u>x| or -|0=u=x|, given a |scaled| value~|x|, we proceed as shown here. - -Note that the call of |take_fraction| will produce the values 0 and~|x| with -about half the probability that it will produce any other particular values -between 0 and~|x|, because it rounds its answers. - -@c static void mp_decimal_m_unif_rand (MP mp, mp_number *ret, mp_number *x_orig) { mp_number x, abs_x, u, y; /* |y| is trial value */ @@ -1932,11 +1685,6 @@ static void mp_decimal_m_unif_rand (MP mp, mp_number *ret, mp_number *x_orig) mp_free_number(mp, &u); } -@ Finally, a normal deviate with mean zero and unit standard deviation can -readily be obtained with the ratio method (Algorithm 3.4.1R in {\sl The Art of -Computer Programming}). - -@c static void mp_decimal_m_norm_rand (MP mp, mp_number *ret) { mp_number abs_x, u, r, la, xa; diff --git a/source/luametatex/source/mp/mpw/mpmathdouble.w b/source/luametatex/source/mp/mpw/mpmathdouble.w index 6f0f8df99..f5a91df75 100644 --- a/source/luametatex/source/mp/mpw/mpmathdouble.w +++ b/source/luametatex/source/mp/mpw/mpmathdouble.w @@ -71,7 +71,6 @@ First, here are some very important constants. @ Here are the functions that are static as they are not used elsewhere. @<Declarations@>= -static int mp_ab_vs_cd (mp_number *a, mp_number *b, mp_number *c, mp_number *d); static void mp_allocate_abs (MP mp, mp_number *n, mp_number_type t, mp_number *v); static void mp_allocate_clone (MP mp, mp_number *n, mp_number_type t, mp_number *v); static void mp_allocate_double (MP mp, mp_number *n, double v); @@ -306,7 +305,7 @@ math_data *mp_initialize_double_math(MP mp) math->md_print = mp_double_print_number; math->md_tostring = mp_double_number_tostring; math->md_modulo = mp_number_modulo; - math->md_ab_vs_cd = mp_ab_vs_cd; + math->md_ab_vs_cd = mp_double_ab_vs_cd; math->md_crossing_point = mp_double_crossing_point; math->md_scan_numeric = mp_double_scan_numeric_token; math->md_scan_fractional = mp_double_scan_fractional_token; @@ -353,7 +352,7 @@ void mp_free_double_math (MP mp) @ Creating an destroying |mp_number| objects -@ @c +@c void mp_allocate_number (MP mp, mp_number *n, mp_number_type t) { (void) mp; @@ -361,7 +360,6 @@ void mp_allocate_number (MP mp, mp_number *n, mp_number_type t) n->type = t; } -@ @c void mp_allocate_clone (MP mp, mp_number *n, mp_number_type t, mp_number *v) { (void) mp; @@ -369,7 +367,6 @@ void mp_allocate_clone (MP mp, mp_number *n, mp_number_type t, mp_number *v) n->data.dval = v->data.dval; } -@ @c void mp_allocate_abs (MP mp, mp_number *n, mp_number_type t, mp_number *v) { (void) mp; @@ -377,7 +374,6 @@ void mp_allocate_abs (MP mp, mp_number *n, mp_number_type t, mp_number *v) n->data.dval = fabs(v->data.dval); } -@ @c void mp_allocate_double (MP mp, mp_number *n, double v) { (void) mp; @@ -385,7 +381,6 @@ void mp_allocate_double (MP mp, mp_number *n, double v) n->data.dval = v; } -@ @c void mp_free_number (MP mp, mp_number *n) { (void) mp; @@ -901,46 +896,16 @@ quick decision is reached. The result is $+1$, 0, or~$-1$ in the three respectiv cases. @c -int mp_ab_vs_cd (mp_number *a_orig, mp_number *b_orig, mp_number *c_orig, mp_number *d_orig) +int mp_double_ab_vs_cd (mp_number *a_orig, mp_number *b_orig, mp_number *c_orig, mp_number *d_orig) { - return mp_double_ab_vs_cd(a_orig, b_orig, c_orig, d_orig); -} - -@ @<Reduce to the case that |a...@>= -if (a < 0) { - a = -a; - b = -b; -} -if (c < 0) { - c = -c; - d = -d; -} -if (d <= 0) { - if (b >= 0) { - if ((a == 0 || b == 0) && (c == 0 || d == 0)) { - ret->data.dval = 0; - } else { - ret->data.dval = 1; - } - goto RETURN; - } if (d == 0) { - ret->data.dval = (a == 0 ? 0 : -1); - goto RETURN; - } else - q = a; - a = c; - c = q; - q = -b; - b = -d; - d = q; - } -} else if (b <= 0) { - if (b < 0 && a > 0) { - ret->data.dval = -1; - return; - } else - ret->data.dval = (c == 0 ? 0 : -1); - goto RETURN; + double ab = a_orig->data.dval * b_orig->data.dval; + double cd = c_orig->data.dval * d_orig->data.dval; + if (ab > cd) { + return 1; + } else if (ab < cd) { + return -1; + } else { + return 0; } } @@ -1210,8 +1175,9 @@ void mp_double_n_arg (MP mp, mp_number *ret, mp_number *x_orig, mp_number *y_ori } else { ret->type = mp_angle_type; ret->data.dval = atan2(y_orig->data.dval, x_orig->data.dval) * (180.0 / PI) * angle_multiplier; - if (ret->data.dval == -0.0) - ret->data.dval = 0.0; + if (ret->data.dval == -0.0) { + ret->data.dval = 0.0; + } } } @@ -1249,7 +1215,8 @@ void mp_double_sin_cos (MP mp, mp_number *z_orig, mp_number *n_cos, mp_number *n } @ This is the http://www-cs-faculty.stanford.edu/~uno/programs/rng.c with small -cosmetic modifications. +cosmetic modifications. The code only documented here as the other non scaled +number models use the same method. @c # define KK 100 /* the long lag */ @@ -1352,8 +1319,6 @@ static void mp_double_aux_ran_start(long seed) mp_double_random_data.ptr = &mp_double_random_data.started; } -# define mp_double_aux_ran_arr_next() (*mp_double_random_data.ptr>=0? *mp_double_random_data.ptr++: mp_double_aux_ran_arr_cycle()) - static long mp_double_aux_ran_arr_cycle(void) { if (mp_double_random_data.ptr == &mp_double_random_data.dummy) { @@ -1394,16 +1359,6 @@ void mp_init_randoms (MP mp, int seed) } @ Here |frac| contains what's beyond the |.|. @c -/* -static double modulus(double left, double right) -{ - double quota = left / right; - double tmp; - double frac = modf(quota, &tmp); - frac *= right; - return frac; -} -*/ void mp_number_modulo(mp_number *a, mp_number *b) { @@ -1417,7 +1372,7 @@ say |next_unif_random|. @c static void mp_next_unif_random (MP mp, mp_number *ret) { - unsigned long int op = (unsigned) mp_double_aux_ran_arr_next(); + unsigned long int op = (unsigned) (*mp_double_random_data.ptr>=0? *mp_double_random_data.ptr++: mp_double_aux_ran_arr_cycle()); double a = op / (MM * 1.0); (void) mp; ret->data.dval = a; @@ -1437,11 +1392,10 @@ static void mp_next_random (MP mp, mp_number *ret) } @ To produce a uniform random number in the range |0<=u<x| or |0>=u>x| or -|0=u=x|, given a |scaled| value~|x|, we proceed as shown here. - -Note that the call of |take_fraction| will produce the values 0 and~|x| with -about half the probability that it will produce any other particular values -between 0 and~|x|, because it rounds its answers. +|0=u=x|, given a |scaled| value~|x|, we proceed as shown here. Note that the +call of |take_fraction| will produce the values 0 and~|x| with about half +the probability that it will produce any other particular values between 0 +and~|x|, because it rounds its answers. @c static void mp_double_m_unif_rand (MP mp, mp_number *ret, mp_number *x_orig) @@ -1504,20 +1458,40 @@ static void mp_double_m_norm_rand (MP mp, mp_number *ret) mp_free_number(mp, &u); } -@ The following subroutine is used only in |norm_rand| and tests if $ab$ is -greater than, equal to, or less than~$cd$. The result is $+1$, 0, or~$-1$ in the -three respective cases. - -@c -int mp_double_ab_vs_cd (mp_number *a_orig, mp_number *b_orig, mp_number *c_orig, mp_number *d_orig) -{ - double ab = a_orig->data.dval * b_orig->data.dval; - double cd = c_orig->data.dval * d_orig->data.dval; - if (ab > cd) { - return 1; - } else if (ab < cd) { - return -1; - } else { - return 0; +@ @<Reduce to the case that |a...@>= +if (a < 0) { + a = -a; + b = -b; +} +if (c < 0) { + c = -c; + d = -d; +} +if (d <= 0) { + if (b >= 0) { + if ((a == 0 || b == 0) && (c == 0 || d == 0)) { + ret->data.dval = 0; + } else { + ret->data.dval = 1; + } + goto RETURN; + } if (d == 0) { + ret->data.dval = (a == 0 ? 0 : -1); + goto RETURN; + } else + q = a; + a = c; + c = q; + q = -b; + b = -d; + d = q; + } +} else if (b <= 0) { + if (b < 0 && a > 0) { + ret->data.dval = -1; + return; + } else + ret->data.dval = (c == 0 ? 0 : -1); + goto RETURN; } } diff --git a/source/luametatex/source/mp/mpw/mpmathposit.w b/source/luametatex/source/mp/mpw/mpmathposit.w new file mode 100644 index 000000000..8477ef27b --- /dev/null +++ b/source/luametatex/source/mp/mpw/mpmathposit.w @@ -0,0 +1,1428 @@ +% This file is part of MetaPost. The MetaPost program is in the public domain. + +@ Introduction. + +TODO: collect constants like decimal +TODO: share scanners and random + +@c +# include "mpconfig.h" +# include "mpmathposit.h" + +@h + +@ @c +@<Declarations@> + +@ @(mpmathposit.h@>= +# ifndef MPMATHPOSIT_H +# define MPMATHPOSIT_H 1 + +# include "mp.h" +# include "softposit.h" + +math_data *mp_initialize_posit_math (MP mp); + +# endif + +@* Math initialization. + +First, here are some very important constants. + +@d mp_fraction_multiplier 4096 +@d mp_angle_multiplier 16 +@d mp_warning_limit pow(2.0,52) + +@d odd(A) (abs(A)%2==1) + +@d two_to_the(A) (1<<(unsigned)(A)) +@d set_cur_cmd(A) mp->cur_mod_->command = (A) +@d set_cur_mod(A) mp->cur_mod_->data.n.data.pval = (A) + +@ Here are the functions that are static as they are not used elsewhere. + +@<Declarations@>= +static void mp_allocate_abs (MP mp, mp_number *n, mp_number_type t, mp_number *v); +static void mp_allocate_clone (MP mp, mp_number *n, mp_number_type t, mp_number *v); +static void mp_allocate_double (MP mp, mp_number *n, double v); +static void mp_allocate_number (MP mp, mp_number *n, mp_number_type t); +static int mp_posit_ab_vs_cd (mp_number *a, mp_number *b, mp_number *c, mp_number *d); +static void mp_posit_abs (mp_number *A); +static void mp_posit_crossing_point (MP mp, mp_number *ret, mp_number *a, mp_number *b, mp_number *c); +static void mp_posit_fraction_to_round_scaled (mp_number *x); +static void mp_posit_m_exp (MP mp, mp_number *ret, mp_number *x_orig); +static void mp_posit_m_log (MP mp, mp_number *ret, mp_number *x_orig); +static void mp_posit_m_norm_rand (MP mp, mp_number *ret); +static void mp_posit_m_unif_rand (MP mp, mp_number *ret, mp_number *x_orig); +static void mp_posit_n_arg (MP mp, mp_number *ret, mp_number *x, mp_number *y); +static void mp_posit_number_make_fraction (MP mp, mp_number *r, mp_number *p, mp_number *q); +static void mp_posit_number_make_scaled (MP mp, mp_number *r, mp_number *p, mp_number *q); +static void mp_posit_number_take_fraction (MP mp, mp_number *r, mp_number *p, mp_number *q); +static void mp_posit_number_take_scaled (MP mp, mp_number *r, mp_number *p, mp_number *q); +static void mp_posit_power_of (MP mp, mp_number *r, mp_number *a, mp_number *b); +static void mp_posit_print_number (MP mp, mp_number *n); +static void mp_posit_pyth_add (MP mp, mp_number *r, mp_number *a, mp_number *b); +static void mp_posit_pyth_sub (MP mp, mp_number *r, mp_number *a, mp_number *b); +static void mp_posit_scan_fractional_token (MP mp, int n); +static void mp_posit_scan_numeric_token (MP mp, int n); +static void mp_posit_set_precision (MP mp); +static void mp_posit_sin_cos (MP mp, mp_number *z_orig, mp_number *n_cos, mp_number *n_sin); +static void mp_posit_slow_add (MP mp, mp_number *ret, mp_number *x_orig, mp_number *y_orig); +static void mp_posit_square_rt (MP mp, mp_number *ret, mp_number *x_orig); +static void mp_posit_velocity (MP mp, mp_number *ret, mp_number *st, mp_number *ct, mp_number *sf, mp_number *cf, mp_number *t); +static void mp_free_posit_math (MP mp); +static void mp_free_number (MP mp, mp_number *n); +static void mp_init_randoms (MP mp, int seed); +static void mp_number_abs_clone (mp_number *A, mp_number *B); +static void mp_number_add (mp_number *A, mp_number *B); +static void mp_number_add_scaled (mp_number *A, int B); /* also for negative B */ +static void mp_number_angle_to_scaled (mp_number *A); +static void mp_number_clone (mp_number *A, mp_number *B); +static void mp_number_divide_int (mp_number *A, int B); +static void mp_number_double (mp_number *A); +static int mp_number_equal (mp_number *A, mp_number *B); +static void mp_number_floor (mp_number *i); +static void mp_number_fraction_to_scaled (mp_number *A); +static int mp_number_greater (mp_number *A, mp_number *B); +static void mp_number_half (mp_number *A); +static int mp_number_less (mp_number *A, mp_number *B); +static void mp_number_modulo (mp_number *a, mp_number *b); +static void mp_number_multiply_int (mp_number *A, int B); +static void mp_number_negate (mp_number *A); +static void mp_number_negated_clone (mp_number *A, mp_number *B); +static int mp_number_nonequalabs (mp_number *A, mp_number *B); +static int mp_number_odd (mp_number *A); +static void mp_number_scaled_to_angle (mp_number *A); +static void mp_number_scaled_to_fraction (mp_number *A); +static void mp_number_subtract (mp_number *A, mp_number *B); +static void mp_number_swap (mp_number *A, mp_number *B); +static int mp_number_to_boolean (mp_number *A); +static double mp_number_to_double (mp_number *A); +static int mp_number_to_int (mp_number *A); +static int mp_number_to_scaled (mp_number *A); +static int mp_round_unscaled (mp_number *x_orig); +static void mp_set_posit_from_addition (mp_number *A, mp_number *B, mp_number *C); +static void mp_set_posit_from_boolean (mp_number *A, int B); +static void mp_set_posit_from_div (mp_number *A, mp_number *B, mp_number *C); +static void mp_set_posit_from_double (mp_number *A, double B); +static void mp_set_posit_from_int (mp_number *A, int B); +static void mp_set_posit_from_int_div (mp_number *A, mp_number *B, int C); +static void mp_set_posit_from_int_mul (mp_number *A, mp_number *B, int C); +static void mp_set_posit_from_mul (mp_number *A, mp_number *B, mp_number *C); +static void mp_set_posit_from_of_the_way (MP mp, mp_number *A, mp_number *t, mp_number *B, mp_number *C); +static void mp_set_posit_from_scaled (mp_number *A, int B); +static void mp_set_posit_from_subtraction (mp_number *A, mp_number *B, mp_number *C); +static void mp_set_posit_half_from_addition (mp_number *A, mp_number *B, mp_number *C); +static void mp_set_posit_half_from_subtraction (mp_number *A, mp_number *B, mp_number *C); +static void mp_wrapup_numeric_token (MP mp, unsigned char *start, unsigned char *stop); +static char *mp_posit_number_tostring (MP mp, mp_number *n); + +typedef struct mp_posit_info { + posit_t unity; + posit_t zero; + posit_t one; + posit_t two; + posit_t three; + posit_t four; + posit_t five; + posit_t eight; + posit_t seven; + posit_t sixteen; + posit_t half_unit; + posit_t minusone; + posit_t three_quarter_unit; + posit_t d16; + posit_t d64; + posit_t d256; + posit_t d4096; + posit_t d65536; + posit_t dp90; + posit_t dp180; + posit_t dp270; + posit_t dp360; + posit_t dm90; + posit_t dm180; + posit_t dm270; + posit_t dm360; + posit_t fraction_multiplier; + posit_t negative_fraction_multiplier; /* todo: also in decimal */ + posit_t angle_multiplier; + posit_t fraction_one; + posit_t fraction_two; + posit_t fraction_three; + posit_t fraction_four; + posit_t fraction_half; + posit_t fraction_one_and_half; + posit_t one_eighty_degrees; + posit_t negative_one_eighty_degrees; + posit_t three_sixty_degrees; + posit_t no_crossing; + posit_t one_crossing; + posit_t zero_crossing; + posit_t error_correction; + posit_t pi; + posit_t pi_divided_by_180; + posit_t epsilon; + posit_t EL_GORDO; + posit_t negative_EL_GORDO; + posit_t one_third_EL_GORDO; + posit_t coef; + posit_t coef_bound; + posit_t scaled_threshold; + posit_t fraction_threshold; + posit_t equation_threshold; + posit_t near_zero_angle; + posit_t p_over_v_threshold; + posit_t warning_limit; + posit_t sqrt_two_mul_fraction_one; + posit_t sqrt_five_minus_one_mul_fraction_one_and_half; + posit_t three_minus_sqrt_five_mul_fraction_one_and_half; + posit_t d180_divided_by_pi_mul_angle; + int initialized; +} mp_posit_info; + +mp_posit_info mp_posit_data = { + .initialized = 0, +}; + +inline static posit_t mp_posit_make_fraction (posit_t p, posit_t q) { return posit_mul(posit_div(p,q), mp_posit_data.fraction_multiplier); } +inline static posit_t mp_posit_take_fraction (posit_t p, posit_t q) { return posit_div(posit_mul(p,q), mp_posit_data.fraction_multiplier); } +inline static posit_t mp_posit_make_scaled (posit_t p, posit_t q) { return posit_div(p,q); } + +math_data *mp_initialize_posit_math(MP mp) +{ + math_data *math = (math_data *) mp_memory_allocate(sizeof(math_data)); + /* alloc */ + if (! mp_posit_data.initialized) { + mp_posit_data.initialized = 1; + mp_posit_data.unity = integer_to_posit(1); + mp_posit_data.zero = integer_to_posit(0); + mp_posit_data.one = integer_to_posit(1); + mp_posit_data.two = integer_to_posit(2); + mp_posit_data.three = integer_to_posit(3); + mp_posit_data.four = integer_to_posit(4); + mp_posit_data.five = integer_to_posit(5); + mp_posit_data.seven = integer_to_posit(7); + mp_posit_data.eight = integer_to_posit(8); + mp_posit_data.sixteen = integer_to_posit(16); + mp_posit_data.dp90 = integer_to_posit(90); + mp_posit_data.dp180 = integer_to_posit(180); + mp_posit_data.dp270 = integer_to_posit(270); + mp_posit_data.dp360 = integer_to_posit(360); + mp_posit_data.dm90 = integer_to_posit(-90); + mp_posit_data.dm180 = integer_to_posit(-180); + mp_posit_data.dm270 = integer_to_posit(-270); + mp_posit_data.dm360 = integer_to_posit(-360); + mp_posit_data.d16 = integer_to_posit(16); + mp_posit_data.d64 = integer_to_posit(64); + mp_posit_data.d256 = integer_to_posit(256); + mp_posit_data.d4096 = integer_to_posit(4096); + mp_posit_data.d65536 = integer_to_posit(65536); + mp_posit_data.minusone = posit_neg(mp_posit_data.one); + mp_posit_data.half_unit = posit_div(mp_posit_data.unity, mp_posit_data.two); + mp_posit_data.three_quarter_unit = posit_mul(mp_posit_data.three, posit_div(mp_posit_data.unity,mp_posit_data.four)); + mp_posit_data.fraction_multiplier = integer_to_posit(mp_fraction_multiplier); + mp_posit_data.negative_fraction_multiplier = posit_neg(mp_posit_data.fraction_multiplier); + mp_posit_data.angle_multiplier = integer_to_posit(mp_angle_multiplier); + mp_posit_data.fraction_one = mp_posit_data.fraction_multiplier; + mp_posit_data.fraction_two = posit_mul(mp_posit_data.fraction_multiplier, mp_posit_data.two); + mp_posit_data.fraction_three = posit_mul(mp_posit_data.fraction_multiplier, mp_posit_data.three); + mp_posit_data.fraction_four = posit_mul(mp_posit_data.fraction_multiplier, mp_posit_data.four); + mp_posit_data.fraction_half = posit_div(mp_posit_data.fraction_multiplier, mp_posit_data.two); + mp_posit_data.fraction_one_and_half = posit_add(mp_posit_data.fraction_multiplier, mp_posit_data.fraction_half); + mp_posit_data.one_eighty_degrees = posit_mul(mp_posit_data.angle_multiplier, mp_posit_data.dp180); + mp_posit_data.negative_one_eighty_degrees = posit_mul(mp_posit_data.angle_multiplier, mp_posit_data.dm180); + mp_posit_data.three_sixty_degrees = posit_mul(mp_posit_data.angle_multiplier, mp_posit_data.dp360); + mp_posit_data.no_crossing = posit_add(mp_posit_data.fraction_multiplier, mp_posit_data.one); + mp_posit_data.one_crossing = mp_posit_data.fraction_multiplier; + mp_posit_data.zero_crossing = mp_posit_data.zero; + mp_posit_data.error_correction = double_to_posit(1E-12); /* debatable */ + mp_posit_data.warning_limit = posit_pow(mp_posit_data.two, integer_to_posit(52)); /* this is a large value that can just be expressed without loss of precision */ + mp_posit_data.pi = double_to_posit(3.1415926535897932384626433832795028841971); + mp_posit_data.pi_divided_by_180 = posit_div(mp_posit_data.pi, mp_posit_data.dp180); + mp_posit_data.epsilon = posit_pow(mp_posit_data.two, integer_to_posit(-52.0)); + mp_posit_data.EL_GORDO = posit_sub(posit_div(double_to_posit(DBL_MAX),mp_posit_data.two), mp_posit_data.one); /* the largest value that \MP\ likes. */ + mp_posit_data.negative_EL_GORDO = posit_neg(mp_posit_data.EL_GORDO); + mp_posit_data.one_third_EL_GORDO = posit_div(mp_posit_data.EL_GORDO, mp_posit_data.three); + mp_posit_data.coef = posit_div(mp_posit_data.seven, mp_posit_data.three); /* |fraction| approximation to 7/3 */ + mp_posit_data.coef_bound = posit_mul(mp_posit_data.coef, mp_posit_data.fraction_multiplier); + mp_posit_data.scaled_threshold = double_to_posit(0.000122); /* a |scaled| coefficient less than this is zeroed */ + mp_posit_data.near_zero_angle = posit_mul(double_to_posit(0.0256), mp_posit_data.angle_multiplier); /* an angle of about 0.0256 */ + mp_posit_data.p_over_v_threshold = integer_to_posit(0x80000); + mp_posit_data.equation_threshold = double_to_posit(0.001); + + mp_posit_data.sqrt_two_mul_fraction_one = + posit_mul( + posit_sqrt(mp_posit_data.two), + mp_posit_data.fraction_one + ); + + mp_posit_data.sqrt_five_minus_one_mul_fraction_one_and_half = + posit_mul( + posit_mul( + mp_posit_data.three, + mp_posit_data.fraction_half + ), + posit_sub( + posit_sqrt(mp_posit_data.five), + mp_posit_data.one + ) + ); + + mp_posit_data.three_minus_sqrt_five_mul_fraction_one_and_half = + posit_mul( + posit_mul( + mp_posit_data.three, + mp_posit_data.fraction_half + ), + posit_sub( + mp_posit_data.three, + posit_sqrt(mp_posit_data.five) + ) + ); + + mp_posit_data.d180_divided_by_pi_mul_angle = + posit_mul( + posit_div( + mp_posit_data.dp180, + mp_posit_data.pi + ), + mp_posit_data.angle_multiplier + ); + + } + /* alloc */ + math->md_allocate = mp_allocate_number; + math->md_free = mp_free_number; + math->md_allocate_clone = mp_allocate_clone; + math->md_allocate_abs = mp_allocate_abs; + math->md_allocate_double = mp_allocate_double; + /* precission */ + mp_allocate_number(mp, &math->md_precision_default, mp_scaled_type); + mp_allocate_number(mp, &math->md_precision_max, mp_scaled_type); + mp_allocate_number(mp, &math->md_precision_min, mp_scaled_type); + /* here are the constants for |scaled| objects */ + mp_allocate_number(mp, &math->md_epsilon_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_inf_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_negative_inf_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_warning_limit_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_one_third_inf_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_unity_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_two_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_three_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_half_unit_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_three_quarter_unit_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_zero_t, mp_scaled_type); + /* |fractions| */ + mp_allocate_number(mp, &math->md_arc_tol_k, mp_fraction_type); + mp_allocate_number(mp, &math->md_fraction_one_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_fraction_half_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_fraction_three_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_fraction_four_t, mp_fraction_type); + /* |angles| */ + mp_allocate_number(mp, &math->md_three_sixty_deg_t, mp_angle_type); + mp_allocate_number(mp, &math->md_one_eighty_deg_t, mp_angle_type); + mp_allocate_number(mp, &math->md_negative_one_eighty_deg_t, mp_angle_type); + /* various approximations */ + mp_allocate_number(mp, &math->md_one_k, mp_scaled_type); + mp_allocate_number(mp, &math->md_sqrt_8_e_k, mp_scaled_type); + mp_allocate_number(mp, &math->md_twelve_ln_2_k, mp_fraction_type); + mp_allocate_number(mp, &math->md_coef_bound_k, mp_fraction_type); + mp_allocate_number(mp, &math->md_coef_bound_minus_1, mp_fraction_type); + mp_allocate_number(mp, &math->md_twelvebits_3, mp_scaled_type); + mp_allocate_number(mp, &math->md_twentysixbits_sqrt2_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_twentyeightbits_d_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_twentysevenbits_sqrt2_d_t, mp_fraction_type); + /* thresholds */ + mp_allocate_number(mp, &math->md_fraction_threshold_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_half_fraction_threshold_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_scaled_threshold_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_half_scaled_threshold_t, mp_scaled_type); + mp_allocate_number(mp, &math->md_near_zero_angle_t, mp_angle_type); + mp_allocate_number(mp, &math->md_p_over_v_threshold_t, mp_fraction_type); + mp_allocate_number(mp, &math->md_equation_threshold_t, mp_scaled_type); + /* initializations */ + math->md_precision_default.data.pval = posit_mul(mp_posit_data.d16, mp_posit_data.unity); + math->md_precision_max.data.pval = posit_mul(mp_posit_data.d16, mp_posit_data.unity); + math->md_precision_min.data.pval = posit_mul(mp_posit_data.d16, mp_posit_data.unity); + math->md_epsilon_t.data.pval = mp_posit_data.epsilon; + math->md_inf_t.data.pval = mp_posit_data.EL_GORDO; + math->md_negative_inf_t.data.pval = mp_posit_data.negative_EL_GORDO; + math->md_one_third_inf_t.data.pval = mp_posit_data.one_third_EL_GORDO; + math->md_warning_limit_t.data.pval = mp_posit_data.warning_limit; + math->md_unity_t.data.pval = mp_posit_data.unity; + math->md_two_t.data.pval = mp_posit_data.two; + math->md_three_t.data.pval = mp_posit_data.three; + math->md_half_unit_t.data.pval = mp_posit_data.half_unit; + math->md_three_quarter_unit_t.data.pval = mp_posit_data.three_quarter_unit; + math->md_arc_tol_k.data.pval = posit_div(mp_posit_data.unity, mp_posit_data.d4096); /* quit when change in arc length estimate reaches this */ + math->md_fraction_one_t.data.pval = mp_posit_data.fraction_one; + math->md_fraction_half_t.data.pval = mp_posit_data.fraction_half; + math->md_fraction_three_t.data.pval = mp_posit_data.fraction_three; + math->md_fraction_four_t.data.pval = mp_posit_data.fraction_four; + math->md_three_sixty_deg_t.data.pval = mp_posit_data.three_sixty_degrees; + math->md_one_eighty_deg_t.data.pval = mp_posit_data.one_eighty_degrees; + math->md_negative_one_eighty_deg_t.data.pval = mp_posit_data.negative_one_eighty_degrees; + math->md_one_k.data.pval = posit_div(mp_posit_data.one, mp_posit_data.d64); + math->md_sqrt_8_e_k.data.pval = double_to_posit(1.71552776992141359295); /* $2^{16}\sqrt{8/e} \approx 112428.82793$ */ + math->md_twelve_ln_2_k.data.pval = posit_mul(double_to_posit(8.31776616671934371292), mp_posit_data.d256); /* $2^{24}\cdot12\ln2 \approx139548959.6165 $ */ + math->md_twelvebits_3.data.pval = posit_div(integer_to_posit(1365), mp_posit_data.unity); /* $1365 \approx 2^{12}/3 $ */ + math->md_twentysixbits_sqrt2_t.data.pval = posit_div(integer_to_posit(94906266), mp_posit_data.d65536); /* $2^{26}\sqrt2 \approx 94906265.62 $ */ + math->md_twentyeightbits_d_t.data.pval = posit_div(integer_to_posit(35596755), mp_posit_data.d65536); /* $2^{28}d \approx 35596754.69 $ */ + math->md_twentysevenbits_sqrt2_d_t.data.pval = posit_div(integer_to_posit(25170707), mp_posit_data.d65536); /* $2^{27}\sqrt2\,d \approx 25170706.63 $ */ + math->md_coef_bound_k.data.pval = mp_posit_data.coef_bound; + math->md_coef_bound_minus_1.data.pval = posit_sub(mp_posit_data.coef_bound, posit_div(mp_posit_data.one, mp_posit_data.d65536)); + math->md_fraction_threshold_t.data.pval = double_to_posit(0.04096); /* a |fraction| coefficient less than this is zeroed */ + math->md_half_fraction_threshold_t.data.pval = posit_div(mp_posit_data.fraction_threshold, mp_posit_data.two); + math->md_scaled_threshold_t.data.pval = mp_posit_data.scaled_threshold; + math->md_half_scaled_threshold_t.data.pval = posit_div(mp_posit_data.scaled_threshold,mp_posit_data.two); + math->md_near_zero_angle_t.data.pval = mp_posit_data.near_zero_angle; + math->md_p_over_v_threshold_t.data.pval = mp_posit_data.p_over_v_threshold; + math->md_equation_threshold_t.data.pval = mp_posit_data.equation_threshold; + + /* functions */ + math->md_from_int = mp_set_posit_from_int; + math->md_from_boolean = mp_set_posit_from_boolean; + math->md_from_scaled = mp_set_posit_from_scaled; + math->md_from_double = mp_set_posit_from_double; + math->md_from_addition = mp_set_posit_from_addition; + math->md_half_from_addition = mp_set_posit_half_from_addition; + math->md_from_subtraction = mp_set_posit_from_subtraction; + math->md_half_from_subtraction = mp_set_posit_half_from_subtraction; + math->md_from_oftheway = mp_set_posit_from_of_the_way; + math->md_from_div = mp_set_posit_from_div; + math->md_from_mul = mp_set_posit_from_mul; + math->md_from_int_div = mp_set_posit_from_int_div; + math->md_from_int_mul = mp_set_posit_from_int_mul; + math->md_negate = mp_number_negate; + math->md_add = mp_number_add; + math->md_subtract = mp_number_subtract; + math->md_half = mp_number_half; + math->md_do_double = mp_number_double; + math->md_abs = mp_posit_abs; + math->md_clone = mp_number_clone; + math->md_negated_clone = mp_number_negated_clone; + math->md_abs_clone = mp_number_abs_clone; + math->md_swap = mp_number_swap; + math->md_add_scaled = mp_number_add_scaled; + math->md_multiply_int = mp_number_multiply_int; + math->md_divide_int = mp_number_divide_int; + math->md_to_boolean = mp_number_to_boolean; + math->md_to_scaled = mp_number_to_scaled; + math->md_to_double = mp_number_to_double; + math->md_to_int = mp_number_to_int; + math->md_odd = mp_number_odd; + math->md_equal = mp_number_equal; + math->md_less = mp_number_less; + math->md_greater = mp_number_greater; + math->md_nonequalabs = mp_number_nonequalabs; + math->md_round_unscaled = mp_round_unscaled; + math->md_floor_scaled = mp_number_floor; + math->md_fraction_to_round_scaled = mp_posit_fraction_to_round_scaled; + math->md_make_scaled = mp_posit_number_make_scaled; + math->md_make_fraction = mp_posit_number_make_fraction; + math->md_take_fraction = mp_posit_number_take_fraction; + math->md_take_scaled = mp_posit_number_take_scaled; + math->md_velocity = mp_posit_velocity; + math->md_n_arg = mp_posit_n_arg; + math->md_m_log = mp_posit_m_log; + math->md_m_exp = mp_posit_m_exp; + math->md_m_unif_rand = mp_posit_m_unif_rand; + math->md_m_norm_rand = mp_posit_m_norm_rand; + math->md_pyth_add = mp_posit_pyth_add; + math->md_pyth_sub = mp_posit_pyth_sub; + math->md_power_of = mp_posit_power_of; + math->md_fraction_to_scaled = mp_number_fraction_to_scaled; + math->md_scaled_to_fraction = mp_number_scaled_to_fraction; + math->md_scaled_to_angle = mp_number_scaled_to_angle; + math->md_angle_to_scaled = mp_number_angle_to_scaled; + math->md_init_randoms = mp_init_randoms; + math->md_sin_cos = mp_posit_sin_cos; + math->md_slow_add = mp_posit_slow_add; + math->md_sqrt = mp_posit_square_rt; + math->md_print = mp_posit_print_number; + math->md_tostring = mp_posit_number_tostring; + math->md_modulo = mp_number_modulo; + math->md_ab_vs_cd = mp_posit_ab_vs_cd; + math->md_crossing_point = mp_posit_crossing_point; + math->md_scan_numeric = mp_posit_scan_numeric_token; + math->md_scan_fractional = mp_posit_scan_fractional_token; + math->md_free_math = mp_free_posit_math; + math->md_set_precision = mp_posit_set_precision; + return math; +} + +void mp_posit_set_precision (MP mp) +{ + (void) mp; +} + +void mp_free_posit_math (MP mp) +{ + /* Is this list up to date? Also check elewhere. */ + mp_free_number(mp, &(mp->math->md_three_sixty_deg_t)); + mp_free_number(mp, &(mp->math->md_one_eighty_deg_t)); + mp_free_number(mp, &(mp->math->md_negative_one_eighty_deg_t)); + mp_free_number(mp, &(mp->math->md_fraction_one_t)); + mp_free_number(mp, &(mp->math->md_zero_t)); + mp_free_number(mp, &(mp->math->md_half_unit_t)); + mp_free_number(mp, &(mp->math->md_three_quarter_unit_t)); + mp_free_number(mp, &(mp->math->md_unity_t)); + mp_free_number(mp, &(mp->math->md_two_t)); + mp_free_number(mp, &(mp->math->md_three_t)); + mp_free_number(mp, &(mp->math->md_one_third_inf_t)); + mp_free_number(mp, &(mp->math->md_inf_t)); + mp_free_number(mp, &(mp->math->md_negative_inf_t)); + mp_free_number(mp, &(mp->math->md_warning_limit_t)); + mp_free_number(mp, &(mp->math->md_one_k)); + mp_free_number(mp, &(mp->math->md_sqrt_8_e_k)); + mp_free_number(mp, &(mp->math->md_twelve_ln_2_k)); + mp_free_number(mp, &(mp->math->md_coef_bound_k)); + mp_free_number(mp, &(mp->math->md_coef_bound_minus_1)); + mp_free_number(mp, &(mp->math->md_fraction_threshold_t)); + mp_free_number(mp, &(mp->math->md_half_fraction_threshold_t)); + mp_free_number(mp, &(mp->math->md_scaled_threshold_t)); + mp_free_number(mp, &(mp->math->md_half_scaled_threshold_t)); + mp_free_number(mp, &(mp->math->md_near_zero_angle_t)); + mp_free_number(mp, &(mp->math->md_p_over_v_threshold_t)); + mp_free_number(mp, &(mp->math->md_equation_threshold_t)); + mp_memory_free(mp->math); +} + +@ See mpmathdouble for documentation. @c + +void mp_allocate_number (MP mp, mp_number *n, mp_number_type t) +{ + (void) mp; + n->data.pval = mp_posit_data.zero; + n->type = t; +} + +void mp_allocate_clone (MP mp, mp_number *n, mp_number_type t, mp_number *v) +{ + (void) mp; + n->type = t; + n->data.pval = v->data.pval; +} + +void mp_allocate_abs (MP mp, mp_number *n, mp_number_type t, mp_number *v) +{ + (void) mp; + n->type = t; + n->data.pval = posit_fabs(v->data.pval); +} + +void mp_allocate_double (MP mp, mp_number *n, double v) +{ + (void) mp; + n->type = mp_scaled_type; + n->data.pval = double_to_posit(v); +} + +void mp_free_number (MP mp, mp_number *n) +{ + (void) mp; + n->type = mp_nan_type; +} + +void mp_set_posit_from_int(mp_number *A, int B) +{ + A->data.pval = integer_to_posit(B); +} + +void mp_set_posit_from_boolean(mp_number *A, int B) +{ + A->data.pval = integer_to_posit(B); +} + +void mp_set_posit_from_scaled(mp_number *A, int B) +{ + A->data.pval = posit_div(integer_to_posit(B), mp_posit_data.d65536); +} + +void mp_set_posit_from_double(mp_number *A, double B) +{ + A->data.pval = double_to_posit(B); +} + +void mp_set_posit_from_addition(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_add(B->data.pval, C->data.pval); +} + +void mp_set_posit_half_from_addition(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_div(posit_add(B->data.pval,C->data.pval), mp_posit_data.two); +} + +void mp_set_posit_from_subtraction(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_sub(B->data.pval, C->data.pval); +} + +void mp_set_posit_half_from_subtraction(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_div(posit_sub(B->data.pval, C->data.pval), mp_posit_data.two); +} + +void mp_set_posit_from_div(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_div(B->data.pval, C->data.pval); +} + +void mp_set_posit_from_mul(mp_number *A, mp_number *B, mp_number *C) +{ + A->data.pval = posit_mul(B->data.pval, C->data.pval); +} + +void mp_set_posit_from_int_div(mp_number *A, mp_number *B, int C) +{ + A->data.pval = posit_div(B->data.pval, integer_to_posit(C)); +} + +void mp_set_posit_from_int_mul(mp_number *A, mp_number *B, int C) +{ + A->data.pval = posit_mul(A->data.pval, integer_to_posit(C)); +} + +void mp_set_posit_from_of_the_way (MP mp, mp_number *A, mp_number *t, mp_number *B, mp_number *C) +{ + (void) mp; + A->data.pval = posit_sub(B->data.pval, mp_posit_take_fraction(posit_sub(B->data.pval, C->data.pval), t->data.pval)); +} + +void mp_number_negate(mp_number *A) +{ + A->data.pval = posit_neg(A->data.pval); +} + +void mp_number_add(mp_number *A, mp_number *B) +{ + A->data.pval = posit_add(A->data.pval, B->data.pval); +} + +void mp_number_subtract(mp_number *A, mp_number *B) +{ + A->data.pval = posit_sub(A->data.pval, B->data.pval); +} + +void mp_number_half(mp_number *A) +{ + A->data.pval = posit_div(A->data.pval, mp_posit_data.two); +} + +void mp_number_double(mp_number *A) +{ + A->data.pval = posit_mul(A->data.pval, mp_posit_data.two); +} + +void mp_number_add_scaled(mp_number *A, int B) +{ + /* also for negative B */ + A->data.pval = posit_add(A->data.pval, posit_div(integer_to_posit(B), mp_posit_data.d65536)); +} + +void mp_number_multiply_int(mp_number *A, int B) +{ + A->data.pval = posit_mul(A->data.pval, integer_to_posit(B)); +} + +void mp_number_divide_int(mp_number *A, int B) +{ + A->data.pval = posit_div(A->data.pval, integer_to_posit(B)); +} + +void mp_posit_abs(mp_number *A) +{ + A->data.pval = posit_fabs(A->data.pval); +} + +void mp_number_clone(mp_number *A, mp_number *B) +{ + A->data.pval = B->data.pval; +} + +void mp_number_negated_clone(mp_number *A, mp_number *B) +{ + A->data.pval = posit_neg(B->data.pval); +} + +void mp_number_abs_clone(mp_number *A, mp_number *B) +{ + A->data.pval = posit_fabs(B->data.pval); +} + +void mp_number_swap(mp_number *A, mp_number *B) +{ + posit_t swap_tmp = A->data.pval; + A->data.pval = B->data.pval; + B->data.pval = swap_tmp; +} + +void mp_number_fraction_to_scaled(mp_number *A) +{ + A->type = mp_scaled_type; + A->data.pval = posit_div(A->data.pval, mp_posit_data.fraction_multiplier); +} + +void mp_number_angle_to_scaled(mp_number *A) +{ + A->type = mp_scaled_type; + A->data.pval = posit_div(A->data.pval, mp_posit_data.angle_multiplier); +} + +void mp_number_scaled_to_fraction(mp_number *A) +{ + A->type = mp_fraction_type; + A->data.pval = posit_mul(A->data.pval, mp_posit_data.fraction_multiplier); +} + +void mp_number_scaled_to_angle(mp_number *A) +{ + A->type = mp_angle_type; + A->data.pval = posit_mul(A->data.pval, mp_posit_data.angle_multiplier); +} + +int mp_number_to_scaled(mp_number *A) +{ + return posit_to_integer(posit_mul(A->data.pval, mp_posit_data.d65536)); +} + +int mp_number_to_int(mp_number *A) +{ + return posit_to_integer(A->data.pval); +} + +int mp_number_to_boolean(mp_number *A) +{ + return posit_eq_zero(A->data.pval) ? 0 : 1; +} + +double mp_number_to_double(mp_number *A) +{ + return posit_to_double(A->data.pval); +} + +int mp_number_odd(mp_number *A) +{ + return odd(posit_to_integer(A->data.pval)); +} + +int mp_number_equal(mp_number *A, mp_number *B) +{ + return posit_eq(A->data.pval, B->data.pval); +} + +int mp_number_greater(mp_number *A, mp_number *B) +{ + return posit_gt(A->data.pval, B->data.pval); +} + +int mp_number_less(mp_number *A, mp_number *B) +{ + return posit_lt(A->data.pval, B->data.pval); +} + +int mp_number_nonequalabs(mp_number *A, mp_number *B) +{ + return ! posit_eq(posit_fabs(A->data.pval), posit_fabs(B->data.pval)); +} + +char *mp_posit_number_tostring (MP mp, mp_number *n) +{ + static char set[64]; + int l = 0; + char *ret = mp_memory_allocate(64); + (void) mp; + snprintf(set, 64, "%.20g", posit_to_double(n->data.pval)); + while (set[l] == ' ') { + l++; + } + strcpy(ret, set+l); + return ret; +} + +void mp_posit_print_number (MP mp, mp_number *n) +{ + char *str = mp_posit_number_tostring(mp, n); + mp_print_e_str(mp, str); + mp_memory_free(str); +} + +/* Todo: it is hard to overflow posits. Also, we can check zero fast. */ + +void mp_posit_slow_add (MP mp, mp_number *ret, mp_number *x_orig, mp_number *y_orig) +{ + if (posit_gt(x_orig->data.pval, mp_posit_data.zero)) { + if (posit_le(y_orig->data.pval, posit_sub(mp_posit_data.EL_GORDO, x_orig->data.pval))) { + ret->data.pval = posit_add(x_orig->data.pval, y_orig->data.pval); + } else { + mp->arith_error = 1; + ret->data.pval = mp_posit_data.EL_GORDO; + } + } else if (posit_le(posit_neg(y_orig->data.pval), posit_add(mp_posit_data.EL_GORDO, x_orig->data.pval))) { + ret->data.pval = posit_add(x_orig->data.pval, y_orig->data.pval); + } else { + mp->arith_error = 1; + ret->data.pval = mp_posit_data.negative_EL_GORDO; + } +} + +void mp_posit_number_make_fraction (MP mp, mp_number *ret, mp_number *p, mp_number *q) { + (void) mp; + ret->data.pval = mp_posit_make_fraction(p->data.pval, q->data.pval); +} + +void mp_posit_number_take_fraction (MP mp, mp_number *ret, mp_number *p, mp_number *q) { + (void) mp; + ret->data.pval = mp_posit_take_fraction(p->data.pval, q->data.pval); +} + +void mp_posit_number_take_scaled (MP mp, mp_number *ret, mp_number *p_orig, mp_number *q_orig) +{ + (void) mp; + ret->data.pval = posit_mul(p_orig->data.pval, q_orig->data.pval); +} + +void mp_posit_number_make_scaled (MP mp, mp_number *ret, mp_number *p_orig, mp_number *q_orig) +{ + (void) mp; + ret->data.pval = posit_div(p_orig->data.pval, q_orig->data.pval); +} + +void mp_wrapup_numeric_token (MP mp, unsigned char *start, unsigned char *stop) +{ + double result; + char *end = (char *) stop; + errno = 0; + result = strtod((char *) start, &end); + if (errno == 0) { + set_cur_mod(double_to_posit(result)); + if (result >= mp_warning_limit) { + if (posit_gt(internal_value(mp_warning_check_internal).data.pval, mp_posit_data.zero) && (mp->scanner_status != mp_tex_flushing_state)) { + char msg[256]; + mp_snprintf(msg, 256, "Number is too large (%g)", result); + @.Number is too large@> + mp_error( + mp, + msg, + "Continue and I'll try to cope with that big value; but it might be dangerous." + "(Set warningcheck := 0 to suppress this message.)" + ); + } + } + } else if (mp->scanner_status != mp_tex_flushing_state) { + mp_error( + mp, + "Enormous number has been reduced.", + "I could not handle this number specification probably because it is out of" + "range." + ); + @.Enormous number...@> + set_cur_mod(mp_posit_data.EL_GORDO); + } + set_cur_cmd(mp_numeric_command); +} + +static void mp_posit_aux_find_exponent (MP mp) +{ + if (mp->buffer[mp->cur_input.loc_field] == 'e' || mp->buffer[mp->cur_input.loc_field] == 'E') { + mp->cur_input.loc_field++; + if (!(mp->buffer[mp->cur_input.loc_field] == '+' + || mp->buffer[mp->cur_input.loc_field] == '-' + || mp->char_class[mp->buffer[mp->cur_input.loc_field]] == mp_digit_class)) { + mp->cur_input.loc_field--; + return; + } + if (mp->buffer[mp->cur_input.loc_field] == '+' + || mp->buffer[mp->cur_input.loc_field] == '-') { + mp->cur_input.loc_field++; + } + while (mp->char_class[mp->buffer[mp->cur_input.loc_field]] == mp_digit_class) { + mp->cur_input.loc_field++; + } + } +} + +void mp_posit_scan_fractional_token (MP mp, int n) /* n is scaled */ +{ + unsigned char *start = &mp->buffer[mp->cur_input.loc_field -1]; + unsigned char *stop; + (void) n; + while (mp->char_class[mp->buffer[mp->cur_input.loc_field]] == mp_digit_class) { + mp->cur_input.loc_field++; + } + mp_posit_aux_find_exponent(mp); + stop = &mp->buffer[mp->cur_input.loc_field-1]; + mp_wrapup_numeric_token(mp, start, stop); +} + +void mp_posit_scan_numeric_token (MP mp, int n) /* n is scaled */ +{ + unsigned char *start = &mp->buffer[mp->cur_input.loc_field -1]; + unsigned char *stop; + (void) n; + while (mp->char_class[mp->buffer[mp->cur_input.loc_field]] == mp_digit_class) { + mp->cur_input.loc_field++; + } + if (mp->buffer[mp->cur_input.loc_field] == '.' && mp->buffer[mp->cur_input.loc_field+1] != '.') { + mp->cur_input.loc_field++; + while (mp->char_class[mp->buffer[mp->cur_input.loc_field]] == mp_digit_class) { + mp->cur_input.loc_field++; + } + } + mp_posit_aux_find_exponent(mp); + stop = &mp->buffer[mp->cur_input.loc_field-1]; + mp_wrapup_numeric_token(mp, start, stop); +} + +void mp_posit_velocity (MP mp, mp_number *ret, mp_number *st, mp_number *ct, mp_number *sf, mp_number *cf, mp_number *t) +{ + posit_t acc, num, denom; /* registers for intermediate calculations */ + (void) mp; + acc = mp_posit_take_fraction( + mp_posit_take_fraction( + posit_sub(st->data.pval, posit_div(sf->data.pval, mp_posit_data.sixteen)), + posit_sub(sf->data.pval, posit_div(st->data.pval, mp_posit_data.sixteen)) + ), + posit_sub(ct->data.pval,cf->data.pval) + ); + num = posit_add( + mp_posit_data.fraction_two, + mp_posit_take_fraction( + acc, + mp_posit_data.sqrt_two_mul_fraction_one + ) + ); + denom = posit_add( + mp_posit_data.fraction_three, + posit_add( + mp_posit_take_fraction( + ct->data.pval, + mp_posit_data.sqrt_five_minus_one_mul_fraction_one_and_half + ), + mp_posit_take_fraction( + cf->data.pval, + mp_posit_data.three_minus_sqrt_five_mul_fraction_one_and_half + ) + ) + ); + if (posit_ne(t->data.pval, mp_posit_data.unity)) { + num = mp_posit_make_scaled(num, t->data.pval); + } + if (posit_ge(posit_div(num, mp_posit_data.four), denom)) { + ret->data.pval = mp_posit_data.fraction_four; + } else { + ret->data.pval = mp_posit_make_fraction(num, denom); + } +} + +int mp_posit_ab_vs_cd (mp_number *a_orig, mp_number *b_orig, mp_number *c_orig, mp_number *d_orig) +{ + posit_t ab = posit_mul(a_orig->data.pval, b_orig->data.pval); + posit_t cd = posit_mul(c_orig->data.pval, d_orig->data.pval); + if (posit_eq(ab,cd)) { + return 0; + } else if (posit_lt(ab,cd)) { + return -1; + } else { + return 1; + } +} + +static void mp_posit_crossing_point (MP mp, mp_number *ret, mp_number *aa, mp_number *bb, mp_number *cc) +{ + posit_t d; + posit_t xx, x0, x1, x2; + posit_t a = aa->data.pval; + posit_t b = bb->data.pval; + posit_t c = cc->data.pval; + (void) mp; + if (posit_lt(a, mp_posit_data.zero)) { + ret->data.pval = mp_posit_data.zero_crossing; + return; + } + if (posit_ge(c, mp_posit_data.zero)) { + if (posit_ge(b, mp_posit_data.zero)) { + if (posit_gt(c, mp_posit_data.zero)) { + ret->data.pval = mp_posit_data.no_crossing; + } else if (posit_eq_zero(a) && posit_eq_zero(b)) { + ret->data.pval = mp_posit_data.no_crossing; + } else { + ret->data.pval = mp_posit_data.one_crossing; + } + return; + } + if (posit_eq_zero(a)) { + ret->data.pval = mp_posit_data.zero_crossing; + return; + } + } else if (posit_eq_zero(a) && posit_le(b, mp_posit_data.zero)) { + ret->data.pval = mp_posit_data.zero_crossing; + return; + } + /* Use bisection to find the crossing point... */ + d = mp_posit_data.epsilon; + x0 = a; + x1 = posit_sub(a, b); + x2 = posit_sub(b, c); + do { + /* not sure why the error correction has to be >= 1E-12 */ + posit_t x = posit_add(posit_div(posit_add(x1, x2), mp_posit_data.two), mp_posit_data.error_correction); + if (posit_gt(posit_sub(x1, x0), x0)) { + x2 = x; + x0 = posit_add(x0, x0); + d = posit_add(d, d); + } else { + xx = posit_sub(posit_add(x1, x), x0); + if (posit_gt(xx, x0)) { + x2 = x; + x0 = posit_add(x0, x0); + d = posit_add(d, d); + } else { + x0 = posit_sub(x0, xx); + if (posit_le(x, x0) && posit_le(posit_add(x, x2), x0)) { + ret->data.pval = mp_posit_data.no_crossing; + return; + } + x1 = x; + d = posit_add(posit_add(d, d), mp_posit_data.epsilon); + } + } + } while (posit_lt(d, mp_posit_data.fraction_one)); + ret->data.pval = posit_sub(d, mp_posit_data.fraction_one); +} + +@ See mpmathdouble for documentation. @c + +int mp_round_unscaled(mp_number *x_orig) +{ + return posit_i_round(x_orig->data.pval); +} + +void mp_number_floor(mp_number *i) +{ + i->data.pval = posit_floor(i->data.pval); +} + +void mp_posit_fraction_to_round_scaled(mp_number *x_orig) +{ + x_orig->type = mp_scaled_type; + x_orig->data.pval = posit_div(x_orig->data.pval, mp_posit_data.fraction_multiplier); +} + +void mp_posit_square_rt (MP mp, mp_number *ret, mp_number *x_orig) /* return, x: scaled */ +{ + if (posit_gt(x_orig->data.pval, mp_posit_data.zero)) { + ret->data.pval = posit_sqrt(x_orig->data.pval); + } else { + if (posit_lt(x_orig->data.pval, mp_posit_data.zero)) { + char msg[256]; + char *xstr = mp_posit_number_tostring(mp, x_orig); + mp_snprintf(msg, 256, "Square root of %s has been replaced by 0", xstr); + mp_memory_free(xstr); + @.Square root...replaced by 0@> + mp_error( + mp, + msg, + "Since I don't take square roots of negative numbers, I'm zeroing this one.\n" + "Proceed, with fingers crossed." + ); + } + ret->data.pval = mp_posit_data.zero; + } +} + +void mp_posit_pyth_add (MP mp, mp_number *ret, mp_number *a_orig, mp_number *b_orig) +{ + ret->data.pval = posit_sqrt( + posit_add( + posit_mul( + a_orig->data.pval, + a_orig->data.pval + ), + posit_mul( + b_orig->data.pval, + b_orig->data.pval + ) + ) + ); +} + +void mp_posit_pyth_sub (MP mp, mp_number *ret, mp_number *a_orig, mp_number *b_orig) +{ + /* can be made nicer */ + if (posit_gt(a_orig->data.pval,b_orig->data.pval)) { + a_orig->data.pval = posit_sqrt( + posit_sub( + posit_mul( + a_orig->data.pval, + a_orig->data.pval + ), + posit_mul( + b_orig->data.pval, + b_orig->data.pval + ) + ) + ); + } else { + if (posit_lt(a_orig->data.pval,b_orig->data.pval)) { + char msg[256]; + char *astr = mp_posit_number_tostring(mp, a_orig); + char *bstr = mp_posit_number_tostring(mp, b_orig); + mp_snprintf(msg, 256, "Pythagorean subtraction %s+-+%s has been replaced by 0", astr, bstr); + mp_memory_free(astr); + mp_memory_free(bstr); + @.Pythagorean...@> + mp_error( + mp, + msg, + "Since I don't take square roots of negative numbers, Im zeroing this one.\n" + "Proceed, with fingers crossed." + ); + } + a_orig->data.pval = mp_posit_data.zero; + } + ret->data.pval = a_orig->data.pval; +} + +void mp_posit_power_of (MP mp, mp_number *ret, mp_number *a_orig, mp_number *b_orig) +{ + errno = 0; + ret->data.pval = posit_pow(a_orig->data.pval, b_orig->data.pval); + if (errno) { + mp->arith_error = 1; + ret->data.pval = mp_posit_data.EL_GORDO; + } +} + +void mp_posit_m_log (MP mp, mp_number *ret, mp_number *x_orig) +{ + /* TODO: int mult */ + if (posit_gt(x_orig->data.pval,mp_posit_data.zero)) { + ret->data.pval = posit_mul(posit_log(x_orig->data.pval),mp_posit_data.d256); + } else { + char msg[256]; + char *xstr = mp_posit_number_tostring(mp, x_orig); + mp_snprintf(msg, 256, "Logarithm of %s has been replaced by 0", xstr); + mp_memory_free(xstr); + mp_error( + mp, + msg, + "Since I don't take logs of non-positive numbers, I'm zeroing this one.\n" + "Proceed, with fingers crossed." + ); + ret->data.pval = mp_posit_data.zero; + } +} + +void mp_posit_m_exp (MP mp, mp_number *ret, mp_number *x_orig) +{ + errno = 0; + ret->data.pval = posit_exp(posit_div(x_orig->data.pval,mp_posit_data.d256)); + if (errno) { + if (posit_gt(x_orig->data.pval,mp_posit_data.zero)) { + mp->arith_error = 1; + ret->data.pval = mp_posit_data.EL_GORDO; + } else { + ret->data.pval = mp_posit_data.zero; + } + } +} + +void mp_posit_n_arg (MP mp, mp_number *ret, mp_number *x_orig, mp_number *y_orig) +{ + if (posit_eq_zero(x_orig->data.pval) && posit_eq_zero(y_orig->data.pval)) { + mp_error( + mp, + "angle(0,0) is taken as zero", + "The 'angle' between two identical points is undefined. I'm zeroing this one.\n" + "Proceed, with fingers crossed." + ); + ret->data.pval = mp_posit_data.zero; + } else { + ret->type = mp_angle_type; + /* TODO */ + ret->data.pval = posit_mul( + posit_atan2( + y_orig->data.pval, + x_orig->data.pval + ), + mp_posit_data.d180_divided_by_pi_mul_angle + ); + } +} + +void mp_posit_sin_cos (MP mp, mp_number *z_orig, mp_number *n_cos, mp_number *n_sin) +{ + posit_t rad = posit_div(z_orig->data.pval, mp_posit_data.angle_multiplier); + (void) mp; + if (posit_eq(rad, mp_posit_data.dp90) || posit_eq(rad, mp_posit_data.dm270)) { + n_cos->data.pval = mp_posit_data.zero; + n_sin->data.pval = mp_posit_data.fraction_multiplier; + } else if (posit_eq(rad, mp_posit_data.dm90) || posit_eq(rad, mp_posit_data.dp270)) { + n_cos->data.pval = mp_posit_data.zero; + n_sin->data.pval = mp_posit_data.negative_fraction_multiplier; + } else if (posit_eq(rad, mp_posit_data.dp180) || posit_eq(rad, mp_posit_data.dm180)) { + n_cos->data.pval = mp_posit_data.negative_fraction_multiplier; + n_sin->data.pval = mp_posit_data.zero; + } else { + rad = posit_mul(rad,mp_posit_data.pi_divided_by_180); + n_cos->data.pval = posit_mul(posit_cos(rad),mp_posit_data.fraction_multiplier); + n_sin->data.pval = posit_mul(posit_sin(rad),mp_posit_data.fraction_multiplier); + } +} + +@ See mpmathdouble for documentation. @c + +# define KK 100 /* the long lag */ +# define LL 37 /* the short lag */ +# define MM (1L<<30) /* the modulus */ +# define mod_diff(x,y) (((x)-(y))&(MM-1)) /* subtraction mod MM */ +# define TT 70 /* guaranteed separation between streams */ +# define is_odd(x) ((x)&1) /* units bit of x */ +# define QUALITY 1009 /* recommended quality level for high-res use */ + +/* destination, array length (must be at least KK) */ + +typedef struct mp_posit_random_info { + long x[KK]; + long buf[QUALITY]; + long dummy; + long started; + long *ptr; +} mp_posit_random_info; + +static mp_posit_random_info mp_posit_random_data = { + .dummy = -1, + .started = -1, + .ptr = &mp_posit_random_data.dummy +}; + +/* the following routines are from exercise 3.6--15 */ +/* after calling |mp_aux_ran_start|, get new randoms by, e.g., |x=mp_aux_ran_arr_next()| */ + +static void mp_posit_aux_ran_array(long aa[], int n) +{ + int i, j; + for (j = 0; j < KK; j++) { + aa[j] = mp_posit_random_data.x[j]; + } + for (; j < n; j++) { + aa[j] = mod_diff(aa[j - KK], aa[j - LL]); + } + for (i = 0; i < LL; i++, j++) { + mp_posit_random_data.x[i] = mod_diff(aa[j - KK], aa[j - LL]); + } + for (; i < KK; i++, j++) { + mp_posit_random_data.x[i] = mod_diff(aa[j - KK], mp_posit_random_data.x[i - LL]); + } +} + +/* Do this before using |mp_aux_ran_array|, long seed selector for different streams. */ + +static void mp_posit_aux_ran_start(long seed) +{ + int t, j; + long x[KK + KK - 1]; /* the preparation buffer */ + long ss = (seed+2) & (MM - 2); + for (j = 0; j < KK; j++) { + /* bootstrap the buffer */ + x[j] = ss; + /* cyclic shift 29 bits */ + ss <<= 1; + if (ss >= MM) { + ss -= MM - 2; + } + } + /* make x[1] (and only x[1]) odd */ + x[1]++; + for (ss = seed & (MM - 1), t = TT - 1; t;) { + for (j = KK - 1; j > 0; j--) { + /* "square" */ + x[j + j] = x[j]; + x[j + j - 1] = 0; + } + for (j = KK + KK - 2; j >= KK; j--) { + x[j - (KK -LL)] = mod_diff(x[j - (KK - LL)], x[j]); + x[j - KK] = mod_diff(x[j - KK], x[j]); + } + if (is_odd(ss)) { + /* "multiply by z" */ + for (j = KK; j>0; j--) { + x[j] = x[j-1]; + } + x[0] = x[KK]; + /* shift the buffer cyclically */ + x[LL] = mod_diff(x[LL], x[KK]); + } + if (ss) { + ss >>= 1; + } else { + t--; + } + } + for (j = 0; j < LL; j++) { + mp_posit_random_data.x[j + KK - LL] = x[j]; + } + for (;j < KK; j++) { + mp_posit_random_data.x[j - LL] = x[j]; + } + for (j = 0; j < 10; j++) { + /* warm things up */ + mp_posit_aux_ran_array(x, KK + KK - 1); + } + mp_posit_random_data.ptr = &mp_posit_random_data.started; +} + +static long mp_posit_aux_ran_arr_cycle(void) +{ + if (mp_posit_random_data.ptr == &mp_posit_random_data.dummy) { + /* the user forgot to initialize */ + mp_posit_aux_ran_start(314159L); + } + mp_posit_aux_ran_array(mp_posit_random_data.buf, QUALITY); + mp_posit_random_data.buf[KK] = -1; + mp_posit_random_data.ptr = mp_posit_random_data.buf + 1; + return mp_posit_random_data.buf[0]; +} + +void mp_init_randoms (MP mp, int seed) +{ + int k = 1; + int j = abs(seed); + int f = (int) mp_fraction_multiplier; /* avoid warnings */ + while (j >= f) { + j = j/2; + } + for (int i = 0; i <= 54; i++) { + int jj = k; + k = j - k; + j = jj; + if (k < 0) { + k += f; + } + mp->randoms[(i * 21) % 55].data.pval = integer_to_posit(j); + } + mp_new_randoms(mp); + mp_new_randoms(mp); + mp_new_randoms(mp); + /* warm up the array */ + mp_posit_aux_ran_start((unsigned long) seed); +} + +void mp_number_modulo(mp_number *a, mp_number *b) +{ + a->data.pval = posit_mul(posit_modf(posit_div(a->data.pval, b->data.pval)), b->data.pval); +} + +static void mp_next_unif_random (MP mp, mp_number *ret) +{ + unsigned long int op = (unsigned) (*mp_posit_random_data.ptr >=0 ? *mp_posit_random_data.ptr++: mp_posit_aux_ran_arr_cycle()); + double a = op / (MM * 1.0); + (void) mp; + ret->data.pval = double_to_posit(a); +} + +static void mp_next_random (MP mp, mp_number *ret) +{ + if ( mp->j_random==0) { + mp_new_randoms(mp); + } else { + mp->j_random = mp->j_random-1; + } + mp_number_clone(ret, &(mp->randoms[mp->j_random])); +} + +static void mp_posit_m_unif_rand (MP mp, mp_number *ret, mp_number *x_orig) +{ + mp_number x, abs_x, u, y; + mp_allocate_number(mp, &y, mp_fraction_type); + mp_allocate_clone(mp, &x, mp_scaled_type, x_orig); + mp_allocate_abs(mp, &abs_x, mp_scaled_type, &x); + mp_allocate_number(mp, &u, mp_scaled_type); + mp_next_unif_random(mp, &u); + y.data.pval = posit_mul(abs_x.data.pval, u.data.pval); + mp_free_number(mp, &u); + if (mp_number_equal(&y, &abs_x)) { + mp_number_clone(ret, &((math_data *)mp->math)->md_zero_t); + } else if (mp_number_greater(&x, &((math_data *)mp->math)->md_zero_t)) { + mp_number_clone(ret, &y); + } else { + mp_number_negated_clone(ret, &y); + } + mp_free_number(mp, &abs_x); + mp_free_number(mp, &x); + mp_free_number(mp, &y); +} + +static void mp_posit_m_norm_rand (MP mp, mp_number *ret) +{ + mp_number abs_x, u, r, la, xa; + mp_allocate_number(mp, &la, mp_scaled_type); + mp_allocate_number(mp, &xa, mp_scaled_type); + mp_allocate_number(mp, &abs_x, mp_scaled_type); + mp_allocate_number(mp, &u, mp_scaled_type); + mp_allocate_number(mp, &r, mp_scaled_type); + do { + do { + mp_number v; + mp_allocate_number(mp, &v, mp_scaled_type); + mp_next_random(mp, &v); + mp_number_subtract(&v, &((math_data *)mp->math)->md_fraction_half_t); + mp_posit_number_take_fraction(mp, &xa, &((math_data *)mp->math)->md_sqrt_8_e_k, &v); + mp_free_number(mp, &v); + mp_next_random(mp, &u); + mp_number_clone(&abs_x, &xa); + mp_posit_abs(&abs_x); + } while (! mp_number_less(&abs_x, &u)); + mp_posit_number_make_fraction(mp, &r, &xa, &u); + mp_number_clone(&xa, &r); + mp_posit_m_log(mp, &la, &u); + mp_set_posit_from_subtraction(&la, &((math_data *)mp->math)->md_twelve_ln_2_k, &la); + } while (mp_posit_ab_vs_cd(&((math_data *)mp->math)->md_one_k, &la, &xa, &xa) < 0); + mp_number_clone(ret, &xa); + mp_free_number(mp, &r); + mp_free_number(mp, &abs_x); + mp_free_number(mp, &la); + mp_free_number(mp, &xa); + mp_free_number(mp, &u); +} + +@ @<Reduce to the case that |a...@>= +if (posit_lt(a, mp_posit_data.zero)) { + a = posit_neg(a); + b = posit_neg(b); +} +if (posit_lt(c, mp_posit_data.zero)) { + c = posit_neg(c); + d = posit_neg(d); +} +if ((posit_le(d, mp_posit_data.zero)) { + if ((posit_ge(b, mp_posit_data.zero)) { + if ((posit_eq_zero(a) || posit_eq_zero(b) && (posit_eq_zero(c) || posit_eq_zero(d))) { + ret->data.pval = mp_posit_data.zero; + } else { + ret->data.pval = mp_posit_data.one; + } + goto RETURN; + } if (posit_eq_zero(d)) { + ret->data.pval = posit_eq_zero(a) ? mp_posit_data.zero : mp_posit_data.minusone; + goto RETURN; + } else + q = a; + a = c; + c = q; + q = -b; + b = -d; + d = q; + } +} else if (posit_le(b, mp_posit_data.zero) { + if (posit_lt(b, mp_posit_data.zero) && posit_gt(a, mp_posit_data.zero)) { + ret->data.pval = mp_posit_data.minusone; + return; + } else + ret->data.pval = posit_eq_zero(c) ? mp_posit_data.zero : mp_posit_data.minusone; + goto RETURN; + } +} diff --git a/source/luametatex/source/tex/texcommands.c b/source/luametatex/source/tex/texcommands.c index 0eb084296..73f509a96 100644 --- a/source/luametatex/source/tex/texcommands.c +++ b/source/luametatex/source/tex/texcommands.c @@ -509,6 +509,7 @@ void tex_initialize_commands(void) tex_primitive(tex_command, "skip", register_cmd, glue_val_level, 0); tex_primitive(tex_command, "muskip", register_cmd, mu_val_level, 0); tex_primitive(tex_command, "toks", register_cmd, tok_val_level, 0); + tex_primitive(luatex_command, "float", register_cmd, posit_val_level, 0); tex_primitive(tex_command, "spacefactor", set_auxiliary_cmd, space_factor_code, 0); tex_primitive(tex_command, "prevdepth", set_auxiliary_cmd, prev_depth_code, 0); @@ -590,6 +591,7 @@ void tex_initialize_commands(void) tex_primitive(etex_command, "fontchardp", some_item_cmd, font_char_dp_code, 0); tex_primitive(etex_command, "fontcharic", some_item_cmd, font_char_ic_code, 0); tex_primitive(luatex_command, "fontcharta", some_item_cmd, font_char_ta_code, 0); + tex_primitive(luatex_command, "fontcharba", some_item_cmd, font_char_ba_code, 0); tex_primitive(luatex_command, "fontspecid", some_item_cmd, font_spec_id_code, 0); tex_primitive(luatex_command, "fontspecscale", some_item_cmd, font_spec_scale_code, 0); tex_primitive(luatex_command, "fontspecxscale", some_item_cmd, font_spec_xscale_code, 0); @@ -619,6 +621,7 @@ void tex_initialize_commands(void) tex_primitive(etex_command, "mutoglue", some_item_cmd, mu_to_glue_code, 0); tex_primitive(etex_command, "gluetomu", some_item_cmd, glue_to_mu_code, 0); tex_primitive(etex_command, "numexpr", some_item_cmd, numexpr_code, 0); + tex_primitive(luatex_command, "posexpr", some_item_cmd, posexpr_code, 0); tex_primitive(etex_command, "dimexpr", some_item_cmd, dimexpr_code, 0); tex_primitive(etex_command, "glueexpr", some_item_cmd, glueexpr_code, 0); tex_primitive(etex_command, "muexpr", some_item_cmd, muexpr_code, 0); @@ -651,9 +654,11 @@ void tex_initialize_commands(void) tex_primitive(luatex_command, "fontspecifiedname", convert_cmd, font_specification_code, 0); tex_primitive(tex_command, "jobname", convert_cmd, job_name_code, 0); tex_primitive(tex_command, "meaning", convert_cmd, meaning_code, 0); - tex_primitive(luatex_command, "meaningfull", convert_cmd, meaning_full_code, 0); - tex_primitive(luatex_command, "meaningless", convert_cmd, meaning_less_code, 0); + tex_primitive(luatex_command, "meaningfull", convert_cmd, meaning_full_code, 0); /* full as in fill, maybe some day meaninfulll */ + tex_primitive(luatex_command, "meaningless", convert_cmd, meaning_less_code, 0); /* less as in fill */ tex_primitive(luatex_command, "meaningasis", convert_cmd, meaning_asis_code, 0); /* for manuals and articles */ + tex_primitive(luatex_command, "meaningful", convert_cmd, meaning_ful_code, 0); /* full as in fil */ + tex_primitive(luatex_command, "meaningles", convert_cmd, meaning_les_code, 0); /* less as in fil, can't be less than this */ /*tex Maybe some day also |meaningonly| (no macro: in front). */ tex_primitive(tex_command, "number", convert_cmd, number_code, 0); tex_primitive(luatex_command, "tointeger", convert_cmd, to_integer_code, 0); @@ -708,8 +713,11 @@ void tex_initialize_commands(void) tex_primitive(luatex_command, "ifincsname", if_test_cmd, if_in_csname_code, 0); /* This is obsolete and might be dropped. */ tex_primitive(luatex_command, "ifabsnum", if_test_cmd, if_abs_int_code, 0); tex_primitive(luatex_command, "ifabsdim", if_test_cmd, if_abs_dim_code, 0); + tex_primitive(luatex_command, "iffloat", if_test_cmd, if_posit_code, 0); + tex_primitive(luatex_command, "ifabsfloat", if_test_cmd, if_abs_posit_code, 0); tex_primitive(luatex_command, "ifzeronum", if_test_cmd, if_zero_int_code, 0); tex_primitive(luatex_command, "ifzerodim", if_test_cmd, if_zero_dim_code, 0); + tex_primitive(luatex_command, "ifzerofloat", if_test_cmd, if_zero_posit_code, 0); tex_primitive(luatex_command, "ifchknum", if_test_cmd, if_chk_int_code, 0); tex_primitive(luatex_command, "ifchknumber", if_test_cmd, if_chk_integer_code, 0); tex_primitive(luatex_command, "ifchkdim", if_test_cmd, if_chk_dim_code, 0); @@ -1013,6 +1021,7 @@ void tex_initialize_commands(void) tex_primitive(tex_command, "chardef", shorthand_def_cmd, char_def_code, 0); tex_primitive(tex_command, "countdef", shorthand_def_cmd, count_def_code, 0); tex_primitive(tex_command, "dimendef", shorthand_def_cmd, dimen_def_code, 0); + tex_primitive(luatex_command, "floatdef", shorthand_def_cmd, float_def_code, 0); tex_primitive(tex_command, "mathchardef", shorthand_def_cmd, math_char_def_code, 0); tex_primitive(tex_command, "muskipdef", shorthand_def_cmd, mu_skip_def_code, 0); tex_primitive(tex_command, "skipdef", shorthand_def_cmd, skip_def_code, 0); @@ -1023,6 +1032,7 @@ void tex_initialize_commands(void) tex_primitive(luatex_command, "attributedef", shorthand_def_cmd, attribute_def_code, 0); tex_primitive(luatex_command, "luadef", shorthand_def_cmd, lua_def_code, 0); tex_primitive(luatex_command, "integerdef", shorthand_def_cmd, integer_def_code, 0); + tex_primitive(luatex_command, "positdef", shorthand_def_cmd, posit_def_code, 0); tex_primitive(luatex_command, "dimensiondef", shorthand_def_cmd, dimension_def_code, 0); tex_primitive(luatex_command, "gluespecdef", shorthand_def_cmd, gluespec_def_code, 0); tex_primitive(luatex_command, "mugluespecdef", shorthand_def_cmd, mugluespec_def_code, 0); diff --git a/source/luametatex/source/tex/texcommands.h b/source/luametatex/source/tex/texcommands.h index faeb38247..d90456f25 100644 --- a/source/luametatex/source/tex/texcommands.h +++ b/source/luametatex/source/tex/texcommands.h @@ -202,6 +202,8 @@ typedef enum tex_command_code { register_int_cmd, /*tex user-defined integers */ internal_attribute_cmd, /*tex */ register_attribute_cmd, /*tex user-defined attributes */ + internal_posit_cmd, + register_posit_cmd, internal_dimen_cmd, /*tex length (|\hsize|, etc.) */ register_dimen_cmd, /*tex user-defined dimensions */ internal_glue_cmd, /*tex glue (|\baselineskip|, etc.) */ @@ -221,6 +223,7 @@ typedef enum tex_command_code { set_font_cmd, /*tex set current font (font identifiers) */ define_font_cmd, /*tex define a font file (|\font|) */ integer_cmd, /*tex the equivalent is a halfword number */ + posit_cmd, dimension_cmd, /*tex the equivalent is a halfword number representing a dimension */ gluespec_cmd, /*tex the equivalent is a halfword reference to glue */ mugluespec_cmd, /*tex the equivalent is a halfword reference to glue with math units */ @@ -305,6 +308,8 @@ typedef enum tex_command_code { register_int_reference_cmd, internal_attribute_reference_cmd, register_attribute_reference_cmd, + internal_posit_reference_cmd, + register_posit_reference_cmd, internal_dimen_reference_cmd, register_dimen_reference_cmd, /*tex @@ -456,9 +461,11 @@ typedef enum convert_codes { detokenized_code, /*tex command code for |\detokenized| */ roman_numeral_code, /*tex command code for |\romannumeral| */ meaning_code, /*tex command code for |\meaning| */ - meaning_full_code, /*tex command code for |\meaningfull| */ + meaning_full_code, /*tex command code for |\meaningfull| */ meaning_less_code, /*tex command code for |\meaningless| */ meaning_asis_code, /*tex command code for |\meaningasis| */ + meaning_ful_code, /*tex command code for |\meaningful| */ + meaning_les_code, /*tex command code for |\meaningles| */ uchar_code, /*tex command code for |\Uchar| */ lua_escape_string_code, /*tex command code for |\luaescapestring| */ /* lua_token_string_code, */ /*tex command code for |\luatokenstring| */ @@ -517,6 +524,7 @@ typedef enum some_item_codes { font_char_dp_code, /*tex |\fontchardp| */ font_char_ic_code, /*tex |\fontcharic| */ font_char_ta_code, /*tex |\fontcharta| */ + font_char_ba_code, /*tex |\fontcharba| */ font_spec_id_code, /*tex |\fontspecid| */ font_spec_scale_code, /*tex |\fontspecscale| */ font_spec_xscale_code, /*tex |\fontspecxscale| */ @@ -553,6 +561,7 @@ typedef enum some_item_codes { mu_to_glue_code, /*tex |\mutoglue| */ glue_to_mu_code, /*tex |\gluetomu| */ numexpr_code, /*tex |\numexpr| */ + posexpr_code, /* attrexpr_code, */ /*tex not used */ dimexpr_code, /*tex |\dimexpr| */ glueexpr_code, /*tex |\glueexpr| */ @@ -686,6 +695,7 @@ typedef enum shorthand_def_codes { math_char_def_code, /*tex |\mathchardef| */ math_xchar_def_code, /*tex |\Umathchardef| */ math_dchar_def_code, /*tex |\Umathdictdef| */ + float_def_code, count_def_code, /*tex |\countdef| */ attribute_def_code, /*tex |\attributedef| */ dimen_def_code, /*tex |\dimendef| */ @@ -695,6 +705,7 @@ typedef enum shorthand_def_codes { /* string_def_code, */ lua_def_code, /*tex |\luadef| */ integer_def_code, + posit_def_code, dimension_def_code, gluespec_def_code, mugluespec_def_code, diff --git a/source/luametatex/source/tex/texconditional.c b/source/luametatex/source/tex/texconditional.c index 40c704492..b2219e2ab 100644 --- a/source/luametatex/source/tex/texconditional.c +++ b/source/luametatex/source/tex/texconditional.c @@ -578,6 +578,36 @@ void tex_conditional_if(halfword code, int unless) case if_zero_int_code: result = tex_scan_int(0, NULL) == 0; goto RESULT; + case if_abs_posit_code: + case if_posit_code: + { + halfword n1 = tex_scan_posit(0); + halfword cp = tex_aux_scan_comparison(code); + halfword n2 = tex_scan_posit(0); + if (code == if_abs_posit_code) { + tex_posit zero = tex_integer_to_posit(0); + if (tex_posit_lt(n1,zero.v)) { + n1 = tex_posit_neg(n1); + } + if (tex_posit_lt(n2,zero.v)) { + n2 = tex_posit_neg(n2); + } + } + switch (cp) { + case 0: result = tex_posit_eq(n1,n2); break; + case 1: result = tex_posit_lt(n1,n2); break; + case 2: result = tex_posit_gt(n1,n2); break; + case 3: result = tex_posit_ne(n1,n2); break; + case 4: result = tex_posit_gt(n1,n2); break; + case 5: result = tex_posit_lt(n1,n2); break; + case 6: result = tex_posit_eq(tex_integer_to_posit(tex_posit_to_integer(n1) & tex_posit_to_integer(n2)).v,n1); break; + case 7: result = tex_posit_ne(tex_integer_to_posit(tex_posit_to_integer(n1) & tex_posit_to_integer(n2)).v,n1); break; + } + } + goto RESULT; + case if_zero_posit_code: + result = tex_posit_eq_zero(tex_scan_posit(0)); + goto RESULT; case if_abs_dim_code: case if_dim_code: { diff --git a/source/luametatex/source/tex/texconditional.h b/source/luametatex/source/tex/texconditional.h index 36f86f6a5..41b33dc36 100644 --- a/source/luametatex/source/tex/texconditional.h +++ b/source/luametatex/source/tex/texconditional.h @@ -55,6 +55,9 @@ typedef enum if_test_codes { if_int_code, /*tex |\ifnum| */ if_abs_int_code, /*tex |\ifabsnum| */ if_zero_int_code, /*tex |\ifzeronum|*/ + if_posit_code, + if_abs_posit_code, + if_zero_posit_code, if_dim_code, /*tex |\ifdim| */ if_abs_dim_code, /*tex |\ifabsdim| */ if_zero_dim_code, /*tex |\ifzerodim| */ diff --git a/source/luametatex/source/tex/texdumpdata.h b/source/luametatex/source/tex/texdumpdata.h index a7b51e924..e21177713 100644 --- a/source/luametatex/source/tex/texdumpdata.h +++ b/source/luametatex/source/tex/texdumpdata.h @@ -55,7 +55,7 @@ */ -# define luametatex_format_fingerprint 687 +# define luametatex_format_fingerprint 689 /* These end up in the string pool. */ diff --git a/source/luametatex/source/tex/texequivalents.c b/source/luametatex/source/tex/texequivalents.c index 56d14a54f..c3cbf087d 100644 --- a/source/luametatex/source/tex/texequivalents.c +++ b/source/luametatex/source/tex/texequivalents.c @@ -1017,6 +1017,7 @@ inline static int tex_aux_equal_eq(halfword p, singleword cmd, singleword flag, } case dimension_cmd: case integer_cmd: + case posit_cmd: if (eq_type(p) == cmd && eq_value(p) == chr) { // if (eq_type(p) == cmd && eq_value(p) == chr && eq_level(p) == cur_level) { return 1; @@ -1307,6 +1308,7 @@ void tex_define_swapped(int g, halfword p1, halfword p2, int force) return; } else { switch (t1) { + case register_posit_cmd: case register_int_cmd: case register_attribute_cmd: case register_dimen_cmd: @@ -1315,6 +1317,7 @@ void tex_define_swapped(int g, halfword p1, halfword p2, int force) case internal_mu_glue_cmd: /* unchecked */ case integer_cmd: case dimension_cmd: + case posit_cmd: tex_aux_just_define(g, p1, v2); tex_aux_just_define(g, p2, v1); return; @@ -1335,6 +1338,10 @@ void tex_define_swapped(int g, halfword p1, halfword p2, int force) tex_assign_internal_attribute_value(g, p1, v2); tex_assign_internal_attribute_value(g, p2, v1); return; + case internal_posit_cmd: + tex_assign_internal_posit_value(g, p1, v2); + tex_assign_internal_posit_value(g, p2, v1); + return; case internal_dimen_cmd: tex_assign_internal_dimen_value(g, p1, v2); tex_assign_internal_dimen_value(g, p2, v1); @@ -1900,6 +1907,16 @@ void tex_aux_show_eqtb(halfword n) tex_print_char('='); tex_print_int(eq_value(n)); break; + case internal_posit_reference_cmd: + tex_print_cmd_chr(internal_posit_cmd, n); + goto POSIT; + case register_posit_reference_cmd: + tex_print_str_esc("posit"); + tex_print_int(register_posit_number(n)); + POSIT: + tex_print_char('='); + tex_print_posit(eq_value(n)); + break; case internal_dimen_reference_cmd: tex_print_cmd_chr(internal_dimen_cmd, n); goto DIMEN; @@ -1991,6 +2008,8 @@ void tex_initialize_equivalents(void) tex_aux_set_eq(register_int_base, level_one, register_int_reference_cmd, 0, max_int_register_index); tex_aux_set_eq(internal_attribute_base, level_one, internal_attribute_reference_cmd, unused_attribute_value, number_attribute_pars); tex_aux_set_eq(register_attribute_base, level_one, register_attribute_reference_cmd, unused_attribute_value, max_attribute_register_index); + tex_aux_set_eq(internal_posit_base, level_one, internal_posit_reference_cmd, 0, number_posit_pars); + tex_aux_set_eq(register_posit_base, level_one, register_posit_reference_cmd, 0, max_posit_register_index); tex_aux_set_eq(internal_dimen_base, level_one, internal_dimen_reference_cmd, 0, number_dimen_pars); tex_aux_set_eq(register_dimen_base, level_one, register_dimen_reference_cmd, 0, max_dimen_register_index); tex_aux_set_eq(internal_specification_base, level_one, specification_reference_cmd, null, number_specification_pars); diff --git a/source/luametatex/source/tex/texequivalents.h b/source/luametatex/source/tex/texequivalents.h index c93c1dd04..a2ea8762d 100644 --- a/source/luametatex/source/tex/texequivalents.h +++ b/source/luametatex/source/tex/texequivalents.h @@ -650,6 +650,11 @@ typedef enum attribute_codes { number_attribute_pars, } attribute_codes; +typedef enum posit_codes { + /*tex total number of posit parameters */ + number_posit_pars, +} posit_codes; + // typedef enum special_sequence_codes { // // current_font_sequence_code, // undefined_control_sequence_code, @@ -717,7 +722,14 @@ typedef enum attribute_codes { # define internal_dimen_number(a) ((a) - internal_dimen_base) # define register_dimen_number(a) ((a) - register_dimen_base) -# define internal_specification_base (register_dimen_base + max_n_of_dimen_registers) +# define internal_posit_base (register_dimen_base + max_n_of_dimen_registers) +# define register_posit_base (internal_posit_base + number_posit_pars + 1) +# define internal_posit_location(a) (internal_posit_base + (a)) +# define register_posit_location(a) (register_posit_base + (a)) +# define internal_posit_number(a) ((a) - internal_posit_base) +# define register_posit_number(a) ((a) - register_posit_base) + +# define internal_specification_base (register_posit_base + max_n_of_posit_registers) # define internal_specification_location(a) (internal_specification_base + (a)) # define internal_specification_number(a) ((a) - internal_specification_base) @@ -940,6 +952,7 @@ typedef enum save_types { # define int_parameter(A) eq_value(internal_int_location(A)) # define count_parameter(A) eq_value(internal_int_location(A)) +# define posit_parameter(A) eq_value(internal_posit_location(A)) # define attribute_parameter(A) eq_value(internal_attribute_location(A)) # define dimen_parameter(A) eq_value(internal_dimen_location(A)) # define toks_parameter(A) eq_value(internal_toks_location(A)) @@ -1671,6 +1684,7 @@ typedef enum auto_migration_mode_bits { # define auto_migrating_mode_permitted(what,flag) ((what & flag) == flag) # define attribute_register(j) eq_value(register_attribute_location(j)) +# define posit_register(j) eq_value(register_posit_location(j)) # define box_register(j) eq_value(register_box_location(j)) # define count_register(j) eq_value(register_int_location(j)) # define dimen_register(j) eq_value(register_dimen_location(j)) diff --git a/source/luametatex/source/tex/texfont.h b/source/luametatex/source/tex/texfont.h index 2adadf45d..2656d2b59 100644 --- a/source/luametatex/source/tex/texfont.h +++ b/source/luametatex/source/tex/texfont.h @@ -482,6 +482,7 @@ typedef enum char_tag_codes { n_ary_tag = 0x04000, radical_tag = 0x08000, punctuation_tag = 0x10000, + keep_base_tag = 0x20000, } char_tag_codes; /*tex diff --git a/source/luametatex/source/tex/texmaincontrol.c b/source/luametatex/source/tex/texmaincontrol.c index c93011ad9..d043209e5 100644 --- a/source/luametatex/source/tex/texmaincontrol.c +++ b/source/luametatex/source/tex/texmaincontrol.c @@ -3443,6 +3443,11 @@ inline static halfword tex_aux_get_register_index(int level) halfword index = tex_scan_attribute_register_number(); return register_attribute_location(index); } + case posit_val_level: + { + halfword index = tex_scan_posit_register_number(); + return register_posit_location(index); + } case glue_val_level: { halfword index = tex_scan_glue_register_number(); @@ -3469,6 +3474,8 @@ inline static halfword tex_aux_get_register_value(int level, int optionalequal) case int_val_level: case attr_val_level: return tex_scan_int(optionalequal, NULL); + case posit_val_level: + return tex_scan_posit(optionalequal); case dimen_val_level: return tex_scan_dimen(0, 0, 0, optionalequal, NULL); default: @@ -3495,6 +3502,12 @@ static int tex_aux_valid_arithmic(int cmd, int *index, int *level, int *varcmd, *level = attr_val_level; *original = eq_value(*index); return 1; + case register_posit_cmd: + case internal_posit_cmd: + *index = cur_chr; + *level = posit_val_level; + *original = eq_value(*index); + return 1; case register_dimen_cmd: case internal_dimen_cmd: *index = cur_chr; @@ -3530,6 +3543,12 @@ static int tex_aux_valid_arithmic(int cmd, int *index, int *level, int *varcmd, *original = cur_chr; *simple = dimension_cmd; return 1; + case posit_cmd: + *index = cur_cs; + *level = posit_val_level; + *original = cur_chr; + *simple = posit_cmd; + return 1; case gluespec_cmd: *index = cur_cs; *level = glue_val_level; @@ -3582,6 +3601,12 @@ inline static void tex_aux_update_register(int a, int level, halfword index, hal tex_change_attribute_register(a, index, value); tex_word_define(a, index, value); break; + case posit_val_level: + tex_word_define(a, index, value); + if (is_frozen(a) && cmd == internal_posit_cmd && cur_mode == hmode) { + tex_update_par_par(internal_posit_cmd, index - lmt_primitive_state.prim_data[cmd].offset); + } + break; case dimen_val_level: tex_word_define(a, index, value); if (is_frozen(a) && cmd == internal_dimen_cmd && cur_mode == hmode) { @@ -3647,6 +3672,13 @@ static void tex_aux_arithmic_register(int a, int code) } else { return; } + case posit_val_level: + if (tex_posit_eq_zero(amount)) { + return; + } else { + value = tex_posit_add(original, amount); + break; + } case glue_val_level: case mu_val_level: if (tex_glue_is_zero(amount)) { @@ -3704,6 +3736,9 @@ static void tex_aux_arithmic_register(int a, int code) case attr_val_level: value = tex_multiply_integers(original, amount); break; + case posit_val_level: + value = tex_posit_mul(original, amount); + break; case dimen_val_level: value = tex_nx_plus_y(original, amount, 0); break; @@ -3746,6 +3781,9 @@ static void tex_aux_arithmic_register(int a, int code) case dimen_val_level: value = tex_x_over_n(original, amount); break; + case posit_val_level: + value = tex_posit_div(original, amount); + break; case glue_val_level: case mu_val_level: { @@ -4256,6 +4294,12 @@ static void tex_aux_set_shorthand_def(int a, int force) tex_define_again(a, p, register_attribute_cmd, register_attribute_location(n)); break; } + case float_def_code: + { + scaled n = tex_scan_posit_register_number(); + tex_define_again(a, p, register_posit_cmd, register_posit_location(n)); + break; + } case dimen_def_code: { scaled n = tex_scan_dimen_register_number(); @@ -4300,6 +4344,13 @@ static void tex_aux_set_shorthand_def(int a, int force) tex_define_again(a, p, dimension_cmd, v); } break; + case posit_def_code: + /* case posit_def_csname_code: */ + { + scaled v = tex_scan_posit(1); + tex_define_again(a, p, posit_cmd, v); + } + break; case gluespec_def_code: { halfword v = tex_scan_glue(glue_val_level, 1); @@ -5318,6 +5369,20 @@ static void tex_aux_set_register_int(int a) tex_word_define(a, p, v); } +static void tex_aux_set_internal_posit(int a) +{ + halfword p = cur_chr; + scaled v = tex_scan_posit(1); + tex_assign_internal_int_value(a, p, v); +} + +static void tex_aux_set_register_posit(int a) +{ + halfword p = cur_chr; + scaled v = tex_scan_posit(1); + tex_word_define(a, p, v); +} + static void tex_aux_set_internal_attr(int a) { halfword p = cur_chr; @@ -5450,6 +5515,9 @@ static void tex_aux_set_constant_register(halfword cmd, halfword cs, halfword fl case dimension_cmd: v = tex_scan_dimen(0, 0, 0, 1, NULL); break; + case posit_cmd: + v = tex_scan_posit(1); + break; case gluespec_cmd: v = tex_scan_glue(glue_val_level, 1); break; @@ -5542,6 +5610,12 @@ static void tex_run_prefixed_command(void) case register_attribute_cmd: tex_aux_set_register_attr(flags); break; + case internal_posit_cmd: + tex_aux_set_internal_posit(flags); + break; + case register_posit_cmd: + tex_aux_set_register_posit(flags); + break; case internal_dimen_cmd: tex_aux_set_internal_dimen(flags); break; @@ -5619,6 +5693,7 @@ static void tex_run_prefixed_command(void) break; case integer_cmd: case dimension_cmd: + case posit_cmd: case gluespec_cmd: case mugluespec_cmd: tex_aux_set_constant_register(cur_cmd, cur_cs, flags); @@ -5964,6 +6039,14 @@ void tex_assign_internal_attribute_value(int a, halfword p, int val) tex_word_define(a, p, val); } +void tex_assign_internal_posit_value(int a, halfword p, int val) +{ + tex_word_define(a, p, val); + // if (is_frozen(a) && cur_mode == hmode) { + // tex_update_par_par(internal_posit_cmd, internal_posit_number(p)); + // } +} + void tex_assign_internal_dimen_value(int a, halfword p, int val) { tex_word_define(a, p, val); @@ -6316,23 +6399,25 @@ static void tex_aux_run_show_whatever(void) inline static void tex_aux_big_switch(int mode, int cmd) { - + /* todo: order */ switch (cmd) { case arithmic_cmd: - case register_attribute_cmd: + case internal_int_cmd : + case register_int_cmd : case internal_attribute_cmd: - case register_dimen_cmd: + case register_attribute_cmd: + case internal_posit_cmd: + case register_posit_cmd: case internal_dimen_cmd: + case register_dimen_cmd: case set_font_property_cmd : - case register_glue_cmd: case internal_glue_cmd: - case register_int_cmd : - case internal_int_cmd : - case register_mu_glue_cmd: + case register_glue_cmd: case internal_mu_glue_cmd: - case register_toks_cmd: + case register_mu_glue_cmd: case internal_toks_cmd: + case register_toks_cmd: case define_char_code_cmd: case def_cmd: case define_family_cmd: @@ -6353,6 +6438,7 @@ inline static void tex_aux_big_switch(int mode, int cmd) case lua_value_cmd: case integer_cmd: case dimension_cmd: + case posit_cmd: case gluespec_cmd: case mugluespec_cmd: case combine_toks_cmd: diff --git a/source/luametatex/source/tex/texmaincontrol.h b/source/luametatex/source/tex/texmaincontrol.h index d46eddc5c..558db148f 100644 --- a/source/luametatex/source/tex/texmaincontrol.h +++ b/source/luametatex/source/tex/texmaincontrol.h @@ -68,6 +68,7 @@ extern void tex_handle_assignments (void); /*tex Used in math. extern void tex_assign_internal_int_value (int a, halfword p, int val); extern void tex_assign_internal_attribute_value (int a, halfword p, int val); +extern void tex_assign_internal_posit_value (int a, halfword p, int val); extern void tex_assign_internal_dimen_value (int a, halfword p, int val); extern void tex_assign_internal_skip_value (int a, halfword p, int val); diff --git a/source/luametatex/source/tex/texmath.c b/source/luametatex/source/tex/texmath.c index f4d962d68..00e67942c 100644 --- a/source/luametatex/source/tex/texmath.c +++ b/source/luametatex/source/tex/texmath.c @@ -506,6 +506,11 @@ scaled tex_get_math_parameter(int style, int param, halfword *type) *type = dimen_val_level; } return eq_value(value); + } else if (eq_type(value) == posit_cmd) { + if (type) { + *type = dimen_val_level; + } + return tex_posit_to_dimension(eq_value(value)); } else { goto MISMATCH; } @@ -2940,7 +2945,7 @@ void tex_run_math_accent(void) case math_uaccent_code: /*tex |\Umathaccent| */ while (1) { - switch (tex_scan_character("abcnsftoABCNSFTO", 0, 1, 0)) { + switch (tex_scan_character("abcnsftokABCNSFTOK", 0, 1, 0)) { case 'a': case 'A': if (tex_scan_mandate_keyword("attr", 1)) { attrlist = tex_scan_attribute(attrlist); @@ -2991,6 +2996,11 @@ void tex_run_math_accent(void) tex_aux_show_keyword_error("fraction|fixed"); goto DONE; } + case 'k': case 'K': + if (tex_scan_mandate_keyword("keepbase", 1)) { + noad_options(accent) |= noad_option_keep_base; + } + break; case 'n': case 'N': if (tex_scan_mandate_keyword("nooverflow", 1)) { /*tex diff --git a/source/luametatex/source/tex/texmlist.c b/source/luametatex/source/tex/texmlist.c index a3154df62..c5613d90a 100644 --- a/source/luametatex/source/tex/texmlist.c +++ b/source/luametatex/source/tex/texmlist.c @@ -897,7 +897,7 @@ static halfword tex_aux_underbar(halfword box, scaled gap, scaled height, scaled */ -static halfword tex_aux_char_box(halfword fnt, int chr, halfword att, scaled *ic, quarterword subtype, scaled target, int style, int shrink, int stretch) +static halfword tex_aux_char_box(halfword fnt, int chr, halfword att, scaled *ic, quarterword subtype, scaled target, int style, int shrink, int stretch, int *isscaled) { /*tex The new box and its character node. */ halfword glyph = tex_aux_new_math_glyph(fnt, chr, subtype); @@ -909,6 +909,9 @@ static halfword tex_aux_char_box(halfword fnt, int chr, halfword att, scaled *ic box_height(box) = whd.ht; box_depth(box) = whd.dp; box_list(box) = glyph; + if (isscaled) { + *isscaled = 0; + } if (tex_has_glyph_option(glyph, glyph_option_no_italic_correction)) { whd.ic = 0; } @@ -932,15 +935,21 @@ static halfword tex_aux_char_box(halfword fnt, int chr, halfword att, scaled *ic if (amount > 0) { glyph_x_scale(glyph) = lround((double) glyph_x_scale(glyph) * amount/whd.wd); glyph_x_offset(glyph) = (whd.wd - amount)/2; + if (isscaled) { + *isscaled = 1; + } } return box; } - if ((shrink && (whd.wd > target)) || (stretch && (whd.wd < target))) { // we need to keep an eye on it - glyph_x_scale(glyph) = lround((double) glyph_x_scale(glyph) * target/whd.wd); - // glyph_x_offset(glyph) = (whd.wd - target)/2; - whd = tex_char_whd_from_glyph(glyph); - box_width(box) = whd.wd; - } + if ((shrink && (whd.wd > target)) || (stretch && (whd.wd < target))) { // we need to keep an eye on it + glyph_x_scale(glyph) = lround((double) glyph_x_scale(glyph) * target/whd.wd); + // glyph_x_offset(glyph) = (whd.wd - target)/2; + whd = tex_char_whd_from_glyph(glyph); + box_width(box) = whd.wd; + if (isscaled) { + *isscaled = 1; + } + } } return box; } @@ -1429,6 +1438,7 @@ static halfword tex_aux_make_delimiter(halfword target, halfword delimiter, int /*tex are we trying the large variant? */ int large_attempt = 0; int do_parts = 0; + int isscaled = 0; int shrink = flat && has_noad_option_shrink(target); int stretch = flat && has_noad_option_stretch(target); /*tex to save the current attribute list */ @@ -1581,7 +1591,7 @@ static halfword tex_aux_make_delimiter(halfword target, halfword delimiter, int added. See (**). */ HERE: - result = tex_aux_char_box(fnt, chr, att, delta, glyph_math_delimiter_subtype, flat ? targetsize : 0, style, shrink, stretch); + result = tex_aux_char_box(fnt, chr, att, delta, glyph_math_delimiter_subtype, flat ? targetsize : 0, style, shrink, stretch, &isscaled); if (flat) { /* This will be done when we have a reasonable example. */ } else { @@ -2393,20 +2403,16 @@ static void tex_aux_set_radical_kerns(delimiterextremes *extremes, kernset *kern { if (kerns && extremes->tfont) { if (tex_math_has_class_option(radical_noad_subtype, carry_over_left_top_kern_class_option)) { -// kerns->topleft = tex_char_top_left_kern_from_font(extremes->tfont, extremes->tchar); -kerns->topleft = tex_aux_math_x_size_scaled(extremes->tfont, tex_char_top_left_kern_from_font(extremes->tfont, extremes->tchar), size); + kerns->topleft = tex_aux_math_x_size_scaled(extremes->tfont, tex_char_top_left_kern_from_font(extremes->tfont, extremes->tchar), size); } if (tex_math_has_class_option(radical_noad_subtype, carry_over_left_bottom_kern_class_option)) { -// kerns->bottomleft = tex_char_bottom_left_kern_from_font(extremes->bfont, extremes->bchar); -kerns->bottomleft = tex_aux_math_x_size_scaled(extremes->bfont, tex_char_bottom_left_kern_from_font(extremes->bfont, extremes->bchar), size); + kerns->bottomleft = tex_aux_math_x_size_scaled(extremes->bfont, tex_char_bottom_left_kern_from_font(extremes->bfont, extremes->bchar), size); } if (tex_math_has_class_option(radical_noad_subtype, carry_over_right_top_kern_class_option)) { -// kerns->topright = tex_char_top_right_kern_from_font(extremes->tfont, extremes->tchar); -kerns->topright = tex_aux_math_x_size_scaled(extremes->tfont, tex_char_top_right_kern_from_font(extremes->tfont, extremes->tchar), size); + kerns->topright = tex_aux_math_x_size_scaled(extremes->tfont, tex_char_top_right_kern_from_font(extremes->tfont, extremes->tchar), size); } if (tex_math_has_class_option(radical_noad_subtype, carry_over_right_bottom_kern_class_option)) { -// kerns->bottomright = tex_char_bottom_right_kern_from_font(extremes->bfont, extremes->bchar); -kerns->bottomright = tex_aux_math_x_size_scaled(extremes->bfont, tex_char_bottom_right_kern_from_font(extremes->bfont, extremes->bchar), size); + kerns->bottomright = tex_aux_math_x_size_scaled(extremes->bfont, tex_char_bottom_right_kern_from_font(extremes->bfont, extremes->bchar), size); } if (tex_math_has_class_option(radical_noad_subtype, prefer_delimiter_dimensions_class_option)) { kerns->height = extremes->height; @@ -2973,10 +2979,14 @@ static void tex_aux_do_make_math_accent(halfword target, halfword accentfnt, hal halfword stretch = (flags & stretch_accent_code) == stretch_accent_code; halfword basefnt = null_font; halfword basechr = 0; + halfword accentbasefnt = accentfnt; + halfword accentbasechr = accentchr; int found = 0; + int isscaled = 0; + int keep = 0; /*tex Compute the amount of skew, or set |skew| to an alignment point. This will be true if a - top-accent has been determined. + top-accent has been determined. This concerns the base! */ int absolute = tex_aux_compute_accent_skew(target, flags, &skew, size); { @@ -2995,6 +3005,7 @@ static void tex_aux_do_make_math_accent(halfword target, halfword accentfnt, hal // basedepth = box_depth(base); } if (base) { + /*tex We always have a base anyway. */ halfword list = box_list(base); if (list && node_type(list) == glyph_node) { basefnt = glyph_font(list); @@ -3073,25 +3084,36 @@ static void tex_aux_do_make_math_accent(halfword target, halfword accentfnt, hal accentchr = next; } } + + } + keep = (accentfnt == accentbasefnt) && (accentchr == accentbasechr) && (has_noad_option_keep_base(target) || tex_char_has_tag_from_font(accentfnt, accentchr, keep_base_tag)); + if (accent) { /*tex - So here we then need to package the offsets. + We have an extensible that already has been boxed */ - } - if (! accent) { - /*tex Italic gets added to width for traditional fonts (no italic anyway): */ - accent = tex_aux_char_box(accentfnt, accentchr, attrlist, NULL, glyph_math_accent_subtype, usedwidth, style, has_noad_option_stretch(target), has_noad_option_shrink(target)); // basewidth + } else { + /*tex + We have a base char or a variant. For traditional fonts the italic correction gets + added to width (not that we have these in \CONTEXT). + */ + accent = tex_aux_char_box(accentfnt, accentchr, attrlist, NULL, glyph_math_accent_subtype, usedwidth, style, keep ? 0 : has_noad_option_shrink(target), keep ? 0 : has_noad_option_stretch(target), &isscaled); // basewidth found = 1; } if (flags & top_accent_code) { scaled b = tex_get_math_y_parameter(style, math_parameter_accent_base_height); - scaled u = found ? tex_get_math_y_parameter(style, stretch ? math_parameter_flattened_accent_top_shift_up : math_parameter_accent_top_shift_up) : undefined_math_parameter; + scaled u = tex_get_math_y_parameter(style, math_parameter_accent_top_shift_up); if (found && ! tex_aux_math_engine_control(accentfnt, math_control_ignore_flat_accents)) { scaled f = tex_get_math_y_parameter(style, math_parameter_flattened_accent_base_height); if (f != undefined_math_parameter && baseheight > f) { + int keep = (accentfnt == accentbasefnt) && (accentchr == accentbasechr) && (has_noad_option_keep_base(target) || tex_char_has_tag_from_font(accentfnt, accentchr, keep_base_tag)); halfword flatchr = tex_char_flat_accent_from_font(accentfnt, accentchr); - if (flatchr != INT_MIN && flatchr != accentchr) { + if (flatchr && flatchr != INT_MIN && flatchr != accentchr) { + scaled uf = tex_get_math_y_parameter(style, math_parameter_flattened_accent_top_shift_up); + if (uf != undefined_math_parameter) { + u = uf; + } tex_flush_node(accent); - accent = tex_aux_char_box(accentfnt, flatchr, attrlist, NULL, glyph_math_accent_subtype, usedwidth, style, 0, 0); + accent = tex_aux_char_box(accentfnt, flatchr, attrlist, NULL, glyph_math_accent_subtype, usedwidth, style, keep ? 0 : has_noad_option_shrink(target), keep ? 0 : has_noad_option_stretch(target), &isscaled); if (tracing_math_par >= 2) { tex_begin_diagnostic(); tex_print_format("[math: flattening accent, old %x, new %x]", accentchr, flatchr); @@ -3155,45 +3177,63 @@ static void tex_aux_do_make_math_accent(halfword target, halfword accentfnt, hal /*tex The top accents of both characters are aligned. */ { halfword accentwidth = box_width(accent); - if (absolute) { - scaled anchor = 0; - if (extended) { - /*tex If the accent is extensible just take the center. */ - anchor = tex_half_scaled(accentwidth); - } else { - if (flags & top_accent_code) { - anchor = tex_char_unchecked_top_anchor_from_font(accentfnt, accentchr); /* no bot accent key */ - } else if (flags & bot_accent_code) { - anchor = tex_char_unchecked_bottom_anchor_from_font(accentfnt, accentchr); /* no bot accent key */ - } else { - anchor = INT_MIN; - } - if (anchor == INT_MIN || has_noad_option_center(target)) { - /*tex just take the center */ + if (accentwidth > basewidth && has_noad_option_nooverflow(target)) { + /*tex + This likely only happens with (too wide base) rules so centering is quite okay then and a + bit like scaling. But it could be an accent option (weren't it that we ran out of bits). + In that case a topaccent is also unlikely. + */ + scaled leftkern = tex_half_scaled(accentwidth - basewidth); + if (leftkern > 0) { + halfword kern = tex_new_kern_node(leftkern, horizontal_math_kern_subtype); + tex_attach_attribute_list_copy(kern, target); + tex_try_couple_nodes(kern, base); + base = tex_hpack(kern, 0, packing_additional, direction_unknown, holding_none_option); + basewidth = accentwidth; + box_width(base) = accentwidth; + } + } else { + if (absolute) { + scaled anchor = 0; /* maybe: INT_MIN */ + if (extended || isscaled) { + /*tex If the accent is extensible just take the center. */ anchor = tex_half_scaled(accentwidth); } else { - anchor = tex_aux_math_x_size_scaled(accentfnt, anchor, size); + /*tex When we scale we center. */ + if (flags & top_accent_code) { + anchor = tex_char_unchecked_top_anchor_from_font(accentfnt, accentchr); /* no bot accent key */ + } else if (flags & bot_accent_code) { + anchor = tex_char_unchecked_bottom_anchor_from_font(accentfnt, accentchr); /* no bot accent key */ + } else { + anchor = INT_MIN; + } + if (anchor == INT_MIN || has_noad_option_center(target)) { + /*tex just take the center */ + anchor = tex_half_scaled(accentwidth); + } else { + anchor = tex_aux_math_x_size_scaled(accentfnt, anchor, size); + } + } + if (math_direction_par == dir_righttoleft) { + skew += anchor - accentwidth; + } else { + skew -= anchor; } - } - if (math_direction_par == dir_righttoleft) { - skew += anchor - accentwidth; + } else if (accentwidth == 0) { + skew += basewidth; + } else if (math_direction_par == dir_righttoleft) { + skew += accentwidth; /* ok? */ } else { - skew -= anchor; + skew += tex_half_scaled(basewidth - accentwidth); + } + box_shift_amount(accent) = skew; + box_width(accent) = 0; /* in gyre zero anyway */ + if (accentwidth) { + overshoot = accentwidth + skew - basewidth; + } + if (overshoot < 0) { + overshoot = 0; } - } else if (accentwidth == 0) { - skew += basewidth; - } else if (math_direction_par == dir_righttoleft) { - skew += accentwidth; /* ok? */ - } else { - skew += tex_half_scaled(basewidth - accentwidth); - } - box_shift_amount(accent) = skew; - box_width(accent) = 0; /* in gyre zero anyway */ - if (accentwidth) { - overshoot = accentwidth + skew - basewidth; - } - if (overshoot < 0) { - overshoot = 0; } } if (flags & (top_accent_code)) { @@ -3354,20 +3394,16 @@ static void tex_aux_wrap_fraction_result(halfword target, int style, int size, h right = tex_aux_make_delimiter(target, right_delimiter, size, delta, 0, style, 1, NULL, NULL, 0, has_noad_option_nooverflow(target), &extremes, 0); if (kerns && extremes.tfont) { if (tex_math_has_class_option(fraction_noad_subtype, carry_over_left_top_kern_class_option)) { -// kerns->topleft = tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar); -kerns->topleft = tex_aux_math_x_size_scaled(extremes.tfont, tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar), size); + kerns->topleft = tex_aux_math_x_size_scaled(extremes.tfont, tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar), size); } if (tex_math_has_class_option(fraction_noad_subtype, carry_over_left_bottom_kern_class_option)) { -// kerns->bottomleft = tex_char_bottom_left_kern_from_font(extremes.bfont, extremes.bchar); -kerns->bottomleft = tex_aux_math_x_size_scaled(extremes.bfont, tex_char_bottom_left_kern_from_font(extremes.bfont, extremes.bchar), size); + kerns->bottomleft = tex_aux_math_x_size_scaled(extremes.bfont, tex_char_bottom_left_kern_from_font(extremes.bfont, extremes.bchar), size); } if (tex_math_has_class_option(fraction_noad_subtype, carry_over_right_top_kern_class_option)) { -// kerns->topright = tex_char_top_right_kern_from_font(extremes.tfont, extremes.tchar); -kerns->topright = tex_aux_math_x_size_scaled(extremes.tfont, tex_char_top_right_kern_from_font(extremes.tfont, extremes.tchar), size); + kerns->topright = tex_aux_math_x_size_scaled(extremes.tfont, tex_char_top_right_kern_from_font(extremes.tfont, extremes.tchar), size); } if (tex_math_has_class_option(fraction_noad_subtype, carry_over_right_bottom_kern_class_option)) { -// kerns->bottomright = tex_char_bottom_right_kern_from_font(extremes.bfont, extremes.bchar); -kerns->bottomright = tex_aux_math_x_size_scaled(extremes.bfont, tex_char_bottom_right_kern_from_font(extremes.bfont, extremes.bchar), size); + kerns->bottomright = tex_aux_math_x_size_scaled(extremes.bfont, tex_char_bottom_right_kern_from_font(extremes.bfont, extremes.bchar), size); } if (tex_math_has_class_option(fraction_noad_subtype, prefer_delimiter_dimensions_class_option)) { kerns->height = extremes.height; @@ -3600,14 +3636,15 @@ static halfword tex_aux_make_skewed_fraction(halfword target, int style, int siz box_depth(fraction) = box_depth(middle) > maxdepth ? box_depth(middle) : maxdepth; ngap = hgap; dgap = hgap; - if (tex_math_has_class_option(fraction_noad_subtype, carry_over_left_top_kern_class_option)) { -// ngap += tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar); -ngap += tex_aux_math_x_size_scaled(extremes.tfont, tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar), size); - } - if (tex_math_has_class_option(fraction_noad_subtype, carry_over_right_bottom_kern_class_option)) { -// dgap += tex_char_bottom_right_kern_from_font(extremes.bfont, extremes.bchar); -dgap += tex_aux_math_x_size_scaled(extremes.bfont, tex_char_bottom_right_kern_from_font(extremes.bfont, extremes.bchar), size); - } + /*tex Better not do this, as we now have factors to control it and can fix the parameters. */ + /* + if (tex_math_has_class_option(fraction_noad_subtype, carry_over_left_top_kern_class_option)) { + ngap += tex_aux_math_x_size_scaled(extremes.tfont, tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar), size); + } + if (tex_math_has_class_option(fraction_noad_subtype, carry_over_right_bottom_kern_class_option)) { + dgap += tex_aux_math_x_size_scaled(extremes.bfont, tex_char_bottom_right_kern_from_font(extremes.bfont, extremes.bchar), size); + } + */ if (ngap || dgap) { // todo: only add when non zero halfword nkern = tex_new_kern_node(ngap, horizontal_math_kern_subtype); @@ -5646,12 +5683,10 @@ static halfword tex_aux_make_left_right(halfword target, int style, scaled max_d /* maybe elsewhere as the above case */ if (extremes && extremes->tfont) { if (tex_math_has_class_option(fenced_noad_subtype, carry_over_right_top_kern_class_option)) { -// kerns.topright = tex_char_top_right_kern_from_font(extremes->tfont, extremes->tchar); -kerns.topright = tex_aux_math_x_size_scaled(extremes->tfont, tex_char_top_right_kern_from_font(extremes->tfont, extremes->tchar), size); + kerns.topright = tex_aux_math_x_size_scaled(extremes->tfont, tex_char_top_right_kern_from_font(extremes->tfont, extremes->tchar), size); } if (tex_math_has_class_option(fenced_noad_subtype, carry_over_right_bottom_kern_class_option)) { -// kerns.bottomright = tex_char_bottom_right_kern_from_font(extremes->bfont, extremes->bchar); -kerns.bottomright = tex_aux_math_x_size_scaled(extremes->bfont, tex_char_bottom_right_kern_from_font(extremes->bfont, extremes->bchar), size); + kerns.bottomright = tex_aux_math_x_size_scaled(extremes->bfont, tex_char_bottom_right_kern_from_font(extremes->bfont, extremes->bchar), size); } if (tex_math_has_class_option(fenced_noad_subtype, prefer_delimiter_dimensions_class_option)) { kerns.height = extremes->height; @@ -6217,12 +6252,10 @@ static void tex_aux_finish_fenced(halfword current, halfword main_style, scaled case left_fence_side: case extended_left_fence_side: if (tex_math_has_class_option(fenced_noad_subtype, carry_over_left_top_kern_class_option)) { - // kerns->topleft = tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar); - kerns->topleft = tex_aux_math_x_size_scaled(extremes.tfont, tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar), main_style); + kerns->topleft = tex_aux_math_x_size_scaled(extremes.tfont, tex_char_top_left_kern_from_font(extremes.tfont, extremes.tchar), main_style); } if (tex_math_has_class_option(fenced_noad_subtype, carry_over_left_bottom_kern_class_option)) { - // kerns->bottomleft = tex_char_bottom_left_kern_from_font(extremes.bfont, extremes.bchar); - kerns->bottomleft = tex_aux_math_x_size_scaled(extremes.bfont, tex_char_bottom_left_kern_from_font(extremes.bfont, extremes.bchar), main_style); + kerns->bottomleft = tex_aux_math_x_size_scaled(extremes.bfont, tex_char_bottom_left_kern_from_font(extremes.bfont, extremes.bchar), main_style); } if (tex_math_has_class_option(fenced_noad_subtype, prefer_delimiter_dimensions_class_option)) { kerns->height = extremes.height; @@ -6236,12 +6269,10 @@ static void tex_aux_finish_fenced(halfword current, halfword main_style, scaled case left_operator_side: case no_fence_side: if (tex_math_has_class_option(fenced_noad_subtype, carry_over_right_top_kern_class_option)) { -// kerns->topright = tex_char_top_right_kern_from_font(extremes.tfont, extremes.tchar); -kerns->topright = tex_aux_math_x_size_scaled(extremes.tfont, tex_char_top_right_kern_from_font(extremes.tfont, extremes.tchar), main_style); + kerns->topright = tex_aux_math_x_size_scaled(extremes.tfont, tex_char_top_right_kern_from_font(extremes.tfont, extremes.tchar), main_style); } if (tex_math_has_class_option(fenced_noad_subtype, carry_over_right_bottom_kern_class_option)) { -// kerns->bottomright = tex_char_bottom_right_kern_from_font(extremes.bfont, extremes.bchar); -kerns->bottomright = tex_aux_math_x_size_scaled(extremes.bfont, tex_char_bottom_right_kern_from_font(extremes.bfont, extremes.bchar), main_style); + kerns->bottomright = tex_aux_math_x_size_scaled(extremes.bfont, tex_char_bottom_right_kern_from_font(extremes.bfont, extremes.bchar), main_style); } if (tex_math_has_class_option(fenced_noad_subtype, prefer_delimiter_dimensions_class_option)) { kerns->height = extremes.height; diff --git a/source/luametatex/source/tex/texnodes.h b/source/luametatex/source/tex/texnodes.h index f8766c430..7fa050428 100644 --- a/source/luametatex/source/tex/texnodes.h +++ b/source/luametatex/source/tex/texnodes.h @@ -1784,21 +1784,22 @@ typedef enum noad_options { /*tex The Microsoft compiler truncates to int, so: */ -# define noad_option_source_on_nucleus 0x0100000000 -# define noad_option_fixed_super_or_sub_script 0x0200000000 -# define noad_option_fixed_super_and_sub_script 0x0400000000 -# define noad_option_auto_base 0x0800000000 -# define noad_option_stretch 0x1000000000 -# define noad_option_shrink 0x2000000000 -# define noad_option_center 0x4000000000 -# define noad_option_scale 0x8000000000 +# define noad_option_source_on_nucleus (uint64_t) 0x00100000000 +# define noad_option_fixed_super_or_sub_script (uint64_t) 0x00200000000 +# define noad_option_fixed_super_and_sub_script (uint64_t) 0x00400000000 +# define noad_option_auto_base (uint64_t) 0x00800000000 +# define noad_option_stretch (uint64_t) 0x01000000000 +# define noad_option_shrink (uint64_t) 0x02000000000 +# define noad_option_center (uint64_t) 0x04000000000 +# define noad_option_scale (uint64_t) 0x08000000000 +# define noad_option_keep_base (uint64_t) 0x10000000000 # define has_option(a,b) (((a) & (b)) == (b)) # define unset_option(a,b) ((a) & ~(b)) -inline static void tex_add_noad_option (halfword a, long long r) { noad_options(a) |= r; } -inline static void tex_remove_noad_option (halfword a, long long r) { noad_options(a) &= ~(r | noad_options(a)); } -inline static int tex_has_noad_option (halfword a, long long r) { return (noad_options(a) & r) == r; } +inline static void tex_add_noad_option (halfword a, uint64_t r) { noad_options(a) |= r; } +inline static void tex_remove_noad_option (halfword a, uint64_t r) { noad_options(a) &= ~(r | noad_options(a)); } +inline static int tex_has_noad_option (halfword a, uint64_t r) { return (noad_options(a) & r) == r; } inline static int has_noad_no_script_option(halfword n, halfword option) { @@ -1853,8 +1854,8 @@ inline static int has_noad_no_script_option(halfword n, halfword option) # define has_noad_option_stretch(a) (has_option(noad_options(a), noad_option_stretch)) # define has_noad_option_shrink(a) (has_option(noad_options(a), noad_option_shrink)) # define has_noad_option_auto_base(a) (has_option(noad_options(a), noad_option_auto_base)) -# define has_noad_option_center(a) (has_option(noad_options(a), noad_option_center)) # define has_noad_option_scale(a) (has_option(noad_options(a), noad_option_scale)) +# define has_noad_option_keep_base(a) (has_option(noad_options(a), noad_option_keep_base)) /*tex In the meantime the codes and subtypes are in sync. The variable component does not really diff --git a/source/luametatex/source/tex/texpackaging.c b/source/luametatex/source/tex/texpackaging.c index c4af153fc..ad1db455c 100644 --- a/source/luametatex/source/tex/texpackaging.c +++ b/source/luametatex/source/tex/texpackaging.c @@ -1454,7 +1454,7 @@ halfword tex_hpack(halfword p, scaled w, int m, singleword pack_direction, int r tex_current_input_file_name(), &rule ); - if (rule) { + if (rule && rule != r) { tex_aux_append_diagnostic_rule(r, rule); } } @@ -1511,7 +1511,7 @@ halfword tex_hpack(halfword p, scaled w, int m, singleword pack_direction, int r rule = tex_new_rule_node(normal_rule_subtype); rule_width(rule) = overfull_rule_par; } - if (rule) { + if (rule && rule != r) { tex_aux_append_diagnostic_rule(r, rule); } if (callback_id == 0) { @@ -1536,7 +1536,7 @@ halfword tex_hpack(halfword p, scaled w, int m, singleword pack_direction, int r lmt_input_state.input_line, tex_current_input_file_name(), &rule); - if (rule) { + if (rule && rule != r) { tex_aux_append_diagnostic_rule(r, rule); } } else { diff --git a/source/luametatex/source/tex/texprimitive.c b/source/luametatex/source/tex/texprimitive.c index cab68f841..027f62d23 100644 --- a/source/luametatex/source/tex/texprimitive.c +++ b/source/luametatex/source/tex/texprimitive.c @@ -818,6 +818,13 @@ void tex_print_cmd_chr(singleword cmd, halfword chr) tex_print_str_esc("attribute"); tex_print_int(register_attribute_number(chr)); break; + case register_posit_cmd: + tex_print_str_esc("posit"); + tex_print_int(register_posit_number(chr)); + break; + case internal_posit_cmd: + tex_aux_prim_cmd_chr(cmd, chr); + break; case internal_dimen_cmd: tex_aux_prim_cmd_chr(cmd, chr); break; @@ -851,6 +858,10 @@ void tex_print_cmd_chr(singleword cmd, halfword chr) tex_print_str("dimension "); tex_print_dimension(chr, pt_unit); break; + case posit_cmd: + tex_print_str("posit "); + tex_print_posit(chr); + break; case gluespec_cmd: tex_print_str("gluespec "); tex_print_spec(chr, pt_unit); diff --git a/source/luametatex/source/tex/texprinting.c b/source/luametatex/source/tex/texprinting.c index 1e384a9cd..9e502fbdf 100644 --- a/source/luametatex/source/tex/texprinting.c +++ b/source/luametatex/source/tex/texprinting.c @@ -585,6 +585,17 @@ void tex_print_sparse_dimension(scaled s, int unit) } } +/*tex + Good enough. +*/ + +void tex_print_posit(halfword s) +{ + char b[32]; + sprintf(b, "%.20g", tex_posit_to_double(s)); + tex_print_str(b); +} + /*tex Hexadecimal printing of nonnegative integers is accomplished by |print_hex|. We have a few diff --git a/source/luametatex/source/tex/texprinting.h b/source/luametatex/source/tex/texprinting.h index 745b8eeb0..cfa50fbf5 100644 --- a/source/luametatex/source/tex/texprinting.h +++ b/source/luametatex/source/tex/texprinting.h @@ -69,6 +69,7 @@ extern void tex_print_cs (halfword p); / extern void tex_print_cs_name (halfword p); /*tex Only prints known ones. */ extern void tex_print_str (const char *s); extern void tex_print_str_esc (const char *s); +extern void tex_print_posit (halfword d); extern void tex_print_dimension (scaled d, int unit); /*tex prints a dimension with pt */ extern void tex_print_sparse_dimension (scaled d, int unit); /*tex prints a dimension with pt */ extern void tex_print_unit (int unit); /*tex prints a glue component */ diff --git a/source/luametatex/source/tex/texscanning.c b/source/luametatex/source/tex/texscanning.c index aae30c6f0..5b60c15c0 100644 --- a/source/luametatex/source/tex/texscanning.c +++ b/source/luametatex/source/tex/texscanning.c @@ -155,6 +155,62 @@ scanner_state_info lmt_scanner_state = { */ +// inline static void tex_aux_downgrade_cur_val(int level, int succeeded, int negative) +// { +// switch (cur_val_level) { +// case tok_val_level: +// case font_val_level: +// case mathspec_val_level: +// case fontspec_val_level: +// /*tex +// This test pays back as this actually happens, but we also need it for the +// |none_lua_function| handling. We end up here in |ident_val_level| and |tok_val_level| +// and they don't downgrade, nor negate which saves a little testing. +// */ +// break; +// case int_val_level: +// case attr_val_level: +// case dimen_val_level: +// case posit_val_level: +// while (cur_val_level > level) { +// --cur_val_level; +// } +// if (negative) { +// cur_val = -cur_val; +// } +// break; +// default: +// /*tex There is no real need for it being a loop, a test would do. */ +// while (cur_val_level > level) { +// /*tex Convert |cur_val| to a lower level. */ +// switch (cur_val_level) { +// case glue_val_level: +// case mu_val_level : +// cur_val = glue_amount(cur_val); +// break; +// // case mu_val_level : +// // tex_aux_mu_error(1); +// // break; +// } +// --cur_val_level; +// } +// if (cur_val_level == glue_val_level || cur_val_level == mu_val_level) { +// if (succeeded == 1) { +// cur_val = tex_new_glue_spec_node(cur_val); +// } +// if (negative) { +// glue_amount(cur_val) = -glue_amount(cur_val); +// glue_stretch(cur_val) = -glue_stretch(cur_val); +// glue_shrink(cur_val) = -glue_shrink(cur_val); +// } +// } else if (negative) { +// cur_val = -cur_val; +// } +// break; +// } +// } + + inline static void tex_aux_downgrade_cur_val(int level, int succeeded, int negative) { switch (cur_val_level) { @@ -168,45 +224,97 @@ inline static void tex_aux_downgrade_cur_val(int level, int succeeded, int negat and they don't downgrade, nor negate which saves a little testing. */ break; - case int_val_level: - case attr_val_level: - case dimen_val_level: - while (cur_val_level > level) { - --cur_val_level; - } - if (negative) { - cur_val = -cur_val; - } - break; - default: - /*tex There is no real need for it being a loop, a test would do. */ - while (cur_val_level > level) { - /*tex Convert |cur_val| to a lower level. */ - switch (cur_val_level) { - case glue_val_level: - case mu_val_level : - cur_val = glue_amount(cur_val); - break; - // case mu_val_level : - // tex_aux_mu_error(1); - // break; - } - --cur_val_level; - } - if (cur_val_level == glue_val_level || cur_val_level == mu_val_level) { - if (succeeded == 1) { - cur_val = tex_new_glue_spec_node(cur_val); - } - if (negative) { - glue_amount(cur_val) = -glue_amount(cur_val); - glue_stretch(cur_val) = -glue_stretch(cur_val); - glue_shrink(cur_val) = -glue_shrink(cur_val); - } - } else if (negative) { - cur_val = -cur_val; - } - break; - } + case int_val_level: +// while (cur_val_level > level) { +// --cur_val_level; +// } +while (cur_val_level > level) { + cur_val_level = level; +} + if (negative) { + cur_val = -cur_val; + } + if (level == posit_val_level) { + cur_val = tex_integer_to_posit(cur_val).v; + } + break; + case attr_val_level: +// while (cur_val_level > level) { +// --cur_val_level; +// } +while (cur_val_level > level) { + cur_val_level = level; +} + if (negative) { + cur_val = -cur_val; + } + if (level == posit_val_level) { + cur_val = tex_integer_to_posit(cur_val).v; + } + break; + case dimen_val_level: +// while (cur_val_level > level) { +// --cur_val_level; +// } +while (cur_val_level > level) { + cur_val_level = level; +} + if (negative) { + cur_val = -cur_val; + } + if (level == posit_val_level) { + cur_val = tex_dimension_to_posit(cur_val).v; + } + break; + case posit_val_level: + switch (cur_val_level) { + case dimen_val_level: + cur_val = tex_dimension_to_posit(cur_val).v; + break; + case int_val_level: + case attr_val_level: + cur_val = tex_integer_to_posit(cur_val).v; + break; + } +// while (cur_val_level > level) { +// --cur_val_level; +// } +while (cur_val_level > level) { + cur_val_level = level; +} + if (negative) { + cur_val = tex_posit_neg(cur_val); + } + break; + default: + /*tex There is no real need for it being a loop, a test would do. */ + while (cur_val_level > level) { + /*tex Convert |cur_val| to a lower level. */ + switch (cur_val_level) { + case glue_val_level: + case mu_val_level : + cur_val = glue_amount(cur_val); + break; + // case mu_val_level : + // tex_aux_mu_error(1); + // break; + } + --cur_val_level; + } + if (cur_val_level == glue_val_level || cur_val_level == mu_val_level) { + if (succeeded == 1) { + cur_val = tex_new_glue_spec_node(cur_val); + } + if (negative) { + glue_amount(cur_val) = -glue_amount(cur_val); + glue_stretch(cur_val) = -glue_stretch(cur_val); + glue_shrink(cur_val) = -glue_shrink(cur_val); + } + } else if (negative) { + cur_val = -cur_val; + } + break; + } } /*tex @@ -214,10 +322,6 @@ inline static void tex_aux_downgrade_cur_val(int level, int succeeded, int negat Some of the internal items can be fetched both routines, and these have been split off into the next routine, that returns true if the command code was understood. -*/ - -/*tex - The |last_item_cmd| branch has been flattened a bit because we don't need to treat \ETEX\ specific thingies special any longer. @@ -571,6 +675,7 @@ static int tex_aux_set_cur_val_by_some_cmd(int code) case font_char_dp_code: case font_char_ic_code: case font_char_ta_code: + case font_char_ba_code: { halfword fnt = tex_scan_font_identifier(NULL); halfword chr = tex_scan_char_number(0); @@ -591,6 +696,9 @@ static int tex_aux_set_cur_val_by_some_cmd(int code) case font_char_ta_code: cur_val = tex_char_top_anchor_from_font(fnt, chr); break; + case font_char_ba_code: + cur_val = tex_char_bottom_anchor_from_font(fnt, chr); + break; } } else { cur_val = 0; @@ -803,6 +911,9 @@ static int tex_aux_set_cur_val_by_some_cmd(int code) /* case attrexpr_code: */ tex_aux_scan_expr(int_val_level); return 1; + case posexpr_code: + tex_aux_scan_expr(posit_val_level); + return 1; case dimexpr_code: tex_aux_scan_expr(dimen_val_level); return 1; @@ -1148,6 +1259,11 @@ void tex_scan_something_simple(halfword cmd, halfword chr) cur_val = eq_value(chr); cur_val_level = int_val_level; break; + case internal_posit_cmd: + case register_posit_cmd: + cur_val = eq_value(chr); + cur_val_level = posit_val_level; + break; case internal_dimen_cmd: case register_dimen_cmd: cur_val = eq_value(chr); @@ -1300,11 +1416,22 @@ static halfword tex_aux_scan_something_internal(halfword cmd, halfword chr, int case register_attribute_cmd: cur_val = eq_value(chr); cur_val_level = int_val_level; + if (level == posit_val_level) { + cur_val = tex_posit_to_integer(cur_val); + } + break; + case internal_posit_cmd: + case register_posit_cmd: + cur_val = eq_value(chr); + cur_val_level = posit_val_level; break; case internal_dimen_cmd: case register_dimen_cmd: cur_val = eq_value(chr); cur_val_level = dimen_val_level; + if (level == posit_val_level) { + cur_val = tex_posit_to_dimension(cur_val); + } break; case internal_glue_cmd: case register_glue_cmd: @@ -1656,6 +1783,12 @@ static halfword tex_aux_scan_something_internal(halfword cmd, halfword chr, int cur_val = attribute_register(n); break; } + case posit_val_level: + { + halfword n = tex_scan_posit_register_number(); + cur_val = posit_register(n); + break; + } case dimen_val_level: { scaled n = tex_scan_dimen_register_number(); @@ -1701,6 +1834,10 @@ static halfword tex_aux_scan_something_internal(halfword cmd, halfword chr, int cur_val = chr; cur_val_level = dimen_val_level; break; + case posit_cmd: + cur_val = chr; + cur_val_level = posit_val_level; + break; case gluespec_cmd: cur_val = chr; cur_val_level = glue_val_level; @@ -1829,6 +1966,7 @@ inline static halfword tex_aux_scan_limited_int(int optional_equal, int min, int halfword tex_scan_int_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_int_register_index, "Integer register index"); } halfword tex_scan_dimen_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_dimen_register_index, "Dimension register index"); } halfword tex_scan_attribute_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_attribute_register_index, "Attribute register index"); } +halfword tex_scan_posit_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_posit_register_index, "Posit register index"); } halfword tex_scan_glue_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_glue_register_index, "Glue register index"); } halfword tex_scan_mu_glue_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_mu_glue_register_index, "Mu glue register index"); } halfword tex_scan_toks_register_number (void) { return tex_aux_scan_limited_int(0, 0, max_toks_register_index, "Toks register index"); } @@ -2120,12 +2258,22 @@ halfword tex_scan_int(int optional_equal, int *radix) return cur_val; } -int tex_scan_cardinal(unsigned *value, int dontbark) +int tex_scan_cardinal(int optional_equal, unsigned *value, int dontbark) { long long result = 0; - do { + // do { + // tex_get_x_token(); + // } while (cur_cmd == spacer_cmd); + while (1) { tex_get_x_token(); - } while (cur_cmd == spacer_cmd); + if (cur_cmd != spacer_cmd) { + if (optional_equal && (cur_tok == equal_token)) { + optional_equal = 0; + } else { + break; + } + } + } if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { result = tex_aux_scan_something_internal(cur_cmd, cur_chr, int_val_level, 0, 0); } else { @@ -2711,6 +2859,9 @@ halfword tex_scan_dimen(int mu, int inf, int shortcut, int optional_equal, halfw } } else if (cur_val_level == dimen_val_level) { goto ATTACH_SIGN; + } else if (cur_val_level == posit_val_level) { + cur_val = tex_posit_to_dimension(cur_val); + goto ATTACH_SIGN; } } else { int has_fraction = tex_token_is_seperator(cur_tok); @@ -3031,6 +3182,14 @@ halfword tex_the_value_toks(int code, halfword *tail, halfword property) /* mayb pop_selector; return tex_cur_str_toks(tail); } + case posit_val_level: + { + int saved_selector; + push_selector; + tex_print_posit(cur_val); + pop_selector; + return tex_cur_str_toks(tail); + } case dimen_val_level: { int saved_selector; @@ -3166,6 +3325,9 @@ strnumber tex_the_scanned_result(void) case attr_val_level: tex_print_int(cur_val); break; + case posit_val_level: + tex_print_posit(cur_val); + break; case dimen_val_level: tex_print_dimension(cur_val, pt_unit); break; @@ -3360,10 +3522,10 @@ halfword tex_scan_font_identifier(halfword *spec) } default: { - /*tex We abuse |scan_cardinal| here btu we have to push back. */ + /*tex We abuse |scan_cardinal| here but we have to push back. */ unsigned fnt = null_font; tex_back_input(cur_tok); - if (tex_scan_cardinal(&fnt, 1)) { + if (tex_scan_cardinal(0, &fnt, 1)) { if (tex_is_valid_font((halfword) fnt)) { return (halfword) fnt; } @@ -3731,7 +3893,7 @@ static int tex_aux_valid_macro_preamble(halfword *p, int *counter, halfword *has } } else if (cur_tok >= A_token_l && cur_tok <= F_token_l) { ++*counter; - if ((cur_tok - letter_token - 'A' - gap_match_count) == *counter) { + if ((cur_tok - A_token_l + 10) == *counter) { cur_tok += match_token - letter_token; break; } @@ -4227,7 +4389,8 @@ static void tex_aux_scan_expr(halfword level) int error_b = 0; /*tex top of expression stack */ halfword top = null; -int braced = 0; + int braced = 0; + int nonelevel = level == posit_val_level ? posit_val_level : int_val_level; /*tex Scan and evaluate an expression |e| of type |l|. */ cur_val_level = level; /* for now */ lmt_scanner_state.expression_depth++; @@ -4241,7 +4404,8 @@ int braced = 0; term = 0; numerator = 0; CONTINUE: - operation = state == expression_none ? level : int_val_level; /* we abuse operation */ +// operation = state == expression_none ? level : int_val_level; /* we abuse operation */ + operation = state == expression_none ? level : nonelevel; /* we abuse operation */ /*tex Scan a factor |f| of type |o| or start a subexpression. Get the next non-blank non-call token. @@ -4283,6 +4447,9 @@ int braced = 0; case attr_val_level: factor = tex_scan_int(0, NULL); break; + case posit_val_level: + factor = tex_scan_posit(0); + break; case dimen_val_level: factor = tex_scan_dimen(0, 0, 0, 0, NULL); break; @@ -4348,6 +4515,12 @@ int braced = 0; factor = 0; } break; + case posit_val_level: + if ((factor > max_cardinal) || (factor < min_cardinal)) { + lmt_scanner_state.arithmic_error = 1; + factor = 0; + } + break; case dimen_val_level: if (abs(factor) > max_dimen) { lmt_scanner_state.arithmic_error = 1; @@ -4398,6 +4571,9 @@ int braced = 0; case attr_val_level: term = tex_multiply_integers(term, factor); break; + case posit_val_level: + term = tex_posit_mul(term, factor); + break; case dimen_val_level: term = tex_nx_plus_y(term, factor, 0); break; @@ -4426,6 +4602,9 @@ int braced = 0; case attr_val_level: term = tex_fract(term, numerator, factor, max_integer); break; + case posit_val_level: + term = tex_posit_div(tex_posit_mul(term, factor), numerator); + break; case dimen_val_level: term = tex_fract(term, numerator, factor, max_dimen); break; @@ -4463,6 +4642,16 @@ int braced = 0; case attr_val_level: expression = tex_aux_add_or_sub(expression, term, max_integer, result); break; + case posit_val_level: + switch (result) { + case expression_subtract: + expression = tex_posit_sub(expression, term); + break; + case expression_add: + expression = tex_posit_add(expression, term); + break; + } + break; case dimen_val_level: expression = tex_aux_add_or_sub(expression, term, max_dimen, result); break; @@ -5174,7 +5363,7 @@ static void tex_aux_trace_expression(stack_info stack, halfword level, halfword tex_end_diagnostic(); } -/* This one is not yet okay ... work in progress. */ +/* This one is not yet okay ... work in progress. We might go for posits here. */ static void tex_aux_scan_expression(int level) { @@ -5803,6 +5992,202 @@ halfword tex_scan_scale(int optional_equal) return cur_val; } +/* todo: share with lmttokenlib.scan_float */ + +# define max_posit_size 60 + +halfword tex_scan_posit(int optional_equal) +{ + int hexadecimal = 1; + int exponent = 1; + int negative = 0; + int b = 0; + char buffer[max_posit_size+4] = { 0 }; + do { + while (1) { + tex_get_x_token(); + if (cur_cmd != spacer_cmd) { + if (optional_equal && (cur_tok == equal_token)) { + optional_equal = 0; + } else { + break; + } + } + } + if (cur_tok == minus_token) { + negative = ! negative; + cur_tok = plus_token; + } + } while (cur_tok == plus_token); + if (cur_cmd >= min_internal_cmd && cur_cmd <= max_internal_cmd) { + cur_val = tex_aux_scan_something_internal(cur_cmd, cur_chr, posit_val_level, 0, 0); + } else { + if (negative) { + buffer[b++] = '-'; + } + /*tex we accept |[.,]digits| */ + if (hexadecimal && (cur_tok == zero_token)) { + buffer[b++] = '0'; + tex_get_x_token(); + if (tex_token_is_hexadecimal(cur_tok)) { + buffer[b++] = 'x'; + goto SCANHEXADECIMAL; + } else { + goto PICKUPDECIMAL; + } + } else { + goto SCANDECIMAL; + } + SCANDECIMAL: + if (tex_token_is_seperator(cur_tok)) { + buffer[b++] = '.'; + while (1) { + tex_get_x_token(); + if (tex_token_is_digit(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } else if (exponent) { + goto DECIMALEXPONENT; + } else { + tex_back_input(cur_tok); + goto DONE; + } + if (b >= 60) { + goto TOOBIG; + } + } + } else { + goto PICKUPDECIMAL; + } + while (1) { + tex_get_x_token(); + PICKUPDECIMAL: + if (tex_token_is_digit(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } else if (tex_token_is_seperator(cur_tok)) { + buffer[b++] = '.'; + while (1) { + tex_get_x_token(); + if (tex_token_is_digit(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } else { + tex_back_input(cur_tok); + break; + } + } + } else if (exponent) { + goto DECIMALEXPONENT; + } else { + tex_back_input(cur_tok); + goto DONE; + } + if (b >= max_posit_size) { + goto TOOBIG; + } + } + DECIMALEXPONENT: + if (tex_token_is_exponent(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + tex_get_x_token(); + if (tex_token_is_sign(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } else if (tex_token_is_digit(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } + while (1) { + tex_get_x_token(); + if (tex_token_is_digit(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } else { + break; + } + if (b >= max_posit_size) { + goto TOOBIG; + } + } + } + tex_back_input(cur_tok); + goto DONE; + SCANHEXADECIMAL: + tex_get_x_token(); + if (tex_token_is_seperator(cur_tok)) { + buffer[b++] = '.'; + while (1) { + tex_get_x_token(); + if (tex_token_is_xdigit(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } else if (exponent) { + goto HEXADECIMALEXPONENT; + } else { + tex_back_input(cur_tok); + goto DONE; + } + if (b >= max_posit_size) { + goto TOOBIG; + } + } + } else { + /* hm, we could avoid this pushback */ + tex_back_input(cur_tok); + while (1) { + tex_get_x_token(); + if (tex_token_is_xdigit(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } else if (tex_token_is_seperator(cur_tok)) { + buffer[b++] = '.'; + while (1) { + tex_get_x_token(); + if (tex_token_is_xdigit(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } else { + tex_back_input(cur_tok); + break; + } + } + } else if (exponent) { + goto HEXADECIMALEXPONENT; + } else { + tex_back_input(cur_tok); + goto DONE; + } + if (b >= max_posit_size) { + goto TOOBIG; + } + } + } + HEXADECIMALEXPONENT: + if (tex_token_is_xexponent(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + tex_get_x_token(); + if (tex_token_is_sign(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } else if (tex_token_is_xdigit(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } + while (1) { + tex_get_x_token(); + if (tex_token_is_xdigit(cur_tok)) { + buffer[b++] = (unsigned char) cur_chr; + } else { + break; + } + if (b >= max_posit_size) { + goto TOOBIG; + } + } + } + tex_back_input(cur_tok); + DONE: + { + double d = strtof(buffer, NULL); + cur_val = tex_double_to_posit(d).v; + return cur_val; + } + TOOBIG: + cur_val = tex_integer_to_posit(0).v; + } + return cur_val; +} + int tex_scan_tex_value(halfword level, halfword *value) { tex_aux_scan_expr(level); diff --git a/source/luametatex/source/tex/texscanning.h b/source/luametatex/source/tex/texscanning.h index 3415c27dd..34e118d93 100644 --- a/source/luametatex/source/tex/texscanning.h +++ b/source/luametatex/source/tex/texscanning.h @@ -6,6 +6,7 @@ # define LMT_SCANNING_H typedef enum value_level_code { + posit_val_level, int_val_level, /*tex integer values */ attr_val_level, /*tex integer values */ dimen_val_level, /*tex dimension values */ @@ -77,9 +78,10 @@ inline static void tex_unsave_full_scanner_status(full_scanner_status a) extern void tex_scan_something_simple (halfword cmd, halfword code); extern void tex_scan_left_brace (void); extern void tex_scan_optional_equals (void); -extern int tex_scan_cardinal (unsigned *value, int dontbark); +extern int tex_scan_cardinal (int optional_equal, unsigned *value, int dontbark); extern halfword tex_scan_int (int optional_equal, int *radix); extern halfword tex_scan_scale (int optional_equal); +extern halfword tex_scan_posit (int optional_equal); extern halfword tex_scan_dimen (int mu, int inf, int shortcut, int optional_equal, halfword *order); extern halfword tex_scan_glue (int level, int optional_equal); extern halfword tex_scan_font (int optional_equal); @@ -107,6 +109,7 @@ extern int tex_scanned_expression (int level); extern halfword tex_scan_int_register_number (void); extern halfword tex_scan_dimen_register_number (void); extern halfword tex_scan_attribute_register_number (void); +extern halfword tex_scan_posit_register_number (void); extern halfword tex_scan_glue_register_number (void); extern halfword tex_scan_mu_glue_register_number (void); extern halfword tex_scan_toks_register_number (void); diff --git a/source/luametatex/source/tex/textoken.c b/source/luametatex/source/tex/textoken.c index b07bf02f1..e3aa90c0f 100644 --- a/source/luametatex/source/tex/textoken.c +++ b/source/luametatex/source/tex/textoken.c @@ -453,8 +453,9 @@ void tex_print_meaning(halfword code) switch (code) { case meaning_code: case meaning_full_code: + case meaning_ful_code: case meaning_asis_code: - tex_print_cmd_flags(cur_cs, cur_cmd, (code == meaning_full_code || code == meaning_asis_code), code == meaning_asis_code); + tex_print_cmd_flags(cur_cs, cur_cmd, (code != meaning_code), code == meaning_asis_code); break; } } @@ -473,11 +474,16 @@ void tex_print_meaning(halfword code) switch (code) { case meaning_code: case meaning_full_code: + case meaning_ful_code: if (constant) { tex_print_str("constant "); } tex_print_str("macro"); - goto FOLLOWUP; + if (code == meaning_ful_code) { + return; + } else { + goto FOLLOWUP; + } case meaning_asis_code: if (constant) { tex_print_str_esc("constant "); @@ -491,6 +497,11 @@ void tex_print_meaning(halfword code) tex_show_token_list(token_link(cur_chr), get_token_preamble(cur_chr)); } return; + case meaning_les_code: + if (cur_chr && token_link(cur_chr)) { + tex_show_token_list(token_link(cur_chr), 2); + } + return; } goto DETAILS; } @@ -644,10 +655,17 @@ void tex_show_token_list(halfword p, int asis) break; } case end_match_cmd: - if (asis) { - tex_print_char('{'); - } else if (chr == 0) { - tex_print_str("->"); + switch (asis) { + case 1: + tex_print_char('{'); + break; + case 2: + return; + default: + if (chr == 0) { + tex_print_str("->"); + } + break; } break; case ignore_something_cmd: @@ -668,7 +686,7 @@ void tex_show_token_list(halfword p, int asis) } p = token_link(p); } - if (asis) { + if (asis == 1) { tex_print_char('}'); } } @@ -2950,6 +2968,8 @@ void tex_run_convert_tokens(halfword code) case meaning_code: case meaning_full_code: case meaning_less_code: + case meaning_ful_code: + case meaning_les_code: case meaning_asis_code: { int saved_selector; @@ -3274,7 +3294,7 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb lmt_token_state.bufmax = default_buffer_size; } lmt_token_state.bufloc = 0; - if (skippreamble) { + if (skippreamble == 1) { skip = get_token_preamble(pp); } while (p) { @@ -3338,19 +3358,21 @@ char *tex_tokenlist_to_tstring(int pp, int inhibit_par, int *siz, int skippreamb ++n; } if (! skip) { - // tex_aux_append_char_to_buffer(chr ? chr : '0'); - if (chr <= 9) { - tex_aux_append_char_to_buffer(chr + '0'); - } else if (chr <= max_match_count) { - tex_aux_append_char_to_buffer(chr + '0' + gap_match_count); - } + tex_aux_append_char_to_buffer(chr ? chr : '0'); + // if (chr <= 9) { + // tex_aux_append_char_to_buffer(chr + '0'); + // } else if (chr <= max_match_count) { + // tex_aux_append_char_to_buffer(chr + '0' + gap_match_count); + // } } if (n > max_match_count) { goto EXIT; } break; case end_match_cmd: - if (chr == 0) { + if (skippreamble ==2) { + goto EXIT; + } else if (chr == 0) { if (! skip) { tex_aux_append_char_to_buffer('-'); tex_aux_append_char_to_buffer('>'); @@ -3468,6 +3490,7 @@ halfword tex_get_tex_dimen_register (int j, int internal) { return internal halfword tex_get_tex_skip_register (int j, int internal) { return internal ? glue_parameter(j) : skip_register(j) ; } halfword tex_get_tex_mu_skip_register (int j, int internal) { return internal ? mu_glue_parameter(j) : mu_skip_register(j); } halfword tex_get_tex_count_register (int j, int internal) { return internal ? count_parameter(j) : count_register(j) ; } +halfword tex_get_tex_posit_register (int j, int internal) { return internal ? posit_parameter(j) : posit_register(j) ; } halfword tex_get_tex_attribute_register (int j, int internal) { return internal ? attribute_parameter(j) : attribute_register(j) ; } halfword tex_get_tex_box_register (int j, int internal) { return internal ? box_parameter(j) : box_register(j) ; } @@ -3514,6 +3537,18 @@ void tex_set_tex_count_register(int j, halfword v, int flags, int internal) tex_word_define(flags, register_int_location(j), v); } } +void tex_set_tex_posit_register(int j, halfword v, int flags, int internal) +{ + if (global_defs_par) { + flags = add_global_flag(flags); + } + if (internal) { + tex_assign_internal_posit_value(flags, internal_posit_location(j), v); + } else { + tex_word_define(flags, register_posit_location(j), v); + } +} + void tex_set_tex_attribute_register(int j, halfword v, int flags, int internal) { diff --git a/source/luametatex/source/tex/textoken.h b/source/luametatex/source/tex/textoken.h index 20885ee66..51c61dc4f 100644 --- a/source/luametatex/source/tex/textoken.h +++ b/source/luametatex/source/tex/textoken.h @@ -387,6 +387,7 @@ extern halfword tex_get_tex_dimen_register (int j, int internal); extern halfword tex_get_tex_skip_register (int j, int internal); extern halfword tex_get_tex_mu_skip_register (int j, int internal); extern halfword tex_get_tex_count_register (int j, int internal); +extern halfword tex_get_tex_posit_register (int j, int internal); extern halfword tex_get_tex_attribute_register (int j, int internal); extern halfword tex_get_tex_box_register (int j, int internal); extern halfword tex_get_tex_toks_register (int j, int internal); @@ -395,6 +396,7 @@ extern void tex_set_tex_dimen_register (int j, halfword v, int flags, extern void tex_set_tex_skip_register (int j, halfword v, int flags, int internal); extern void tex_set_tex_mu_skip_register (int j, halfword v, int flags, int internal); extern void tex_set_tex_count_register (int j, halfword v, int flags, int internal); +extern void tex_set_tex_posit_register (int j, halfword v, int flags, int internal); extern void tex_set_tex_attribute_register (int j, halfword v, int flags, int internal); extern void tex_set_tex_box_register (int j, halfword v, int flags, int internal); diff --git a/source/luametatex/source/tex/textypes.h b/source/luametatex/source/tex/textypes.h index c4f917d4f..d2bb77972 100644 --- a/source/luametatex/source/tex/textypes.h +++ b/source/luametatex/source/tex/textypes.h @@ -147,6 +147,8 @@ extern halfword tex_badness( # define min_cardinal 0 # define max_integer 0x7FFFFFFF /*tex aka |infinity| */ # define min_integer -0x7FFFFFFF /*tex aka |min_infinity| */ +# define max_posit max_cardinal +# define min_posit min_cardinal # define max_dimen 0x3FFFFFFF # define min_dimen -0x3FFFFFFF # define min_data_value 0 @@ -236,6 +238,7 @@ extern halfword tex_badness( # define max_box_register_index 0xFFFF /* 0xFFFF 0xFFFF 0x7FFF */ /* 64 64 32 */ # define max_int_register_index 0xFFFF /* 0xFFFF 0xFFFF 0x3FFF */ /* 64 64 16 */ # define max_dimen_register_index 0xFFFF /* 0xFFFF 0xFFFF 0x3FFF */ /* 64 64 16 */ + # define max_posit_register_index 0xFFFF /* 0xFFFF 0x7FFF 0x1FFF */ /* 64 32 8 */ # define max_attribute_register_index 0xFFFF /* 0xFFFF 0x7FFF 0x1FFF */ /* 64 32 8 */ # define max_glue_register_index 0xFFFF /* 0xFFFF 0x7FFF 0x1FFF */ /* 64 32 8 */ # define max_mu_glue_register_index 0xFFFF /* 0xFFFF 0x3FFF 0x1FFF */ /* 64 16 8 */ @@ -246,6 +249,7 @@ extern halfword tex_badness( # define max_box_register_index 0x7FFF # define max_int_register_index 0x1FFF # define max_dimen_register_index 0x1FFF + # define max_posit_register_index 0x1FFF # define max_attribute_register_index 0x1FFF # define max_glue_register_index 0x1FFF # define max_mu_glue_register_index 0x1FFF @@ -257,6 +261,7 @@ extern halfword tex_badness( # define max_n_of_int_registers (max_int_register_index + 1) # define max_n_of_dimen_registers (max_dimen_register_index + 1) # define max_n_of_attribute_registers (max_attribute_register_index + 1) +# define max_n_of_posit_registers (max_posit_register_index + 1) # define max_n_of_glue_registers (max_glue_register_index + 1) # define max_n_of_mu_glue_registers (max_mu_glue_register_index + 1) diff --git a/source/luametatex/source/utilities/auxposit.c b/source/luametatex/source/utilities/auxposit.c new file mode 100644 index 000000000..4cc89fbfc --- /dev/null +++ b/source/luametatex/source/utilities/auxposit.c @@ -0,0 +1,5 @@ +/* + See license.txt in the root of this project. +*/ + +# include "auxposit.h" diff --git a/source/luametatex/source/utilities/auxposit.h b/source/luametatex/source/utilities/auxposit.h new file mode 100644 index 000000000..cc18dae2a --- /dev/null +++ b/source/luametatex/source/utilities/auxposit.h @@ -0,0 +1,147 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_UTILITIES_POSIT_H +# define LMT_UTILITIES_POSIT_H + +# include "libraries/softposit/source/include/softposit.h" +# include <math.h> + +typedef posit32_t posit_t; +typedef posit32_t *posit; + +/*tex + + Below is the abstraction of posits for \METAPOST\ and \LUA. Currently we have only 32 bit + posits, but for \TEX\ that is okay. It's why we have extra aliases for \TEX\ so that we + can update \LUA\ and \METAPOST\ with 64 bit without changes. + +*/ + +# define posit_bits 32 + +# define i64_to_posit i64_to_p32 +# define posit_to_i64 p32_to_i64 + +# define double_to_posit convertDoubleToP32 +# define posit_to_double convertP32ToDouble +# define integer_to_posit i64_to_p32 +# define posit_to_integer p32_to_i64 + +# define posit_round_to_integer p32_roundToInt + +# define posit_eq p32_eq +# define posit_le p32_le +# define posit_lt p32_lt +# define posit_gt(a,b) (! p32_le(a,b)) +# define posit_ge(a,b) (! p32_lt(a,b)) +# define posit_ne(a,b) (! p32_eq(a,b)) + +# define posit_add p32_add +# define posit_sub p32_sub +# define posit_mul p32_mul +# define posit_div p32_div +# define posit_sqrt p32_sqrt + +# define posit_is_NaR isNaRP32UI + +# define posit_eq_zero(a) (a.v == 0) + +inline static posit_t posit_neg(posit_t a) { posit_t p ; p.v = -a.v & 0xFFFFFFFF; return p; } +inline static posit_t posit_abs(posit_t a) { posit_t p ; int mask = a.v >> 31; p.v = ((a.v + mask) ^ mask) & 0xFFFFFFFF; return p; } + +// static posit_t posit_neg (posit_t v) { return posit_mul(v, integer_to_posit(-1)) ; } +inline static posit_t posit_fabs (posit_t v) { return double_to_posit(fabs (posit_to_double(v))); } +inline static posit_t posit_exp (posit_t v) { return double_to_posit(exp (posit_to_double(v))); } +inline static posit_t posit_log (posit_t v) { return double_to_posit(log (posit_to_double(v))); } +inline static posit_t posit_sin (posit_t v) { return double_to_posit(sin (posit_to_double(v))); } +inline static posit_t posit_cos (posit_t v) { return double_to_posit(cos (posit_to_double(v))); } +inline static posit_t posit_tan (posit_t v) { return double_to_posit(tan (posit_to_double(v))); } +inline static posit_t posit_asin (posit_t v) { return double_to_posit(asin (posit_to_double(v))); } +inline static posit_t posit_acos (posit_t v) { return double_to_posit(acos (posit_to_double(v))); } +inline static posit_t posit_atan (posit_t v) { return double_to_posit(atan (posit_to_double(v))); } +inline static posit_t posit_atan2 (posit_t v, posit_t w) { return double_to_posit(atan2(posit_to_double(v),posit_to_double(w))); } +inline static posit_t posit_pow (posit_t v, posit_t w) { return double_to_posit(pow (posit_to_double(v),posit_to_double(w))); } +inline static posit_t posit_round (posit_t v) { return posit_round_to_integer(v); } +inline static posit_t posit_floor (posit_t v) { return double_to_posit(floor(posit_to_double(v))); } +inline static posit_t posit_modf (posit_t v) { double d; return double_to_posit(modf(posit_to_double(v), &d)); } + +inline static posit_t posit_d_log (double v) { return double_to_posit(log (v)); } +inline static posit_t posit_d_sin (double v) { return double_to_posit(sin (v)); } +inline static posit_t posit_d_cos (double v) { return double_to_posit(cos (v)); } +inline static posit_t posit_d_asin (double v) { return double_to_posit(asin (v)); } +inline static posit_t posit_d_acos (double v) { return double_to_posit(acos (v)); } +inline static posit_t posit_d_atan (double v) { return double_to_posit(atan (v)); } +inline static posit_t posit_d_atan2 (double v, double w) { return double_to_posit(atan2(v,w)); } + +inline static int posit_i_round (posit_t v) { return posit_to_integer(v); } + +/*tex + + The next code is used at the \TEX\ end where we are always 32 bit, while at some + point \METAPOST\ and \LUA\ will go 64 bit. + + The posit lib code is somewhat over the top wrt comparisons, so I might + eventually replace that. + + Not all relevant parts are done yet (I need to check where dimensions and posits) + get cast. + +*/ + +typedef int halfword; +typedef posit32_t tex_posit; + +# define tex_double_to_posit(p) double_to_posit(p) +# define tex_posit_to_double(p) posit_to_double((tex_posit) { .v = (uint32_t) p }) + +# define tex_integer_to_posit(p) integer_to_posit((int32_t) p) +# define tex_posit_to_integer(p) posit_to_integer((tex_posit) { .v = (uint32_t) p }) + +# define tex_posit_round_to_integer(p) posit_round_to_integer((tex_posit) { .v = (uint32_t) p }) + +# define tex_posit_eq(p,q) posit_eq((tex_posit) { .v = (uint32_t) p }, (tex_posit) { .v = (uint32_t) q }) +# define tex_posit_le(p,q) posit_le((tex_posit) { .v = (uint32_t) p }, (tex_posit) { .v = (uint32_t) q }) +# define tex_posit_lt(p,q) posit_lt((tex_posit) { .v = (uint32_t) p }, (tex_posit) { .v = (uint32_t) q }) +# define tex_posit_gt(p,q) posit_gt((tex_posit) { .v = (uint32_t) p }, (tex_posit) { .v = (uint32_t) q }) +# define tex_posit_ge(p,q) posit_ge((tex_posit) { .v = (uint32_t) p }, (tex_posit) { .v = (uint32_t) q }) +# define tex_posit_ne(p,q) posit_ne((tex_posit) { .v = (uint32_t) p }, (tex_posit) { .v = (uint32_t) q }) + +# define tex_posit_add(p,q) (halfword) posit_add((tex_posit) { .v = (uint32_t) p }, (tex_posit) { .v = (uint32_t) q }).v +# define tex_posit_sub(p,q) (halfword) posit_sub((tex_posit) { .v = (uint32_t) p }, (tex_posit) { .v = (uint32_t) q }).v +# define tex_posit_mul(p,q) (halfword) posit_mul((tex_posit) { .v = (uint32_t) p }, (tex_posit) { .v = (uint32_t) q }).v +# define tex_posit_div(p,q) (halfword) posit_div((tex_posit) { .v = (uint32_t) p }, (tex_posit) { .v = (uint32_t) q }).v +# define tex_posit_sqrt(p) (halfword) posit_sqrt((tex_posit) { .v = (uint32_t) p }.v + +# define tex_posit_is_NaR(p) posit_is_NaR((tex_posit) { .v = (uint32_t) p }) + +# define tex_posit_eq_zero(p) posit_eq_zero((tex_posit) { .v = (uint32_t) p }) + +inline static halfword tex_posit_neg(halfword a) +{ + posit32_t p ; + p.v = -a & 0xFFFFFFFF; + return p.v; +} + +inline static halfword tex_posit_abs(halfword a) { + posit32_t p ; + int mask = a >> 31; + p.v = ((a + mask) ^ mask) & 0xFFFFFFFF; + return p.v; +} + +inline static tex_posit tex_dimension_to_posit(halfword p) +{ + return p32_div(ui32_to_p32(p), ui32_to_p32(65536)); +} + +inline static halfword tex_posit_to_dimension(halfword p) +{ + posit32_t x; + x.v = (uint32_t) p; + return posit_to_integer(p32_mul(x, i32_to_p32(65536))); +} + +# endif |