7 files changed, 550 insertions, 288 deletions
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h
deleted file mode 100644
index f60d7acd0..000000000
--- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/* ----------------------------------------------------------------------------
-Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
-This is free software; you can redistribute it and/or modify it under the
-terms of the MIT license. A copy of the license can be found in the file
-"LICENSE" at the root of this distribution.
------------------------------------------------------------------------------*/
-#pragma once
-#ifndef MIMALLOC_TRACK_H
-#define MIMALLOC_TRACK_H
-
-// ------------------------------------------------------
-// Track memory ranges with macros for tools like Valgrind
-// address sanitizer, or other memory checkers.
-// ------------------------------------------------------
-
-#if MI_VALGRIND
-
-#define MI_TRACK_ENABLED 1
-#define MI_TRACK_TOOL    "valgrind"
-
-#include <valgrind/valgrind.h>
-#include <valgrind/memcheck.h>
-
-#define mi_track_malloc(p,size,zero)        VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
-#define mi_track_resize(p,oldsize,newsize)  VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
-#define mi_track_free(p)                    VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
-#define mi_track_free_size(p,_size)         mi_track_free(p)
-#define mi_track_mem_defined(p,size)        VALGRIND_MAKE_MEM_DEFINED(p,size)
-#define mi_track_mem_undefined(p,size)      VALGRIND_MAKE_MEM_UNDEFINED(p,size)
-#define mi_track_mem_noaccess(p,size)       VALGRIND_MAKE_MEM_NOACCESS(p,size)
-
-#elif MI_ASAN
-
-#define MI_TRACK_ENABLED 1
-#define MI_TRACK_TOOL    "asan"
-
-#include <sanitizer/asan_interface.h>
-
-#define mi_track_malloc(p,size,zero)        ASAN_UNPOISON_MEMORY_REGION(p,size)
-#define mi_track_resize(p,oldsize,newsize)  ASAN_POISON_MEMORY_REGION(p,oldsize); ASAN_UNPOISON_MEMORY_REGION(p,newsize)
-#define mi_track_free(p)                    ASAN_POISON_MEMORY_REGION(p,mi_usable_size(p))
-#define mi_track_free_size(p,size)          ASAN_POISON_MEMORY_REGION(p,size)
-#define mi_track_mem_defined(p,size)        ASAN_UNPOISON_MEMORY_REGION(p,size)
-#define mi_track_mem_undefined(p,size)      ASAN_UNPOISON_MEMORY_REGION(p,size)
-#define mi_track_mem_noaccess(p,size)       ASAN_POISON_MEMORY_REGION(p,size)
-
-#else
-
-#define MI_TRACK_ENABLED 0
-#define MI_TRACK_TOOL    "none"
-
-#define mi_track_malloc(p,size,zero)
-#define mi_track_resize(p,oldsize,newsize)
-#define mi_track_free(p)
-#define mi_track_free_size(p,_size)
-#define mi_track_mem_defined(p,size)
-#define mi_track_mem_undefined(p,size)
-#define mi_track_mem_noaccess(p,size)
-
-#endif
-
-#endif
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc.h
index 9b72fbfda..1372cb3f2 100644
--- a/source/luametatex/source/libraries/mimalloc/include/mimalloc.h
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc.h
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2022, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_H
 #define MIMALLOC_H
 
-#define MI_MALLOC_VERSION 209   // major + 2 digits minor
+#define MI_MALLOC_VERSION 210   // major + 2 digits minor
 
 // ------------------------------------------------------
 // Compiler specific attributes
@@ -477,11 +477,13 @@ template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const
 template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return false; }
 
 
-#if (__cplusplus >= 201103L) || (_MSC_VER > 1900)  // C++11
+#if (__cplusplus >= 201103L) || (_MSC_VER >= 1900)  // C++11
+#define MI_HAS_HEAP_STL_ALLOCATOR 1
+
 #include <memory>      // std::shared_ptr
 
 // Common base class for STL allocators in a specific heap
-template<class T, bool destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
+template<class T, bool _mi_destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
   using typename _mi_stl_allocator_common<T>::size_type;
   using typename _mi_stl_allocator_common<T>::value_type;
   using typename _mi_stl_allocator_common<T>::pointer;
@@ -500,7 +502,7 @@ template<class T, bool destroy> struct _mi_heap_stl_allocator_common : public _m
   #endif
 
   void collect(bool force) { mi_heap_collect(this->heap.get(), force); }
-  template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, destroy>& x) const { return (this->heap == x.heap); }
+  template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) const { return (this->heap == x.heap); }
 
 protected:
   std::shared_ptr<mi_heap_t> heap;
@@ -508,10 +510,10 @@ protected:
   
   _mi_heap_stl_allocator_common() {
     mi_heap_t* hp = mi_heap_new();
-    this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete));  /* calls heap_delete/destroy when the refcount drops to zero */
+    this->heap.reset(hp, (_mi_destroy ? &heap_destroy : &heap_delete));  /* calls heap_delete/destroy when the refcount drops to zero */
   }
   _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { }
-  template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, destroy>& x) mi_attr_noexcept : heap(x.heap) { }
+  template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, _mi_destroy>& x) mi_attr_noexcept : heap(x.heap) { }
 
 private:
   static void heap_delete(mi_heap_t* hp)  { if (hp != NULL) { mi_heap_delete(hp); } }
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-atomic.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc/atomic.h
index c66f80493..fe79fbcaf 100644
--- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-atomic.h
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc/atomic.h
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2021 Microsoft Research, Daan Leijen
+Copyright (c) 2018-2023 Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -275,6 +275,15 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
   return (intptr_t)mi_atomic_addi(p, -sub);
 }
 
+typedef _Atomic(uintptr_t) mi_atomic_once_t;
+
+// Returns true only on the first invocation
+static inline bool mi_atomic_once( mi_atomic_once_t* once ) {
+  if (mi_atomic_load_relaxed(once) != 0) return false;     // quick test 
+  uintptr_t expected = 0;
+  return mi_atomic_cas_strong_acq_rel(once, &expected, 1); // try to set to 1
+}
+
 // Yield
 #if defined(__cplusplus)
 #include <thread>
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc/internal.h
index a68e69662..a4495c161 100644
--- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc/internal.h
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2022, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -8,8 +8,14 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_INTERNAL_H
 #define MIMALLOC_INTERNAL_H
 
-#include "mimalloc-types.h"
-#include "mimalloc-track.h"
+
+// --------------------------------------------------------------------------
+// This file contains the interal API's of mimalloc and various utility
+// functions and macros.
+// --------------------------------------------------------------------------
+
+#include "mimalloc/types.h"
+#include "mimalloc/track.h"
 
 #if (MI_DEBUG>0)
 #define mi_trace_message(...)  _mi_trace_message(__VA_ARGS__)
@@ -44,6 +50,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #define mi_decl_externc
 #endif
 
+// pthreads
 #if !defined(_WIN32) && !defined(__wasi__)
 #define  MI_USE_PTHREADS
 #include <pthread.h>
@@ -73,36 +80,46 @@ extern mi_decl_cache_align mi_stats_t       _mi_stats_main;
 extern mi_decl_cache_align const mi_page_t  _mi_page_empty;
 bool       _mi_is_main_thread(void);
 size_t     _mi_current_thread_count(void);
-bool       _mi_preloading(void);  // true while the C runtime is not ready
+bool       _mi_preloading(void);        // true while the C runtime is not ready
+mi_threadid_t _mi_thread_id(void) mi_attr_noexcept;
+mi_heap_t* _mi_heap_main_get(void);     // statically allocated main backing heap
+void       _mi_thread_done(mi_heap_t* heap);
 
 // os.c
-size_t     _mi_os_page_size(void);
 void       _mi_os_init(void);                                      // called from process init
 void*      _mi_os_alloc(size_t size, mi_stats_t* stats);           // to allocate thread local data
 void       _mi_os_free(void* p, size_t size, mi_stats_t* stats);   // to free thread local data
-
-bool       _mi_os_protect(void* addr, size_t size);
-bool       _mi_os_unprotect(void* addr, size_t size);
-bool       _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
-bool       _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
-bool       _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
-// bool       _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
+size_t     _mi_os_page_size(void);
 size_t     _mi_os_good_alloc_size(size_t size);
 bool       _mi_os_has_overcommit(void);
+
 bool       _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats);
+bool       _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
+bool       _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
+bool       _mi_os_protect(void* addr, size_t size);
+bool       _mi_os_unprotect(void* addr, size_t size);
 
+void*      _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats);
 void*      _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats);
 void       _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
+void*      _mi_os_get_aligned_hint(size_t try_alignment, size_t size);
+bool       _mi_os_use_large_page(size_t size, size_t alignment);
+size_t     _mi_os_large_page_size(void);
+
+void       _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats);
+void*      _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize);
+void       _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats);
 
 // arena.c
-void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-void*      _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-void       _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats);
 mi_arena_id_t _mi_arena_id_none(void);
-bool       _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id);
+void       _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats);
+void*      _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
+void*      _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
+bool       _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id);
+bool       _mi_arena_is_os_allocated(size_t arena_memid);
 
 // "segment-cache.c"
-void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
+void*      _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
 bool       _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
 void       _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
 void       _mi_segment_cache_free_all(mi_os_tld_t* tld);
@@ -127,8 +144,6 @@ void       _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
 void       _mi_abandoned_await_readers(void);
 void       _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld);
 
-
-
 // "page.c"
 void*      _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment)  mi_attr_noexcept mi_attr_malloc;
 
@@ -160,7 +175,6 @@ void       _mi_heap_destroy_all(void);
 
 // "stats.c"
 void       _mi_stats_done(mi_stats_t* stats);
-
 mi_msecs_t  _mi_clock_now(void);
 mi_msecs_t  _mi_clock_end(mi_msecs_t start);
 mi_msecs_t  _mi_clock_start(void);
@@ -173,6 +187,16 @@ void*       _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool
 mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
 bool        _mi_free_delayed_block(mi_block_t* block);
 void        _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept;  // for runtime integration
+void        _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size);
+
+// option.c, c primitives
+char        _mi_toupper(char c);
+int         _mi_strnicmp(const char* s, const char* t, size_t n);
+void        _mi_strlcpy(char* dest, const char* src, size_t dest_size);
+void        _mi_strlcat(char* dest, const char* src, size_t dest_size);
+size_t      _mi_strlen(const char* s);
+size_t      _mi_strnlen(const char* s, size_t max_len);
+
 
 #if MI_DEBUG>1
 bool        _mi_page_is_valid(mi_page_t* page);
@@ -340,93 +364,11 @@ static inline bool mi_count_size_overflow(size_t count, size_t size, size_t* tot
 }
 
 
-/* ----------------------------------------------------------------------------------------
-The thread local default heap: `_mi_get_default_heap` returns the thread local heap.
-On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
-__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
-that the storage will always be available (allocated on the thread stacks).
-On some platforms though we cannot use that when overriding `malloc` since the underlying
-TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
-We try to circumvent this in an efficient way:
-- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
-           loader itself calls `malloc` even before the modules are initialized.
-- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
-- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323)
+/*----------------------------------------------------------------------------------------
+  Heap functions
 ------------------------------------------------------------------------------------------- */
 
 extern const mi_heap_t _mi_heap_empty;  // read-only empty heap, initial value of the thread local default heap
-extern bool _mi_process_is_initialized;
-mi_heap_t*  _mi_heap_main_get(void);    // statically allocated main backing heap
-
-#if defined(MI_MALLOC_OVERRIDE)
-#if defined(__APPLE__) // macOS
-#define MI_TLS_SLOT               89  // seems unused?
-// #define MI_TLS_RECURSE_GUARD 1
-// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
-// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
-#elif defined(__OpenBSD__)
-// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
-// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
-#define MI_TLS_PTHREAD_SLOT_OFS   (6*sizeof(int) + 4*sizeof(void*) + 24)
-// #elif defined(__DragonFly__)
-// #warning "mimalloc is not working correctly on DragonFly yet."
-// #define MI_TLS_PTHREAD_SLOT_OFS   (4 + 1*sizeof(void*))  // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
-#elif defined(__ANDROID__)
-// See issue #381
-#define MI_TLS_PTHREAD
-#endif
-#endif
-
-#if defined(MI_TLS_SLOT)
-static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept;   // forward declaration
-#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
-static inline mi_heap_t** mi_tls_pthread_heap_slot(void) {
-  pthread_t self = pthread_self();
-  #if defined(__DragonFly__)
-  if (self==NULL) {
-    mi_heap_t* pheap_main = _mi_heap_main_get();
-    return &pheap_main;
-  }
-  #endif
-  return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS);
-}
-#elif defined(MI_TLS_PTHREAD)
-extern pthread_key_t _mi_heap_default_key;
-#endif
-
-// Default heap to allocate from (if not using TLS- or pthread slots).
-// Do not use this directly but use through `mi_heap_get_default()` (or the unchecked `mi_get_default_heap`).
-// This thread local variable is only used when neither MI_TLS_SLOT, MI_TLS_PTHREAD, or MI_TLS_PTHREAD_SLOT_OFS are defined.
-// However, on the Apple M1 we do use the address of this variable as the unique thread-id (issue #356).
-extern mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from
-
-static inline mi_heap_t* mi_get_default_heap(void) {
-#if defined(MI_TLS_SLOT)
-  mi_heap_t* heap = (mi_heap_t*)mi_tls_slot(MI_TLS_SLOT);
-  if mi_unlikely(heap == NULL) {
-    #ifdef __GNUC__
-    __asm(""); // prevent conditional load of the address of _mi_heap_empty
-    #endif
-    heap = (mi_heap_t*)&_mi_heap_empty;
-  }
-  return heap;
-#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
-  mi_heap_t* heap = *mi_tls_pthread_heap_slot();
-  return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
-#elif defined(MI_TLS_PTHREAD)
-  mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
-  return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
-#else
-  #if defined(MI_TLS_RECURSE_GUARD)
-  if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
-  #endif
-  return _mi_heap_default;
-#endif
-}
-
-static inline bool mi_heap_is_default(const mi_heap_t* heap) {
-  return (heap == mi_get_default_heap());
-}
 
 static inline bool mi_heap_is_backing(const mi_heap_t* heap) {
   return (heap->tld->heap_backing == heap);
@@ -454,11 +396,6 @@ static inline mi_page_t* _mi_heap_get_free_small_page(mi_heap_t* heap, size_t si
   return heap->pages_free_direct[idx];
 }
 
-// Get the page belonging to a certain size class
-static inline mi_page_t* _mi_get_free_small_page(size_t size) {
-  return _mi_heap_get_free_small_page(mi_get_default_heap(), size);
-}
-
 // Segment that contains the pointer
 // Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
 // and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
@@ -834,107 +771,6 @@ static inline size_t _mi_os_numa_node_count(void) {
 }
 
 
-// -------------------------------------------------------------------
-// Getting the thread id should be performant as it is called in the
-// fast path of `_mi_free` and we specialize for various platforms.
-// We only require _mi_threadid() to return a unique id for each thread.
-// -------------------------------------------------------------------
-#if defined(_WIN32)
-
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
-  // Windows: works on Intel and ARM in both 32- and 64-bit
-  return (uintptr_t)NtCurrentTeb();
-}
-
-// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
-// both the OS and libc implementation so we use specific tests for each main platform.
-// If you test on another platform and it works please send a PR :-)
-// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
-#elif defined(__GNUC__) && ( \
-           (defined(__GLIBC__)   && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
-        || (defined(__APPLE__)   && (defined(__x86_64__) || defined(__aarch64__))) \
-        || (defined(__BIONIC__)  && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
-        || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
-        || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
-      )
-
-static inline void* mi_tls_slot(size_t slot) mi_attr_noexcept {
-  void* res;
-  const size_t ofs = (slot*sizeof(void*));
-  #if defined(__i386__)
-    __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86 32-bit always uses GS
-  #elif defined(__APPLE__) && defined(__x86_64__)
-    __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 macOSX uses GS
-  #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
-    __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x32 ABI
-  #elif defined(__x86_64__)
-    __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 Linux, BSD uses FS
-  #elif defined(__arm__)
-    void** tcb; MI_UNUSED(ofs);
-    __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
-    res = tcb[slot];
-  #elif defined(__aarch64__)
-    void** tcb; MI_UNUSED(ofs);
-    #if defined(__APPLE__) // M1, issue #343
-    __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
-    #else
-    __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
-    #endif
-    res = tcb[slot];
-  #endif
-  return res;
-}
-
-// setting a tls slot is only used on macOS for now
-static inline void mi_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
-  const size_t ofs = (slot*sizeof(void*));
-  #if defined(__i386__)
-    __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // 32-bit always uses GS
-  #elif defined(__APPLE__) && defined(__x86_64__)
-    __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 macOS uses GS
-  #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
-    __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x32 ABI
-  #elif defined(__x86_64__)
-    __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 Linux, BSD uses FS
-  #elif defined(__arm__)
-    void** tcb; MI_UNUSED(ofs);
-    __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
-    tcb[slot] = value;
-  #elif defined(__aarch64__)
-    void** tcb; MI_UNUSED(ofs);
-    #if defined(__APPLE__) // M1, issue #343
-    __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
-    #else
-    __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
-    #endif
-    tcb[slot] = value;
-  #endif
-}
-
-static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
-  #if defined(__BIONIC__)
-    // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
-    // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
-    return (uintptr_t)mi_tls_slot(1);
-  #else
-    // in all our other targets, slot 0 is the thread id
-    // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
-    // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
-    return (uintptr_t)mi_tls_slot(0);
-  #endif
-}
-
-#else
-
-// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
-static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
-  return (uintptr_t)&_mi_heap_default;
-}
-
-#endif
-
 
 // -----------------------------------------------------------------------
 // Count bits: trailing or leading zeros (with MI_INTPTR_BITS on all zero)
@@ -964,6 +800,7 @@ static inline size_t mi_ctz(uintptr_t x) {
 #elif defined(_MSC_VER)
 
 #include <limits.h>       // LONG_MAX
+#include <intrin.h>       // BitScanReverse64
 #define MI_HAVE_FAST_BITSCAN
 static inline size_t mi_clz(uintptr_t x) {
   if (x==0) return MI_INTPTR_BITS;
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc/prim.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc/prim.h
new file mode 100644
index 000000000..68f0871e8
--- /dev/null
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc/prim.h
@@ -0,0 +1,311 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#pragma once
+#ifndef MIMALLOC_PRIM_H
+#define MIMALLOC_PRIM_H
+
+
+// --------------------------------------------------------------------------
+// This file specifies the primitive portability API.
+// Each OS/host needs to implement these primitives, see `src/prim`
+// for implementations on Window, macOS, WASI, and Linux/Unix.
+//
+// note: on all primitive functions, we always get:
+//  addr != NULL and page aligned
+//  size > 0     and page aligned
+//  return value is an error code an int where 0 is success.
+// --------------------------------------------------------------------------
+
+// OS memory configuration
+typedef struct mi_os_mem_config_s {
+  size_t  page_size;          // 4KiB
+  size_t  large_page_size;    // 2MiB
+  size_t  alloc_granularity;  // smallest allocation size (on Windows 64KiB)
+  bool    has_overcommit;     // can we reserve more memory than can be actually committed?
+  bool    must_free_whole;    // must allocated blocks free as a whole (false for mmap, true for VirtualAlloc)
+} mi_os_mem_config_t;
+
+// Initialize
+void _mi_prim_mem_init( mi_os_mem_config_t* config );
+
+// Free OS memory
+int _mi_prim_free(void* addr, size_t size );
+  
+// Allocate OS memory. Return NULL on error.
+// The `try_alignment` is just a hint and the returned pointer does not have to be aligned.
+// pre: !commit => !allow_large
+//      try_alignment >= _mi_os_page_size() and a power of 2
+int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr);
+
+// Commit memory. Returns error code or 0 on success.
+int _mi_prim_commit(void* addr, size_t size, bool commit);
+
+// Reset memory. The range keeps being accessible but the content might be reset.
+// Returns error code or 0 on success.
+int _mi_prim_reset(void* addr, size_t size);
+
+// Protect memory. Returns error code or 0 on success.
+int _mi_prim_protect(void* addr, size_t size, bool protect);
+
+// Allocate huge (1GiB) pages possibly associated with a NUMA node.
+// pre: size > 0  and a multiple of 1GiB.
+//      addr is either NULL or an address hint.
+//      numa_node is either negative (don't care), or a numa node number.
+int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr);
+
+// Return the current NUMA node
+size_t _mi_prim_numa_node(void);
+
+// Return the number of logical NUMA nodes
+size_t _mi_prim_numa_node_count(void);
+
+// Clock ticks
+mi_msecs_t _mi_prim_clock_now(void);
+
+// Return process information (only for statistics)
+typedef struct mi_process_info_s {
+  mi_msecs_t  elapsed;
+  mi_msecs_t  utime;
+  mi_msecs_t  stime; 
+  size_t      current_rss; 
+  size_t      peak_rss;  
+  size_t      current_commit;
+  size_t      peak_commit; 
+  size_t      page_faults;
+} mi_process_info_t;
+
+void _mi_prim_process_info(mi_process_info_t* pinfo);
+
+// Default stderr output. (only for warnings etc. with verbose enabled)
+// msg != NULL && _mi_strlen(msg) > 0
+void _mi_prim_out_stderr( const char* msg );
+
+// Get an environment variable. (only for options)
+// name != NULL, result != NULL, result_size >= 64
+bool _mi_prim_getenv(const char* name, char* result, size_t result_size);
+
+
+// Fill a buffer with strong randomness; return `false` on error or if
+// there is no strong randomization available.
+bool _mi_prim_random_buf(void* buf, size_t buf_len);
+
+// Called on the first thread start, and should ensure `_mi_thread_done` is called on thread termination.
+void _mi_prim_thread_init_auto_done(void);
+
+// Called on process exit and may take action to clean up resources associated with the thread auto done.
+void _mi_prim_thread_done_auto_done(void);
+
+// Called when the default heap for a thread changes
+void _mi_prim_thread_associate_default_heap(mi_heap_t* heap);
+
+
+//-------------------------------------------------------------------
+// Thread id: `_mi_prim_thread_id()`
+// 
+// Getting the thread id should be performant as it is called in the
+// fast path of `_mi_free` and we specialize for various platforms as
+// inlined definitions. Regular code should call `init.c:_mi_thread_id()`.
+// We only require _mi_prim_thread_id() to return a unique id
+// for each thread (unequal to zero).
+//-------------------------------------------------------------------
+
+static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept;
+
+#if defined(_WIN32)
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
+  // Windows: works on Intel and ARM in both 32- and 64-bit
+  return (uintptr_t)NtCurrentTeb();
+}
+
+// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
+// both the OS and libc implementation so we use specific tests for each main platform.
+// If you test on another platform and it works please send a PR :-)
+// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
+#elif defined(__GNUC__) && ( \
+           (defined(__GLIBC__)   && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
+        || (defined(__APPLE__)   && (defined(__x86_64__) || defined(__aarch64__))) \
+        || (defined(__BIONIC__)  && (defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__))) \
+        || (defined(__FreeBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
+        || (defined(__OpenBSD__) && (defined(__x86_64__) || defined(__i386__) || defined(__aarch64__))) \
+      )
+
+static inline void* mi_prim_tls_slot(size_t slot) mi_attr_noexcept {
+  void* res;
+  const size_t ofs = (slot*sizeof(void*));
+  #if defined(__i386__)
+    __asm__("movl %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86 32-bit always uses GS
+  #elif defined(__APPLE__) && defined(__x86_64__)
+    __asm__("movq %%gs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 macOSX uses GS
+  #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
+    __asm__("movl %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x32 ABI
+  #elif defined(__x86_64__)
+    __asm__("movq %%fs:%1, %0" : "=r" (res) : "m" (*((void**)ofs)) : );  // x86_64 Linux, BSD uses FS
+  #elif defined(__arm__)
+    void** tcb; MI_UNUSED(ofs);
+    __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
+    res = tcb[slot];
+  #elif defined(__aarch64__)
+    void** tcb; MI_UNUSED(ofs);
+    #if defined(__APPLE__) // M1, issue #343
+    __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
+    #else
+    __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
+    #endif
+    res = tcb[slot];
+  #endif
+  return res;
+}
+
+// setting a tls slot is only used on macOS for now
+static inline void mi_prim_tls_slot_set(size_t slot, void* value) mi_attr_noexcept {
+  const size_t ofs = (slot*sizeof(void*));
+  #if defined(__i386__)
+    __asm__("movl %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // 32-bit always uses GS
+  #elif defined(__APPLE__) && defined(__x86_64__)
+    __asm__("movq %1,%%gs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 macOS uses GS
+  #elif defined(__x86_64__) && (MI_INTPTR_SIZE==4)
+    __asm__("movl %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x32 ABI
+  #elif defined(__x86_64__)
+    __asm__("movq %1,%%fs:%0" : "=m" (*((void**)ofs)) : "rn" (value) : );  // x86_64 Linux, BSD uses FS
+  #elif defined(__arm__)
+    void** tcb; MI_UNUSED(ofs);
+    __asm__ volatile ("mrc p15, 0, %0, c13, c0, 3\nbic %0, %0, #3" : "=r" (tcb));
+    tcb[slot] = value;
+  #elif defined(__aarch64__)
+    void** tcb; MI_UNUSED(ofs);
+    #if defined(__APPLE__) // M1, issue #343
+    __asm__ volatile ("mrs %0, tpidrro_el0\nbic %0, %0, #7" : "=r" (tcb));
+    #else
+    __asm__ volatile ("mrs %0, tpidr_el0" : "=r" (tcb));
+    #endif
+    tcb[slot] = value;
+  #endif
+}
+
+static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
+  #if defined(__BIONIC__)
+    // issue #384, #495: on the Bionic libc (Android), slot 1 is the thread id
+    // see: https://github.com/aosp-mirror/platform_bionic/blob/c44b1d0676ded732df4b3b21c5f798eacae93228/libc/platform/bionic/tls_defines.h#L86
+    return (uintptr_t)mi_prim_tls_slot(1);
+  #else
+    // in all our other targets, slot 0 is the thread id
+    // glibc: https://sourceware.org/git/?p=glibc.git;a=blob_plain;f=sysdeps/x86_64/nptl/tls.h
+    // apple: https://github.com/apple/darwin-xnu/blob/main/libsyscall/os/tsd.h#L36
+    return (uintptr_t)mi_prim_tls_slot(0);
+  #endif
+}
+
+#else
+
+// otherwise use portable C, taking the address of a thread local variable (this is still very fast on most platforms).
+static inline mi_threadid_t _mi_prim_thread_id(void) mi_attr_noexcept {
+  return (uintptr_t)&_mi_heap_default;
+}
+
+#endif
+
+
+
+/* ----------------------------------------------------------------------------------------
+The thread local default heap: `_mi_prim_get_default_heap()`
+This is inlined here as it is on the fast path for allocation functions.
+
+On most platforms (Windows, Linux, FreeBSD, NetBSD, etc), this just returns a
+__thread local variable (`_mi_heap_default`). With the initial-exec TLS model this ensures
+that the storage will always be available (allocated on the thread stacks).
+
+On some platforms though we cannot use that when overriding `malloc` since the underlying
+TLS implementation (or the loader) will call itself `malloc` on a first access and recurse.
+We try to circumvent this in an efficient way:
+- macOSX : we use an unused TLS slot from the OS allocated slots (MI_TLS_SLOT). On OSX, the
+           loader itself calls `malloc` even before the modules are initialized.
+- OpenBSD: we use an unused slot from the pthread block (MI_TLS_PTHREAD_SLOT_OFS).
+- DragonFly: defaults are working but seem slow compared to freeBSD (see PR #323)
+------------------------------------------------------------------------------------------- */
+
+// defined in `init.c`; do not use these directly
+extern mi_decl_thread mi_heap_t* _mi_heap_default;  // default heap to allocate from
+extern bool _mi_process_is_initialized;             // has mi_process_init been called?
+
+static inline mi_heap_t* mi_prim_get_default_heap(void);
+
+#if defined(MI_MALLOC_OVERRIDE)
+#if defined(__APPLE__) // macOS
+  #define MI_TLS_SLOT               89  // seems unused?
+  // #define MI_TLS_RECURSE_GUARD 1
+  // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
+  // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
+#elif defined(__OpenBSD__)
+  // use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
+  // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
+  #define MI_TLS_PTHREAD_SLOT_OFS   (6*sizeof(int) + 4*sizeof(void*) + 24)
+  // #elif defined(__DragonFly__)
+  // #warning "mimalloc is not working correctly on DragonFly yet."
+  // #define MI_TLS_PTHREAD_SLOT_OFS   (4 + 1*sizeof(void*))  // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
+#elif defined(__ANDROID__)
+  // See issue #381
+  #define MI_TLS_PTHREAD
+#endif
+#endif
+
+
+#if defined(MI_TLS_SLOT)
+
+static inline mi_heap_t* mi_prim_get_default_heap(void) {
+  mi_heap_t* heap = (mi_heap_t*)mi_prim_tls_slot(MI_TLS_SLOT);
+  if mi_unlikely(heap == NULL) {
+    #ifdef __GNUC__
+    __asm(""); // prevent conditional load of the address of _mi_heap_empty
+    #endif
+    heap = (mi_heap_t*)&_mi_heap_empty;
+  }
+  return heap;
+}
+
+#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
+
+static inline mi_heap_t** mi_prim_tls_pthread_heap_slot(void) {
+  pthread_t self = pthread_self();
+  #if defined(__DragonFly__)
+  if (self==NULL) return NULL;
+  #endif
+  return (mi_heap_t**)((uint8_t*)self + MI_TLS_PTHREAD_SLOT_OFS);
+}
+
+static inline mi_heap_t* mi_prim_get_default_heap(void) {
+  mi_heap_t** pheap = mi_prim_tls_pthread_heap_slot();
+  if mi_unlikely(pheap == NULL) return _mi_heap_main_get();
+  mi_heap_t* heap = *pheap;
+  if mi_unlikely(heap == NULL) return (mi_heap_t*)&_mi_heap_empty;
+  return heap;
+}
+
+#elif defined(MI_TLS_PTHREAD)
+
+extern pthread_key_t _mi_heap_default_key;
+static inline mi_heap_t* mi_prim_get_default_heap(void) {
+  mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
+  return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
+}
+
+#else // default using a thread local variable; used on most platforms.
+
+static inline mi_heap_t* mi_prim_get_default_heap(void) {
+  #if defined(MI_TLS_RECURSE_GUARD)
+  if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
+  #endif
+  return _mi_heap_default;
+}
+
+#endif  // mi_prim_get_default_heap()
+
+
+
+#endif  // MIMALLOC_PRIM_H
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc/track.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc/track.h
new file mode 100644
index 000000000..f78e8daa7
--- /dev/null
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc/track.h
@@ -0,0 +1,147 @@
+/* ----------------------------------------------------------------------------
+Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
+This is free software; you can redistribute it and/or modify it under the
+terms of the MIT license. A copy of the license can be found in the file
+"LICENSE" at the root of this distribution.
+-----------------------------------------------------------------------------*/
+#pragma once
+#ifndef MIMALLOC_TRACK_H
+#define MIMALLOC_TRACK_H
+
+/* ------------------------------------------------------------------------------------------------------
+Track memory ranges with macros for tools like Valgrind address sanitizer, or other memory checkers.
+These can be defined for tracking allocation:
+
+  #define mi_track_malloc_size(p,reqsize,size,zero)
+  #define mi_track_free_size(p,_size)
+
+The macros are set up such that the size passed to `mi_track_free_size`
+always matches the size of `mi_track_malloc_size`. (currently, `size == mi_usable_size(p)`).
+The `reqsize` is what the user requested, and `size >= reqsize`.
+The `size` is either byte precise (and `size==reqsize`) if `MI_PADDING` is enabled,
+or otherwise it is the usable block size which may be larger than the original request.
+Use `_mi_block_size_of(void* p)` to get the full block size that was allocated (including padding etc).
+The `zero` parameter is `true` if the allocated block is zero initialized.
+
+Optional:
+
+  #define mi_track_align(p,alignedp,offset,size)
+  #define mi_track_resize(p,oldsize,newsize)
+  #define mi_track_init()
+
+The `mi_track_align` is called right after a `mi_track_malloc` for aligned pointers in a block.
+The corresponding `mi_track_free` still uses the block start pointer and original size (corresponding to the `mi_track_malloc`).
+The `mi_track_resize` is currently unused but could be called on reallocations within a block.
+`mi_track_init` is called at program start.
+
+The following macros are for tools like asan and valgrind to track whether memory is 
+defined, undefined, or not accessible at all:
+
+  #define mi_track_mem_defined(p,size)
+  #define mi_track_mem_undefined(p,size)
+  #define mi_track_mem_noaccess(p,size)
+
+-------------------------------------------------------------------------------------------------------*/
+
+#if MI_TRACK_VALGRIND
+// valgrind tool
+
+#define MI_TRACK_ENABLED      1
+#define MI_TRACK_HEAP_DESTROY 1           // track free of individual blocks on heap_destroy
+#define MI_TRACK_TOOL         "valgrind"
+
+#include <valgrind/valgrind.h>
+#include <valgrind/memcheck.h>
+
+#define mi_track_malloc_size(p,reqsize,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
+#define mi_track_free_size(p,_size)               VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
+#define mi_track_resize(p,oldsize,newsize)        VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
+#define mi_track_mem_defined(p,size)              VALGRIND_MAKE_MEM_DEFINED(p,size)
+#define mi_track_mem_undefined(p,size)            VALGRIND_MAKE_MEM_UNDEFINED(p,size)
+#define mi_track_mem_noaccess(p,size)             VALGRIND_MAKE_MEM_NOACCESS(p,size)
+
+#elif MI_TRACK_ASAN
+// address sanitizer
+
+#define MI_TRACK_ENABLED      1
+#define MI_TRACK_HEAP_DESTROY 0
+#define MI_TRACK_TOOL         "asan"
+
+#include <sanitizer/asan_interface.h>
+
+#define mi_track_malloc_size(p,reqsize,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_free_size(p,size)                ASAN_POISON_MEMORY_REGION(p,size)
+#define mi_track_mem_defined(p,size)              ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_mem_undefined(p,size)            ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_mem_noaccess(p,size)             ASAN_POISON_MEMORY_REGION(p,size)
+
+#elif MI_TRACK_ETW
+// windows event tracing
+
+#define MI_TRACK_ENABLED      1
+#define MI_TRACK_HEAP_DESTROY 0
+#define MI_TRACK_TOOL         "ETW"
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+#include "../src/prim/windows/etw.h"
+
+#define mi_track_init()                           EventRegistermicrosoft_windows_mimalloc();
+#define mi_track_malloc_size(p,reqsize,size,zero) EventWriteETW_MI_ALLOC((UINT64)(p), size)
+#define mi_track_free_size(p,size)                EventWriteETW_MI_FREE((UINT64)(p), size)
+
+#else
+// no tracking
+
+#define MI_TRACK_ENABLED      0
+#define MI_TRACK_HEAP_DESTROY 0 
+#define MI_TRACK_TOOL         "none"
+
+#define mi_track_malloc_size(p,reqsize,size,zero)
+#define mi_track_free_size(p,_size)
+
+#endif
+
+// -------------------
+// Utility definitions
+
+#ifndef mi_track_resize
+#define mi_track_resize(p,oldsize,newsize)      mi_track_free_size(p,oldsize); mi_track_malloc(p,newsize,false)
+#endif
+
+#ifndef mi_track_align
+#define mi_track_align(p,alignedp,offset,size)  mi_track_mem_noaccess(p,offset)
+#endif
+
+#ifndef mi_track_init
+#define mi_track_init()
+#endif
+
+#ifndef mi_track_mem_defined
+#define mi_track_mem_defined(p,size)
+#endif
+
+#ifndef mi_track_mem_undefined
+#define mi_track_mem_undefined(p,size)
+#endif
+
+#ifndef mi_track_mem_noaccess
+#define mi_track_mem_noaccess(p,size)
+#endif
+
+
+#if MI_PADDING
+#define mi_track_malloc(p,reqsize,zero) \
+  if ((p)!=NULL) { \
+    mi_assert_internal(mi_usable_size(p)==(reqsize)); \
+    mi_track_malloc_size(p,reqsize,reqsize,zero); \
+  }
+#else
+#define mi_track_malloc(p,reqsize,zero) \
+  if ((p)!=NULL) { \
+    mi_assert_internal(mi_usable_size(p)>=(reqsize)); \
+    mi_track_malloc_size(p,reqsize,mi_usable_size(p),zero); \
+  }
+#endif
+
+#endif
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-types.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc/types.h
index f3af528e5..c7ddaaaef 100644
--- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-types.h
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc/types.h
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2018-2021, Microsoft Research, Daan Leijen
+Copyright (c) 2018-2023, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -8,9 +8,20 @@ terms of the MIT license. A copy of the license can be found in the file
 #ifndef MIMALLOC_TYPES_H
 #define MIMALLOC_TYPES_H
 
+// --------------------------------------------------------------------------
+// This file contains the main type definitions for mimalloc:
+// mi_heap_t      : all data for a thread-local heap, contains
+//                  lists of all managed heap pages.
+// mi_segment_t   : a larger chunk of memory (32GiB) from where pages
+//                  are allocated.
+// mi_page_t      : a mimalloc page (usually 64KiB or 512KiB) from
+//                  where objects are allocated.
+// --------------------------------------------------------------------------
+
+
 #include <stddef.h>   // ptrdiff_t
 #include <stdint.h>   // uintptr_t, uint16_t, etc
-#include "mimalloc-atomic.h"  // _Atomic
+#include "mimalloc/atomic.h"  // _Atomic
 
 #ifdef _MSC_VER
 #pragma warning(disable:4214) // bitfield is not int
@@ -29,8 +40,10 @@ terms of the MIT license. A copy of the license can be found in the file
 // Define NDEBUG in the release version to disable assertions.
 // #define NDEBUG
 
-// Define MI_VALGRIND to enable valgrind support
-// #define MI_VALGRIND 1
+// Define MI_TRACK_<tool> to enable tracking support
+// #define MI_TRACK_VALGRIND 1
+// #define MI_TRACK_ASAN     1
+// #define MI_TRACK_ETW      1
 
 // Define MI_STAT as 1 to maintain statistics; set it to 2 to have detailed statistics (but costs some performance).
 // #define MI_STAT 1
@@ -58,11 +71,16 @@ terms of the MIT license. A copy of the license can be found in the file
 #endif
 
 // Reserve extra padding at the end of each block to be more resilient against heap block overflows.
-// The padding can detect byte-precise buffer overflow on free.
-#if !defined(MI_PADDING) && (MI_DEBUG>=1 || MI_VALGRIND)
+// The padding can detect buffer overflow on free.
+#if !defined(MI_PADDING) && (MI_SECURE>=3 || MI_DEBUG>=1 || (MI_TRACK_VALGRIND || MI_TRACK_ASAN || MI_TRACK_ETW))
 #define MI_PADDING  1
 #endif
 
+// Check padding bytes; allows byte-precise buffer overflow detection
+#if !defined(MI_PADDING_CHECK) && MI_PADDING && (MI_SECURE>=3 || MI_DEBUG>=1)
+#define MI_PADDING_CHECK 1
+#endif
+
 
 // Encoded free lists allow detection of corrupted free lists
 // and can detect buffer overflows, modify after free, and double `free`s.
@@ -290,8 +308,8 @@ typedef struct mi_page_s {
   uint32_t              xblock_size;       // size available in each block (always `>0`)
   mi_block_t*           local_free;        // list of deferred free blocks by this thread (migrates to `free`)
 
-  #ifdef MI_ENCODE_FREELIST
-  uintptr_t             keys[2];           // two random keys to encode the free lists (see `_mi_block_next`)
+  #if (MI_ENCODE_FREELIST || MI_PADDING)
+  uintptr_t             keys[2];           // two random keys to encode the free lists (see `_mi_block_next`) or padding canary
   #endif
 
   _Atomic(mi_thread_free_t) xthread_free;  // list of deferred free blocks freed by other threads