18 files changed, 478 insertions, 264 deletions
diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c b/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c
index fce0fd749..9614aa092 100644
--- a/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c
+++ b/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c
@@ -31,7 +31,8 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   }
 
   // otherwise over-allocate
-  void* p = _mi_heap_malloc_zero(heap, size + alignment - 1, zero);
+  const size_t oversize = size + alignment - 1;
+  void* p = _mi_heap_malloc_zero(heap, oversize, zero);
   if (p == NULL) return NULL;
 
   // .. and align within the allocation
@@ -41,6 +42,16 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
   if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true);
   mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
   mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
+
+  #if MI_TRACK_ENABLED
+  if (p != aligned_p) {
+    mi_track_free(p);
+    mi_track_malloc(aligned_p,size,zero);
+  }
+  else {
+    mi_track_resize(aligned_p,oversize,size);
+  }
+  #endif
   return aligned_p;
 }
 
@@ -49,19 +60,19 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
 {
   // note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size.
   mi_assert(alignment > 0);
-  if (mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment))) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
+  if mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
     #if MI_DEBUG > 0
     _mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment);
     #endif
     return NULL;
   }
-  if (mi_unlikely(alignment > MI_ALIGNMENT_MAX)) {  // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers)
+  if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {  // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers)
     #if MI_DEBUG > 0
     _mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment);
     #endif
     return NULL;
   }
-  if (mi_unlikely(size > PTRDIFF_MAX)) {          // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)                                                    
+  if mi_unlikely(size > PTRDIFF_MAX) {          // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)                                                    
     #if MI_DEBUG > 0
     _mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
     #endif
@@ -71,18 +82,18 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
   const size_t padsize = size + MI_PADDING_SIZE;  // note: cannot overflow due to earlier size > PTRDIFF_MAX check
 
   // try first if there happens to be a small block available with just the right alignment
-  if (mi_likely(padsize <= MI_SMALL_SIZE_MAX)) {
+  if mi_likely(padsize <= MI_SMALL_SIZE_MAX) {
     mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize);
     const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0;
-    if (mi_likely(page->free != NULL && is_aligned))
+    if mi_likely(page->free != NULL && is_aligned)
     {
       #if MI_STAT>1
       mi_heap_stat_increase(heap, malloc, size);
       #endif
-      void* p = _mi_page_malloc(heap, page, padsize); // TODO: inline _mi_page_malloc
+      void* p = _mi_page_malloc(heap, page, padsize, zero); // TODO: inline _mi_page_malloc
       mi_assert_internal(p != NULL);
       mi_assert_internal(((uintptr_t)p + offset) % alignment == 0);
-      if (zero) { _mi_block_zero_init(page, p, size); }
+      mi_track_malloc(p,size,zero);
       return p;
     }
   }
@@ -95,19 +106,19 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
 // Optimized mi_heap_malloc_aligned / mi_malloc_aligned
 // ------------------------------------------------------
 
-mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
   return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, false);
 }
 
-mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
   #if !MI_PADDING
   // without padding, any small sized allocation is naturally aligned (see also `_mi_segment_page_start`)
   if (!_mi_is_power_of_two(alignment)) return NULL;
-  if (mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX))
+  if mi_likely(_mi_is_power_of_two(size) && size >= alignment && size <= MI_SMALL_SIZE_MAX)
   #else
   // with padding, we can only guarantee this for fixed alignments
-  if (mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2)))
-                && size <= MI_SMALL_SIZE_MAX))
+  if mi_likely((alignment == sizeof(void*) || (alignment == MI_MAX_ALIGN_SIZE && size > (MI_MAX_ALIGN_SIZE/2)))
+                && size <= MI_SMALL_SIZE_MAX)
   #endif
   {
     // fast path for common alignment and size
@@ -122,45 +133,45 @@ mi_decl_restrict void* mi_heap_malloc_aligned(mi_heap_t* heap, size_t size, size
 // Aligned Allocation
 // ------------------------------------------------------
 
-mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned_at(mi_heap_t* heap, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
   return mi_heap_malloc_zero_aligned_at(heap, size, alignment, offset, true);
 }
 
-mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_zalloc_aligned(mi_heap_t* heap, size_t size, size_t alignment) mi_attr_noexcept {
   return mi_heap_zalloc_aligned_at(heap, size, alignment, 0);
 }
 
-mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned_at(mi_heap_t* heap, size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
   size_t total;
   if (mi_count_size_overflow(count, size, &total)) return NULL;
   return mi_heap_zalloc_aligned_at(heap, total, alignment, offset);
 }
 
-mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, size_t count, size_t size, size_t alignment) mi_attr_noexcept {
   return mi_heap_calloc_aligned_at(heap,count,size,alignment,0);
 }
 
-mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
   return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset);
 }
 
-mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
   return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment);
 }
 
-mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
   return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset);
 }
 
-mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept {
   return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment);
 }
 
-mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
   return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset);
 }
 
-mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept {
   return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment);
 }
 
@@ -207,55 +218,55 @@ static void* mi_heap_realloc_zero_aligned(mi_heap_t* heap, void* p, size_t newsi
   return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,zero);
 }
 
-void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_heap_realloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
   return mi_heap_realloc_zero_aligned_at(heap,p,newsize,alignment,offset,false);
 }
 
-void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_heap_realloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
   return mi_heap_realloc_zero_aligned(heap,p,newsize,alignment,false);
 }
 
-void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_heap_rezalloc_aligned_at(mi_heap_t* heap, void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
   return mi_heap_realloc_zero_aligned_at(heap, p, newsize, alignment, offset, true);
 }
 
-void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_heap_rezalloc_aligned(mi_heap_t* heap, void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
   return mi_heap_realloc_zero_aligned(heap, p, newsize, alignment, true);
 }
 
-void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_heap_recalloc_aligned_at(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
   size_t total;
   if (mi_count_size_overflow(newcount, size, &total)) return NULL;
   return mi_heap_rezalloc_aligned_at(heap, p, total, alignment, offset);
 }
 
-void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
   size_t total;
   if (mi_count_size_overflow(newcount, size, &total)) return NULL;
   return mi_heap_rezalloc_aligned(heap, p, total, alignment);
 }
 
-void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
   return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset);
 }
 
-void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
   return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment);
 }
 
-void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept {
   return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset);
 }
 
-void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept {
   return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment);
 }
 
-void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept {
   return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset);
 }
 
-void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
   return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment);
 }
 
diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c b/source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c
index 41d0a386e..ba2313a2a 100644
--- a/source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c
+++ b/source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c
@@ -254,7 +254,7 @@ static malloc_zone_t mi_malloc_zone = {
 static inline malloc_zone_t* mi_get_default_zone(void)
 {
   static bool init;
-  if (mi_unlikely(!init)) { 
+  if mi_unlikely(!init) { 
     init = true;
     malloc_zone_register(&mi_malloc_zone);  // by calling register we avoid a zone error on free (see <http://eatmyrandom.blogspot.com/2010/03/mallocfree-interception-on-mac-os-x.html>)
   }
diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-override.c b/source/luametatex/source/libraries/mimalloc/src/alloc-override.c
index e29cb4b23..6b9845d39 100644
--- a/source/luametatex/source/libraries/mimalloc/src/alloc-override.c
+++ b/source/luametatex/source/libraries/mimalloc/src/alloc-override.c
@@ -29,7 +29,7 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
 // Override system malloc
 // ------------------------------------------------------
 
-#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) && !defined(MI_VALGRIND)
+#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__) && !MI_TRACK_ENABLED
   // gcc, clang: use aliasing to alias the exported function to one of our `mi_` functions
   #if (defined(__GNUC__) && __GNUC__ >= 9)
     #pragma GCC diagnostic ignored "-Wattributes"  // or we get warnings that nodiscard is ignored on a forward
@@ -123,10 +123,10 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
   // we just override new/delete which does work in a static library.
 #else
   // On all other systems forward to our API  
-  void* malloc(size_t size)              MI_FORWARD1(mi_malloc, size)
-  void* calloc(size_t size, size_t n)    MI_FORWARD2(mi_calloc, size, n)
-  void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
-  void  free(void* p)                    MI_FORWARD0(mi_free, p)
+  mi_decl_export void* malloc(size_t size)              MI_FORWARD1(mi_malloc, size)
+  mi_decl_export void* calloc(size_t size, size_t n)    MI_FORWARD2(mi_calloc, size, n)
+  mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
+  mi_decl_export void  free(void* p)                    MI_FORWARD0(mi_free, p)  
 #endif
 
 #if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__)
diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c b/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c
index 176e7ec30..214a97345 100644
--- a/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c
+++ b/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c
@@ -83,13 +83,16 @@ mi_decl_nodiscard mi_decl_restrict void* mi_pvalloc(size_t size) mi_attr_noexcep
 }
 
 mi_decl_nodiscard mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size_t size) mi_attr_noexcept {
-  if (mi_unlikely((size&(alignment-1)) != 0)) { // C11 requires alignment>0 && integral multiple, see <https://en.cppreference.com/w/c/memory/aligned_alloc>
+  // C11 requires the size to be an integral multiple of the alignment, see <https://en.cppreference.com/w/c/memory/aligned_alloc>.
+  // unfortunately, it turns out quite some programs pass a size that is not an integral multiple so skip this check..
+  /* if mi_unlikely((size & (alignment - 1)) != 0) { // C11 requires alignment>0 && integral multiple, see <https://en.cppreference.com/w/c/memory/aligned_alloc>
     #if MI_DEBUG > 0
     _mi_error_message(EOVERFLOW, "(mi_)aligned_alloc requires the size to be an integral multiple of the alignment (size %zu, alignment %zu)\n", size, alignment);
     #endif
     return NULL;
   }
-  // C11 also requires alignment to be a power-of-two which is checked in mi_malloc_aligned
+  */  
+  // C11 also requires alignment to be a power-of-two (and > 0) which is checked in mi_malloc_aligned
   void* p = mi_malloc_aligned(size, alignment);
   mi_assert_internal(((uintptr_t)p % alignment) == 0);
   return p;
@@ -109,7 +112,7 @@ mi_decl_nodiscard int mi_reallocarr( void* p, size_t count, size_t size ) mi_att
   }
   void** op = (void**)p;  
   void* newp = mi_reallocarray(*op, count, size);
-  if (mi_unlikely(newp == NULL)) return errno;
+  if mi_unlikely(newp == NULL) { return errno; }
   *op = newp;
   return 0;
 }
diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc.c b/source/luametatex/source/libraries/mimalloc/src/alloc.c
index 1a36b5da8..348218246 100644
--- a/source/luametatex/source/libraries/mimalloc/src/alloc.c
+++ b/source/luametatex/source/libraries/mimalloc/src/alloc.c
@@ -12,6 +12,7 @@ terms of the MIT license. A copy of the license can be found in the file
 #include "mimalloc-internal.h"
 #include "mimalloc-atomic.h"
 
+
 #include <string.h>  // memset, strlen
 #include <stdlib.h>  // malloc, exit
 
@@ -25,11 +26,11 @@ terms of the MIT license. A copy of the license can be found in the file
 
 // Fast allocation in a page: just pop from the free list.
 // Fall back to generic allocation only if the list is empty.
-extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size) mi_attr_noexcept {
+extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept {
   mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
   mi_block_t* const block = page->free;
-  if (mi_unlikely(block == NULL)) {
-    return _mi_malloc_generic(heap, size); 
+  if mi_unlikely(block == NULL) {
+    return _mi_malloc_generic(heap, size, zero); 
   }
   mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
   // pop from the free list
@@ -37,10 +38,22 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
   page->free = mi_block_next(page, block);
   mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
 
-#if (MI_DEBUG>0)
-  if (!page->is_zero) { memset(block, MI_DEBUG_UNINIT, size); }
+  // allow use of the block internally 
+  // note: when tracking we need to avoid ever touching the MI_PADDING since
+  // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc-track.h`)
+  mi_track_mem_undefined(block, mi_page_usable_block_size(page));
+  
+  // zero the block? note: we need to zero the full block size (issue #63)
+  if mi_unlikely(zero) {
+    mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
+    const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size);
+    _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE);    
+  }
+
+#if (MI_DEBUG>0) && !MI_TRACK_ENABLED
+  if (!page->is_zero && !zero) { memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); }
 #elif (MI_SECURE!=0)
-  block->next = 0;  // don't leak internal data
+  if (!zero) { block->next = 0; } // don't leak internal data
 #endif
 
 #if (MI_STAT>0)
@@ -55,10 +68,13 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
   }
 #endif
 
-#if (MI_PADDING > 0) && defined(MI_ENCODE_FREELIST)
+#if (MI_PADDING > 0) && defined(MI_ENCODE_FREELIST) && !MI_TRACK_ENABLED
   mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page));
   ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE));
+  #if (MI_DEBUG>1)
   mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta));
+  mi_track_mem_defined(padding,sizeof(mi_padding_t));  // note: re-enable since mi_page_usable_block_size may set noaccess
+  #endif
   padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys));
   padding->delta  = (uint32_t)(delta);
   uint8_t* fill = (uint8_t*)padding - delta;
@@ -69,8 +85,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
   return block;
 }
 
-// allocate a small block
-extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
   mi_assert(heap!=NULL);
   mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
   mi_assert(size <= MI_SMALL_SIZE_MAX);
@@ -80,7 +95,7 @@ extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_
   }
   #endif
   mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE);
-  void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE);
+  void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
   mi_assert_internal(p==NULL || mi_usable_size(p) >= size);
   #if MI_STAT>1
   if (p != NULL) {
@@ -88,22 +103,28 @@ extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_
     mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
   }
   #endif
+  mi_track_malloc(p,size,zero);
   return p;
 }
 
-extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept {
+// allocate a small block
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+  return mi_heap_malloc_small_zero(heap, size, false);
+}
+
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept {
   return mi_heap_malloc_small(mi_get_default_heap(), size);
 }
 
 // The main allocation function
-extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
-  if (mi_likely(size <= MI_SMALL_SIZE_MAX)) {
-    return mi_heap_malloc_small(heap, size);
+extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
+  if mi_likely(size <= MI_SMALL_SIZE_MAX) {
+    return mi_heap_malloc_small_zero(heap, size, zero);
   }
   else {
     mi_assert(heap!=NULL);
     mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
-    void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE);      // note: size can overflow but it is detected in malloc_generic
+    void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero);  // note: size can overflow but it is detected in malloc_generic
     mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
     #if MI_STAT>1
     if (p != NULL) {
@@ -111,55 +132,29 @@ extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size
       mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
     }
     #endif
+    mi_track_malloc(p,size,zero);
     return p;
   }
 }
 
-extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
-  return mi_heap_malloc(mi_get_default_heap(), size);
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+  return _mi_heap_malloc_zero(heap, size, false);
 }
 
-
-void _mi_block_zero_init(const mi_page_t* page, void* p, size_t size) {
-  // note: we need to initialize the whole usable block size to zero, not just the requested size,
-  // or the recalloc/rezalloc functions cannot safely expand in place (see issue #63)
-  MI_UNUSED(size);
-  mi_assert_internal(p != NULL);
-  mi_assert_internal(mi_usable_size(p) >= size); // size can be zero
-  mi_assert_internal(_mi_ptr_page(p)==page);
-  if (page->is_zero && size > sizeof(mi_block_t)) {
-    // already zero initialized memory
-    ((mi_block_t*)p)->next = 0;  // clear the free list pointer
-    mi_assert_expensive(mi_mem_is_zero(p, mi_usable_size(p)));
-  }
-  else {
-    // otherwise memset
-    memset(p, 0, mi_usable_size(p));
-  }
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept {
+  return mi_heap_malloc(mi_get_default_heap(), size);
 }
 
 // zero initialized small block
-mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept {
-  void* p = mi_malloc_small(size);
-  if (p != NULL) {
-    _mi_block_zero_init(_mi_ptr_page(p), p, size);  // todo: can we avoid getting the page again?
-  }
-  return p;
-}
-
-void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
-  void* p = mi_heap_malloc(heap,size);
-  if (zero && p != NULL) {
-    _mi_block_zero_init(_mi_ptr_page(p),p,size);  // todo: can we avoid getting the page again?
-  }
-  return p;
+mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept {
+  return mi_heap_malloc_small_zero(mi_get_default_heap(), size, true);
 }
 
-extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
   return _mi_heap_malloc_zero(heap, size, true);
 }
 
-mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept {
   return mi_heap_zalloc(mi_get_default_heap(),size);
 }
 
@@ -192,16 +187,19 @@ static mi_decl_noinline bool mi_check_is_double_freex(const mi_page_t* page, con
   return false;
 }
 
+#define mi_track_page(page,access)  { size_t psize; void* pstart = _mi_page_start(_mi_page_segment(page),page,&psize); mi_track_mem_##access( pstart, psize); }
+
 static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
+  bool is_double_free = false;
   mi_block_t* n = mi_block_nextx(page, block, page->keys); // pretend it is freed, and get the decoded first field
   if (((uintptr_t)n & (MI_INTPTR_SIZE-1))==0 &&  // quick check: aligned pointer?
       (n==NULL || mi_is_in_same_page(block, n))) // quick check: in same page or NULL?
   {
     // Suspicous: decoded value a in block is in the same page (or NULL) -- maybe a double free?
     // (continue in separate function to improve code generation)
-    return mi_check_is_double_freex(page, block);
+    is_double_free = mi_check_is_double_freex(page, block);
   }
-  return false;
+  return is_double_free;
 }
 #else
 static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block_t* block) {
@@ -215,12 +213,19 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block
 // Check for heap block overflow by setting up padding at the end of the block
 // ---------------------------------------------------------------------------
 
-#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST)
+#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) && !MI_TRACK_ENABLED
 static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) {
   *bsize = mi_page_usable_block_size(page);
   const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize);
+  mi_track_mem_defined(padding,sizeof(mi_padding_t));
   *delta = padding->delta;
-  return ((uint32_t)mi_ptr_encode(page,block,page->keys) == padding->canary && *delta <= *bsize);
+  uint32_t canary = padding->canary;
+  uintptr_t keys[2]; 
+  keys[0] = page->keys[0];
+  keys[1] = page->keys[1]; 
+  bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize);
+  mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
+  return ok;
 }
 
 // Return the exact usable size of a block.
@@ -242,13 +247,16 @@ static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, si
   *size = bsize - delta;
   uint8_t* fill = (uint8_t*)block + bsize - delta;
   const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
+  mi_track_mem_defined(fill,maxpad);
   for (size_t i = 0; i < maxpad; i++) {
     if (fill[i] != MI_DEBUG_PADDING) {
       *wrong = bsize - delta + i;
-      return false;
+      ok = false;
+      break;
     }
   }
-  return true;
+  mi_track_mem_noaccess(fill,maxpad);
+  return ok;
 }
 
 static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) {
@@ -347,14 +355,14 @@ static void mi_stat_huge_free(const mi_page_t* page) {
 // Free
 // ------------------------------------------------------
 
-// multi-threaded free
+// multi-threaded free (or free in huge block)
 static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
 {
   // The padding check may access the non-thread-owned page for the key values.
   // that is safe as these are constant and the page won't be freed (as the block is not freed yet).
   mi_check_padding(page, block);
   mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
-  #if (MI_DEBUG!=0)
+  #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED                    // note: when tracking, cannot use mi_usable_size with multi-threading
   memset(block, MI_DEBUG_FREED, mi_usable_size(block));
   #endif
 
@@ -372,7 +380,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
   mi_thread_free_t tfree = mi_atomic_load_relaxed(&page->xthread_free);
   do {
     use_delayed = (mi_tf_delayed(tfree) == MI_USE_DELAYED_FREE);
-    if (mi_unlikely(use_delayed)) {
+    if mi_unlikely(use_delayed) {
       // unlikely: this only happens on the first concurrent free in a page that is in the full list
       tfreex = mi_tf_set_delayed(tfree,MI_DELAYED_FREEING);
     }
@@ -383,7 +391,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
     }
   } while (!mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
 
-  if (mi_unlikely(use_delayed)) {
+  if mi_unlikely(use_delayed) {
     // racy read on `heap`, but ok because MI_DELAYED_FREEING is set (see `mi_heap_delete` and `mi_heap_collect_abandon`)
     mi_heap_t* const heap = (mi_heap_t*)(mi_atomic_load_acquire(&page->xheap)); //mi_page_heap(page);
     mi_assert_internal(heap != NULL);
@@ -409,20 +417,21 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc
 static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block)
 {
   // and push it on the free list
-  if (mi_likely(local)) {
+  //const size_t bsize = mi_page_block_size(page);
+  if mi_likely(local) {
     // owning thread can free a block directly
-    if (mi_unlikely(mi_check_is_double_free(page, block))) return;
+    if mi_unlikely(mi_check_is_double_free(page, block)) return;
     mi_check_padding(page, block);
-    #if (MI_DEBUG!=0)
+    #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED
     memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
     #endif
     mi_block_set_next(page, block, page->local_free);
     page->local_free = block;
     page->used--;
-    if (mi_unlikely(mi_page_all_free(page))) {
+    if mi_unlikely(mi_page_all_free(page)) {
       _mi_page_retire(page);
     }
-    else if (mi_unlikely(mi_page_is_in_full(page))) {
+    else if mi_unlikely(mi_page_is_in_full(page)) {
       _mi_page_unfull(page);
     }
   }
@@ -444,7 +453,8 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p
 static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) mi_attr_noexcept {
   mi_page_t* const page = _mi_segment_page_of(segment, p);
   mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
-  mi_stat_free(page, block);
+  mi_stat_free(page, block);                 // stat_free may access the padding
+  mi_track_free(p);
   _mi_free_block(page, local, block);
 }
 
@@ -455,26 +465,26 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
 {
   MI_UNUSED(msg);
 #if (MI_DEBUG>0)
-  if (mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0)) {
+  if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) {
     _mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
     return NULL;
   }
 #endif
 
   mi_segment_t* const segment = _mi_ptr_segment(p);
-  if (mi_unlikely(segment == NULL)) return NULL;  // checks also for (p==NULL)
+  if mi_unlikely(segment == NULL) return NULL;  // checks also for (p==NULL)
 
 #if (MI_DEBUG>0)
-  if (mi_unlikely(!mi_is_in_heap_region(p))) {
+  if mi_unlikely(!mi_is_in_heap_region(p)) {
     _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
       "(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
-    if (mi_likely(_mi_ptr_cookie(segment) == segment->cookie)) {
+    if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
       _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
     }
   }
 #endif
 #if (MI_DEBUG>0 || MI_SECURE>=4)
-  if (mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie)) {
+  if mi_unlikely(_mi_ptr_cookie(segment) != segment->cookie) {
     _mi_error_message(EINVAL, "%s: pointer does not point to a valid heap space: %p\n", msg, p);
     return NULL;
   }
@@ -486,23 +496,24 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
 void mi_free(void* p) mi_attr_noexcept
 {
   mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
-  if (mi_unlikely(segment == NULL)) return; 
+  if mi_unlikely(segment == NULL) return; 
 
   mi_threadid_t tid = _mi_thread_id();
   mi_page_t* const page = _mi_segment_page_of(segment, p);
   
-  if (mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0)) {  // the thread id matches and it is not a full page, nor has aligned blocks
+  if mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0) {  // the thread id matches and it is not a full page, nor has aligned blocks
     // local, and not full or aligned
     mi_block_t* block = (mi_block_t*)(p);
-    if (mi_unlikely(mi_check_is_double_free(page,block))) return;
+    if mi_unlikely(mi_check_is_double_free(page,block)) return;
     mi_check_padding(page, block);
     mi_stat_free(page, block);
-    #if (MI_DEBUG!=0)
+    #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED
     memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
     #endif
+    mi_track_free(p);
     mi_block_set_next(page, block, page->local_free);
     page->local_free = block;
-    if (mi_unlikely(--page->used == 0)) {   // using this expression generates better code than: page->used--; if (mi_page_all_free(page))    
+    if mi_unlikely(--page->used == 0) {   // using this expression generates better code than: page->used--; if (mi_page_all_free(page))    
       _mi_page_retire(page);
     }
   }
@@ -513,6 +524,7 @@ void mi_free(void* p) mi_attr_noexcept
   }
 }
 
+// return true if successful
 bool _mi_free_delayed_block(mi_block_t* block) {
   // get segment and page
   const mi_segment_t* const segment = _mi_ptr_segment(block);
@@ -525,7 +537,9 @@ bool _mi_free_delayed_block(mi_block_t* block) {
   // some blocks may end up in the page `thread_free` list with no blocks in the
   // heap `thread_delayed_free` list which may cause the page to be never freed!
   // (it would only be freed if we happen to scan it in `mi_page_queue_find_free_ex`)
-  _mi_page_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */);
+  if (!_mi_page_try_use_delayed_free(page, MI_USE_DELAYED_FREE, false /* dont overwrite never delayed */)) {
+    return false;
+  }
 
   // collect all other non-local frees to ensure up-to-date `used` count
   _mi_page_free_collect(page, false);
@@ -548,7 +562,7 @@ static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noe
   const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
   if (segment==NULL) return 0;  // also returns 0 if `p == NULL`
   const mi_page_t* const page = _mi_segment_page_of(segment, p);  
-  if (mi_likely(!mi_page_has_aligned(page))) {
+  if mi_likely(!mi_page_has_aligned(page)) {
     const mi_block_t* block = (const mi_block_t*)p;
     return mi_page_usable_size_of(page, block);
   }
@@ -558,7 +572,7 @@ static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noe
   }
 }
 
-size_t mi_usable_size(const void* p) mi_attr_noexcept {
+mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
   return _mi_usable_size(p, "mi_usable_size");
 }
 
@@ -570,6 +584,7 @@ size_t mi_usable_size(const void* p) mi_attr_noexcept {
 #ifdef __cplusplus
 void* _mi_externs[] = {
   (void*)&_mi_page_malloc,
+  (void*)&_mi_heap_malloc_zero,
   (void*)&mi_malloc,
   (void*)&mi_malloc_small,
   (void*)&mi_zalloc_small,
@@ -602,24 +617,24 @@ void mi_free_aligned(void* p, size_t alignment) mi_attr_noexcept {
   mi_free(p);
 }
 
-extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
   size_t total;
   if (mi_count_size_overflow(count,size,&total)) return NULL;
   return mi_heap_zalloc(heap,total);
 }
 
-mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept {
   return mi_heap_calloc(mi_get_default_heap(),count,size);
 }
 
 // Uninitialized `calloc`
-extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, size_t count, size_t size) mi_attr_noexcept {
   size_t total;
   if (mi_count_size_overflow(count, size, &total)) return NULL;
   return mi_heap_malloc(heap, total);
 }
 
-mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept {
   return mi_heap_mallocn(mi_get_default_heap(),count,size);
 }
 
@@ -638,31 +653,40 @@ void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
 }
 
 void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept {
-  const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL
-  if (mi_unlikely(newsize <= size && newsize >= (size / 2))) {
+  // if p == NULL then behave as malloc.
+  // else if size == 0 then reallocate to a zero-sized block (and don't return NULL, just as mi_malloc(0)).
+  // (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.)
+  const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0)
+  if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) {  // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0)
     // todo: adjust potential padding to reflect the new size?
+    mi_track_free(p);
+    mi_track_malloc(p,newsize,true);
     return p;  // reallocation still fits and not more than 50% waste
   }
   void* newp = mi_heap_malloc(heap,newsize);
-  if (mi_likely(newp != NULL)) {
+  if mi_likely(newp != NULL) {
     if (zero && newsize > size) {
       // also set last word in the previous allocation to zero to ensure any padding is zero-initialized
       const size_t start = (size >= sizeof(intptr_t) ? size - sizeof(intptr_t) : 0);
       memset((uint8_t*)newp + start, 0, newsize - start);
     }
-    if (mi_likely(p != NULL)) {
-      _mi_memcpy_aligned(newp, p, (newsize > size ? size : newsize));
+    if mi_likely(p != NULL) {
+      if mi_likely(_mi_is_aligned(p, sizeof(uintptr_t))) {  // a client may pass in an arbitrary pointer `p`..
+        const size_t copysize = (newsize > size ? size : newsize);
+        mi_track_mem_defined(p,copysize);  // _mi_useable_size may be too large for byte precise memory tracking..
+        _mi_memcpy_aligned(newp, p, copysize);
+      }
       mi_free(p); // only free the original pointer if successful
     }
   }
   return newp;
 }
 
-void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
   return _mi_heap_realloc_zero(heap, p, newsize, false);  
 }
 
-void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
   size_t total;
   if (mi_count_size_overflow(count, size, &total)) return NULL;
   return mi_heap_realloc(heap, p, total);
@@ -670,41 +694,41 @@ void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_a
 
 
 // Reallocate but free `p` on errors
-void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_heap_reallocf(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
   void* newp = mi_heap_realloc(heap, p, newsize);
   if (newp==NULL && p!=NULL) mi_free(p);
   return newp;
 }
 
-void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_heap_rezalloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
   return _mi_heap_realloc_zero(heap, p, newsize, true);
 }
 
-void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
   size_t total;
   if (mi_count_size_overflow(count, size, &total)) return NULL;
   return mi_heap_rezalloc(heap, p, total);
 }
 
 
-void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept {
   return mi_heap_realloc(mi_get_default_heap(),p,newsize);
 }
 
-void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept {
   return mi_heap_reallocn(mi_get_default_heap(),p,count,size);
 }
 
 // Reallocate but free `p` on errors
-void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept {
   return mi_heap_reallocf(mi_get_default_heap(),p,newsize);
 }
 
-void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept {
   return mi_heap_rezalloc(mi_get_default_heap(), p, newsize);
 }
 
-void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept {
+mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept {
   return mi_heap_recalloc(mi_get_default_heap(), p, count, size);
 }
 
@@ -715,7 +739,7 @@ void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept {
 // ------------------------------------------------------
 
 // `strdup` using mi_malloc
-mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_noexcept {
   if (s == NULL) return NULL;
   size_t n = strlen(s);
   char* t = (char*)mi_heap_malloc(heap,n+1);
@@ -723,12 +747,12 @@ mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const char* s) mi_attr_no
   return t;
 }
 
-mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept {
   return mi_heap_strdup(mi_get_default_heap(), s);
 }
 
 // `strndup` using mi_malloc
-mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n) mi_attr_noexcept {
   if (s == NULL) return NULL;
   const char* end = (const char*)memchr(s, 0, n);  // find end of string in the first `n` characters (returns NULL if not found)
   const size_t m = (end != NULL ? (size_t)(end - s) : n);  // `m` is the minimum of `n` or the end-of-string
@@ -740,7 +764,7 @@ mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const char* s, size_t n)
   return t;
 }
 
-mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept {
   return mi_heap_strndup(mi_get_default_heap(),s,n);
 }
 
@@ -751,7 +775,7 @@ mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept {
 #define PATH_MAX MAX_PATH
 #endif
 #include <windows.h>
-mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
   // todo: use GetFullPathNameW to allow longer file names
   char buf[PATH_MAX];
   DWORD res = GetFullPathNameA(fname, PATH_MAX, (resolved_name == NULL ? buf : resolved_name), NULL);
@@ -797,7 +821,7 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name)
 }
 #endif
 
-mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept {
   return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name);
 }
 #endif
@@ -835,8 +859,8 @@ static bool mi_try_new_handler(bool nothrow) {
 #else
 typedef void (*std_new_handler_t)(void);
 
-#if (defined(__GNUC__) || defined(__clang__))
-std_new_handler_t __attribute((weak)) _ZSt15get_new_handlerv(void) {
+#if (defined(__GNUC__) || (defined(__clang__) && !defined(_MSC_VER)))  // exclude clang-cl, see issue #631
+std_new_handler_t __attribute__((weak)) _ZSt15get_new_handlerv(void) {
   return NULL;
 }
 static std_new_handler_t mi_get_new_handler(void) {
@@ -873,19 +897,19 @@ static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow ) {
   return p;
 }
 
-mi_decl_restrict void* mi_new(size_t size) {
+mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
   void* p = mi_malloc(size);
-  if (mi_unlikely(p == NULL)) return mi_try_new(size,false);
+  if mi_unlikely(p == NULL) return mi_try_new(size,false);
   return p;
 }
 
-mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept {
   void* p = mi_malloc(size);
-  if (mi_unlikely(p == NULL)) return mi_try_new(size, true);
+  if mi_unlikely(p == NULL) return mi_try_new(size, true);
   return p;
 }
 
-mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) {
+mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) {
   void* p;
   do {
     p = mi_malloc_aligned(size, alignment);
@@ -894,7 +918,7 @@ mi_decl_restrict void* mi_new_aligned(size_t size, size_t alignment) {
   return p;
 }
 
-mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept {
+mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_attr_noexcept {
   void* p;
   do {
     p = mi_malloc_aligned(size, alignment);
@@ -903,9 +927,9 @@ mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, size_t alignment) mi_
   return p;
 }
 
-mi_decl_restrict void* mi_new_n(size_t count, size_t size) {
+mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) {
   size_t total;
-  if (mi_unlikely(mi_count_size_overflow(count, size, &total))) {
+  if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
     mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
     return NULL;
   }
@@ -914,7 +938,7 @@ mi_decl_restrict void* mi_new_n(size_t count, size_t size) {
   }
 }
 
-void* mi_new_realloc(void* p, size_t newsize) {
+mi_decl_nodiscard void* mi_new_realloc(void* p, size_t newsize) {
   void* q;
   do {
     q = mi_realloc(p, newsize);
@@ -922,9 +946,9 @@ void* mi_new_realloc(void* p, size_t newsize) {
   return q;
 }
 
-void* mi_new_reallocn(void* p, size_t newcount, size_t size) {
+mi_decl_nodiscard void* mi_new_reallocn(void* p, size_t newcount, size_t size) {
   size_t total;
-  if (mi_unlikely(mi_count_size_overflow(newcount, size, &total))) {
+  if mi_unlikely(mi_count_size_overflow(newcount, size, &total)) {
     mi_try_new_handler(false);  // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
     return NULL;
   }
diff --git a/source/luametatex/source/libraries/mimalloc/src/arena.c b/source/luametatex/source/libraries/mimalloc/src/arena.c
index 6b1e951f3..56b87d083 100644
--- a/source/luametatex/source/libraries/mimalloc/src/arena.c
+++ b/source/luametatex/source/libraries/mimalloc/src/arena.c
@@ -1,5 +1,5 @@
 /* ----------------------------------------------------------------------------
-Copyright (c) 2019-2021, Microsoft Research, Daan Leijen
+Copyright (c) 2019-2022, Microsoft Research, Daan Leijen
 This is free software; you can redistribute it and/or modify it under the
 terms of the MIT license. A copy of the license can be found in the file
 "LICENSE" at the root of this distribution.
@@ -45,16 +45,17 @@ bool  _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
   Arena allocation
 ----------------------------------------------------------- */
 
-
 // Block info: bit 0 contains the `in_use` bit, the upper bits the
 // size in count of arena blocks.
 typedef uintptr_t mi_block_info_t;
 #define MI_ARENA_BLOCK_SIZE   (MI_SEGMENT_SIZE)        // 8MiB  (must be at least MI_SEGMENT_ALIGN)
 #define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2)  // 4MiB
-#define MI_MAX_ARENAS         (64)                     // not more than 256 (since we use 8 bits in the memid)
+#define MI_MAX_ARENAS         (64)                     // not more than 126 (since we use 7 bits in the memid and an arena index + 1)
 
 // A memory arena descriptor
 typedef struct mi_arena_s {
+  mi_arena_id_t id;                       // arena id; 0 for non-specific
+  bool     exclusive;                     // only allow allocations if specifically for this arena
   _Atomic(uint8_t*) start;                // the start of the memory area
   size_t   block_count;                   // size of the area in arena blocks (of `MI_ARENA_BLOCK_SIZE`)
   size_t   field_count;                   // number of bitmap fields (where `field_count * MI_BITMAP_FIELD_BITS >= block_count`)
@@ -75,23 +76,58 @@ static mi_decl_cache_align _Atomic(size_t)      mi_arena_count; // = 0
 
 
 /* -----------------------------------------------------------
+  Arena id's
+  0 is used for non-arena's (like OS memory)
+  id = arena_index + 1
+----------------------------------------------------------- */
+
+static size_t mi_arena_id_index(mi_arena_id_t id) {
+  return (size_t)(id <= 0 ? MI_MAX_ARENAS : id - 1);
+}
+
+static mi_arena_id_t mi_arena_id_create(size_t arena_index) {
+  mi_assert_internal(arena_index < MI_MAX_ARENAS);
+  mi_assert_internal(MI_MAX_ARENAS <= 126);
+  int id = (int)arena_index + 1;
+  mi_assert_internal(id >= 1 && id <= 127);
+  return id;
+}
+
+mi_arena_id_t _mi_arena_id_none(void) {
+  return 0;
+}
+
+static bool mi_arena_id_is_suitable(mi_arena_id_t arena_id, bool arena_is_exclusive, mi_arena_id_t req_arena_id) {
+  return ((!arena_is_exclusive && req_arena_id == _mi_arena_id_none()) ||
+          (arena_id == req_arena_id));
+}
+
+
+/* -----------------------------------------------------------
   Arena allocations get a memory id where the lower 8 bits are
-  the arena index +1, and the upper bits the block index.
+  the arena id, and the upper bits the block index.
 ----------------------------------------------------------- */
 
 // Use `0` as a special id for direct OS allocated memory.
 #define MI_MEMID_OS   0
 
-static size_t mi_arena_id_create(size_t arena_index, mi_bitmap_index_t bitmap_index) {
-  mi_assert_internal(arena_index < 0xFE);
+static size_t mi_arena_memid_create(mi_arena_id_t id, bool exclusive, mi_bitmap_index_t bitmap_index) {
   mi_assert_internal(((bitmap_index << 8) >> 8) == bitmap_index); // no overflow?
-  return ((bitmap_index << 8) | ((arena_index+1) & 0xFF));
+  mi_assert_internal(id >= 0 && id <= 0x7F);
+  return ((bitmap_index << 8) | ((uint8_t)id & 0x7F) | (exclusive ? 0x80 : 0));
+}
+
+static bool mi_arena_memid_indices(size_t arena_memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
+  *bitmap_index = (arena_memid >> 8);
+  mi_arena_id_t id = (int)(arena_memid & 0x7F);
+  *arena_index = mi_arena_id_index(id);
+  return ((arena_memid & 0x80) != 0);
 }
 
-static void mi_arena_id_indices(size_t memid, size_t* arena_index, mi_bitmap_index_t* bitmap_index) {
-  mi_assert_internal(memid != MI_MEMID_OS);
-  *arena_index = (memid & 0xFF) - 1;
-  *bitmap_index = (memid >> 8);
+bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena_id) {
+  mi_arena_id_t id = (int)(arena_memid & 0x7F);
+  bool exclusive = ((arena_memid & 0x80) != 0);
+  return mi_arena_id_is_suitable(id, exclusive, request_arena_id);
 }
 
 static size_t mi_block_count_of_size(size_t size) {
@@ -117,14 +153,19 @@ static bool mi_arena_alloc(mi_arena_t* arena, size_t blocks, mi_bitmap_index_t*
 ----------------------------------------------------------- */
 
 static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t arena_index, size_t needed_bcount,
-                                                  bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+                                                   bool* commit, bool* large, bool* is_pinned, bool* is_zero, 
+                                                   mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
+  MI_UNUSED(arena_index);
+  mi_assert_internal(mi_arena_id_index(arena->id) == arena_index);
+  if (!mi_arena_id_is_suitable(arena->id, arena->exclusive, req_arena_id)) return NULL;
+
   mi_bitmap_index_t bitmap_index;
   if (!mi_arena_alloc(arena, needed_bcount, &bitmap_index)) return NULL;
 
   // claimed it! set the dirty bits (todo: no need for an atomic op here?)
   void* p    = arena->start + (mi_bitmap_index_bit(bitmap_index)*MI_ARENA_BLOCK_SIZE);
-  *memid     = mi_arena_id_create(arena_index, bitmap_index);
+  *memid     = mi_arena_memid_create(arena->id, arena->exclusive, bitmap_index);
   *is_zero   = _mi_bitmap_claim_across(arena->blocks_dirty, arena->field_count, needed_bcount, bitmap_index, NULL);
   *large     = arena->is_large;
   *is_pinned = (arena->is_large || !arena->allow_decommit);
@@ -149,15 +190,31 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
   return p;
 }
 
-static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large, 
+                                                bool* is_pinned, bool* is_zero, 
+                                                mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {  
   MI_UNUSED_RELEASE(alignment);
   mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
   const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);  
   const size_t bcount = mi_block_count_of_size(size);
-  if (mi_likely(max_arena == 0)) return NULL;
+  if mi_likely(max_arena == 0) return NULL;
   mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
 
+  size_t arena_index = mi_arena_id_index(req_arena_id);
+  if (arena_index < MI_MAX_ARENAS) {
+    // try a specific arena if requested
+    mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
+    if (arena != NULL &&
+        (arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local?
+        (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+    {
+      void* p = mi_arena_alloc_from(arena, arena_index, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
+      mi_assert_internal((uintptr_t)p % alignment == 0);
+      if (p != NULL) return p;
+    }
+  }
+  else {
   // try numa affine allocation
   for (size_t i = 0; i < max_arena; i++) {
     mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
@@ -165,11 +222,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size
     if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
       (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
     {
-      void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld);
+        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
       mi_assert_internal((uintptr_t)p % alignment == 0);
-      if (p != NULL) {
-        return p;
-      }
+        if (p != NULL) return p;
     }
   }
 
@@ -180,10 +235,9 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size
     if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
       (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
     {
-      void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, memid, tld);
+        void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
       mi_assert_internal((uintptr_t)p % alignment == 0);
-      if (p != NULL) {
-        return p;
+        if (p != NULL) return p;
       }
     }
   }
@@ -192,7 +246,7 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size
 
 
 void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero,
-                              size_t* memid, mi_os_tld_t* tld)
+                              mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
   mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
@@ -206,12 +260,12 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool*
 
   // try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
   if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN) {
-    void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, memid, tld);
+    void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
     if (p != NULL) return p;
   }
 
   // finally, fall back to the OS
-  if (mi_option_is_enabled(mi_option_limit_os_alloc)) {
+  if (mi_option_is_enabled(mi_option_limit_os_alloc) || req_arena_id != _mi_arena_id_none()) {
     errno = ENOMEM;
     return NULL;
   }
@@ -222,9 +276,19 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool*
   return p;
 }
 
-void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
-  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, memid, tld);
+  return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
+}
+
+void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
+  if (size != NULL) *size = 0;
+  size_t arena_index = mi_arena_id_index(arena_id);
+  if (arena_index >= MI_MAX_ARENAS) return NULL;
+  mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
+  if (arena == NULL) return NULL;
+  if (size != NULL) *size = arena->block_count * MI_ARENA_BLOCK_SIZE;
+  return arena->start;
 }
 
 /* -----------------------------------------------------------
@@ -244,7 +308,7 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_o
     // allocated in an arena
     size_t arena_idx;
     size_t bitmap_idx;
-    mi_arena_id_indices(memid, &arena_idx, &bitmap_idx);
+    mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx);
     mi_assert_internal(arena_idx < MI_MAX_ARENAS);
     mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t,&mi_arenas[arena_idx]);
     mi_assert_internal(arena != NULL);
@@ -281,10 +345,11 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_o
   Add an arena.
 ----------------------------------------------------------- */
 
-static bool mi_arena_add(mi_arena_t* arena) {
+static bool mi_arena_add(mi_arena_t* arena, mi_arena_id_t* arena_id) {
   mi_assert_internal(arena != NULL);
   mi_assert_internal((uintptr_t)mi_atomic_load_ptr_relaxed(uint8_t,&arena->start) % MI_SEGMENT_ALIGN == 0);
   mi_assert_internal(arena->block_count > 0);
+  if (arena_id != NULL) *arena_id = -1;
 
   size_t i = mi_atomic_increment_acq_rel(&mi_arena_count);
   if (i >= MI_MAX_ARENAS) {
@@ -292,11 +357,14 @@ static bool mi_arena_add(mi_arena_t* arena) {
     return false;
   }
   mi_atomic_store_ptr_release(mi_arena_t,&mi_arenas[i], arena);
+  arena->id = mi_arena_id_create(i);
+  if (arena_id != NULL) *arena_id = arena->id;
   return true;
 }
 
-bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept
+bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept
 {
+  if (arena_id != NULL) *arena_id = _mi_arena_id_none();
   if (size < MI_ARENA_BLOCK_SIZE) return false;
 
   if (is_large) {
@@ -311,6 +379,8 @@ bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_la
   mi_arena_t* arena   = (mi_arena_t*)_mi_os_alloc(asize, &_mi_stats_main); // TODO: can we avoid allocating from the OS?
   if (arena == NULL) return false;
 
+  arena->id = _mi_arena_id_none();
+  arena->exclusive = exclusive;
   arena->block_count = bcount;
   arena->field_count = fields;
   arena->start = (uint8_t*)start;
@@ -335,18 +405,19 @@ bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_la
     _mi_bitmap_claim(arena->blocks_inuse, fields, post, postidx, NULL);
   }
 
-  mi_arena_add(arena);
-  return true;
+  return mi_arena_add(arena, arena_id);
+
 }
 
 // Reserve a range of regular OS memory
-int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept 
+int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept 
 {
+  if (arena_id != NULL) *arena_id = _mi_arena_id_none();
   size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
   bool large = allow_large;
   void* start = _mi_os_alloc_aligned(size, MI_SEGMENT_ALIGN, commit, &large, &_mi_stats_main);
   if (start==NULL) return ENOMEM;
-  if (!mi_manage_os_memory(start, size, (large || commit), large, true, -1)) {
+  if (!mi_manage_os_memory_ex(start, size, (large || commit), large, true, -1, exclusive, arena_id)) {
     _mi_os_free_ex(start, size, commit, &_mi_stats_main);
     _mi_verbose_message("failed to reserve %zu k memory\n", _mi_divide_up(size,1024));
     return ENOMEM;
@@ -355,6 +426,19 @@ int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noe
   return 0;
 }
 
+bool mi_manage_os_memory(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node) mi_attr_noexcept {
+  return mi_manage_os_memory_ex(start, size, is_committed, is_large, is_zero, numa_node, false, NULL);
+}
+
+int mi_reserve_os_memory(size_t size, bool commit, bool allow_large) mi_attr_noexcept {
+  return mi_reserve_os_memory_ex(size, commit, allow_large, false, NULL);
+}
+
+
+/* -----------------------------------------------------------
+  Debugging
+----------------------------------------------------------- */
+
 static size_t mi_debug_show_bitmap(const char* prefix, mi_bitmap_field_t* fields, size_t field_count ) {
   size_t inuse_count = 0;
   for (size_t i = 0; i < field_count; i++) {
@@ -383,11 +467,13 @@ void mi_debug_show_arenas(void) mi_attr_noexcept {
   }
 }
 
+
 /* -----------------------------------------------------------
   Reserve a huge page arena.
 ----------------------------------------------------------- */
 // reserve at a specific numa node
-int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
+int mi_reserve_huge_os_pages_at_ex(size_t pages, int numa_node, size_t timeout_msecs, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept {
+  if (arena_id != NULL) *arena_id = -1;
   if (pages==0) return 0;
   if (numa_node < -1) numa_node = -1;
   if (numa_node >= 0) numa_node = numa_node % _mi_os_numa_node_count();
@@ -400,13 +486,16 @@ int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msec
   }
   _mi_verbose_message("numa node %i: reserved %zu GiB huge pages (of the %zu GiB requested)\n", numa_node, pages_reserved, pages);
 
-  if (!mi_manage_os_memory(p, hsize, true, true, true, numa_node)) {
+  if (!mi_manage_os_memory_ex(p, hsize, true, true, true, numa_node, exclusive, arena_id)) {
     _mi_os_free_huge_pages(p, hsize, &_mi_stats_main);
     return ENOMEM;
   }
   return 0;
 }
 
+int mi_reserve_huge_os_pages_at(size_t pages, int numa_node, size_t timeout_msecs) mi_attr_noexcept {
+  return mi_reserve_huge_os_pages_at_ex(pages, numa_node, timeout_msecs, false, NULL);
+}
 
 // reserve huge pages evenly among the given number of numa nodes (or use the available ones as detected)
 int mi_reserve_huge_os_pages_interleave(size_t pages, size_t numa_nodes, size_t timeout_msecs) mi_attr_noexcept {
diff --git a/source/luametatex/source/libraries/mimalloc/src/bitmap.c b/source/luametatex/source/libraries/mimalloc/src/bitmap.c
index af6de0a12..4fc7a1f3d 100644
--- a/source/luametatex/source/libraries/mimalloc/src/bitmap.c
+++ b/source/luametatex/source/libraries/mimalloc/src/bitmap.c
@@ -108,6 +108,25 @@ bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fiel
   return false;
 }
 
+// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled
+bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, 
+            const size_t start_field_idx, const size_t count, 
+            mi_bitmap_pred_fun_t pred_fun, void* pred_arg,            
+            mi_bitmap_index_t* bitmap_idx) {
+  size_t idx = start_field_idx;
+  for (size_t visited = 0; visited < bitmap_fields; visited++, idx++) {
+    if (idx >= bitmap_fields) idx = 0; // wrap
+    if (_mi_bitmap_try_find_claim_field(bitmap, idx, count, bitmap_idx)) {
+      if (pred_fun == NULL || pred_fun(*bitmap_idx, pred_arg)) { 
+        return true;
+      }
+      // predicate returned false, unclaim and look further
+      _mi_bitmap_unclaim(bitmap, bitmap_fields, count, *bitmap_idx);
+    }
+  }
+  return false;
+}
+
 /*
 // Find `count` bits of 0 and set them to 1 atomically; returns `true` on success.
 // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never span fields.
@@ -283,7 +302,7 @@ bool _mi_bitmap_try_find_from_claim_across(mi_bitmap_t bitmap, const size_t bitm
 static size_t mi_bitmap_mask_across(mi_bitmap_index_t bitmap_idx, size_t bitmap_fields, size_t count, size_t* pre_mask, size_t* mid_mask, size_t* post_mask) {
   MI_UNUSED_RELEASE(bitmap_fields);
   const size_t bitidx = mi_bitmap_index_bit_in_field(bitmap_idx);
-  if (mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS)) {
+  if mi_likely(bitidx + count <= MI_BITMAP_FIELD_BITS) {
     *pre_mask = mi_bitmap_mask_(count, bitidx);
     *mid_mask = 0;
     *post_mask = 0;
diff --git a/source/luametatex/source/libraries/mimalloc/src/bitmap.h b/source/luametatex/source/libraries/mimalloc/src/bitmap.h
index 7bd3106c9..0c501ec1f 100644
--- a/source/luametatex/source/libraries/mimalloc/src/bitmap.h
+++ b/source/luametatex/source/libraries/mimalloc/src/bitmap.h
@@ -72,6 +72,10 @@ bool _mi_bitmap_try_find_claim_field(mi_bitmap_t bitmap, size_t idx, const size_
 // For now, `count` can be at most MI_BITMAP_FIELD_BITS and will never cross fields.
 bool _mi_bitmap_try_find_from_claim(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_index_t* bitmap_idx);
 
+// Like _mi_bitmap_try_find_from_claim but with an extra predicate that must be fullfilled
+typedef bool (mi_cdecl *mi_bitmap_pred_fun_t)(mi_bitmap_index_t bitmap_idx, void* pred_arg);
+bool _mi_bitmap_try_find_from_claim_pred(mi_bitmap_t bitmap, const size_t bitmap_fields, const size_t start_field_idx, const size_t count, mi_bitmap_pred_fun_t pred_fun, void* pred_arg, mi_bitmap_index_t* bitmap_idx);
+
 // Set `count` bits at `bitmap_idx` to 0 atomically
 // Returns `true` if all `count` bits were 1 previously.
 bool _mi_bitmap_unclaim(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx);
diff --git a/source/luametatex/source/libraries/mimalloc/src/heap.c b/source/luametatex/source/libraries/mimalloc/src/heap.c
index 816d961ae..15ca36031 100644
--- a/source/luametatex/source/libraries/mimalloc/src/heap.c
+++ b/source/luametatex/source/libraries/mimalloc/src/heap.c
@@ -139,9 +139,9 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
     mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
   }
 
-  // free thread delayed blocks.
+  // free all current thread delayed blocks.
   // (if abandoning, after this there are no more thread-delayed references into the pages.)
-  _mi_heap_delayed_free(heap);
+  _mi_heap_delayed_free_all(heap);
 
   // collect retired pages
   _mi_heap_collect_retired(heap, force);
@@ -200,13 +200,14 @@ mi_heap_t* mi_heap_get_backing(void) {
   return bheap;
 }
 
-mi_heap_t* mi_heap_new(void) {
+mi_decl_nodiscard mi_heap_t* mi_heap_new_in_arena( mi_arena_id_t arena_id ) {
   mi_heap_t* bheap = mi_heap_get_backing();
   mi_heap_t* heap = mi_heap_malloc_tp(bheap, mi_heap_t);  // todo: OS allocate in secure mode?
   if (heap==NULL) return NULL;
   _mi_memcpy_aligned(heap, &_mi_heap_empty, sizeof(mi_heap_t));
   heap->tld = bheap->tld;
   heap->thread_id = _mi_thread_id();
+  heap->arena_id = arena_id;
   _mi_random_split(&bheap->random, &heap->random);
   heap->cookie  = _mi_heap_random_next(heap) | 1;
   heap->keys[0] = _mi_heap_random_next(heap);
@@ -218,6 +219,14 @@ mi_heap_t* mi_heap_new(void) {
   return heap;
 }
 
+mi_decl_nodiscard mi_heap_t* mi_heap_new(void) {
+  return mi_heap_new_in_arena(_mi_arena_id_none());
+}
+
+bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid) {
+  return _mi_arena_memid_is_suitable(memid, heap->arena_id);
+}
+
 uintptr_t _mi_heap_random_next(mi_heap_t* heap) {
   return _mi_random_next(&heap->random);
 }
@@ -350,7 +359,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
   if (from==NULL || from->page_count == 0) return;
 
   // reduce the size of the delayed frees
-  _mi_heap_delayed_free(from);
+  _mi_heap_delayed_free_partial(from);
   
   // transfer all pages by appending the queues; this will set a new heap field 
   // so threads may do delayed frees in either heap for a while.
@@ -369,7 +378,7 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
   // note: be careful here as the `heap` field in all those pages no longer point to `from`,
   // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a 
   // the regular `_mi_free_delayed_block` which is safe.
-  _mi_heap_delayed_free(from);  
+  _mi_heap_delayed_free_all(from);  
   #if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353
   mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL);
   #endif
@@ -421,7 +430,7 @@ static mi_heap_t* mi_heap_of_block(const void* p) {
   mi_segment_t* segment = _mi_ptr_segment(p);
   bool valid = (_mi_ptr_cookie(segment) == segment->cookie);
   mi_assert_internal(valid);
-  if (mi_unlikely(!valid)) return NULL;
+  if mi_unlikely(!valid) return NULL;
   return mi_page_heap(_mi_segment_page_of(segment,p));
 }
 
@@ -543,7 +552,7 @@ static bool mi_heap_visit_areas_page(mi_heap_t* heap, mi_page_queue_t* pq, mi_pa
   xarea.area.reserved = page->reserved * bsize;
   xarea.area.committed = page->capacity * bsize;
   xarea.area.blocks = _mi_page_start(_mi_page_segment(page), page, NULL);
-  xarea.area.used = page->used * bsize;
+  xarea.area.used = page->used;   // number of blocks in use (#553)
   xarea.area.block_size = ubsize;
   xarea.area.full_block_size = bsize;
   return fun(heap, &xarea, arg);
diff --git a/source/luametatex/source/libraries/mimalloc/src/init.c b/source/luametatex/source/libraries/mimalloc/src/init.c
index 19124afef..4f37b7176 100644
--- a/source/luametatex/source/libraries/mimalloc/src/init.c
+++ b/source/luametatex/source/libraries/mimalloc/src/init.c
@@ -109,6 +109,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
   MI_ATOMIC_VAR_INIT(NULL),
   0,                // tid
   0,                // cookie
+  0,                // arena id
   { 0, 0 },         // keys
   { {0}, {0}, 0 },
   0,                // page count
@@ -149,6 +150,7 @@ mi_heap_t _mi_heap_main = {
   MI_ATOMIC_VAR_INIT(NULL),
   0,                // thread id
   0,                // initial cookie
+  0,                // arena id
   { 0, 0 },         // the key of the main heap can be fixed (unlike page keys that need to be secure!)
   { {0x846ca68b}, {0}, 0 },  // random
   0,                // page count
@@ -475,7 +477,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap)  {
 // --------------------------------------------------------
 // Run functions on process init/done, and thread init/done
 // --------------------------------------------------------
-static void mi_process_done(void);
+static void mi_cdecl mi_process_done(void);
 
 static bool os_preloading = true;    // true until this module is initialized
 static bool mi_redirected = false;   // true if malloc redirects to mi_malloc
@@ -490,7 +492,7 @@ mi_decl_nodiscard bool mi_is_redirected(void) mi_attr_noexcept {
 }
 
 // Communicate with the redirection module on Windows
-#if defined(_WIN32) && defined(MI_SHARED_LIB)
+#if defined(_WIN32) && defined(MI_SHARED_LIB) && !defined(MI_WIN_NOREDIRECT)
 #ifdef __cplusplus
 extern "C" {
 #endif
@@ -506,8 +508,8 @@ mi_decl_export void _mi_redirect_entry(DWORD reason) {
     mi_thread_done();
   }
 }
-__declspec(dllimport) bool mi_allocator_init(const char** message);
-__declspec(dllimport) void mi_allocator_done(void);
+__declspec(dllimport) bool mi_cdecl mi_allocator_init(const char** message);
+__declspec(dllimport) void mi_cdecl mi_allocator_done(void);
 #ifdef __cplusplus
 }
 #endif
@@ -606,7 +608,7 @@ void mi_process_init(void) mi_attr_noexcept {
 }
 
 // Called when the process is done (through `at_exit`)
-static void mi_process_done(void) {
+static void mi_cdecl mi_process_done(void) {
   // only shutdown if we were initialized
   if (!_mi_process_is_initialized) return;
   // ensure we are called once
diff --git a/source/luametatex/source/libraries/mimalloc/src/options.c b/source/luametatex/source/libraries/mimalloc/src/options.c
index 6b2379322..0182671ce 100644
--- a/source/luametatex/source/libraries/mimalloc/src/options.c
+++ b/source/luametatex/source/libraries/mimalloc/src/options.c
@@ -120,7 +120,7 @@ mi_decl_nodiscard long mi_option_get(mi_option_t option) {
   if (option < 0 || option >= _mi_option_last) return 0;
   mi_option_desc_t* desc = &options[option];
   mi_assert(desc->option == option);  // index should match the option
-  if (mi_unlikely(desc->init == UNINIT)) {
+  if mi_unlikely(desc->init == UNINIT) {
     mi_option_init(desc);
   }
   return desc->value;
@@ -170,7 +170,7 @@ void mi_option_disable(mi_option_t option) {
 }
 
 
-static void mi_out_stderr(const char* msg, void* arg) {
+static void mi_cdecl mi_out_stderr(const char* msg, void* arg) {
   MI_UNUSED(arg);
   if (msg == NULL) return;
   #ifdef _WIN32
@@ -203,7 +203,7 @@ static void mi_out_stderr(const char* msg, void* arg) {
 static char out_buf[MI_MAX_DELAY_OUTPUT+1];
 static _Atomic(size_t) out_len;
 
-static void mi_out_buf(const char* msg, void* arg) {
+static void mi_cdecl mi_out_buf(const char* msg, void* arg) {
   MI_UNUSED(arg);
   if (msg==NULL) return;
   if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return;
@@ -235,7 +235,7 @@ static void mi_out_buf_flush(mi_output_fun* out, bool no_more_buf, void* arg) {
 
 // Once this module is loaded, switch to this routine
 // which outputs to stderr and the delayed output buffer.
-static void mi_out_buf_stderr(const char* msg, void* arg) {
+static void mi_cdecl mi_out_buf_stderr(const char* msg, void* arg) {
   mi_out_stderr(msg,arg);
   mi_out_buf(msg,arg);
 }
@@ -346,7 +346,7 @@ void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) {
 static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) {
   if (prefix != NULL && strlen(prefix) <= 32 && !_mi_is_main_thread()) {
     char tprefix[64];
-    snprintf(tprefix, sizeof(tprefix), "%sthread 0x%x: ", prefix, (unsigned) _mi_thread_id()); /* HH: %z is unknown */
+/* HH */ snprintf(tprefix, sizeof(tprefix), "%sthread 0x%x: ", prefix, (unsigned) _mi_thread_id()); /* HH: %z is unknown */
     mi_vfprintf(out, arg, tprefix, fmt, args);
   }
   else {
diff --git a/source/luametatex/source/libraries/mimalloc/src/os.c b/source/luametatex/source/libraries/mimalloc/src/os.c
index 72959d818..6d7249873 100644
--- a/source/luametatex/source/libraries/mimalloc/src/os.c
+++ b/source/luametatex/source/libraries/mimalloc/src/os.c
@@ -122,7 +122,7 @@ size_t _mi_os_good_alloc_size(size_t size) {
   else if (size < 8*MI_MiB) align_size = 256*MI_KiB;
   else if (size < 32*MI_MiB) align_size = 1*MI_MiB;
   else align_size = 4*MI_MiB;
-  if (mi_unlikely(size >= (SIZE_MAX - align_size))) return size; // possible overflow?
+  if mi_unlikely(size >= (SIZE_MAX - align_size)) return size; // possible overflow?
   return _mi_align_up(size, align_size);
 }
 
@@ -362,9 +362,9 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
     // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside
     // the memory region returned by VirtualAlloc; in that case we need to free using
     // the start of the region.
-    MEMORY_BASIC_INFORMATION info = { 0, 0 };
+    MEMORY_BASIC_INFORMATION info = { 0 };
     VirtualQuery(addr, &info, sizeof(info));
-    if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < MI_SEGMENT_SIZE) {
+    if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) {
       errcode = 0;
       err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0);
       if (err) { errcode = GetLastError(); }
@@ -986,7 +986,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
         else _mi_stat_decrease(&stats->reset, csize);
   if (!reset) return true; // nothing to do on unreset!
 
-  #if (MI_DEBUG>1)
+  #if (MI_DEBUG>1) && !MI_TRACK_ENABLED
   if (MI_SECURE==0) {
     memset(start, 0, csize); // pretend it is eagerly reset
   }
diff --git a/source/luametatex/source/libraries/mimalloc/src/page.c b/source/luametatex/source/libraries/mimalloc/src/page.c
index fd6c5397d..4b321156c 100644
--- a/source/luametatex/source/libraries/mimalloc/src/page.c
+++ b/source/luametatex/source/libraries/mimalloc/src/page.c
@@ -124,14 +124,23 @@ bool _mi_page_is_valid(mi_page_t* page) {
 #endif
 
 void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
+  while (!_mi_page_try_use_delayed_free(page, delay, override_never)) {
+    mi_atomic_yield();
+  }
+}
+
+bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
   mi_thread_free_t tfreex;
   mi_delayed_t     old_delay;
   mi_thread_free_t tfree;  
+  size_t yield_count = 0;
   do {
     tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS;
     tfreex = mi_tf_set_delayed(tfree, delay);
     old_delay = mi_tf_delayed(tfree);
-    if (mi_unlikely(old_delay == MI_DELAYED_FREEING)) {
+    if mi_unlikely(old_delay == MI_DELAYED_FREEING) {
+      if (yield_count >= 4) return false;  // give up after 4 tries
+      yield_count++;
       mi_atomic_yield(); // delay until outstanding MI_DELAYED_FREEING are done.
       // tfree = mi_tf_set_delayed(tfree, MI_NO_DELAYED_FREE); // will cause CAS to busy fail
     }
@@ -143,6 +152,8 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
     }
   } while ((old_delay == MI_DELAYED_FREEING) ||
            !mi_atomic_cas_weak_release(&page->xthread_free, &tfree, tfreex));
+
+  return true; // success
 }
 
 /* -----------------------------------------------------------
@@ -199,7 +210,7 @@ void _mi_page_free_collect(mi_page_t* page, bool force) {
 
   // and the local free list
   if (page->local_free != NULL) {
-    if (mi_likely(page->free == NULL)) {
+    if mi_likely(page->free == NULL) {
       // usual case
       page->free = page->local_free;
       page->local_free = NULL;
@@ -272,10 +283,18 @@ static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
    Do any delayed frees
    (put there by other threads if they deallocated in a full page)
 ----------------------------------------------------------- */
-void _mi_heap_delayed_free(mi_heap_t* heap) {
+void _mi_heap_delayed_free_all(mi_heap_t* heap) {
+  while (!_mi_heap_delayed_free_partial(heap)) {
+    mi_atomic_yield();
+  }
+}
+
+// returns true if all delayed frees were processed
+bool _mi_heap_delayed_free_partial(mi_heap_t* heap) {
   // take over the list (note: no atomic exchange since it is often NULL)
   mi_block_t* block = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
   while (block != NULL && !mi_atomic_cas_ptr_weak_acq_rel(mi_block_t, &heap->thread_delayed_free, &block, NULL)) { /* nothing */ };
+  bool all_freed = true;
 
   // and free them all
   while(block != NULL) {
@@ -283,7 +302,9 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
     // use internal free instead of regular one to keep stats etc correct
     if (!_mi_free_delayed_block(block)) {
       // we might already start delayed freeing while another thread has not yet
-      // reset the delayed_freeing flag; in that case delay it further by reinserting.
+      // reset the delayed_freeing flag; in that case delay it further by reinserting the current block
+      // into the delayed free list
+      all_freed = false;
       mi_block_t* dfree = mi_atomic_load_ptr_relaxed(mi_block_t, &heap->thread_delayed_free);
       do {
         mi_block_set_nextx(heap, block, dfree, heap->keys);
@@ -291,6 +312,7 @@ void _mi_heap_delayed_free(mi_heap_t* heap) {
     }
     block = next;
   }
+  return all_freed;
 }
 
 /* -----------------------------------------------------------
@@ -403,7 +425,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
   // how to check this efficiently though...
   // for now, we don't retire if it is the only page left of this size class.
   mi_page_queue_t* pq = mi_page_queue_of(page);
-  if (mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page))) {
+  if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page)) {
     if (pq->last==page && pq->first==page) { // the only page in the queue?
       mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
       page->retire_expire = 1 + (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);      
@@ -619,7 +641,9 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
   mi_page_set_heap(page, heap);
   page->xblock_size = (block_size < MI_HUGE_BLOCK_SIZE ? (uint32_t)block_size : MI_HUGE_BLOCK_SIZE); // initialize before _mi_segment_page_start
   size_t page_size;
-  _mi_segment_page_start(segment, page, &page_size);
+  const void* page_start = _mi_segment_page_start(segment, page, &page_size);
+  MI_UNUSED(page_start);
+  mi_track_mem_noaccess(page_start,page_size);
   mi_assert_internal(mi_page_block_size(page) <= page_size);
   mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE);
   mi_assert_internal(page_size / block_size < (1L<<16));
@@ -812,8 +836,8 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
 static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
   // huge allocation?
   const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`  
-  if (mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) )) {
-    if (mi_unlikely(req_size > PTRDIFF_MAX)) {  // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
+  if mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE)) {
+    if mi_unlikely(req_size > PTRDIFF_MAX) {  // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
       _mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
       return NULL;
     }
@@ -830,32 +854,32 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
 
 // Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
 // Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
-void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
+void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept
 {
   mi_assert_internal(heap != NULL);
 
   // initialize if necessary
-  if (mi_unlikely(!mi_heap_is_initialized(heap))) {
+  if mi_unlikely(!mi_heap_is_initialized(heap)) {
     mi_thread_init(); // calls `_mi_heap_init` in turn
     heap = mi_get_default_heap();
-    if (mi_unlikely(!mi_heap_is_initialized(heap))) { return NULL; }
+    if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; }
   }
   mi_assert_internal(mi_heap_is_initialized(heap));
 
   // call potential deferred free routines
   _mi_deferred_free(heap, false);
 
-  // free delayed frees from other threads
-  _mi_heap_delayed_free(heap);
+  // free delayed frees from other threads (but skip contended ones)
+  _mi_heap_delayed_free_partial(heap);
 
   // find (or allocate) a page of the right size
   mi_page_t* page = mi_find_page(heap, size);
-  if (mi_unlikely(page == NULL)) { // first time out of memory, try to collect and retry the allocation once more
+  if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more
     mi_heap_collect(heap, true /* force */);
     page = mi_find_page(heap, size);
   }
 
-  if (mi_unlikely(page == NULL)) { // out of memory
+  if mi_unlikely(page == NULL) { // out of memory
     const size_t req_size = size - MI_PADDING_SIZE;  // correct for padding_size in case of an overflow on `size`  
     _mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size);
     return NULL;
@@ -864,6 +888,15 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size) mi_attr_noexcept
   mi_assert_internal(mi_page_immediate_available(page));
   mi_assert_internal(mi_page_block_size(page) >= size);
 
-  // and try again, this time succeeding! (i.e. this should never recurse)
-  return _mi_page_malloc(heap, page, size);
+  // and try again, this time succeeding! (i.e. this should never recurse through _mi_page_malloc)
+  if mi_unlikely(zero && page->xblock_size == 0) {
+    // note: we cannot call _mi_page_malloc with zeroing for huge blocks; we zero it afterwards in that case.
+    void* p = _mi_page_malloc(heap, page, size, false);
+    mi_assert_internal(p != NULL);
+    _mi_memzero_aligned(p, mi_page_usable_block_size(page));
+    return p;
+  }
+  else {
+    return _mi_page_malloc(heap, page, size, zero);
+  }
 }
diff --git a/source/luametatex/source/libraries/mimalloc/src/random.c b/source/luametatex/source/libraries/mimalloc/src/random.c
index d474a53a0..a5f5e6b82 100644
--- a/source/luametatex/source/libraries/mimalloc/src/random.c
+++ b/source/luametatex/source/libraries/mimalloc/src/random.c
@@ -172,6 +172,8 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim
 // We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using 
 // dynamic overriding, we observed it can raise an exception when compiled with C++, and 
 // sometimes deadlocks when also running under the VS debugger.
+// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom.
+// To be continued..
 #pragma comment (lib,"advapi32.lib")
 #define RtlGenRandom  SystemFunction036
 #ifdef __cplusplus
diff --git a/source/luametatex/source/libraries/mimalloc/src/region.c b/source/luametatex/source/libraries/mimalloc/src/region.c
index 72ce84947..57d11fe8d 100644
--- a/source/luametatex/source/libraries/mimalloc/src/region.c
+++ b/source/luametatex/source/libraries/mimalloc/src/region.c
@@ -49,9 +49,10 @@ bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 
 // arena.c
+mi_arena_id_t _mi_arena_id_none(void);
 void    _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats);
-void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
-void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld);
+void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
+void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
 
 
 
@@ -180,7 +181,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
   bool is_zero = false;
   bool is_pinned = false;
   size_t arena_memid = 0;
-  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_pinned, &is_zero, &arena_memid, tld);
+  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_pinned, &is_zero, _mi_arena_id_none(),  & arena_memid, tld);
   if (start == NULL) return false;
   mi_assert_internal(!(region_large && !allow_large));
   mi_assert_internal(!region_large || region_commit);
@@ -330,7 +331,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
   }
   mi_assert_internal(!_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
   
-  #if (MI_DEBUG>=2)
+  #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED
   if (*commit) { ((uint8_t*)p)[0] = 0; }
   #endif
   
@@ -370,13 +371,13 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
   }
   if (p == NULL) {
     // and otherwise fall back to the OS
-    p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, &arena_memid, tld);
+    p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, _mi_arena_id_none(),  & arena_memid, tld);
     *memid = mi_memid_create_from_arena(arena_memid);
   }
 
   if (p != NULL) {
     mi_assert_internal((uintptr_t)p % alignment == 0);
-#if (MI_DEBUG>=2)
+    #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED
     if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
 #endif
   }
diff --git a/source/luametatex/source/libraries/mimalloc/src/segment-cache.c b/source/luametatex/source/libraries/mimalloc/src/segment-cache.c
index aacdbc11d..da726716a 100644
--- a/source/luametatex/source/libraries/mimalloc/src/segment-cache.c
+++ b/source/luametatex/source/libraries/mimalloc/src/segment-cache.c
@@ -39,8 +39,13 @@ static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] =
 static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET };
 static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS];   // zero bit = free
 
+static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) {
+  mi_arena_id_t req_arena_id = *((mi_arena_id_t*)arg);
+  mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)];
+  return _mi_arena_memid_is_suitable(slot->memid, req_arena_id);
+}
 
-mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
 {
 #ifdef MI_CACHE_DISABLE
   return NULL;
@@ -60,12 +65,15 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm
   // find an available slot
   mi_bitmap_index_t bitidx = 0;
   bool claimed = false;
+  mi_arena_id_t req_arena_id = _req_arena_id;
+  mi_bitmap_pred_fun_t pred_fun = &mi_segment_cache_is_suitable;  // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache?
+  
   if (*large) {  // large allowed?
-    claimed = _mi_bitmap_try_find_from_claim(cache_available_large, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
     if (claimed) *large = true;
   }
   if (!claimed) {
-    claimed = _mi_bitmap_try_find_from_claim(cache_available, MI_CACHE_FIELDS, start_field, 1, &bitidx);
+    claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
     if (claimed) *large = false;
   }
 
@@ -283,7 +291,7 @@ static mi_segment_t* _mi_segment_of(const void* p) {
   size_t index = mi_segment_map_index_of(segment, &bitidx);
   // fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
   const uintptr_t mask = mi_atomic_load_relaxed(&mi_segment_map[index]);
-  if (mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0)) {
+  if mi_likely((mask & ((uintptr_t)1 << bitidx)) != 0) {
     return segment; // yes, allocated by us
   }
   if (index==MI_SEGMENT_MAP_WSIZE) return NULL;
@@ -324,7 +332,7 @@ static mi_segment_t* _mi_segment_of(const void* p) {
   mi_assert_internal((void*)segment < p);
   bool cookie_ok = (_mi_ptr_cookie(segment) == segment->cookie);
   mi_assert_internal(cookie_ok);
-  if (mi_unlikely(!cookie_ok)) return NULL;
+  if mi_unlikely(!cookie_ok) return NULL;
   if (((uint8_t*)segment + mi_segment_size(segment)) <= (uint8_t*)p) return NULL; // outside the range
   mi_assert_internal(p >= (void*)segment && (uint8_t*)p < (uint8_t*)segment + mi_segment_size(segment));
   return segment;
diff --git a/source/luametatex/source/libraries/mimalloc/src/segment.c b/source/luametatex/source/libraries/mimalloc/src/segment.c
index 800d4fc31..c76c2259e 100644
--- a/source/luametatex/source/libraries/mimalloc/src/segment.c
+++ b/source/luametatex/source/libraries/mimalloc/src/segment.c
@@ -721,7 +721,7 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
   return page;
 }
 
-static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segments_tld_t* tld) {
+static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) {
   mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX);
   // search from best fit up
   mi_span_queue_t* sq = mi_span_queue_for(slice_count, tld);
@@ -730,8 +730,11 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm
     for (mi_slice_t* slice = sq->first; slice != NULL; slice = slice->next) {
       if (slice->slice_count >= slice_count) {
         // found one
-        mi_span_queue_delete(sq, slice);
         mi_segment_t* segment = _mi_ptr_segment(slice);
+        if (_mi_arena_memid_is_suitable(segment->memid, req_arena_id)) {
+          // found a suitable page span
+          mi_span_queue_delete(sq, slice);
+
         if (slice->slice_count > slice_count) {
           mi_segment_slice_split(segment, slice, slice_count, tld);
         }
@@ -745,6 +748,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm
         return page;        
       }
     }
+    }
     sq++;
   }
   // could not find a page..
@@ -757,7 +761,7 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_segm
 ----------------------------------------------------------- */
 
 // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
+static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
 {
   mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL));
   mi_assert_internal((segment==NULL) || (segment!=NULL && required==0));
@@ -793,9 +797,9 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
     bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy    
     bool is_pinned = false;
     size_t memid = 0;
-    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
+    segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld);
     if (segment==NULL) {
-      segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, &memid, os_tld);
+      segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld);
       if (segment == NULL) return NULL;  // failed to allocate
       if (commit) {
         mi_commit_mask_create_full(&commit_mask);
@@ -817,6 +821,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
       if (!ok) return NULL; // failed to commit   
       mi_commit_mask_set(&commit_mask, &commit_needed_mask); 
     }
+    mi_track_mem_undefined(segment,commit_needed);
     segment->memid = memid;
     segment->mem_is_pinned = is_pinned;
     segment->mem_is_large = mem_large;
@@ -907,8 +912,8 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
 
 
 // Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_alloc(size_t required, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) {
-  return mi_segment_init(NULL, required, tld, os_tld, huge_page);
+static mi_segment_t* mi_segment_alloc(size_t required, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) {
+  return mi_segment_init(NULL, required, req_arena_id, tld, os_tld, huge_page);
 }
 
 
@@ -1149,8 +1154,8 @@ static mi_segment_t* mi_abandoned_pop(void) {
   // Check efficiently if it is empty (or if the visited list needs to be moved)
   mi_tagged_segment_t ts = mi_atomic_load_relaxed(&abandoned);
   segment = mi_tagged_segment_ptr(ts);
-  if (mi_likely(segment == NULL)) {
-    if (mi_likely(!mi_abandoned_visited_revisit())) { // try to swap in the visited list on NULL
+  if mi_likely(segment == NULL) {
+    if mi_likely(!mi_abandoned_visited_revisit()) { // try to swap in the visited list on NULL
       return NULL;
     }
   }
@@ -1367,6 +1372,9 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
   long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024);     // limit the work to bound allocation times  
   while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
     segment->abandoned_visits++;
+    // todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments
+    // and push them into the visited list and use many tries. Perhaps we can skip non-suitable ones in a better way?
+    bool is_suitable = _mi_heap_memid_is_suitable(heap, segment->memid);
     bool has_page = mi_segment_check_free(segment,needed_slices,block_size,tld); // try to free up pages (due to concurrent frees)
     if (segment->used == 0) {
       // free the segment (by forced reclaim) to make it available to other threads.
@@ -1376,13 +1384,13 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
       // freeing but that would violate some invariants temporarily)
       mi_segment_reclaim(segment, heap, 0, NULL, tld);
     }
-    else if (has_page) {
+    else if (has_page && is_suitable) {
       // found a large enough free span, or a page of the right block_size with free space 
       // we return the result of reclaim (which is usually `segment`) as it might free
       // the segment due to concurrent frees (in which case `NULL` is returned).
       return mi_segment_reclaim(segment, heap, block_size, reclaimed, tld);
     }
-    else if (segment->abandoned_visits > 3) {  
+    else if (segment->abandoned_visits > 3 && is_suitable) {  
       // always reclaim on 3rd visit to limit the abandoned queue length.
       mi_segment_reclaim(segment, heap, 0, NULL, tld);
     }
@@ -1442,7 +1450,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_
     return segment;
   }
   // 2. otherwise allocate a fresh segment
-  return mi_segment_alloc(0, tld, os_tld, NULL);  
+  return mi_segment_alloc(0, heap->arena_id, tld, os_tld, NULL);  
 }
 
 
@@ -1458,7 +1466,7 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki
   size_t page_size = _mi_align_up(required, (required > MI_MEDIUM_PAGE_SIZE ? MI_MEDIUM_PAGE_SIZE : MI_SEGMENT_SLICE_SIZE));
   size_t slices_needed = page_size / MI_SEGMENT_SLICE_SIZE;
   mi_assert_internal(slices_needed * MI_SEGMENT_SLICE_SIZE == page_size);
-  mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
+  mi_page_t* page = mi_segments_page_find_and_allocate(slices_needed, heap->arena_id, tld); //(required <= MI_SMALL_SIZE_MAX ? 0 : slices_needed), tld);
   if (page==NULL) {
     // no free page, allocate a new segment and try again
     if (mi_segment_reclaim_or_alloc(heap, slices_needed, block_size, tld, os_tld) == NULL) {
@@ -1482,10 +1490,10 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki
    Huge page allocation
 ----------------------------------------------------------- */
 
-static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
 {
   mi_page_t* page = NULL;
-  mi_segment_t* segment = mi_segment_alloc(size,tld,os_tld,&page);
+  mi_segment_t* segment = mi_segment_alloc(size,req_arena_id,tld,os_tld,&page);
   if (segment == NULL || page==NULL) return NULL;
   mi_assert_internal(segment->used==1);
   mi_assert_internal(mi_page_block_size(page) >= size);  
@@ -1535,8 +1543,9 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segment
     page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld);
   }
   else {
-    page = mi_segment_huge_page_alloc(block_size,tld,os_tld);
+    page = mi_segment_huge_page_alloc(block_size,heap->arena_id,tld,os_tld);    
   }
+  mi_assert_internal(page == NULL || _mi_heap_memid_is_suitable(heap, _mi_page_segment(page)->memid));
   mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
   return page;
 }
diff --git a/source/luametatex/source/libraries/mimalloc/src/stats.c b/source/luametatex/source/libraries/mimalloc/src/stats.c
index 134a7bcb6..f82c7c67f 100644
--- a/source/luametatex/source/libraries/mimalloc/src/stats.c
+++ b/source/luametatex/source/libraries/mimalloc/src/stats.c
@@ -267,7 +267,7 @@ static void mi_buffered_flush(buffered_t* buf) {
   buf->used = 0;
 }
 
-static void mi_buffered_out(const char* msg, void* arg) {
+static void mi_cdecl mi_buffered_out(const char* msg, void* arg) {
   buffered_t* buf = (buffered_t*)arg;
   if (msg==NULL || buf==NULL) return;
   for (const char* src = msg; *src != 0; src++) {