1 files changed, 53 insertions, 43 deletions
diff --git a/source/luametatex/source/libraries/mimalloc/src/region.c b/source/luametatex/source/libraries/mimalloc/src/region.c
index 57d11fe8d..3571abb60 100644
--- a/source/luametatex/source/libraries/mimalloc/src/region.c
+++ b/source/luametatex/source/libraries/mimalloc/src/region.c
@@ -16,8 +16,8 @@ We need this memory layer between the raw OS calls because of:
 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
    to reuse memory effectively.
 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
-   an OS allocation/free is still (much) too expensive relative to the accesses 
-   in that object :-( (`malloc-large` tests this). This means we need a cheaper 
+   an OS allocation/free is still (much) too expensive relative to the accesses
+   in that object :-( (`malloc-large` tests this). This means we need a cheaper
    way to reuse memory.
 3. This layer allows for NUMA aware allocation.
 
@@ -47,35 +47,34 @@ bool    _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
 bool    _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
 bool    _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
+bool    _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
 
 // arena.c
 mi_arena_id_t _mi_arena_id_none(void);
-void    _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats);
+void    _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats);
 void*   _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
+void*   _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
 
 
 
 // Constants
 #if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE    (256 * MI_GiB)  // 64KiB for the region map 
+#define MI_HEAP_REGION_MAX_SIZE    (256 * MI_GiB)  // 64KiB for the region map
 #elif (MI_INTPTR_SIZE==4)
 #define MI_HEAP_REGION_MAX_SIZE    (3 * MI_GiB)    // ~ KiB for the region map
 #else
 #error "define the maximum heap space allowed for regions on this platform"
 #endif
 
-#define MI_SEGMENT_ALIGN          MI_SEGMENT_SIZE
-
 #define MI_REGION_MAX_BLOCKS      MI_BITMAP_FIELD_BITS
 #define MI_REGION_SIZE            (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS)    // 256MiB  (64MiB on 32 bits)
 #define MI_REGION_MAX             (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE)  // 1024  (48 on 32 bits)
 #define MI_REGION_MAX_OBJ_BLOCKS  (MI_REGION_MAX_BLOCKS/4)                    // 64MiB
-#define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)  
+#define MI_REGION_MAX_OBJ_SIZE    (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
 
-// Region info 
+// Region info
 typedef union mi_region_info_u {
-  size_t value;      
+  size_t value;
   struct {
     bool  valid;        // initialized?
     bool  is_large:1;   // allocated in fixed large/huge OS pages
@@ -89,7 +88,7 @@ typedef union mi_region_info_u {
 // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
 typedef struct mem_region_s {
   _Atomic(size_t)           info;        // mi_region_info_t.value
-  _Atomic(void*)            start;       // start of the memory area 
+  _Atomic(void*)            start;       // start of the memory area
   mi_bitmap_field_t         in_use;      // bit per in-use block
   mi_bitmap_field_t         dirty;       // track if non-zero per block
   mi_bitmap_field_t         commit;      // track if committed per block
@@ -102,7 +101,7 @@ typedef struct mem_region_s {
 static mem_region_t regions[MI_REGION_MAX];
 
 // Allocated regions
-static _Atomic(size_t) regions_count; // = 0;        
+static _Atomic(size_t) regions_count; // = 0;
 
 
 /* ----------------------------------------------------------------------------
@@ -137,7 +136,7 @@ mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
 static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
   uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start);
   mi_assert_internal(start != NULL);
-  return (start + (bit_idx * MI_SEGMENT_SIZE));  
+  return (start + (bit_idx * MI_SEGMENT_SIZE));
 }
 
 static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
@@ -181,7 +180,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
   bool is_zero = false;
   bool is_pinned = false;
   size_t arena_memid = 0;
-  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_pinned, &is_zero, _mi_arena_id_none(),  & arena_memid, tld);
+  void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, &region_commit, &region_large, &is_pinned, &is_zero, _mi_arena_id_none(),  & arena_memid, tld);
   if (start == NULL) return false;
   mi_assert_internal(!(region_large && !allow_large));
   mi_assert_internal(!region_large || region_commit);
@@ -190,7 +189,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
   const size_t idx = mi_atomic_increment_acq_rel(&regions_count);
   if (idx >= MI_REGION_MAX) {
     mi_atomic_decrement_acq_rel(&regions_count);
-    _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats);
+    _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats);
     _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB));
     return false;
   }
@@ -206,7 +205,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
   _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
   mi_atomic_store_ptr_release(void,&r->start, start);
 
-  // and share it 
+  // and share it
   mi_region_info_t info;
   info.value = 0;                        // initialize the full union to zero
   info.x.valid = true;
@@ -243,7 +242,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo
 
 static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
 {
-  // try all regions for a free slot  
+  // try all regions for a free slot
   const size_t count = mi_atomic_load_relaxed(&regions_count); // monotonic, so ok to be relaxed
   size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
   for (size_t visited = 0; visited < count; visited++, idx++) {
@@ -277,7 +276,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
       return NULL;
     }
   }
-  
+
   // ------------------------------------------------
   // found a region and claimed `blocks` at `bit_idx`, initialize them now
   mi_assert_internal(region != NULL);
@@ -289,7 +288,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
   mi_assert_internal(!(info.x.is_large && !*large));
   mi_assert_internal(start != NULL);
 
-  *is_zero   = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);  
+  *is_zero   = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);
   *large     = info.x.is_large;
   *is_pinned = info.x.is_pinned;
   *memid     = mi_memid_create(region, bit_idx);
@@ -308,20 +307,20 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
         mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
         return NULL;
       }
-      if (commit_zero) *is_zero = true;      
+      if (commit_zero) *is_zero = true;
     }
   }
   else {
     // no need to commit, but check if already fully committed
     *commit = _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
-  }  
+  }
   mi_assert_internal(!*commit || _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
 
   // unreset reset blocks
   if (_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
     // some blocks are still reset
     mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
-    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); 
+    mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
     mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
     if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
       bool reset_zero = false;
@@ -330,13 +329,13 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
     }
   }
   mi_assert_internal(!_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
-  
+
   #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED
   if (*commit) { ((uint8_t*)p)[0] = 0; }
   #endif
-  
-  // and return the allocation  
-  mi_assert_internal(p != NULL);  
+
+  // and return the allocation
+  mi_assert_internal(p != NULL);
   return p;
 }
 
@@ -347,7 +346,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
 
 // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
 // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
-void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
 {
   mi_assert_internal(memid != NULL && tld != NULL);
   mi_assert_internal(size > 0);
@@ -355,7 +354,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
   *is_zero = false;
   *is_pinned = false;
   bool default_large = false;
-  if (large==NULL) large = &default_large;  // ensure `large != NULL`  
+  if (large==NULL) large = &default_large;  // ensure `large != NULL`
   if (size == 0) return NULL;
   size = _mi_align_up(size, _mi_os_page_size());
 
@@ -363,23 +362,23 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
   void* p = NULL;
   size_t arena_memid;
   const size_t blocks = mi_region_block_count(size);
-  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
-    p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);    
+  if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
+    p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);
     if (p == NULL) {
       _mi_warning_message("unable to allocate from region: size %zu\n", size);
     }
   }
   if (p == NULL) {
     // and otherwise fall back to the OS
-    p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, _mi_arena_id_none(),  & arena_memid, tld);
+    p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(),  & arena_memid, tld);
     *memid = mi_memid_create_from_arena(arena_memid);
   }
 
   if (p != NULL) {
-    mi_assert_internal((uintptr_t)p % alignment == 0);
+    mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0);
     #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED
     if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
-#endif
+    #endif
   }
   return p;
 }
@@ -391,21 +390,22 @@ Free
 -----------------------------------------------------------------------------*/
 
 // Free previously allocated memory with a given id.
-void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
+void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
   mi_assert_internal(size > 0 && tld != NULL);
   if (p==NULL) return;
   if (size==0) return;
   size = _mi_align_up(size, _mi_os_page_size());
-  
+
   size_t arena_memid = 0;
   mi_bitmap_index_t bit_idx;
   mem_region_t* region;
   if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
    // was a direct arena allocation, pass through
-    _mi_arena_free(p, size, arena_memid, full_commit, tld->stats);
+    _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats);
   }
   else {
     // allocated in a region
+    mi_assert_internal(align_offset == 0);
     mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
     const size_t blocks = mi_region_block_count(size);
     mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
@@ -428,9 +428,9 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
     }
 
     // reset the blocks to reduce the working set.
-    if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) 
+    if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset)
        && (mi_option_is_enabled(mi_option_eager_commit) ||
-           mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead            
+           mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
     {
       bool any_unreset;
       _mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
@@ -438,7 +438,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
         _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
         _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
       }
-    }    
+    }
 
     // and unclaim
     bool all_unclaimed = mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
@@ -467,9 +467,9 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
         memset((void*)&regions[i], 0, sizeof(mem_region_t));  // cast to void* to avoid atomic warning
         // and release the whole region
         mi_atomic_store_release(&region->info, (size_t)0);
-        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {         
+        if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
           _mi_abandoned_await_readers(); // ensure no pending reads
-          _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats);
+          _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats);
         }
       }
     }
@@ -482,11 +482,21 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
 -----------------------------------------------------------------------------*/
 
 bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
-  return _mi_os_reset(p, size, tld->stats);
+  if (mi_option_is_enabled(mi_option_reset_decommits)) {
+    return _mi_os_decommit(p, size, tld->stats);
+  }
+  else {
+    return _mi_os_reset(p, size, tld->stats);
+  }
 }
 
 bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
-  return _mi_os_unreset(p, size, is_zero, tld->stats);
+  if (mi_option_is_enabled(mi_option_reset_decommits)) {
+    return _mi_os_commit(p, size, is_zero, tld->stats);
+  }
+  else {
+    return _mi_os_unreset(p, size, is_zero, tld->stats);
+  }
 }
 
 bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {