From 059fc69b2c7853b937ddb4cfc9d36304dee07893 Mon Sep 17 00:00:00 2001 From: Hans Hagen Date: Sat, 1 Apr 2023 10:50:35 +0200 Subject: 2023-04-01 09:31:00 --- .../source/libraries/mimalloc/src/alloc-aligned.c | 61 +- .../libraries/mimalloc/src/alloc-override-osx.c | 458 --------- .../source/libraries/mimalloc/src/alloc-override.c | 2 +- .../source/libraries/mimalloc/src/alloc-posix.c | 4 +- .../source/libraries/mimalloc/src/alloc.c | 176 ++-- .../source/libraries/mimalloc/src/arena.c | 29 +- .../source/libraries/mimalloc/src/bitmap.c | 2 +- .../source/libraries/mimalloc/src/bitmap.h | 2 +- .../source/libraries/mimalloc/src/heap.c | 46 +- .../source/libraries/mimalloc/src/init.c | 122 +-- .../source/libraries/mimalloc/src/options.c | 185 +--- .../luametatex/source/libraries/mimalloc/src/os.c | 1077 ++------------------ .../source/libraries/mimalloc/src/page.c | 23 +- .../mimalloc/src/prim/osx/alloc-override-zone.c | 458 +++++++++ .../source/libraries/mimalloc/src/prim/osx/prim.c | 9 + .../source/libraries/mimalloc/src/prim/prim.c | 24 + .../source/libraries/mimalloc/src/prim/readme.md | 9 + .../source/libraries/mimalloc/src/prim/unix/prim.c | 838 +++++++++++++++ .../source/libraries/mimalloc/src/prim/wasi/prim.c | 265 +++++ .../mimalloc/src/prim/windows/etw-mimalloc.wprp | 61 ++ .../libraries/mimalloc/src/prim/windows/etw.h | 905 ++++++++++++++++ .../libraries/mimalloc/src/prim/windows/etw.man | Bin 0 -> 3926 bytes .../libraries/mimalloc/src/prim/windows/prim.c | 607 +++++++++++ .../libraries/mimalloc/src/prim/windows/readme.md | 17 + .../source/libraries/mimalloc/src/random.c | 162 +-- .../source/libraries/mimalloc/src/region.c | 27 +- .../source/libraries/mimalloc/src/segment-cache.c | 100 +- .../source/libraries/mimalloc/src/segment.c | 50 +- .../source/libraries/mimalloc/src/static.c | 29 +- .../source/libraries/mimalloc/src/stats.c | 228 +---- 30 files changed, 3739 insertions(+), 2237 deletions(-) delete mode 100644 source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c create mode 100644 source/luametatex/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c create mode 100644 source/luametatex/source/libraries/mimalloc/src/prim/osx/prim.c create mode 100644 source/luametatex/source/libraries/mimalloc/src/prim/prim.c create mode 100644 source/luametatex/source/libraries/mimalloc/src/prim/readme.md create mode 100644 source/luametatex/source/libraries/mimalloc/src/prim/unix/prim.c create mode 100644 source/luametatex/source/libraries/mimalloc/src/prim/wasi/prim.c create mode 100644 source/luametatex/source/libraries/mimalloc/src/prim/windows/etw-mimalloc.wprp create mode 100644 source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.h create mode 100644 source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.man create mode 100644 source/luametatex/source/libraries/mimalloc/src/prim/windows/prim.c create mode 100644 source/luametatex/source/libraries/mimalloc/src/prim/windows/readme.md (limited to 'source/luametatex/source/libraries/mimalloc/src') diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c b/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c index 9fe82890f..e79a22208 100644 --- a/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c +++ b/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c @@ -6,9 +6,10 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" // mi_prim_get_default_heap -#include // memset +#include // memset // ------------------------------------------------------ // Aligned Allocation @@ -46,7 +47,7 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size); p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block // zero afterwards as only the area from the aligned_p may be committed! - if (p == NULL) return NULL; + if (p == NULL) return NULL; } else { // otherwise over-allocate @@ -61,30 +62,30 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* mi_assert_internal(adjust < alignment); void* aligned_p = (void*)((uintptr_t)p + adjust); if (aligned_p != p) { - mi_page_set_has_aligned(_mi_ptr_page(p), true); + mi_page_t* page = _mi_ptr_page(p); + mi_page_set_has_aligned(page, true); + _mi_padding_shrink(page, (mi_block_t*)p, adjust + size); } + // todo: expand padding if overallocated ? mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p)); mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0); - mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size); - + mi_assert_internal(mi_usable_size(aligned_p)>=size); + mi_assert_internal(mi_usable_size(p) == mi_usable_size(aligned_p)+adjust); + // now zero the block if needed - if (zero && alignment > MI_ALIGNMENT_MAX) { - const ptrdiff_t diff = (uint8_t*)aligned_p - (uint8_t*)p; - const ptrdiff_t zsize = mi_page_usable_block_size(_mi_ptr_page(p)) - diff - MI_PADDING_SIZE; - if (zsize > 0) { _mi_memzero(aligned_p, zsize); } + if (alignment > MI_ALIGNMENT_MAX) { + // for the tracker, on huge aligned allocations only from the start of the large block is defined + mi_track_mem_undefined(aligned_p, size); + if (zero) { + _mi_memzero(aligned_p, mi_usable_size(aligned_p)); + } } - #if MI_TRACK_ENABLED if (p != aligned_p) { - mi_track_free_size(p, oversize); - mi_track_malloc(aligned_p, size, zero); - } - else { - mi_track_resize(aligned_p, oversize, size); - } - #endif + mi_track_align(p,aligned_p,adjust,mi_usable_size(aligned_p)); + } return aligned_p; } @@ -187,27 +188,27 @@ mi_decl_nodiscard mi_decl_restrict void* mi_heap_calloc_aligned(mi_heap_t* heap, } mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_malloc_aligned_at(mi_get_default_heap(), size, alignment, offset); + return mi_heap_malloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_malloc_aligned(mi_get_default_heap(), size, alignment); + return mi_heap_malloc_aligned(mi_prim_get_default_heap(), size, alignment); } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_zalloc_aligned_at(mi_get_default_heap(), size, alignment, offset); + return mi_heap_zalloc_aligned_at(mi_prim_get_default_heap(), size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_aligned(size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_zalloc_aligned(mi_get_default_heap(), size, alignment); + return mi_heap_zalloc_aligned(mi_prim_get_default_heap(), size, alignment); } mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned_at(size_t count, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_calloc_aligned_at(mi_get_default_heap(), count, size, alignment, offset); + return mi_heap_calloc_aligned_at(mi_prim_get_default_heap(), count, size, alignment, offset); } mi_decl_nodiscard mi_decl_restrict void* mi_calloc_aligned(size_t count, size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_calloc_aligned(mi_get_default_heap(), count, size, alignment); + return mi_heap_calloc_aligned(mi_prim_get_default_heap(), count, size, alignment); } @@ -282,25 +283,25 @@ mi_decl_nodiscard void* mi_heap_recalloc_aligned(mi_heap_t* heap, void* p, size_ } mi_decl_nodiscard void* mi_realloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_realloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); + return mi_heap_realloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset); } mi_decl_nodiscard void* mi_realloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { - return mi_heap_realloc_aligned(mi_get_default_heap(), p, newsize, alignment); + return mi_heap_realloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment); } mi_decl_nodiscard void* mi_rezalloc_aligned_at(void* p, size_t newsize, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_rezalloc_aligned_at(mi_get_default_heap(), p, newsize, alignment, offset); + return mi_heap_rezalloc_aligned_at(mi_prim_get_default_heap(), p, newsize, alignment, offset); } mi_decl_nodiscard void* mi_rezalloc_aligned(void* p, size_t newsize, size_t alignment) mi_attr_noexcept { - return mi_heap_rezalloc_aligned(mi_get_default_heap(), p, newsize, alignment); + return mi_heap_rezalloc_aligned(mi_prim_get_default_heap(), p, newsize, alignment); } mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t size, size_t alignment, size_t offset) mi_attr_noexcept { - return mi_heap_recalloc_aligned_at(mi_get_default_heap(), p, newcount, size, alignment, offset); + return mi_heap_recalloc_aligned_at(mi_prim_get_default_heap(), p, newcount, size, alignment, offset); } mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept { - return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment); + return mi_heap_recalloc_aligned(mi_prim_get_default_heap(), p, newcount, size, alignment); } diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c b/source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c deleted file mode 100644 index a2819a8bf..000000000 --- a/source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c +++ /dev/null @@ -1,458 +0,0 @@ -/* ---------------------------------------------------------------------------- -Copyright (c) 2018-2022, Microsoft Research, Daan Leijen -This is free software; you can redistribute it and/or modify it under the -terms of the MIT license. A copy of the license can be found in the file -"LICENSE" at the root of this distribution. ------------------------------------------------------------------------------*/ - -#include "mimalloc.h" -#include "mimalloc-internal.h" - -#if defined(MI_MALLOC_OVERRIDE) - -#if !defined(__APPLE__) -#error "this file should only be included on macOS" -#endif - -/* ------------------------------------------------------ - Override system malloc on macOS - This is done through the malloc zone interface. - It seems to be most robust in combination with interposing - though or otherwise we may get zone errors as there are could - be allocations done by the time we take over the - zone. ------------------------------------------------------- */ - -#include -#include -#include // memset -#include - -#ifdef __cplusplus -extern "C" { -#endif - -#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) -// only available from OSX 10.6 -extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_import)); -#endif - -/* ------------------------------------------------------ - malloc zone members ------------------------------------------------------- */ - -static size_t zone_size(malloc_zone_t* zone, const void* p) { - MI_UNUSED(zone); - if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out - return mi_usable_size(p); -} - -static void* zone_malloc(malloc_zone_t* zone, size_t size) { - MI_UNUSED(zone); - return mi_malloc(size); -} - -static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) { - MI_UNUSED(zone); - return mi_calloc(count, size); -} - -static void* zone_valloc(malloc_zone_t* zone, size_t size) { - MI_UNUSED(zone); - return mi_malloc_aligned(size, _mi_os_page_size()); -} - -static void zone_free(malloc_zone_t* zone, void* p) { - MI_UNUSED(zone); - mi_cfree(p); -} - -static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { - MI_UNUSED(zone); - return mi_realloc(p, newsize); -} - -static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) { - MI_UNUSED(zone); - return mi_malloc_aligned(size,alignment); -} - -static void zone_destroy(malloc_zone_t* zone) { - MI_UNUSED(zone); - // todo: ignore for now? -} - -static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, unsigned count) { - size_t i; - for (i = 0; i < count; i++) { - ps[i] = zone_malloc(zone, size); - if (ps[i] == NULL) break; - } - return i; -} - -static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) { - for(size_t i = 0; i < count; i++) { - zone_free(zone, ps[i]); - ps[i] = NULL; - } -} - -static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) { - MI_UNUSED(zone); MI_UNUSED(size); - mi_collect(false); - return 0; -} - -static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) { - MI_UNUSED(size); - zone_free(zone,p); -} - -static boolean_t zone_claimed_address(malloc_zone_t* zone, void* p) { - MI_UNUSED(zone); - return mi_is_in_heap_region(p); -} - - -/* ------------------------------------------------------ - Introspection members ------------------------------------------------------- */ - -static kern_return_t intro_enumerator(task_t task, void* p, - unsigned type_mask, vm_address_t zone_address, - memory_reader_t reader, - vm_range_recorder_t recorder) -{ - // todo: enumerate all memory - MI_UNUSED(task); MI_UNUSED(p); MI_UNUSED(type_mask); MI_UNUSED(zone_address); - MI_UNUSED(reader); MI_UNUSED(recorder); - return KERN_SUCCESS; -} - -static size_t intro_good_size(malloc_zone_t* zone, size_t size) { - MI_UNUSED(zone); - return mi_good_size(size); -} - -static boolean_t intro_check(malloc_zone_t* zone) { - MI_UNUSED(zone); - return true; -} - -static void intro_print(malloc_zone_t* zone, boolean_t verbose) { - MI_UNUSED(zone); MI_UNUSED(verbose); - mi_stats_print(NULL); -} - -static void intro_log(malloc_zone_t* zone, void* p) { - MI_UNUSED(zone); MI_UNUSED(p); - // todo? -} - -static void intro_force_lock(malloc_zone_t* zone) { - MI_UNUSED(zone); - // todo? -} - -static void intro_force_unlock(malloc_zone_t* zone) { - MI_UNUSED(zone); - // todo? -} - -static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { - MI_UNUSED(zone); - // todo... - stats->blocks_in_use = 0; - stats->size_in_use = 0; - stats->max_size_in_use = 0; - stats->size_allocated = 0; -} - -static boolean_t intro_zone_locked(malloc_zone_t* zone) { - MI_UNUSED(zone); - return false; -} - - -/* ------------------------------------------------------ - At process start, override the default allocator ------------------------------------------------------- */ - -#if defined(__GNUC__) && !defined(__clang__) -#pragma GCC diagnostic ignored "-Wmissing-field-initializers" -#endif - -#if defined(__clang__) -#pragma clang diagnostic ignored "-Wc99-extensions" -#endif - -static malloc_introspection_t mi_introspect = { - .enumerator = &intro_enumerator, - .good_size = &intro_good_size, - .check = &intro_check, - .print = &intro_print, - .log = &intro_log, - .force_lock = &intro_force_lock, - .force_unlock = &intro_force_unlock, -#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) - .statistics = &intro_statistics, - .zone_locked = &intro_zone_locked, -#endif -}; - -static malloc_zone_t mi_malloc_zone = { - // note: even with designators, the order is important for C++ compilation - //.reserved1 = NULL, - //.reserved2 = NULL, - .size = &zone_size, - .malloc = &zone_malloc, - .calloc = &zone_calloc, - .valloc = &zone_valloc, - .free = &zone_free, - .realloc = &zone_realloc, - .destroy = &zone_destroy, - .zone_name = "mimalloc", - .batch_malloc = &zone_batch_malloc, - .batch_free = &zone_batch_free, - .introspect = &mi_introspect, -#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) - #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) - .version = 10, - #else - .version = 9, - #endif - // switch to version 9+ on OSX 10.6 to support memalign. - .memalign = &zone_memalign, - .free_definite_size = &zone_free_definite_size, - .pressure_relief = &zone_pressure_relief, - #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) - .claimed_address = &zone_claimed_address, - #endif -#else - .version = 4, -#endif -}; - -#ifdef __cplusplus -} -#endif - - -#if defined(MI_OSX_INTERPOSE) && defined(MI_SHARED_LIB_EXPORT) - -// ------------------------------------------------------ -// Override malloc_xxx and malloc_zone_xxx api's to use only -// our mimalloc zone. Since even the loader uses malloc -// on macOS, this ensures that all allocations go through -// mimalloc (as all calls are interposed). -// The main `malloc`, `free`, etc calls are interposed in `alloc-override.c`, -// Here, we also override macOS specific API's like -// `malloc_zone_calloc` etc. see -// ------------------------------------------------------ - -static inline malloc_zone_t* mi_get_default_zone(void) -{ - static bool init; - if mi_unlikely(!init) { - init = true; - malloc_zone_register(&mi_malloc_zone); // by calling register we avoid a zone error on free (see ) - } - return &mi_malloc_zone; -} - -mi_decl_externc int malloc_jumpstart(uintptr_t cookie); -mi_decl_externc void _malloc_fork_prepare(void); -mi_decl_externc void _malloc_fork_parent(void); -mi_decl_externc void _malloc_fork_child(void); - - -static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) { - MI_UNUSED(size); MI_UNUSED(flags); - return mi_get_default_zone(); -} - -static malloc_zone_t* mi_malloc_default_zone (void) { - return mi_get_default_zone(); -} - -static malloc_zone_t* mi_malloc_default_purgeable_zone(void) { - return mi_get_default_zone(); -} - -static void mi_malloc_destroy_zone(malloc_zone_t* zone) { - MI_UNUSED(zone); - // nothing. -} - -static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) { - MI_UNUSED(task); MI_UNUSED(mr); - if (addresses != NULL) *addresses = NULL; - if (count != NULL) *count = 0; - return KERN_SUCCESS; -} - -static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) { - return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name); -} - -static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) { - MI_UNUSED(zone); MI_UNUSED(name); -} - -static int mi_malloc_jumpstart(uintptr_t cookie) { - MI_UNUSED(cookie); - return 1; // or 0 for no error? -} - -static void mi__malloc_fork_prepare(void) { - // nothing -} -static void mi__malloc_fork_parent(void) { - // nothing -} -static void mi__malloc_fork_child(void) { - // nothing -} - -static void mi_malloc_printf(const char* fmt, ...) { - MI_UNUSED(fmt); -} - -static bool zone_check(malloc_zone_t* zone) { - MI_UNUSED(zone); - return true; -} - -static malloc_zone_t* zone_from_ptr(const void* p) { - MI_UNUSED(p); - return mi_get_default_zone(); -} - -static void zone_log(malloc_zone_t* zone, void* p) { - MI_UNUSED(zone); MI_UNUSED(p); -} - -static void zone_print(malloc_zone_t* zone, bool b) { - MI_UNUSED(zone); MI_UNUSED(b); -} - -static void zone_print_ptr_info(void* p) { - MI_UNUSED(p); -} - -static void zone_register(malloc_zone_t* zone) { - MI_UNUSED(zone); -} - -static void zone_unregister(malloc_zone_t* zone) { - MI_UNUSED(zone); -} - -// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` -// See: -struct mi_interpose_s { - const void* replacement; - const void* target; -}; -#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } -#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) -#define MI_INTERPOSE_ZONE(fun) MI_INTERPOSE_FUN(malloc_##fun,fun) -__attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[] __attribute__((section("__DATA, __interpose"))) = -{ - - MI_INTERPOSE_MI(malloc_create_zone), - MI_INTERPOSE_MI(malloc_default_purgeable_zone), - MI_INTERPOSE_MI(malloc_default_zone), - MI_INTERPOSE_MI(malloc_destroy_zone), - MI_INTERPOSE_MI(malloc_get_all_zones), - MI_INTERPOSE_MI(malloc_get_zone_name), - MI_INTERPOSE_MI(malloc_jumpstart), - MI_INTERPOSE_MI(malloc_printf), - MI_INTERPOSE_MI(malloc_set_zone_name), - MI_INTERPOSE_MI(_malloc_fork_child), - MI_INTERPOSE_MI(_malloc_fork_parent), - MI_INTERPOSE_MI(_malloc_fork_prepare), - - MI_INTERPOSE_ZONE(zone_batch_free), - MI_INTERPOSE_ZONE(zone_batch_malloc), - MI_INTERPOSE_ZONE(zone_calloc), - MI_INTERPOSE_ZONE(zone_check), - MI_INTERPOSE_ZONE(zone_free), - MI_INTERPOSE_ZONE(zone_from_ptr), - MI_INTERPOSE_ZONE(zone_log), - MI_INTERPOSE_ZONE(zone_malloc), - MI_INTERPOSE_ZONE(zone_memalign), - MI_INTERPOSE_ZONE(zone_print), - MI_INTERPOSE_ZONE(zone_print_ptr_info), - MI_INTERPOSE_ZONE(zone_realloc), - MI_INTERPOSE_ZONE(zone_register), - MI_INTERPOSE_ZONE(zone_unregister), - MI_INTERPOSE_ZONE(zone_valloc) -}; - - -#else - -// ------------------------------------------------------ -// hook into the zone api's without interposing -// This is the official way of adding an allocator but -// it seems less robust than using interpose. -// ------------------------------------------------------ - -static inline malloc_zone_t* mi_get_default_zone(void) -{ - // The first returned zone is the real default - malloc_zone_t** zones = NULL; - unsigned count = 0; - kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count); - if (ret == KERN_SUCCESS && count > 0) { - return zones[0]; - } - else { - // fallback - return malloc_default_zone(); - } -} - -#if defined(__clang__) -__attribute__((constructor(0))) -#else -__attribute__((constructor)) // seems not supported by g++-11 on the M1 -#endif -static void _mi_macos_override_malloc() { - malloc_zone_t* purgeable_zone = NULL; - - #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) - // force the purgeable zone to exist to avoid strange bugs - if (malloc_default_purgeable_zone) { - purgeable_zone = malloc_default_purgeable_zone(); - } - #endif - - // Register our zone. - // thomcc: I think this is still needed to put us in the zone list. - malloc_zone_register(&mi_malloc_zone); - // Unregister the default zone, this makes our zone the new default - // as that was the last registered. - malloc_zone_t *default_zone = mi_get_default_zone(); - // thomcc: Unsure if the next test is *always* false or just false in the - // cases I've tried. I'm also unsure if the code inside is needed. at all - if (default_zone != &mi_malloc_zone) { - malloc_zone_unregister(default_zone); - - // Reregister the default zone so free and realloc in that zone keep working. - malloc_zone_register(default_zone); - } - - // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs - // earlier than the default zone. - if (purgeable_zone != NULL) { - malloc_zone_unregister(purgeable_zone); - malloc_zone_register(purgeable_zone); - } - -} -#endif // MI_OSX_INTERPOSE - -#endif // MI_MALLOC_OVERRIDE diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-override.c b/source/luametatex/source/libraries/mimalloc/src/alloc-override.c index 84a0d19df..40098ac58 100644 --- a/source/luametatex/source/libraries/mimalloc/src/alloc-override.c +++ b/source/luametatex/source/libraries/mimalloc/src/alloc-override.c @@ -57,7 +57,7 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t; // functions that are interposed (or the interposing does not work) #define MI_OSX_IS_INTERPOSED - mi_decl_externc static size_t mi_malloc_size_checked(void *p) { + mi_decl_externc size_t mi_malloc_size_checked(void *p) { if (!mi_is_in_heap_region(p)) return 0; return mi_usable_size(p); } diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c b/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c index e6505f290..b6f09d1a1 100644 --- a/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c +++ b/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c @@ -10,7 +10,7 @@ terms of the MIT license. A copy of the license can be found in the file // for convenience and used when overriding these functions. // ------------------------------------------------------------------------ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" // ------------------------------------------------------ // Posix & Unix functions definitions @@ -149,7 +149,7 @@ int mi_dupenv_s(char** buf, size_t* size, const char* name) mi_attr_noexcept { else { *buf = mi_strdup(p); if (*buf==NULL) return ENOMEM; - if (size != NULL) *size = strlen(p); + if (size != NULL) *size = _mi_strlen(p); } return 0; } diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc.c b/source/luametatex/source/libraries/mimalloc/src/alloc.c index 86453f152..24045162d 100644 --- a/source/luametatex/source/libraries/mimalloc/src/alloc.c +++ b/source/luametatex/source/libraries/mimalloc/src/alloc.c @@ -9,12 +9,12 @@ terms of the MIT license. A copy of the license can be found in the file #endif #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // _mi_prim_thread_id() - -#include // memset, strlen -#include // malloc, exit +#include // memset, strlen (for mi_strdup) +#include // malloc, abort #define MI_IN_ALLOC_C #include "alloc-override.c" @@ -40,7 +40,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz // allow use of the block internally // note: when tracking we need to avoid ever touching the MI_PADDING since - // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc-track.h`) + // that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc/track.h`) mi_track_mem_undefined(block, mi_page_usable_block_size(page)); // zero the block? note: we need to zero the full block size (issue #63) @@ -50,7 +50,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE); } -#if (MI_DEBUG>0) && !MI_TRACK_ENABLED +#if (MI_DEBUG>0) && !MI_TRACK_ENABLED && !MI_TSAN if (!page->is_zero && !zero && !mi_page_is_huge(page)) { memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); } @@ -70,20 +70,22 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz } #endif -#if (MI_PADDING > 0) && defined(MI_ENCODE_FREELIST) && !MI_TRACK_ENABLED +#if MI_PADDING // && !MI_TRACK_ENABLED mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + mi_page_usable_block_size(page)); ptrdiff_t delta = ((uint8_t*)padding - (uint8_t*)block - (size - MI_PADDING_SIZE)); - #if (MI_DEBUG>1) + #if (MI_DEBUG>=2) mi_assert_internal(delta >= 0 && mi_page_usable_block_size(page) >= (size - MI_PADDING_SIZE + delta)); mi_track_mem_defined(padding,sizeof(mi_padding_t)); // note: re-enable since mi_page_usable_block_size may set noaccess #endif padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys)); padding->delta = (uint32_t)(delta); + #if MI_PADDING_CHECK if (!mi_page_is_huge(page)) { uint8_t* fill = (uint8_t*)padding - delta; const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; } } + #endif #endif return block; @@ -96,21 +98,18 @@ static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local #endif mi_assert(size <= MI_SMALL_SIZE_MAX); -#if (MI_PADDING) - if (size == 0) { - size = sizeof(void*); - } -#endif + #if (MI_PADDING) + if (size == 0) { size = sizeof(void*); } + #endif mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE); - void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero); - mi_assert_internal(p == NULL || mi_usable_size(p) >= size); -#if MI_STAT>1 + void* const p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero); + mi_track_malloc(p,size,zero); + #if MI_STAT>1 if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } -#endif - mi_track_malloc(p,size,zero); + #endif return p; } @@ -120,7 +119,7 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc_small(mi_h } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t size) mi_attr_noexcept { - return mi_heap_malloc_small(mi_get_default_heap(), size); + return mi_heap_malloc_small(mi_prim_get_default_heap(), size); } // The main allocation function @@ -133,14 +132,13 @@ extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool z mi_assert(heap!=NULL); mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic - mi_assert_internal(p == NULL || mi_usable_size(p) >= size); + mi_track_malloc(p,size,zero); #if MI_STAT>1 if (p != NULL) { - if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); } + if (!mi_heap_is_initialized(heap)) { heap = mi_prim_get_default_heap(); } mi_heap_stat_increase(heap, malloc, mi_usable_size(p)); } #endif - mi_track_malloc(p,size,zero); return p; } } @@ -154,12 +152,12 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc(size_t size) mi_attr_noexcept { - return mi_heap_malloc(mi_get_default_heap(), size); + return mi_heap_malloc(mi_prim_get_default_heap(), size); } // zero initialized small block mi_decl_nodiscard mi_decl_restrict void* mi_zalloc_small(size_t size) mi_attr_noexcept { - return mi_heap_malloc_small_zero(mi_get_default_heap(), size, true); + return mi_heap_malloc_small_zero(mi_prim_get_default_heap(), size, true); } mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* heap, size_t size) mi_attr_noexcept { @@ -167,7 +165,7 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_zalloc(mi_heap_t* } mi_decl_nodiscard mi_decl_restrict void* mi_zalloc(size_t size) mi_attr_noexcept { - return mi_heap_zalloc(mi_get_default_heap(),size); + return mi_heap_zalloc(mi_prim_get_default_heap(),size); } @@ -225,7 +223,7 @@ static inline bool mi_check_is_double_free(const mi_page_t* page, const mi_block // Check for heap block overflow by setting up padding at the end of the block // --------------------------------------------------------------------------- -#if (MI_PADDING>0) && defined(MI_ENCODE_FREELIST) && !MI_TRACK_ENABLED +#if MI_PADDING // && !MI_TRACK_ENABLED static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* block, size_t* delta, size_t* bsize) { *bsize = mi_page_usable_block_size(page); const mi_padding_t* const padding = (mi_padding_t*)((uint8_t*)block + *bsize); @@ -249,6 +247,40 @@ static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* bl return (ok ? bsize - delta : 0); } +// When a non-thread-local block is freed, it becomes part of the thread delayed free +// list that is freed later by the owning heap. If the exact usable size is too small to +// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) +// so it will later not trigger an overflow error in `mi_free_block`. +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + size_t bsize; + size_t delta; + bool ok = mi_page_decode_padding(page, block, &delta, &bsize); + mi_assert_internal(ok); + if (!ok || (bsize - delta) >= min_size) return; // usually already enough space + mi_assert_internal(bsize >= min_size); + if (bsize < min_size) return; // should never happen + size_t new_delta = (bsize - min_size); + mi_assert_internal(new_delta < bsize); + mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); + mi_track_mem_defined(padding,sizeof(mi_padding_t)); + padding->delta = (uint32_t)new_delta; + mi_track_mem_noaccess(padding,sizeof(mi_padding_t)); +} +#else +static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { + MI_UNUSED(block); + return mi_page_usable_block_size(page); +} + +void _mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { + MI_UNUSED(page); + MI_UNUSED(block); + MI_UNUSED(min_size); +} +#endif + +#if MI_PADDING && MI_PADDING_CHECK + static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, size_t* size, size_t* wrong) { size_t bsize; size_t delta; @@ -281,39 +313,13 @@ static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { } } -// When a non-thread-local block is freed, it becomes part of the thread delayed free -// list that is freed later by the owning heap. If the exact usable size is too small to -// contain the pointer for the delayed list, then shrink the padding (by decreasing delta) -// so it will later not trigger an overflow error in `mi_free_block`. -static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { - size_t bsize; - size_t delta; - bool ok = mi_page_decode_padding(page, block, &delta, &bsize); - mi_assert_internal(ok); - if (!ok || (bsize - delta) >= min_size) return; // usually already enough space - mi_assert_internal(bsize >= min_size); - if (bsize < min_size) return; // should never happen - size_t new_delta = (bsize - min_size); - mi_assert_internal(new_delta < bsize); - mi_padding_t* padding = (mi_padding_t*)((uint8_t*)block + bsize); - padding->delta = (uint32_t)new_delta; -} #else + static void mi_check_padding(const mi_page_t* page, const mi_block_t* block) { MI_UNUSED(page); MI_UNUSED(block); } -static size_t mi_page_usable_size_of(const mi_page_t* page, const mi_block_t* block) { - MI_UNUSED(block); - return mi_page_usable_block_size(page); -} - -static void mi_padding_shrink(const mi_page_t* page, const mi_block_t* block, const size_t min_size) { - MI_UNUSED(page); - MI_UNUSED(block); - MI_UNUSED(min_size); -} #endif // only maintain stats for smaller objects if requested @@ -377,7 +383,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc // The padding check may access the non-thread-owned page for the key values. // that is safe as these are constant and the page won't be freed (as the block is not freed yet). mi_check_padding(page, block); - mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection + _mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection // huge page segments are always abandoned and can be freed immediately mi_segment_t* segment = _mi_page_segment(page); @@ -395,7 +401,7 @@ static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* bloc #endif } - #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED // note: when tracking, cannot use mi_usable_size with multi-threading + #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED && !MI_TSAN // note: when tracking, cannot use mi_usable_size with multi-threading if (segment->kind != MI_SEGMENT_HUGE) { // not for huge segments as we just reset the content memset(block, MI_DEBUG_FREED, mi_usable_size(block)); } @@ -449,7 +455,7 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block // owning thread can free a block directly if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); - #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED + #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED && !MI_TSAN if (!mi_page_is_huge(page)) { // huge page content may be already decommitted memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); } @@ -481,8 +487,8 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept { mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p); - mi_stat_free(page, block); // stat_free may access the padding - mi_track_free(p); + mi_stat_free(page, block); // stat_free may access the padding + mi_track_free_size(block, mi_page_usable_size_of(page,block)); _mi_free_block(page, is_local, block); } @@ -535,7 +541,7 @@ void mi_free(void* p) mi_attr_noexcept { if mi_unlikely(p == NULL) return; mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free"); - const bool is_local= (_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); + const bool is_local= (_mi_prim_thread_id() == mi_atomic_load_relaxed(&segment->thread_id)); mi_page_t* const page = _mi_segment_page_of(segment, p); if mi_likely(is_local) { // thread-local free? @@ -545,10 +551,10 @@ void mi_free(void* p) mi_attr_noexcept if mi_unlikely(mi_check_is_double_free(page, block)) return; mi_check_padding(page, block); mi_stat_free(page, block); - #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED + #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED && !MI_TSAN memset(block, MI_DEBUG_FREED, mi_page_block_size(page)); #endif - mi_track_free(p); + mi_track_free_size(p, mi_page_usable_size_of(page,block)); // faster then mi_usable_size as we already know the page and that p is unaligned mi_block_set_next(page, block, page->local_free); page->local_free = block; if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page)) @@ -648,7 +654,7 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_calloc(mi_heap_t* } mi_decl_nodiscard mi_decl_restrict void* mi_calloc(size_t count, size_t size) mi_attr_noexcept { - return mi_heap_calloc(mi_get_default_heap(),count,size); + return mi_heap_calloc(mi_prim_get_default_heap(),count,size); } // Uninitialized `calloc` @@ -659,7 +665,7 @@ mi_decl_nodiscard extern mi_decl_restrict void* mi_heap_mallocn(mi_heap_t* heap, } mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) mi_attr_noexcept { - return mi_heap_mallocn(mi_get_default_heap(),count,size); + return mi_heap_mallocn(mi_prim_get_default_heap(),count,size); } // Expand (or shrink) in place (or fail) @@ -682,9 +688,9 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) // (this means that returning NULL always indicates an error, and `p` will not have been freed in that case.) const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0) if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) { // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0) - // todo: adjust potential padding to reflect the new size? - mi_track_free_size(p, size); - mi_track_malloc(p,newsize,true); + mi_assert_internal(p!=NULL); + // todo: do not track as the usable size is still the same in the free; adjust potential padding? + // mi_track_resize(p,size,newsize) return p; // reallocation still fits and not more than 50% waste } void* newp = mi_heap_malloc(heap,newsize); @@ -736,24 +742,24 @@ mi_decl_nodiscard void* mi_heap_recalloc(mi_heap_t* heap, void* p, size_t count, mi_decl_nodiscard void* mi_realloc(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_realloc(mi_get_default_heap(),p,newsize); + return mi_heap_realloc(mi_prim_get_default_heap(),p,newsize); } mi_decl_nodiscard void* mi_reallocn(void* p, size_t count, size_t size) mi_attr_noexcept { - return mi_heap_reallocn(mi_get_default_heap(),p,count,size); + return mi_heap_reallocn(mi_prim_get_default_heap(),p,count,size); } // Reallocate but free `p` on errors mi_decl_nodiscard void* mi_reallocf(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_reallocf(mi_get_default_heap(),p,newsize); + return mi_heap_reallocf(mi_prim_get_default_heap(),p,newsize); } mi_decl_nodiscard void* mi_rezalloc(void* p, size_t newsize) mi_attr_noexcept { - return mi_heap_rezalloc(mi_get_default_heap(), p, newsize); + return mi_heap_rezalloc(mi_prim_get_default_heap(), p, newsize); } mi_decl_nodiscard void* mi_recalloc(void* p, size_t count, size_t size) mi_attr_noexcept { - return mi_heap_recalloc(mi_get_default_heap(), p, count, size); + return mi_heap_recalloc(mi_prim_get_default_heap(), p, count, size); } @@ -774,7 +780,7 @@ mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const c } mi_decl_nodiscard mi_decl_restrict char* mi_strdup(const char* s) mi_attr_noexcept { - return mi_heap_strdup(mi_get_default_heap(), s); + return mi_heap_strdup(mi_prim_get_default_heap(), s); } // `strndup` using mi_malloc @@ -791,7 +797,7 @@ mi_decl_nodiscard mi_decl_restrict char* mi_heap_strndup(mi_heap_t* heap, const } mi_decl_nodiscard mi_decl_restrict char* mi_strndup(const char* s, size_t n) mi_attr_noexcept { - return mi_heap_strndup(mi_get_default_heap(),s,n); + return mi_heap_strndup(mi_prim_get_default_heap(),s,n); } #ifndef __wasi__ @@ -860,7 +866,7 @@ char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) #endif mi_decl_nodiscard mi_decl_restrict char* mi_realpath(const char* fname, char* resolved_name) mi_attr_noexcept { - return mi_heap_realpath(mi_get_default_heap(),fname,resolved_name); + return mi_heap_realpath(mi_prim_get_default_heap(),fname,resolved_name); } #endif @@ -927,7 +933,7 @@ static bool mi_try_new_handler(bool nothrow) { } #endif -static mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow ) { +mi_decl_export mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow ) { void* p = NULL; while(p == NULL && mi_try_new_handler(nothrow)) { p = mi_heap_malloc(heap,size); @@ -936,22 +942,22 @@ static mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool } static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) { - return mi_heap_try_new(mi_get_default_heap(), size, nothrow); + return mi_heap_try_new(mi_prim_get_default_heap(), size, nothrow); } -mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) { void* p = mi_heap_malloc(heap,size); if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false); return p; } mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) { - return mi_heap_alloc_new(mi_get_default_heap(), size); + return mi_heap_alloc_new(mi_prim_get_default_heap(), size); } -mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) { +mi_decl_nodiscard mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) { size_t total; if mi_unlikely(mi_count_size_overflow(count, size, &total)) { mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc @@ -963,7 +969,7 @@ mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new_n(mi_he } mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) { - return mi_heap_alloc_new_n(mi_get_default_heap(), size, count); + return mi_heap_alloc_new_n(mi_prim_get_default_heap(), size, count); } @@ -1024,8 +1030,8 @@ void* _mi_externs[] = { (void*)&mi_zalloc_small, (void*)&mi_heap_malloc, (void*)&mi_heap_zalloc, - (void*)&mi_heap_malloc_small, - (void*)&mi_heap_alloc_new, - (void*)&mi_heap_alloc_new_n + (void*)&mi_heap_malloc_small + // (void*)&mi_heap_alloc_new, + // (void*)&mi_heap_alloc_new_n }; #endif diff --git a/source/luametatex/source/libraries/mimalloc/src/arena.c b/source/luametatex/source/libraries/mimalloc/src/arena.c index 80dd47869..43defe009 100644 --- a/source/luametatex/source/libraries/mimalloc/src/arena.c +++ b/source/luametatex/source/libraries/mimalloc/src/arena.c @@ -11,18 +11,16 @@ large blocks (>= MI_ARENA_MIN_BLOCK_SIZE, 4MiB). In contrast to the rest of mimalloc, the arenas are shared between threads and need to be accessed using atomic operations. -Currently arenas are only used to for huge OS page (1GiB) reservations, -or direct OS memory reservations -- otherwise it delegates to direct allocation from the OS. -In the future, we can expose an API to manually add more kinds of arenas -which is sometimes needed for embedded devices or shared memory for example. -(We can also employ this with WASI or `sbrk` systems to reserve large arenas - on demand and be able to reuse them efficiently). +Arenas are used to for huge OS page (1GiB) reservations or for reserving +OS memory upfront which can be improve performance or is sometimes needed +on embedded devices. We can also employ this with WASI or `sbrk` systems +to reserve large arenas upfront and be able to reuse the memory more effectively. The arena allocation needs to be thread safe and we use an atomic bitmap to allocate. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #include // memset #include // ENOMEM @@ -30,17 +28,6 @@ The arena allocation needs to be thread safe and we use an atomic bitmap to allo #include "bitmap.h" // atomic bitmap -// os.c -void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* stats); -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* stats); - -void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_secs, size_t* pages_reserved, size_t* psize); -void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats); - -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); - - /* ----------------------------------------------------------- Arena allocation ----------------------------------------------------------- */ @@ -130,6 +117,10 @@ bool _mi_arena_memid_is_suitable(size_t arena_memid, mi_arena_id_t request_arena return mi_arena_id_is_suitable(id, exclusive, request_arena_id); } +bool _mi_arena_is_os_allocated(size_t arena_memid) { + return (arena_memid == MI_MEMID_OS); +} + static size_t mi_block_count_of_size(size_t size) { return _mi_divide_up(size, MI_ARENA_BLOCK_SIZE); } diff --git a/source/luametatex/source/libraries/mimalloc/src/bitmap.c b/source/luametatex/source/libraries/mimalloc/src/bitmap.c index 4ea9f4afa..6fe745ac1 100644 --- a/source/luametatex/source/libraries/mimalloc/src/bitmap.c +++ b/source/luametatex/source/libraries/mimalloc/src/bitmap.c @@ -18,7 +18,7 @@ between the fields. (This is used in arena allocation) ---------------------------------------------------------------------------- */ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" #include "bitmap.h" /* ----------------------------------------------------------- diff --git a/source/luametatex/source/libraries/mimalloc/src/bitmap.h b/source/luametatex/source/libraries/mimalloc/src/bitmap.h index 0c501ec1f..3476ea46b 100644 --- a/source/luametatex/source/libraries/mimalloc/src/bitmap.h +++ b/source/luametatex/source/libraries/mimalloc/src/bitmap.h @@ -1,5 +1,5 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2019-2020 Microsoft Research, Daan Leijen +Copyright (c) 2019-2023 Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. diff --git a/source/luametatex/source/libraries/mimalloc/src/heap.c b/source/luametatex/source/libraries/mimalloc/src/heap.c index ac2d042bf..7103281f0 100644 --- a/source/luametatex/source/libraries/mimalloc/src/heap.c +++ b/source/luametatex/source/libraries/mimalloc/src/heap.c @@ -6,8 +6,9 @@ terms of the MIT license. A copy of the license can be found in the file -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // mi_prim_get_default_heap #include // memset, memcpy @@ -30,15 +31,18 @@ static bool mi_heap_visit_pages(mi_heap_t* heap, heap_page_visitor_fun* fn, void // visit all pages #if MI_DEBUG>1 size_t total = heap->page_count; - #endif size_t count = 0; + #endif + for (size_t i = 0; i <= MI_BIN_FULL; i++) { mi_page_queue_t* pq = &heap->pages[i]; mi_page_t* page = pq->first; while(page != NULL) { mi_page_t* next = page->next; // save next in case the page gets removed from the queue mi_assert_internal(mi_page_heap(page) == heap); + #if MI_DEBUG>1 count++; + #endif if (!fn(heap, pq, page, arg1, arg2)) return false; page = next; // and continue } @@ -178,7 +182,7 @@ void mi_heap_collect(mi_heap_t* heap, bool force) mi_attr_noexcept { } void mi_collect(bool force) mi_attr_noexcept { - mi_heap_collect(mi_get_default_heap(), force); + mi_heap_collect(mi_prim_get_default_heap(), force); } @@ -188,9 +192,14 @@ void mi_collect(bool force) mi_attr_noexcept { mi_heap_t* mi_heap_get_default(void) { mi_thread_init(); - return mi_get_default_heap(); + return mi_prim_get_default_heap(); +} + +static bool mi_heap_is_default(const mi_heap_t* heap) { + return (heap == mi_prim_get_default_heap()); } + mi_heap_t* mi_heap_get_backing(void) { mi_heap_t* heap = mi_heap_get_default(); mi_assert_internal(heap!=NULL); @@ -237,9 +246,6 @@ static void mi_heap_reset_pages(mi_heap_t* heap) { mi_assert_internal(mi_heap_is_initialized(heap)); // TODO: copy full empty heap instead? memset(&heap->pages_free_direct, 0, sizeof(heap->pages_free_direct)); -#ifdef MI_MEDIUM_DIRECT - memset(&heap->pages_free_medium, 0, sizeof(heap->pages_free_medium)); -#endif _mi_memcpy_aligned(&heap->pages, &_mi_heap_empty.pages, sizeof(heap->pages)); heap->thread_delayed_free = NULL; heap->page_count = 0; @@ -330,6 +336,14 @@ void _mi_heap_destroy_pages(mi_heap_t* heap) { mi_heap_reset_pages(heap); } +#if MI_TRACK_HEAP_DESTROY +static bool mi_cdecl mi_heap_track_block_free(const mi_heap_t* heap, const mi_heap_area_t* area, void* block, size_t block_size, void* arg) { + MI_UNUSED(heap); MI_UNUSED(area); MI_UNUSED(arg); MI_UNUSED(block_size); + mi_track_free_size(block,mi_usable_size(block)); + return true; +} +#endif + void mi_heap_destroy(mi_heap_t* heap) { mi_assert(heap != NULL); mi_assert(mi_heap_is_initialized(heap)); @@ -341,6 +355,10 @@ void mi_heap_destroy(mi_heap_t* heap) { mi_heap_delete(heap); } else { + // track all blocks as freed + #if MI_TRACK_HEAP_DESTROY + mi_heap_visit_blocks(heap, true, mi_heap_track_block_free, NULL); + #endif // free all pages _mi_heap_destroy_pages(heap); mi_heap_free(heap); @@ -425,7 +443,7 @@ mi_heap_t* mi_heap_set_default(mi_heap_t* heap) { mi_assert(mi_heap_is_initialized(heap)); if (heap==NULL || !mi_heap_is_initialized(heap)) return NULL; mi_assert_expensive(mi_heap_is_valid(heap)); - mi_heap_t* old = mi_get_default_heap(); + mi_heap_t* old = mi_prim_get_default_heap(); _mi_heap_set_default_direct(heap); return old; } @@ -475,7 +493,7 @@ bool mi_heap_check_owned(mi_heap_t* heap, const void* p) { } bool mi_check_owned(const void* p) { - return mi_heap_check_owned(mi_get_default_heap(), p); + return mi_heap_check_owned(mi_prim_get_default_heap(), p); } /* ----------------------------------------------------------- @@ -518,9 +536,13 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v uintptr_t free_map[MI_MAX_BLOCKS / sizeof(uintptr_t)]; memset(free_map, 0, sizeof(free_map)); + #if MI_DEBUG>1 size_t free_count = 0; + #endif for (mi_block_t* block = page->free; block != NULL; block = mi_block_next(page,block)) { + #if MI_DEBUG>1 free_count++; + #endif mi_assert_internal((uint8_t*)block >= pstart && (uint8_t*)block < (pstart + psize)); size_t offset = (uint8_t*)block - pstart; mi_assert_internal(offset % bsize == 0); @@ -533,7 +555,9 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v mi_assert_internal(page->capacity == (free_count + page->used)); // walk through all blocks skipping the free ones + #if MI_DEBUG>1 size_t used_count = 0; + #endif for (size_t i = 0; i < page->capacity; i++) { size_t bitidx = (i / sizeof(uintptr_t)); size_t bit = i - (bitidx * sizeof(uintptr_t)); @@ -542,7 +566,9 @@ static bool mi_heap_area_visit_blocks(const mi_heap_area_ex_t* xarea, mi_block_v i += (sizeof(uintptr_t) - 1); // skip a run of free blocks } else if ((m & ((uintptr_t)1 << bit)) == 0) { + #if MI_DEBUG>1 used_count++; + #endif uint8_t* block = pstart + (i * bsize); if (!visitor(mi_page_heap(page), area, block, ubsize, arg)) return false; } diff --git a/source/luametatex/source/libraries/mimalloc/src/init.c b/source/luametatex/source/libraries/mimalloc/src/init.c index c416208cf..51d42acd9 100644 --- a/source/luametatex/source/libraries/mimalloc/src/init.c +++ b/source/luametatex/source/libraries/mimalloc/src/init.c @@ -5,11 +5,13 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" #include // memcpy, memset #include // atexit + // Empty page used to initialize the small free pages array const mi_page_t _mi_page_empty = { 0, false, false, false, false, @@ -22,7 +24,7 @@ const mi_page_t _mi_page_empty = { 0, // used 0, // xblock_size NULL, // local_free - #if MI_ENCODE_FREELIST + #if (MI_PADDING || MI_ENCODE_FREELIST) { 0, 0 }, #endif MI_ATOMIC_VAR_INIT(0), // xthread_free @@ -130,6 +132,10 @@ mi_decl_cache_align static const mi_tld_t tld_empty = { { MI_STATS_NULL } // stats }; +mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { + return _mi_prim_thread_id(); +} + // the thread-local default heap for allocation mi_decl_thread mi_heap_t* _mi_heap_default = (mi_heap_t*)&_mi_heap_empty; @@ -259,13 +265,13 @@ static void mi_thread_data_collect(void) { // Initialize the thread local default heap, called from `mi_thread_init` static bool _mi_heap_init(void) { - if (mi_heap_is_initialized(mi_get_default_heap())) return true; + if (mi_heap_is_initialized(mi_prim_get_default_heap())) return true; if (_mi_is_main_thread()) { // mi_assert_internal(_mi_heap_main.thread_id != 0); // can happen on freeBSD where alloc is called before any initialization // the main heap is statically allocated mi_heap_main_init(); _mi_heap_set_default_direct(&_mi_heap_main); - //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_get_default_heap()); + //mi_assert_internal(_mi_heap_default->tld->heap_backing == mi_prim_get_default_heap()); } else { // use `_mi_os_alloc` to allocate directly from the OS @@ -363,54 +369,12 @@ static bool _mi_heap_done(mi_heap_t* heap) { // to set up the thread local keys. // -------------------------------------------------------- -static void _mi_thread_done(mi_heap_t* default_heap); - -#if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain -#elif defined(_WIN32) && !defined(MI_SHARED_LIB) - // use thread local storage keys to detect thread ending - #include - #include - #if (_WIN32_WINNT < 0x600) // before Windows Vista - WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); - WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); - WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); - WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); - #endif - static DWORD mi_fls_key = (DWORD)(-1); - static void NTAPI mi_fls_done(PVOID value) { - mi_heap_t* heap = (mi_heap_t*)value; - if (heap != NULL) { - _mi_thread_done(heap); - FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672 - } - } -#elif defined(MI_USE_PTHREADS) - // use pthread local storage keys to detect thread ending - // (and used with MI_TLS_PTHREADS for the default heap) - pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); - static void mi_pthread_done(void* value) { - if (value!=NULL) _mi_thread_done((mi_heap_t*)value); - } -#elif defined(__wasi__) -// no pthreads in the WebAssembly Standard Interface -#else - #pragma message("define a way to call mi_thread_done when a thread is done") -#endif - // Set up handlers so `mi_thread_done` is called automatically static void mi_process_setup_auto_thread_done(void) { static bool tls_initialized = false; // fine if it races if (tls_initialized) return; tls_initialized = true; - #if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain - #elif defined(_WIN32) && !defined(MI_SHARED_LIB) - mi_fls_key = FlsAlloc(&mi_fls_done); - #elif defined(MI_USE_PTHREADS) - mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); - pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); - #endif + _mi_prim_thread_init_auto_done(); _mi_heap_set_default_direct(&_mi_heap_main); } @@ -442,13 +406,26 @@ void mi_thread_init(void) mi_attr_noexcept } void mi_thread_done(void) mi_attr_noexcept { - _mi_thread_done(mi_get_default_heap()); + _mi_thread_done(NULL); } -static void _mi_thread_done(mi_heap_t* heap) { +void _mi_thread_done(mi_heap_t* heap) +{ + // calling with NULL implies using the default heap + if (heap == NULL) { + heap = mi_prim_get_default_heap(); + if (heap == NULL) return; + } + + // prevent re-entrancy through heap_done/heap_set_default_direct (issue #699) + if (!mi_heap_is_initialized(heap)) { + return; + } + + // adjust stats mi_atomic_decrement_relaxed(&thread_count); _mi_stat_decrease(&_mi_stats_main.threads, 1); - + // check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps... if (heap->thread_id != _mi_thread_id()) return; @@ -459,7 +436,7 @@ static void _mi_thread_done(mi_heap_t* heap) { void _mi_heap_set_default_direct(mi_heap_t* heap) { mi_assert_internal(heap != NULL); #if defined(MI_TLS_SLOT) - mi_tls_slot_set(MI_TLS_SLOT,heap); + mi_prim_tls_slot_set(MI_TLS_SLOT,heap); #elif defined(MI_TLS_PTHREAD_SLOT_OFS) *mi_tls_pthread_heap_slot() = heap; #elif defined(MI_TLS_PTHREAD) @@ -470,16 +447,7 @@ void _mi_heap_set_default_direct(mi_heap_t* heap) { // ensure the default heap is passed to `_mi_thread_done` // setting to a non-NULL value also ensures `mi_thread_done` is called. - #if defined(_WIN32) && defined(MI_SHARED_LIB) - // nothing to do as it is done in DllMain - #elif defined(_WIN32) && !defined(MI_SHARED_LIB) - mi_assert_internal(mi_fls_key != 0); - FlsSetValue(mi_fls_key, heap); - #elif defined(MI_USE_PTHREADS) - if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD - pthread_setspecific(_mi_heap_default_key, heap); - } - #endif + _mi_prim_thread_associate_default_heap(heap); } @@ -492,7 +460,7 @@ static bool os_preloading = true; // true until this module is initialized static bool mi_redirected = false; // true if malloc redirects to mi_malloc // Returns true if this module has not been initialized; Don't use C runtime routines until it returns false. -bool _mi_preloading(void) { +bool mi_decl_noinline _mi_preloading(void) { return os_preloading; } @@ -535,9 +503,9 @@ static void mi_allocator_done(void) { // Called once by the process loader static void mi_process_load(void) { mi_heap_main_init(); - #if defined(MI_TLS_RECURSE_GUARD) + #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) volatile mi_heap_t* dummy = _mi_heap_default; // access TLS to allocate it before setting tls_initialized to true; - MI_UNUSED(dummy); + if (dummy == NULL) return; // use dummy or otherwise the access may get optimized away (issue #697) #endif os_preloading = false; mi_assert_internal(_mi_is_main_thread()); @@ -568,7 +536,7 @@ static void mi_detect_cpu_features(void) { // FSRM for fast rep movsb support (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017)) int32_t cpu_info[4]; __cpuid(cpu_info, 7); - _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see + _mi_cpu_has_fsrm = ((cpu_info[3] & (1 << 4)) != 0); // bit 4 of EDX : see } #else static void mi_detect_cpu_features(void) { @@ -579,29 +547,34 @@ static void mi_detect_cpu_features(void) { // Initialize the process; called by thread_init or the process loader void mi_process_init(void) mi_attr_noexcept { // ensure we are called once - if (_mi_process_is_initialized) return; - _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); + static mi_atomic_once_t process_init; + if (!mi_atomic_once(&process_init)) return; _mi_process_is_initialized = true; + _mi_verbose_message("process init: 0x%zx\n", _mi_thread_id()); mi_process_setup_auto_thread_done(); mi_detect_cpu_features(); _mi_os_init(); mi_heap_main_init(); - #if (MI_DEBUG) + #if MI_DEBUG _mi_verbose_message("debug level : %d\n", MI_DEBUG); #endif _mi_verbose_message("secure level: %d\n", MI_SECURE); _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL); + #if MI_TSAN + _mi_verbose_message("thread santizer enabled\n"); + #endif mi_thread_init(); - #if defined(_WIN32) && !defined(MI_SHARED_LIB) - // When building as a static lib the FLS cleanup happens to early for the main thread. + #if defined(_WIN32) + // On windows, when building as a static lib the FLS cleanup happens to early for the main thread. // To avoid this, set the FLS value for the main thread to NULL so the fls cleanup // will not call _mi_thread_done on the (still executing) main thread. See issue #508. - FlsSetValue(mi_fls_key, NULL); + _mi_prim_thread_associate_default_heap(NULL); #endif mi_stats_reset(); // only call stat reset *after* thread init (or the heap tld == NULL) + mi_track_init(); if (mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { size_t pages = mi_option_get_clamp(mi_option_reserve_huge_os_pages, 0, 128*1024); @@ -629,10 +602,9 @@ static void mi_cdecl mi_process_done(void) { if (process_done) return; process_done = true; - #if defined(_WIN32) && !defined(MI_SHARED_LIB) - FlsFree(mi_fls_key); // call thread-done on all threads (except the main thread) to prevent dangling callback pointer if statically linked with a DLL; Issue #208 - #endif - + // release any thread specific resources and ensure _mi_thread_done is called on all but the main thread + _mi_prim_thread_done_auto_done(); + #ifndef MI_SKIP_COLLECT_ON_EXIT #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB) // free all memory if possible on process exit. This is not needed for a stand-alone process diff --git a/source/luametatex/source/libraries/mimalloc/src/options.c b/source/luametatex/source/libraries/mimalloc/src/options.c index e53538f5f..c39e20526 100644 --- a/source/luametatex/source/libraries/mimalloc/src/options.c +++ b/source/luametatex/source/libraries/mimalloc/src/options.c @@ -5,19 +5,14 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" // mi_prim_out_stderr -#include -#include // strtol -#include // strncpy, strncat, strlen, strstr -#include // toupper +#include // FILE +#include // abort #include -#ifdef _MSC_VER -#pragma warning(disable:4996) // strncpy, strncat -#endif - static long mi_max_error_count = 16; // stop outputting errors after this (use < 0 for no limit) static long mi_max_warning_count = 16; // stop outputting warnings after this (use < 0 for no limit) @@ -28,9 +23,6 @@ int mi_version(void) mi_attr_noexcept { return MI_MALLOC_VERSION; } -#ifdef _WIN32 -#include -#endif // -------------------------------------------------------- // Options @@ -171,41 +163,11 @@ void mi_option_disable(mi_option_t option) { mi_option_set_enabled(option,false); } - static void mi_cdecl mi_out_stderr(const char* msg, void* arg) { MI_UNUSED(arg); - if (msg == NULL) return; - #ifdef _WIN32 - // on windows with redirection, the C runtime cannot handle locale dependent output - // after the main thread closes so we use direct console output. - if (!_mi_preloading()) { - // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console - static HANDLE hcon = INVALID_HANDLE_VALUE; - static bool hconIsConsole; - if (hcon == INVALID_HANDLE_VALUE) { - CONSOLE_SCREEN_BUFFER_INFO sbi; - hcon = GetStdHandle(STD_ERROR_HANDLE); - hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); - } - const size_t len = strlen(msg); - if (len > 0 && len < UINT32_MAX) { - DWORD written = 0; - if (hconIsConsole) { - WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); - } - else if (hcon != INVALID_HANDLE_VALUE) { - // use direct write if stderr was redirected - WriteFile(hcon, msg, (DWORD)len, &written, NULL); - } - else { - // finally fall back to fputs after all - fputs(msg, stderr); - } - } + if (msg != NULL && msg[0] != 0) { + _mi_prim_out_stderr(msg); } - #else - fputs(msg, stderr); - #endif } // Since an output function can be registered earliest in the `main` @@ -222,7 +184,7 @@ static void mi_cdecl mi_out_buf(const char* msg, void* arg) { MI_UNUSED(arg); if (msg==NULL) return; if (mi_atomic_load_relaxed(&out_len)>=MI_MAX_DELAY_OUTPUT) return; - size_t n = strlen(msg); + size_t n = _mi_strlen(msg); if (n==0) return; // claim space size_t start = mi_atomic_add_acq_rel(&out_len, n); @@ -314,7 +276,7 @@ static mi_decl_noinline void mi_recurse_exit_prim(void) { static bool mi_recurse_enter(void) { #if defined(__APPLE__) || defined(MI_TLS_RECURSE_GUARD) - if (_mi_preloading()) return true; + if (_mi_preloading()) return false; #endif return mi_recurse_enter_prim(); } @@ -359,9 +321,9 @@ void _mi_fprintf( mi_output_fun* out, void* arg, const char* fmt, ... ) { } static void mi_vfprintf_thread(mi_output_fun* out, void* arg, const char* prefix, const char* fmt, va_list args) { - if (prefix != NULL && strlen(prefix) <= 32 && !_mi_is_main_thread()) { + if (prefix != NULL && _mi_strnlen(prefix,33) <= 32 && !_mi_is_main_thread()) { char tprefix[64]; -/* HH */ snprintf(tprefix, sizeof(tprefix), "%sthread 0x%x: ", prefix, (unsigned) _mi_thread_id()); /* HH: %z is unknown */ + snprintf(tprefix, sizeof(tprefix), "%sthread 0x%llx: ", prefix, (unsigned long long)_mi_thread_id()); mi_vfprintf(out, arg, tprefix, fmt, args); } else { @@ -464,8 +426,20 @@ void _mi_error_message(int err, const char* fmt, ...) { // -------------------------------------------------------- // Initialize options by checking the environment // -------------------------------------------------------- +char _mi_toupper(char c) { + if (c >= 'a' && c <= 'z') return (c - 'a' + 'A'); + else return c; +} -static void mi_strlcpy(char* dest, const char* src, size_t dest_size) { +int _mi_strnicmp(const char* s, const char* t, size_t n) { + if (n == 0) return 0; + for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { + if (_mi_toupper(*s) != _mi_toupper(*t)) break; + } + return (n == 0 ? 0 : *s - *t); +} + +void _mi_strlcpy(char* dest, const char* src, size_t dest_size) { if (dest==NULL || src==NULL || dest_size == 0) return; // copy until end of src, or when dest is (almost) full while (*src != 0 && dest_size > 1) { @@ -476,7 +450,7 @@ static void mi_strlcpy(char* dest, const char* src, size_t dest_size) { *dest = 0; } -static void mi_strlcat(char* dest, const char* src, size_t dest_size) { +void _mi_strlcat(char* dest, const char* src, size_t dest_size) { if (dest==NULL || src==NULL || dest_size == 0) return; // find end of string in the dest buffer while (*dest != 0 && dest_size > 1) { @@ -484,7 +458,21 @@ static void mi_strlcat(char* dest, const char* src, size_t dest_size) { dest_size--; } // and catenate - mi_strlcpy(dest, src, dest_size); + _mi_strlcpy(dest, src, dest_size); +} + +size_t _mi_strlen(const char* s) { + if (s==NULL) return 0; + size_t len = 0; + while(s[len] != 0) { len++; } + return len; +} + +size_t _mi_strnlen(const char* s, size_t max_len) { + if (s==NULL) return 0; + size_t len = 0; + while(s[len] != 0 && len < max_len) { len++; } + return len; } #ifdef MI_NO_GETENV @@ -495,93 +483,27 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) { return false; } #else -#if defined _WIN32 -// On Windows use GetEnvironmentVariable instead of getenv to work -// reliably even when this is invoked before the C runtime is initialized. -// i.e. when `_mi_preloading() == true`. -// Note: on windows, environment names are not case sensitive. -#include static bool mi_getenv(const char* name, char* result, size_t result_size) { - result[0] = 0; - size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); - return (len > 0 && len < result_size); -} -#elif !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) -// On Posix systemsr use `environ` to acces environment variables -// even before the C runtime is initialized. -#if defined(__APPLE__) && defined(__has_include) && __has_include() -#include -static char** mi_get_environ(void) { - return (*_NSGetEnviron()); -} -#else -extern char** environ; -static char** mi_get_environ(void) { - return environ; + if (name==NULL || result == NULL || result_size < 64) return false; + return _mi_prim_getenv(name,result,result_size); } #endif -static int mi_strnicmp(const char* s, const char* t, size_t n) { - if (n == 0) return 0; - for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) { - if (toupper(*s) != toupper(*t)) break; - } - return (n == 0 ? 0 : *s - *t); -} -static bool mi_getenv(const char* name, char* result, size_t result_size) { - if (name==NULL) return false; - const size_t len = strlen(name); - if (len == 0) return false; - char** env = mi_get_environ(); - if (env == NULL) return false; - // compare up to 256 entries - for (int i = 0; i < 256 && env[i] != NULL; i++) { - const char* s = env[i]; - if (mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive - // found it - mi_strlcpy(result, s + len + 1, result_size); - return true; - } - } - return false; -} -#else -// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime -static bool mi_getenv(const char* name, char* result, size_t result_size) { - // cannot call getenv() when still initializing the C runtime. - if (_mi_preloading()) return false; - const char* s = getenv(name); - if (s == NULL) { - // we check the upper case name too. - char buf[64+1]; - size_t len = strlen(name); - if (len >= sizeof(buf)) len = sizeof(buf) - 1; - for (size_t i = 0; i < len; i++) { - buf[i] = toupper(name[i]); - } - buf[len] = 0; - s = getenv(buf); - } - if (s != NULL && strlen(s) < result_size) { - mi_strlcpy(result, s, result_size); - return true; - } - else { - return false; - } -} -#endif // !MI_USE_ENVIRON -#endif // !MI_NO_GETENV + +// TODO: implement ourselves to reduce dependencies on the C runtime +#include // strtol +#include // strstr + static void mi_option_init(mi_option_desc_t* desc) { // Read option value from the environment char s[64+1]; char buf[64+1]; - mi_strlcpy(buf, "mimalloc_", sizeof(buf)); - mi_strlcat(buf, desc->name, sizeof(buf)); + _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + _mi_strlcat(buf, desc->name, sizeof(buf)); bool found = mi_getenv(buf,s,sizeof(s)); if (!found && desc->legacy_name != NULL) { - mi_strlcpy(buf, "mimalloc_", sizeof(buf)); - mi_strlcat(buf, desc->legacy_name, sizeof(buf)); + _mi_strlcpy(buf, "mimalloc_", sizeof(buf)); + _mi_strlcat(buf, desc->legacy_name, sizeof(buf)); found = mi_getenv(buf,s,sizeof(s)); if (found) { _mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name ); @@ -589,10 +511,9 @@ static void mi_option_init(mi_option_desc_t* desc) { } if (found) { - size_t len = strlen(s); - if (len >= sizeof(buf)) len = sizeof(buf) - 1; + size_t len = _mi_strnlen(s,sizeof(buf)-1); for (size_t i = 0; i < len; i++) { - buf[i] = (char)toupper(s[i]); + buf[i] = _mi_toupper(s[i]); } buf[len] = 0; if (buf[0]==0 || strstr("1;TRUE;YES;ON", buf) != NULL) { diff --git a/source/luametatex/source/libraries/mimalloc/src/os.c b/source/luametatex/source/libraries/mimalloc/src/os.c index 0f9847417..75895c1b1 100644 --- a/source/luametatex/source/libraries/mimalloc/src/os.c +++ b/source/luametatex/source/libraries/mimalloc/src/os.c @@ -1,118 +1,48 @@ /* ---------------------------------------------------------------------------- -Copyright (c) 2018-2021, Microsoft Research, Daan Leijen +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ -#ifndef _DEFAULT_SOURCE -#define _DEFAULT_SOURCE // ensure mmap flags are defined -#endif - -#if defined(__sun) -// illumos provides new mman.h api when any of these are defined -// otherwise the old api based on caddr_t which predates the void pointers one. -// stock solaris provides only the former, chose to atomically to discard those -// flags only here rather than project wide tough. -#undef _XOPEN_SOURCE -#undef _POSIX_C_SOURCE -#endif #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" -#include // strerror - -#ifdef _MSC_VER -#pragma warning(disable:4996) // strerror -#endif - -#if defined(__wasi__) -#define MI_USE_SBRK -#endif - -#if defined(_WIN32) -#include -#elif defined(__wasi__) -#include // sbrk -#else -#include // mmap -#include // sysconf -#if defined(__linux__) -#include -#include -#if defined(__GLIBC__) -#include // linux mmap flags -#else -#include -#endif -#endif -#if defined(__APPLE__) -#include -#if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR -#include -#endif -#endif -#if defined(__FreeBSD__) || defined(__DragonFly__) -#include -#if __FreeBSD_version >= 1200000 -#include -#include -#endif -#include -#endif -#endif /* ----------------------------------------------------------- Initialization. On windows initializes support for aligned allocation and large OS pages (if MIMALLOC_LARGE_OS_PAGES is true). ----------------------------------------------------------- */ -bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); -bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); -static void* mi_align_up_ptr(void* p, size_t alignment) { - return (void*)_mi_align_up((uintptr_t)p, alignment); -} - -static void* mi_align_down_ptr(void* p, size_t alignment) { - return (void*)_mi_align_down((uintptr_t)p, alignment); -} - - -// page size (initialized properly in `os_init`) -static size_t os_page_size = 4096; - -// minimal allocation granularity -static size_t os_alloc_granularity = 4096; - -// if non-zero, use large page allocation -static size_t large_os_page_size = 0; - -// is memory overcommit allowed? -// set dynamically in _mi_os_init (and if true we use MAP_NORESERVE) -static bool os_overcommit = true; +static mi_os_mem_config_t mi_os_mem_config = { + 4096, // page size + 0, // large page size (usually 2MiB) + 4096, // allocation granularity + true, // has overcommit? (if true we use MAP_NORESERVE on mmap systems) + false // must free whole? (on mmap systems we can free anywhere in a mapped range, but on Windows we must free the entire span) +}; bool _mi_os_has_overcommit(void) { - return os_overcommit; + return mi_os_mem_config.has_overcommit; } // OS (small) page size size_t _mi_os_page_size(void) { - return os_page_size; + return mi_os_mem_config.page_size; } // if large OS pages are supported (2 or 4MiB), then return the size, otherwise return the small page size (4KiB) size_t _mi_os_large_page_size(void) { - return (large_os_page_size != 0 ? large_os_page_size : _mi_os_page_size()); + return (mi_os_mem_config.large_page_size != 0 ? mi_os_mem_config.large_page_size : _mi_os_page_size()); } -#if !defined(MI_USE_SBRK) && !defined(__wasi__) -static bool use_large_os_page(size_t size, size_t alignment) { +bool _mi_os_use_large_page(size_t size, size_t alignment) { // if we have access, check the size and alignment requirements - if (large_os_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false; - return ((size % large_os_page_size) == 0 && (alignment % large_os_page_size) == 0); + if (mi_os_mem_config.large_page_size == 0 || !mi_option_is_enabled(mi_option_large_os_pages)) return false; + return ((size % mi_os_mem_config.large_page_size) == 0 && (alignment % mi_os_mem_config.large_page_size) == 0); } -#endif // round to a good OS allocation size (bounded by max 12.5% waste) size_t _mi_os_good_alloc_size(size_t size) { @@ -126,177 +56,24 @@ size_t _mi_os_good_alloc_size(size_t size) { return _mi_align_up(size, align_size); } -#if defined(_WIN32) -// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. -// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) -// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) -// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's. -typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E { - MiMemExtendedParameterInvalidType = 0, - MiMemExtendedParameterAddressRequirements, - MiMemExtendedParameterNumaNode, - MiMemExtendedParameterPartitionHandle, - MiMemExtendedParameterUserPhysicalHandle, - MiMemExtendedParameterAttributeFlags, - MiMemExtendedParameterMax -} MI_MEM_EXTENDED_PARAMETER_TYPE; - -typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S { - struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type; - union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg; -} MI_MEM_EXTENDED_PARAMETER; - -typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { - PVOID LowestStartingAddress; - PVOID HighestEndingAddress; - SIZE_T Alignment; -} MI_MEM_ADDRESS_REQUIREMENTS; - -#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 - -#include -typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); -typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); -static PVirtualAlloc2 pVirtualAlloc2 = NULL; -static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; - -// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 -typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; - -typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); -typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); -typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); -typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); -static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; -static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; -static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; -static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; - -static bool mi_win_enable_large_os_pages(void) -{ - if (large_os_page_size > 0) return true; - - // Try to see if large OS pages are supported - // To use large pages on Windows, we first need access permission - // Set "Lock pages in memory" permission in the group policy editor - // - unsigned long err = 0; - HANDLE token = NULL; - BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); - if (ok) { - TOKEN_PRIVILEGES tp; - ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); - if (ok) { - tp.PrivilegeCount = 1; - tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; - ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); - if (ok) { - err = GetLastError(); - ok = (err == ERROR_SUCCESS); - if (ok) { - large_os_page_size = GetLargePageMinimum(); - } - } - } - CloseHandle(token); - } - if (!ok) { - if (err == 0) err = GetLastError(); - _mi_warning_message("cannot enable large OS page support, error %lu\n", err); - } - return (ok!=0); -} - -void _mi_os_init(void) -{ - os_overcommit = false; - // get the page size - SYSTEM_INFO si; - GetSystemInfo(&si); - if (si.dwPageSize > 0) os_page_size = si.dwPageSize; - if (si.dwAllocationGranularity > 0) os_alloc_granularity = si.dwAllocationGranularity; - // get the VirtualAlloc2 function - HINSTANCE hDll; - hDll = LoadLibrary(TEXT("kernelbase.dll")); - if (hDll != NULL) { - // use VirtualAlloc2FromApp if possible as it is available to Windows store apps - pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); - if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); - FreeLibrary(hDll); - } - // NtAllocateVirtualMemoryEx is used for huge page allocation - hDll = LoadLibrary(TEXT("ntdll.dll")); - if (hDll != NULL) { - pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); - FreeLibrary(hDll); - } - // Try to use Win7+ numa API - hDll = LoadLibrary(TEXT("kernel32.dll")); - if (hDll != NULL) { - pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); - pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); - pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); - pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); - FreeLibrary(hDll); - } - if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { - mi_win_enable_large_os_pages(); - } -} -#elif defined(__wasi__) void _mi_os_init(void) { - os_overcommit = false; - os_page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB - os_alloc_granularity = 16; + _mi_prim_mem_init(&mi_os_mem_config); } -#else // generic unix - -static void os_detect_overcommit(void) { -#if defined(__linux__) - int fd = open("/proc/sys/vm/overcommit_memory", O_RDONLY); - if (fd < 0) return; - char buf[32]; - ssize_t nread = read(fd, &buf, sizeof(buf)); - close(fd); - // - // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE) - if (nread >= 1) { - os_overcommit = (buf[0] == '0' || buf[0] == '1'); - } -#elif defined(__FreeBSD__) - int val = 0; - size_t olen = sizeof(val); - if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) { - os_overcommit = (val != 0); - } -#else - // default: overcommit is true -#endif -} -void _mi_os_init(void) { - // get the page size - long result = sysconf(_SC_PAGESIZE); - if (result > 0) { - os_page_size = (size_t)result; - os_alloc_granularity = os_page_size; - } - large_os_page_size = 2*MI_MiB; // TODO: can we query the OS for this? - os_detect_overcommit(); -} -#endif +/* ----------------------------------------------------------- + Util +-------------------------------------------------------------- */ +bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats); +bool _mi_os_commit(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); +static void* mi_align_up_ptr(void* p, size_t alignment) { + return (void*)_mi_align_up((uintptr_t)p, alignment); +} -#if defined(MADV_NORMAL) -static int mi_madvise(void* addr, size_t length, int advice) { - #if defined(__sun) - return madvise((caddr_t)addr, length, advice); // Solaris needs cast (issue #520) - #else - return madvise(addr, length, advice); - #endif +static void* mi_align_down_ptr(void* p, size_t alignment) { + return (void*)_mi_align_down((uintptr_t)p, alignment); } -#endif /* ----------------------------------------------------------- @@ -319,7 +96,7 @@ static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; #define MI_HINT_AREA ((uintptr_t)4 << 40) // upto 6TiB (since before win8 there is "only" 8TiB available to processes) #define MI_HINT_MAX ((uintptr_t)30 << 40) // wrap after 30TiB (area after 32TiB is used for huge OS pages) -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { if (try_alignment <= 1 || try_alignment > MI_SEGMENT_SIZE) return NULL; size = _mi_align_up(size, MI_SEGMENT_SIZE); @@ -332,7 +109,7 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) if (hint == 0 || hint > MI_HINT_MAX) { // wrap or initialize uintptr_t init = MI_HINT_BASE; #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of aligned allocations unless in debug mode - uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap()); init = init + ((MI_SEGMENT_SIZE * ((r>>17) & 0xFFFFF)) % MI_HINT_AREA); // (randomly 20 bits)*4MiB == 0 to 4TiB #endif uintptr_t expected = hint + size; @@ -343,361 +120,39 @@ static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) return (void*)hint; } #else -static void* mi_os_get_aligned_hint(size_t try_alignment, size_t size) { +void* _mi_os_get_aligned_hint(size_t try_alignment, size_t size) { MI_UNUSED(try_alignment); MI_UNUSED(size); return NULL; } #endif + /* ----------------------------------------------------------- Free memory -------------------------------------------------------------- */ -static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* stats) -{ - if (addr == NULL || size == 0) return true; // || _mi_os_is_huge_reserved(addr) - bool err = false; -#if defined(_WIN32) - DWORD errcode = 0; - err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); - if (err) { errcode = GetLastError(); } - if (errcode == ERROR_INVALID_ADDRESS) { - // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside - // the memory region returned by VirtualAlloc; in that case we need to free using - // the start of the region. - MEMORY_BASIC_INFORMATION info = { 0 }; - VirtualQuery(addr, &info, sizeof(info)); - if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) { - errcode = 0; - err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0); - if (err) { errcode = GetLastError(); } - } - } - if (errcode != 0) { - _mi_warning_message("unable to release OS memory: error code 0x%x, addr: %p, size: %zu\n", errcode, addr, size); - } -#elif defined(MI_USE_SBRK) || defined(__wasi__) - err = false; // sbrk heap cannot be shrunk -#else - err = (munmap(addr, size) == -1); - if (err) { - _mi_warning_message("unable to release OS memory: %s, addr: %p, size: %zu\n", strerror(errno), addr, size); +static void mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { + MI_UNUSED(tld_stats); + mi_assert_internal((size % _mi_os_page_size()) == 0); + if (addr == NULL || size == 0) return; // || _mi_os_is_huge_reserved(addr) + int err = _mi_prim_free(addr, size); + if (err != 0) { + _mi_warning_message("unable to free OS memory (error: %d (0x%x), size: 0x%zx bytes, address: %p)\n", err, err, size, addr); } -#endif + mi_stats_t* stats = &_mi_stats_main; if (was_committed) { _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); - return !err; -} - - -/* ----------------------------------------------------------- - Raw allocation on Windows (VirtualAlloc) --------------------------------------------------------------- */ - -#ifdef _WIN32 - -#define MEM_COMMIT_RESERVE (MEM_COMMIT|MEM_RESERVE) - -static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment, DWORD flags) { -#if (MI_INTPTR_SIZE >= 8) - // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations - if (addr == NULL) { - void* hint = mi_os_get_aligned_hint(try_alignment,size); - if (hint != NULL) { - void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); - if (p != NULL) return p; - _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags); - // fall through on error - } - } -#endif - // on modern Windows try use VirtualAlloc2 for aligned allocation - if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { - MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; - reqs.Alignment = try_alignment; - MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; - param.Type.Type = MiMemExtendedParameterAddressRequirements; - param.Arg.Pointer = &reqs; - void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); - if (p != NULL) return p; - _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); - // fall through on error - } - // last resort - return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } -static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { - mi_assert_internal(!(large_only && !allow_large)); - static _Atomic(size_t) large_page_try_ok; // = 0; - void* p = NULL; - // Try to allocate large OS pages (2MiB) if allowed or required. - if ((large_only || use_large_os_page(size, try_alignment)) - && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { - size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); - if (!large_only && try_ok > 0) { - // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. - // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. - mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); - } - else { - // large OS pages must always reserve and commit. - *is_large = true; - p = mi_win_virtual_allocx(addr, size, try_alignment, flags | MEM_LARGE_PAGES); - if (large_only) return p; - // fall back to non-large page allocation on error (`p == NULL`). - if (p == NULL) { - mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations - } - } - } - // Fall back to regular page allocation - if (p == NULL) { - *is_large = ((flags&MEM_LARGE_PAGES) != 0); - p = mi_win_virtual_allocx(addr, size, try_alignment, flags); - } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); - } - return p; -} - -/* ----------------------------------------------------------- - Raw allocation using `sbrk` or `wasm_memory_grow` --------------------------------------------------------------- */ - -#elif defined(MI_USE_SBRK) || defined(__wasi__) -#if defined(MI_USE_SBRK) - static void* mi_memory_grow( size_t size ) { - void* p = sbrk(size); - if (p == (void*)(-1)) return NULL; - #if !defined(__wasi__) // on wasi this is always zero initialized already (?) - memset(p,0,size); - #endif - return p; - } -#elif defined(__wasi__) - static void* mi_memory_grow( size_t size ) { - size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size())) - : __builtin_wasm_memory_size(0)); - if (base == SIZE_MAX) return NULL; - return (void*)(base * _mi_os_page_size()); - } -#endif - -#if defined(MI_USE_PTHREADS) -static pthread_mutex_t mi_heap_grow_mutex = PTHREAD_MUTEX_INITIALIZER; -#endif - -static void* mi_heap_grow(size_t size, size_t try_alignment) { - void* p = NULL; - if (try_alignment <= 1) { - // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now) - #if defined(MI_USE_PTHREADS) - pthread_mutex_lock(&mi_heap_grow_mutex); - #endif - p = mi_memory_grow(size); - #if defined(MI_USE_PTHREADS) - pthread_mutex_unlock(&mi_heap_grow_mutex); - #endif - } - else { - void* base = NULL; - size_t alloc_size = 0; - // to allocate aligned use a lock to try to avoid thread interaction - // between getting the current size and actual allocation - // (also, `sbrk` is not thread safe in general) - #if defined(MI_USE_PTHREADS) - pthread_mutex_lock(&mi_heap_grow_mutex); - #endif - { - void* current = mi_memory_grow(0); // get current size - if (current != NULL) { - void* aligned_current = mi_align_up_ptr(current, try_alignment); // and align from there to minimize wasted space - alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size()); - base = mi_memory_grow(alloc_size); - } - } - #if defined(MI_USE_PTHREADS) - pthread_mutex_unlock(&mi_heap_grow_mutex); - #endif - if (base != NULL) { - p = mi_align_up_ptr(base, try_alignment); - if ((uint8_t*)p + size > (uint8_t*)base + alloc_size) { - // another thread used wasm_memory_grow/sbrk in-between and we do not have enough - // space after alignment. Give up (and waste the space as we cannot shrink :-( ) - // (in `mi_os_mem_alloc_aligned` this will fall back to overallocation to align) - p = NULL; - } - } - } - if (p == NULL) { - _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); - errno = ENOMEM; - return NULL; - } - mi_assert_internal( try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); - return p; -} - -/* ----------------------------------------------------------- - Raw allocation on Unix's (mmap) --------------------------------------------------------------- */ -#else -#define MI_OS_USE_MMAP -static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { - MI_UNUSED(try_alignment); - #if defined(MAP_ALIGNED) // BSD - if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { - size_t n = mi_bsr(try_alignment); - if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB - flags |= MAP_ALIGNED(n); - void* p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); - if (p!=MAP_FAILED) return p; - // fall back to regular mmap - } - } - #elif defined(MAP_ALIGN) // Solaris - if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { - void* p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment - if (p!=MAP_FAILED) return p; - // fall back to regular mmap - } - #endif - #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) - // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations - if (addr == NULL) { - void* hint = mi_os_get_aligned_hint(try_alignment, size); - if (hint != NULL) { - void* p = mmap(hint, size, protect_flags, flags, fd, 0); - if (p!=MAP_FAILED) return p; - // fall back to regular mmap - } - } - #endif - // regular mmap - void* p = mmap(addr, size, protect_flags, flags, fd, 0); - if (p!=MAP_FAILED) return p; - // failed to allocate - return NULL; -} -static int mi_unix_mmap_fd(void) { -#if defined(VM_MAKE_TAG) - // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) - int os_tag = (int)mi_option_get(mi_option_os_tag); - if (os_tag < 100 || os_tag > 255) os_tag = 100; - return VM_MAKE_TAG(os_tag); -#else - return -1; -#endif +void _mi_os_free_ex(void* addr, size_t size, bool was_committed, mi_stats_t* tld_stats) { + const size_t csize = _mi_os_good_alloc_size(size); + mi_os_mem_free(addr,csize,was_committed,tld_stats); } -static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { - void* p = NULL; - #if !defined(MAP_ANONYMOUS) - #define MAP_ANONYMOUS MAP_ANON - #endif - #if !defined(MAP_NORESERVE) - #define MAP_NORESERVE 0 - #endif - const int fd = mi_unix_mmap_fd(); - int flags = MAP_PRIVATE | MAP_ANONYMOUS; - if (_mi_os_has_overcommit()) { - flags |= MAP_NORESERVE; - } - #if defined(PROT_MAX) - protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD - #endif - // huge page allocation - if ((large_only || use_large_os_page(size, try_alignment)) && allow_large) { - static _Atomic(size_t) large_page_try_ok; // = 0; - size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); - if (!large_only && try_ok > 0) { - // If the OS is not configured for large OS pages, or the user does not have - // enough permission, the `mmap` will always fail (but it might also fail for other reasons). - // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times - // to avoid too many failing calls to mmap. - mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); - } - else { - int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux - int lfd = fd; - #ifdef MAP_ALIGNED_SUPER - lflags |= MAP_ALIGNED_SUPER; - #endif - #ifdef MAP_HUGETLB - lflags |= MAP_HUGETLB; - #endif - #ifdef MAP_HUGE_1GB - static bool mi_huge_pages_available = true; - if ((size % MI_GiB) == 0 && mi_huge_pages_available) { - lflags |= MAP_HUGE_1GB; - } - else - #endif - { - #ifdef MAP_HUGE_2MB - lflags |= MAP_HUGE_2MB; - #endif - } - #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB - lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; - #endif - if (large_only || lflags != flags) { - // try large OS page allocation - *is_large = true; - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - #ifdef MAP_HUGE_1GB - if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { - mi_huge_pages_available = false; // don't try huge 1GiB pages again - _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (error %i)\n", errno); - lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, lflags, lfd); - } - #endif - if (large_only) return p; - if (p == NULL) { - mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations - } - } - } - } - // regular allocation - if (p == NULL) { - *is_large = false; - p = mi_unix_mmapx(addr, size, try_alignment, protect_flags, flags, fd); - if (p != NULL) { - #if defined(MADV_HUGEPAGE) - // Many Linux systems don't allow MAP_HUGETLB but they support instead - // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE - // though since properly aligned allocations will already use large pages if available - // in that case -- in particular for our large regions (in `memory.c`). - // However, some systems only allow THP if called with explicit `madvise`, so - // when large OS pages are enabled for mimalloc, we call `madvise` anyways. - if (allow_large && use_large_os_page(size, try_alignment)) { - if (mi_madvise(p, size, MADV_HUGEPAGE) == 0) { - *is_large = true; // possibly - }; - } - #elif defined(__sun) - if (allow_large && use_large_os_page(size, try_alignment)) { - struct memcntl_mha cmd = {0}; - cmd.mha_pagesize = large_os_page_size; - cmd.mha_cmd = MHA_MAPSIZE_VA; - if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { - *is_large = true; - } - } - #endif - } - } - if (p == NULL) { - _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: %i, address: %p, large only: %d, allow large: %d)\n", size, errno, addr, large_only, allow_large); - } - return p; +void _mi_os_free(void* p, size_t size, mi_stats_t* tld_stats) { + _mi_os_free_ex(p, size, true, tld_stats); } -#endif /* ----------------------------------------------------------- @@ -711,7 +166,11 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo if (!commit) allow_large = false; if (try_alignment == 0) try_alignment = 1; // avoid 0 to ensure there will be no divide by zero when aligning - void* p = NULL; + void* p = NULL; + int err = _mi_prim_alloc(size, try_alignment, commit, allow_large, is_large, &p); + if (err != 0) { + _mi_warning_message("unable to allocate OS memory (error: %d (0x%x), size: 0x%zx bytes, align: 0x%zx, commit: %d, allow large: %d)\n", err, err, size, try_alignment, commit, allow_large); + } /* if (commit && allow_large) { p = _mi_os_try_alloc_from_huge_reserved(size, try_alignment); @@ -722,18 +181,6 @@ static void* mi_os_mem_alloc(size_t size, size_t try_alignment, bool commit, boo } */ - #if defined(_WIN32) - int flags = MEM_RESERVE; - if (commit) { flags |= MEM_COMMIT; } - p = mi_win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); - #elif defined(MI_USE_SBRK) || defined(__wasi__) - MI_UNUSED(allow_large); - *is_large = false; - p = mi_heap_grow(size, try_alignment); - #else - int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); - p = mi_unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); - #endif mi_stat_counter_increase(stats->mmap_calls, 1); if (p != NULL) { _mi_stat_increase(&stats->reserved, size); @@ -760,39 +207,40 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, // if not aligned, free it, overallocate, and unmap around it if (((uintptr_t)p % alignment != 0)) { mi_os_mem_free(p, size, commit, stats); - _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (%zu bytes, address: %p, alignment: %zu, commit: %d)\n", size, p, alignment, commit); + _mi_warning_message("unable to allocate aligned OS memory directly, fall back to over-allocation (size: 0x%zx bytes, address: %p, alignment: 0x%zx, commit: %d)\n", size, p, alignment, commit); if (size >= (SIZE_MAX - alignment)) return NULL; // overflow const size_t over_size = size + alignment; -#if _WIN32 - // over-allocate uncommitted (virtual) memory - p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats); - if (p == NULL) return NULL; + if (mi_os_mem_config.must_free_whole) { // win32 virtualAlloc cannot free parts of an allocate block + // over-allocate uncommitted (virtual) memory + p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats); + if (p == NULL) return NULL; - // set p to the aligned part in the full region - // note: this is dangerous on Windows as VirtualFree needs the actual region pointer - // but in mi_os_mem_free we handle this (hopefully exceptional) situation. - p = mi_align_up_ptr(p, alignment); + // set p to the aligned part in the full region + // note: this is dangerous on Windows as VirtualFree needs the actual region pointer + // but in mi_os_mem_free we handle this (hopefully exceptional) situation. + p = mi_align_up_ptr(p, alignment); - // explicitly commit only the aligned part - if (commit) { - _mi_os_commit(p, size, NULL, stats); + // explicitly commit only the aligned part + if (commit) { + _mi_os_commit(p, size, NULL, stats); + } + } + else { // mmap can free inside an allocation + // overallocate... + p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats); + if (p == NULL) return NULL; + // and selectively unmap parts around the over-allocated area. (noop on sbrk) + void* aligned_p = mi_align_up_ptr(p, alignment); + size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; + size_t mid_size = _mi_align_up(size, _mi_os_page_size()); + size_t post_size = over_size - pre_size - mid_size; + mi_assert_internal(pre_size < over_size&& post_size < over_size&& mid_size >= size); + if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); + if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); + // we can return the aligned pointer on `mmap` (and sbrk) systems + p = aligned_p; } -#else - // overallocate... - p = mi_os_mem_alloc(over_size, 1, commit, false, is_large, stats); - if (p == NULL) return NULL; - // and selectively unmap parts around the over-allocated area. (noop on sbrk) - void* aligned_p = mi_align_up_ptr(p, alignment); - size_t pre_size = (uint8_t*)aligned_p - (uint8_t*)p; - size_t mid_size = _mi_align_up(size, _mi_os_page_size()); - size_t post_size = over_size - pre_size - mid_size; - mi_assert_internal(pre_size < over_size && post_size < over_size && mid_size >= size); - if (pre_size > 0) mi_os_mem_free(p, pre_size, commit, stats); - if (post_size > 0) mi_os_mem_free((uint8_t*)aligned_p + mid_size, post_size, commit, stats); - // we can return the aligned pointer on `mmap` (and sbrk) systems - p = aligned_p; -#endif } mi_assert_internal(p == NULL || (p != NULL && ((uintptr_t)p % alignment) == 0)); @@ -801,7 +249,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, /* ----------------------------------------------------------- - OS API: alloc, free, alloc_aligned + OS API: alloc and alloc_aligned ----------------------------------------------------------- */ void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { @@ -813,21 +261,9 @@ void* _mi_os_alloc(size_t size, mi_stats_t* tld_stats) { return mi_os_mem_alloc(size, 0, true, false, &is_large, stats); } -void _mi_os_free_ex(void* p, size_t size, bool was_committed, mi_stats_t* tld_stats) { - MI_UNUSED(tld_stats); - mi_stats_t* stats = &_mi_stats_main; - if (size == 0 || p == NULL) return; - size = _mi_os_good_alloc_size(size); - mi_os_mem_free(p, size, was_committed, stats); -} - -void _mi_os_free(void* p, size_t size, mi_stats_t* stats) { - _mi_os_free_ex(p, size, true, stats); -} - void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* large, mi_stats_t* tld_stats) { - MI_UNUSED(&mi_os_get_aligned_hint); // suppress unused warnings + MI_UNUSED(&_mi_os_get_aligned_hint); // suppress unused warnings MI_UNUSED(tld_stats); if (size == 0) return NULL; size = _mi_os_good_alloc_size(size); @@ -880,11 +316,11 @@ void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_of _mi_os_free_ex(start, size + extra, was_committed, tld_stats); } + /* ----------------------------------------------------------- OS memory API: reset, commit, decommit, protect, unprotect. ----------------------------------------------------------- */ - // OS page align within a given area, either conservative (pages inside the area only), // or not (straddling pages outside the area is possible) static void* mi_os_page_align_areax(bool conservative, void* addr, size_t size, size_t* newsize) { @@ -909,18 +345,6 @@ static void* mi_os_page_align_area_conservative(void* addr, size_t size, size_t* return mi_os_page_align_areax(true, addr, size, newsize); } -static void mi_mprotect_hint(int err) { -#if defined(MI_OS_USE_MMAP) && (MI_SECURE>=2) // guard page around every mimalloc page - if (err == ENOMEM) { - _mi_warning_message("the previous warning may have been caused by a low memory map limit.\n" - " On Linux this is controlled by the vm.max_map_count. For example:\n" - " > sudo sysctl -w vm.max_map_count=262144\n"); - } -#else - MI_UNUSED(err); -#endif -} - // Commit/Decommit memory. // Usually commit is aligned liberal, while decommit is aligned conservative. // (but not for the reset version where we want commit to be conservative as well) @@ -930,7 +354,6 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ size_t csize; void* start = mi_os_page_align_areax(conservative, addr, size, &csize); if (csize == 0) return true; // || _mi_os_is_huge_reserved(addr)) - int err = 0; if (commit) { _mi_stat_increase(&stats->committed, size); // use size for precise commit vs. decommit _mi_stat_counter_increase(&stats->commit_calls, 1); @@ -939,56 +362,9 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ _mi_stat_decrease(&stats->committed, size); } - #if defined(_WIN32) - if (commit) { - // *is_zero = true; // note: if the memory was already committed, the call succeeds but the memory is not zero'd - void* p = VirtualAlloc(start, csize, MEM_COMMIT, PAGE_READWRITE); - err = (p == start ? 0 : GetLastError()); - } - else { - BOOL ok = VirtualFree(start, csize, MEM_DECOMMIT); - err = (ok ? 0 : GetLastError()); - } - #elif defined(__wasi__) - // WebAssembly guests can't control memory protection - #elif 0 && defined(MAP_FIXED) && !defined(__APPLE__) - // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?) - if (commit) { - // commit: just change the protection - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) - const int fd = mi_unix_mmap_fd(); - void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); - if (p != start) { err = errno; } - } - #else - // Linux, macOSX and others. - if (commit) { - // commit: ensure we can access the area - err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } - } - else { - #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0 - // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) - // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) - err = madvise(start, csize, MADV_DONTNEED); - #else - // decommit: just disable access (also used in debug and secure mode to trap on illegal access) - err = mprotect(start, csize, PROT_NONE); - if (err != 0) { err = errno; } - #endif - //#if defined(MADV_FREE_REUSE) - // while ((err = mi_madvise(start, csize, MADV_FREE_REUSE)) != 0 && errno == EAGAIN) { errno = 0; } - //#endif - } - #endif + int err = _mi_prim_commit(start, csize, commit); if (err != 0) { - _mi_warning_message("%s error: start: %p, csize: 0x%zx, err: %i\n", commit ? "commit" : "decommit", start, csize, err); - mi_mprotect_hint(err); + _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", commit ? "commit" : "decommit", err, err, start, csize); } mi_assert_internal(err == 0); return (err == 0); @@ -1027,45 +403,17 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) else _mi_stat_decrease(&stats->reset, csize); if (!reset) return true; // nothing to do on unreset! - #if (MI_DEBUG>1) && !MI_TRACK_ENABLED + #if (MI_DEBUG>1) && !MI_TRACK_ENABLED // && !MI_TSAN if (MI_SECURE==0) { memset(start, 0, csize); // pretend it is eagerly reset } #endif -#if defined(_WIN32) - // Testing shows that for us (on `malloc-large`) MEM_RESET is 2x faster than DiscardVirtualMemory - void* p = VirtualAlloc(start, csize, MEM_RESET, PAGE_READWRITE); - mi_assert_internal(p == start); - #if 1 - if (p == start && start != NULL) { - VirtualUnlock(start,csize); // VirtualUnlock after MEM_RESET removes the memory from the working set - } - #endif - if (p != start) return false; -#else -#if defined(MADV_FREE) - static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); - int oadvice = (int)mi_atomic_load_relaxed(&advice); - int err; - while ((err = mi_madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; - if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { - // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on - mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED); - err = mi_madvise(start, csize, MADV_DONTNEED); - } -#elif defined(__wasi__) - int err = 0; -#else - int err = mi_madvise(start, csize, MADV_DONTNEED); -#endif + int err = _mi_prim_reset(start, csize); if (err != 0) { - _mi_warning_message("madvise reset error: start: %p, csize: 0x%zx, errno: %i\n", start, csize, errno); + _mi_warning_message("cannot reset OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", err, err, start, csize); } - //mi_assert(err == 0); - if (err != 0) return false; -#endif - return true; + return (err == 0); } // Signal to the OS that the address range is no longer in use @@ -1098,20 +446,9 @@ static bool mi_os_protectx(void* addr, size_t size, bool protect) { _mi_warning_message("cannot mprotect memory allocated in huge OS pages\n"); } */ - int err = 0; -#ifdef _WIN32 - DWORD oldprotect = 0; - BOOL ok = VirtualProtect(start, csize, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); - err = (ok ? 0 : GetLastError()); -#elif defined(__wasi__) - err = 0; -#else - err = mprotect(start, csize, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); - if (err != 0) { err = errno; } -#endif + int err = _mi_prim_protect(start,csize,protect); if (err != 0) { - _mi_warning_message("mprotect error: start: %p, csize: 0x%zx, err: %i\n", start, csize, err); - mi_mprotect_hint(err); + _mi_warning_message("cannot %s OS memory (error: %d (0x%x), address: %p, size: 0x%zx bytes)\n", (protect ? "protect" : "unprotect"), err, err, start, csize); } return (err == 0); } @@ -1126,115 +463,12 @@ bool _mi_os_unprotect(void* addr, size_t size) { -bool _mi_os_shrink(void* p, size_t oldsize, size_t newsize, mi_stats_t* stats) { - // page align conservatively within the range - mi_assert_internal(oldsize > newsize && p != NULL); - if (oldsize < newsize || p == NULL) return false; - if (oldsize == newsize) return true; - - // oldsize and newsize should be page aligned or we cannot shrink precisely - void* addr = (uint8_t*)p + newsize; - size_t size = 0; - void* start = mi_os_page_align_area_conservative(addr, oldsize - newsize, &size); - if (size == 0 || start != addr) return false; - -#ifdef _WIN32 - // we cannot shrink on windows, but we can decommit - return _mi_os_decommit(start, size, stats); -#else - return mi_os_mem_free(start, size, true, stats); -#endif -} - - /* ---------------------------------------------------------------------------- Support for allocating huge OS pages (1Gib) that are reserved up-front and possibly associated with a specific NUMA node. (use `numa_node>=0`) -----------------------------------------------------------------------------*/ #define MI_HUGE_OS_PAGE_SIZE (MI_GiB) -#if defined(_WIN32) && (MI_INTPTR_SIZE >= 8) -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) -{ - mi_assert_internal(size%MI_GiB == 0); - mi_assert_internal(addr != NULL); - const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; - - mi_win_enable_large_os_pages(); - - MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; - // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages - static bool mi_huge_pages_available = true; - if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { - params[0].Type.Type = MiMemExtendedParameterAttributeFlags; - params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; - ULONG param_count = 1; - if (numa_node >= 0) { - param_count++; - params[1].Type.Type = MiMemExtendedParameterNumaNode; - params[1].Arg.ULong = (unsigned)numa_node; - } - SIZE_T psize = size; - void* base = addr; - NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); - if (err == 0 && base != NULL) { - return base; - } - else { - // fall back to regular large pages - mi_huge_pages_available = false; // don't try further huge pages - _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); - } - } - // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation - if (pVirtualAlloc2 != NULL && numa_node >= 0) { - params[0].Type.Type = MiMemExtendedParameterNumaNode; - params[0].Arg.ULong = (unsigned)numa_node; - return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); - } - - // otherwise use regular virtual alloc on older windows - return VirtualAlloc(addr, size, flags, PAGE_READWRITE); -} - -#elif defined(MI_OS_USE_MMAP) && (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) -#include -#ifndef MPOL_PREFERRED -#define MPOL_PREFERRED 1 -#endif -#if defined(SYS_mbind) -static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { - return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); -} -#else -static long mi_os_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { - MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags); - return 0; -} -#endif -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - mi_assert_internal(size%MI_GiB == 0); - bool is_large = true; - void* p = mi_unix_mmap(addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); - if (p == NULL) return NULL; - if (numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes - unsigned long numa_mask = (1UL << numa_node); - // TODO: does `mbind` work correctly for huge OS pages? should we - // use `set_mempolicy` before calling mmap instead? - // see: - long err = mi_os_mbind(p, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); - if (err != 0) { - _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d: %s\n", numa_node, strerror(errno)); - } - } - return p; -} -#else -static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) { - MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(numa_node); - return NULL; -} -#endif #if (MI_INTPTR_SIZE >= 8) // To ensure proper alignment, use our own area for huge OS pages @@ -1253,10 +487,10 @@ static uint8_t* mi_os_claim_huge_pages(size_t pages, size_t* total_size) { if (start == 0) { // Initialize the start address after the 32TiB area start = ((uintptr_t)32 << 40); // 32TiB virtual start address -#if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode - uintptr_t r = _mi_heap_random_next(mi_get_default_heap()); + #if (MI_SECURE>0 || MI_DEBUG==0) // security: randomize start of huge pages unless in debug mode + uintptr_t r = _mi_heap_random_next(mi_prim_get_default_heap()); start = start + ((uintptr_t)MI_HUGE_OS_PAGE_SIZE * ((r>>17) & 0x0FFF)); // (randomly 12bits)*1GiB == between 0 to 4TiB -#endif + #endif } end = start + size; mi_assert_internal(end % MI_SEGMENT_SIZE == 0); @@ -1285,23 +519,29 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse // We allocate one page at the time to be able to abort if it takes too long // or to at least allocate as many as available on the system. mi_msecs_t start_t = _mi_clock_start(); - size_t page; - for (page = 0; page < pages; page++) { + size_t page = 0; + while (page < pages) { // allocate a page void* addr = start + (page * MI_HUGE_OS_PAGE_SIZE); - void* p = mi_os_alloc_huge_os_pagesx(addr, MI_HUGE_OS_PAGE_SIZE, numa_node); + void* p = NULL; + int err = _mi_prim_alloc_huge_os_pages(addr, MI_HUGE_OS_PAGE_SIZE, numa_node, &p); + if (err != 0) { + _mi_warning_message("unable to allocate huge OS page (error: %d (0x%x), address: %p, size: %zx bytes)\n", err, err, addr, MI_HUGE_OS_PAGE_SIZE); + break; + } // Did we succeed at a contiguous address? if (p != addr) { // no success, issue a warning and break if (p != NULL) { - _mi_warning_message("could not allocate contiguous huge page %zu at %p\n", page, addr); + _mi_warning_message("could not allocate contiguous huge OS page %zu at %p\n", page, addr); _mi_os_free(p, MI_HUGE_OS_PAGE_SIZE, &_mi_stats_main); } break; } // success, record it + page++; // increase before timeout check (see issue #711) _mi_stat_increase(&_mi_stats_main.committed, MI_HUGE_OS_PAGE_SIZE); _mi_stat_increase(&_mi_stats_main.reserved, MI_HUGE_OS_PAGE_SIZE); @@ -1315,7 +555,7 @@ void* _mi_os_alloc_huge_os_pages(size_t pages, int numa_node, mi_msecs_t max_mse } } if (elapsed > max_msecs) { - _mi_warning_message("huge page allocation timed out\n"); + _mi_warning_message("huge OS page allocation timed out (after allocating %zu page(s))\n", page); break; } } @@ -1341,113 +581,6 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- Support NUMA aware allocation -----------------------------------------------------------------------------*/ -#ifdef _WIN32 -static size_t mi_os_numa_nodex(void) { - USHORT numa_node = 0; - if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { - // Extended API is supported - MI_PROCESSOR_NUMBER pnum; - (*pGetCurrentProcessorNumberEx)(&pnum); - USHORT nnode = 0; - BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); - if (ok) { numa_node = nnode; } - } - else if (pGetNumaProcessorNode != NULL) { - // Vista or earlier, use older API that is limited to 64 processors. Issue #277 - DWORD pnum = GetCurrentProcessorNumber(); - UCHAR nnode = 0; - BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode); - if (ok) { numa_node = nnode; } - } - return numa_node; -} - -static size_t mi_os_numa_node_countx(void) { - ULONG numa_max = 0; - GetNumaHighestNodeNumber(&numa_max); - // find the highest node number that has actual processors assigned to it. Issue #282 - while(numa_max > 0) { - if (pGetNumaNodeProcessorMaskEx != NULL) { - // Extended API is supported - GROUP_AFFINITY affinity; - if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) { - if (affinity.Mask != 0) break; // found the maximum non-empty node - } - } - else { - // Vista or earlier, use older API that is limited to 64 processors. - ULONGLONG mask; - if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { - if (mask != 0) break; // found the maximum non-empty node - }; - } - // max node was invalid or had no processor assigned, try again - numa_max--; - } - return ((size_t)numa_max + 1); -} -#elif defined(__linux__) -#include // getcpu -#include // access - -static size_t mi_os_numa_nodex(void) { -#ifdef SYS_getcpu - unsigned long node = 0; - unsigned long ncpu = 0; - long err = syscall(SYS_getcpu, &ncpu, &node, NULL); - if (err != 0) return 0; - return node; -#else - return 0; -#endif -} -static size_t mi_os_numa_node_countx(void) { - char buf[128]; - unsigned node = 0; - for(node = 0; node < 256; node++) { - // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) - snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); - if (access(buf,R_OK) != 0) break; - } - return (node+1); -} -#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000 -static size_t mi_os_numa_nodex(void) { - domainset_t dom; - size_t node; - int policy; - if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul; - for (node = 0; node < MAXMEMDOM; node++) { - if (DOMAINSET_ISSET(node, &dom)) return node; - } - return 0ul; -} -static size_t mi_os_numa_node_countx(void) { - size_t ndomains = 0; - size_t len = sizeof(ndomains); - if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul; - return ndomains; -} -#elif defined(__DragonFly__) -static size_t mi_os_numa_nodex(void) { - // TODO: DragonFly does not seem to provide any userland means to get this information. - return 0ul; -} -static size_t mi_os_numa_node_countx(void) { - size_t ncpus = 0, nvirtcoresperphys = 0; - size_t len = sizeof(size_t); - if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul; - if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul; - return nvirtcoresperphys * ncpus; -} -#else -static size_t mi_os_numa_nodex(void) { - return 0; -} -static size_t mi_os_numa_node_countx(void) { - return 1; -} -#endif _Atomic(size_t) _mi_numa_node_count; // = 0 // cache the node count @@ -1459,7 +592,7 @@ size_t _mi_os_numa_node_count_get(void) { count = (size_t)ncount; } else { - count = mi_os_numa_node_countx(); // or detect dynamically + count = _mi_prim_numa_node_count(); // or detect dynamically if (count == 0) count = 1; } mi_atomic_store_release(&_mi_numa_node_count, count); // save it @@ -1473,7 +606,7 @@ int _mi_os_numa_node_get(mi_os_tld_t* tld) { size_t numa_count = _mi_os_numa_node_count(); if (numa_count<=1) return 0; // optimize on single numa node systems: always node 0 // never more than the node count and >= 0 - size_t numa_node = mi_os_numa_nodex(); + size_t numa_node = _mi_prim_numa_node(); if (numa_node >= numa_count) { numa_node = numa_node % numa_count; } return (int)numa_node; } diff --git a/source/luametatex/source/libraries/mimalloc/src/page.c b/source/luametatex/source/libraries/mimalloc/src/page.c index 4250ff358..d0da87a1f 100644 --- a/source/luametatex/source/libraries/mimalloc/src/page.c +++ b/source/luametatex/source/libraries/mimalloc/src/page.c @@ -12,8 +12,8 @@ terms of the MIT license. A copy of the license can be found in the file ----------------------------------------------------------- */ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" /* ----------------------------------------------------------- Definition of page queues for each block size @@ -92,10 +92,12 @@ static bool mi_page_is_valid_init(mi_page_t* page) { } #endif + #if !MI_TRACK_ENABLED && !MI_TSAN mi_block_t* tfree = mi_page_thread_free(page); mi_assert_internal(mi_page_list_is_valid(page, tfree)); //size_t tfree_count = mi_page_list_count(page, tfree); //mi_assert_internal(tfree_count <= page->thread_freed + 1); + #endif size_t free_count = mi_page_list_count(page, page->free) + mi_page_list_count(page, page->local_free); mi_assert_internal(page->used + free_count == page->capacity); @@ -103,6 +105,8 @@ static bool mi_page_is_valid_init(mi_page_t* page) { return true; } +extern bool _mi_process_is_initialized; // has mi_process_init been called? + bool _mi_page_is_valid(mi_page_t* page) { mi_assert_internal(mi_page_is_valid_init(page)); #if MI_SECURE @@ -663,7 +667,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page_size / block_size < (1L<<16)); page->reserved = (uint16_t)(page_size / block_size); mi_assert_internal(page->reserved > 0); - #ifdef MI_ENCODE_FREELIST + #if (MI_PADDING || MI_ENCODE_FREELIST) page->keys[0] = _mi_heap_random_next(heap); page->keys[1] = _mi_heap_random_next(heap); #endif @@ -683,7 +687,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi mi_assert_internal(page->prev == NULL); mi_assert_internal(page->retire_expire == 0); mi_assert_internal(!mi_page_has_aligned(page)); - #if (MI_ENCODE_FREELIST) + #if (MI_PADDING || MI_ENCODE_FREELIST) mi_assert_internal(page->keys[0] != 0); mi_assert_internal(page->keys[1] != 0); #endif @@ -703,12 +707,16 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* pq, bool first_try) { // search through the pages in "next fit" order + #if MI_STAT size_t count = 0; + #endif mi_page_t* page = pq->first; while (page != NULL) { mi_page_t* next = page->next; // remember next + #if MI_STAT count++; + #endif // 0. collect freed blocks by us and other threads _mi_page_free_collect(page, false); @@ -869,7 +877,9 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignme } else { // otherwise find a page with free blocks in our size segregated queues - mi_assert_internal(size >= MI_PADDING_SIZE); + #if MI_PADDING + mi_assert_internal(size >= MI_PADDING_SIZE); + #endif return mi_find_free_page(heap, size); } } @@ -884,8 +894,7 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_al // initialize if necessary if mi_unlikely(!mi_heap_is_initialized(heap)) { - mi_thread_init(); // calls `_mi_heap_init` in turn - heap = mi_get_default_heap(); + heap = mi_heap_get_default(); // calls mi_thread_init if mi_unlikely(!mi_heap_is_initialized(heap)) { return NULL; } } mi_assert_internal(mi_heap_is_initialized(heap)); diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c b/source/luametatex/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c new file mode 100644 index 000000000..80bcfa939 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c @@ -0,0 +1,458 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc/internal.h" + +#if defined(MI_MALLOC_OVERRIDE) + +#if !defined(__APPLE__) +#error "this file should only be included on macOS" +#endif + +/* ------------------------------------------------------ + Override system malloc on macOS + This is done through the malloc zone interface. + It seems to be most robust in combination with interposing + though or otherwise we may get zone errors as there are could + be allocations done by the time we take over the + zone. +------------------------------------------------------ */ + +#include +#include +#include // memset +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) +// only available from OSX 10.6 +extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_import)); +#endif + +/* ------------------------------------------------------ + malloc zone members +------------------------------------------------------ */ + +static size_t zone_size(malloc_zone_t* zone, const void* p) { + MI_UNUSED(zone); + if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out + return mi_usable_size(p); +} + +static void* zone_malloc(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); + return mi_malloc(size); +} + +static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) { + MI_UNUSED(zone); + return mi_calloc(count, size); +} + +static void* zone_valloc(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); + return mi_malloc_aligned(size, _mi_os_page_size()); +} + +static void zone_free(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); + mi_cfree(p); +} + +static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { + MI_UNUSED(zone); + return mi_realloc(p, newsize); +} + +static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) { + MI_UNUSED(zone); + return mi_malloc_aligned(size,alignment); +} + +static void zone_destroy(malloc_zone_t* zone) { + MI_UNUSED(zone); + // todo: ignore for now? +} + +static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, unsigned count) { + size_t i; + for (i = 0; i < count; i++) { + ps[i] = zone_malloc(zone, size); + if (ps[i] == NULL) break; + } + return i; +} + +static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) { + for(size_t i = 0; i < count; i++) { + zone_free(zone, ps[i]); + ps[i] = NULL; + } +} + +static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); MI_UNUSED(size); + mi_collect(false); + return 0; +} + +static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) { + MI_UNUSED(size); + zone_free(zone,p); +} + +static boolean_t zone_claimed_address(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); + return mi_is_in_heap_region(p); +} + + +/* ------------------------------------------------------ + Introspection members +------------------------------------------------------ */ + +static kern_return_t intro_enumerator(task_t task, void* p, + unsigned type_mask, vm_address_t zone_address, + memory_reader_t reader, + vm_range_recorder_t recorder) +{ + // todo: enumerate all memory + MI_UNUSED(task); MI_UNUSED(p); MI_UNUSED(type_mask); MI_UNUSED(zone_address); + MI_UNUSED(reader); MI_UNUSED(recorder); + return KERN_SUCCESS; +} + +static size_t intro_good_size(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); + return mi_good_size(size); +} + +static boolean_t intro_check(malloc_zone_t* zone) { + MI_UNUSED(zone); + return true; +} + +static void intro_print(malloc_zone_t* zone, boolean_t verbose) { + MI_UNUSED(zone); MI_UNUSED(verbose); + mi_stats_print(NULL); +} + +static void intro_log(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); MI_UNUSED(p); + // todo? +} + +static void intro_force_lock(malloc_zone_t* zone) { + MI_UNUSED(zone); + // todo? +} + +static void intro_force_unlock(malloc_zone_t* zone) { + MI_UNUSED(zone); + // todo? +} + +static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { + MI_UNUSED(zone); + // todo... + stats->blocks_in_use = 0; + stats->size_in_use = 0; + stats->max_size_in_use = 0; + stats->size_allocated = 0; +} + +static boolean_t intro_zone_locked(malloc_zone_t* zone) { + MI_UNUSED(zone); + return false; +} + + +/* ------------------------------------------------------ + At process start, override the default allocator +------------------------------------------------------ */ + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wc99-extensions" +#endif + +static malloc_introspection_t mi_introspect = { + .enumerator = &intro_enumerator, + .good_size = &intro_good_size, + .check = &intro_check, + .print = &intro_print, + .log = &intro_log, + .force_lock = &intro_force_lock, + .force_unlock = &intro_force_unlock, +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) + .statistics = &intro_statistics, + .zone_locked = &intro_zone_locked, +#endif +}; + +static malloc_zone_t mi_malloc_zone = { + // note: even with designators, the order is important for C++ compilation + //.reserved1 = NULL, + //.reserved2 = NULL, + .size = &zone_size, + .malloc = &zone_malloc, + .calloc = &zone_calloc, + .valloc = &zone_valloc, + .free = &zone_free, + .realloc = &zone_realloc, + .destroy = &zone_destroy, + .zone_name = "mimalloc", + .batch_malloc = &zone_batch_malloc, + .batch_free = &zone_batch_free, + .introspect = &mi_introspect, +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) + #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) + .version = 10, + #else + .version = 9, + #endif + // switch to version 9+ on OSX 10.6 to support memalign. + .memalign = &zone_memalign, + .free_definite_size = &zone_free_definite_size, + .pressure_relief = &zone_pressure_relief, + #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) + .claimed_address = &zone_claimed_address, + #endif +#else + .version = 4, +#endif +}; + +#ifdef __cplusplus +} +#endif + + +#if defined(MI_OSX_INTERPOSE) && defined(MI_SHARED_LIB_EXPORT) + +// ------------------------------------------------------ +// Override malloc_xxx and malloc_zone_xxx api's to use only +// our mimalloc zone. Since even the loader uses malloc +// on macOS, this ensures that all allocations go through +// mimalloc (as all calls are interposed). +// The main `malloc`, `free`, etc calls are interposed in `alloc-override.c`, +// Here, we also override macOS specific API's like +// `malloc_zone_calloc` etc. see +// ------------------------------------------------------ + +static inline malloc_zone_t* mi_get_default_zone(void) +{ + static bool init; + if mi_unlikely(!init) { + init = true; + malloc_zone_register(&mi_malloc_zone); // by calling register we avoid a zone error on free (see ) + } + return &mi_malloc_zone; +} + +mi_decl_externc int malloc_jumpstart(uintptr_t cookie); +mi_decl_externc void _malloc_fork_prepare(void); +mi_decl_externc void _malloc_fork_parent(void); +mi_decl_externc void _malloc_fork_child(void); + + +static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) { + MI_UNUSED(size); MI_UNUSED(flags); + return mi_get_default_zone(); +} + +static malloc_zone_t* mi_malloc_default_zone (void) { + return mi_get_default_zone(); +} + +static malloc_zone_t* mi_malloc_default_purgeable_zone(void) { + return mi_get_default_zone(); +} + +static void mi_malloc_destroy_zone(malloc_zone_t* zone) { + MI_UNUSED(zone); + // nothing. +} + +static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) { + MI_UNUSED(task); MI_UNUSED(mr); + if (addresses != NULL) *addresses = NULL; + if (count != NULL) *count = 0; + return KERN_SUCCESS; +} + +static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) { + return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name); +} + +static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) { + MI_UNUSED(zone); MI_UNUSED(name); +} + +static int mi_malloc_jumpstart(uintptr_t cookie) { + MI_UNUSED(cookie); + return 1; // or 0 for no error? +} + +static void mi__malloc_fork_prepare(void) { + // nothing +} +static void mi__malloc_fork_parent(void) { + // nothing +} +static void mi__malloc_fork_child(void) { + // nothing +} + +static void mi_malloc_printf(const char* fmt, ...) { + MI_UNUSED(fmt); +} + +static bool zone_check(malloc_zone_t* zone) { + MI_UNUSED(zone); + return true; +} + +static malloc_zone_t* zone_from_ptr(const void* p) { + MI_UNUSED(p); + return mi_get_default_zone(); +} + +static void zone_log(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); MI_UNUSED(p); +} + +static void zone_print(malloc_zone_t* zone, bool b) { + MI_UNUSED(zone); MI_UNUSED(b); +} + +static void zone_print_ptr_info(void* p) { + MI_UNUSED(p); +} + +static void zone_register(malloc_zone_t* zone) { + MI_UNUSED(zone); +} + +static void zone_unregister(malloc_zone_t* zone) { + MI_UNUSED(zone); +} + +// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` +// See: +struct mi_interpose_s { + const void* replacement; + const void* target; +}; +#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } +#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) +#define MI_INTERPOSE_ZONE(fun) MI_INTERPOSE_FUN(malloc_##fun,fun) +__attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[] __attribute__((section("__DATA, __interpose"))) = +{ + + MI_INTERPOSE_MI(malloc_create_zone), + MI_INTERPOSE_MI(malloc_default_purgeable_zone), + MI_INTERPOSE_MI(malloc_default_zone), + MI_INTERPOSE_MI(malloc_destroy_zone), + MI_INTERPOSE_MI(malloc_get_all_zones), + MI_INTERPOSE_MI(malloc_get_zone_name), + MI_INTERPOSE_MI(malloc_jumpstart), + MI_INTERPOSE_MI(malloc_printf), + MI_INTERPOSE_MI(malloc_set_zone_name), + MI_INTERPOSE_MI(_malloc_fork_child), + MI_INTERPOSE_MI(_malloc_fork_parent), + MI_INTERPOSE_MI(_malloc_fork_prepare), + + MI_INTERPOSE_ZONE(zone_batch_free), + MI_INTERPOSE_ZONE(zone_batch_malloc), + MI_INTERPOSE_ZONE(zone_calloc), + MI_INTERPOSE_ZONE(zone_check), + MI_INTERPOSE_ZONE(zone_free), + MI_INTERPOSE_ZONE(zone_from_ptr), + MI_INTERPOSE_ZONE(zone_log), + MI_INTERPOSE_ZONE(zone_malloc), + MI_INTERPOSE_ZONE(zone_memalign), + MI_INTERPOSE_ZONE(zone_print), + MI_INTERPOSE_ZONE(zone_print_ptr_info), + MI_INTERPOSE_ZONE(zone_realloc), + MI_INTERPOSE_ZONE(zone_register), + MI_INTERPOSE_ZONE(zone_unregister), + MI_INTERPOSE_ZONE(zone_valloc) +}; + + +#else + +// ------------------------------------------------------ +// hook into the zone api's without interposing +// This is the official way of adding an allocator but +// it seems less robust than using interpose. +// ------------------------------------------------------ + +static inline malloc_zone_t* mi_get_default_zone(void) +{ + // The first returned zone is the real default + malloc_zone_t** zones = NULL; + unsigned count = 0; + kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count); + if (ret == KERN_SUCCESS && count > 0) { + return zones[0]; + } + else { + // fallback + return malloc_default_zone(); + } +} + +#if defined(__clang__) +__attribute__((constructor(0))) +#else +__attribute__((constructor)) // seems not supported by g++-11 on the M1 +#endif +static void _mi_macos_override_malloc(void) { + malloc_zone_t* purgeable_zone = NULL; + + #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) + // force the purgeable zone to exist to avoid strange bugs + if (malloc_default_purgeable_zone) { + purgeable_zone = malloc_default_purgeable_zone(); + } + #endif + + // Register our zone. + // thomcc: I think this is still needed to put us in the zone list. + malloc_zone_register(&mi_malloc_zone); + // Unregister the default zone, this makes our zone the new default + // as that was the last registered. + malloc_zone_t *default_zone = mi_get_default_zone(); + // thomcc: Unsure if the next test is *always* false or just false in the + // cases I've tried. I'm also unsure if the code inside is needed. at all + if (default_zone != &mi_malloc_zone) { + malloc_zone_unregister(default_zone); + + // Reregister the default zone so free and realloc in that zone keep working. + malloc_zone_register(default_zone); + } + + // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs + // earlier than the default zone. + if (purgeable_zone != NULL) { + malloc_zone_unregister(purgeable_zone); + malloc_zone_register(purgeable_zone); + } + +} +#endif // MI_OSX_INTERPOSE + +#endif // MI_MALLOC_OVERRIDE diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/osx/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/osx/prim.c new file mode 100644 index 000000000..8a2f4e8aa --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/osx/prim.c @@ -0,0 +1,9 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// We use the unix/prim.c with the mmap API on macOSX +#include "../unix/prim.c" diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/prim.c new file mode 100644 index 000000000..9a597d8eb --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/prim.c @@ -0,0 +1,24 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// Select the implementation of the primitives +// depending on the OS. + +#if defined(_WIN32) +#include "windows/prim.c" // VirtualAlloc (Windows) + +#elif defined(__APPLE__) +#include "osx/prim.c" // macOSX (actually defers to mmap in unix/prim.c) + +#elif defined(__wasi__) +#define MI_USE_SBRK +#include "wasi/prim.c" // memory-grow or sbrk (Wasm) + +#else +#include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) + +#endif diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/readme.md b/source/luametatex/source/libraries/mimalloc/src/prim/readme.md new file mode 100644 index 000000000..380dd3a71 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/readme.md @@ -0,0 +1,9 @@ +## Portability Primitives + +This is the portability layer where all primitives needed from the OS are defined. + +- `include/mimalloc/prim.h`: primitive portability API definition. +- `prim.c`: Selects one of `unix/prim.c`, `wasi/prim.c`, or `windows/prim.c` depending on the host platform + (and on macOS, `osx/prim.c` defers to `unix/prim.c`). + +Note: still work in progress, there may still be places in the sources that still depend on OS ifdef's. \ No newline at end of file diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/unix/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/unix/prim.c new file mode 100644 index 000000000..8d9c7a723 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/unix/prim.c @@ -0,0 +1,838 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// This file is included in `src/prim/prim.c` + +#ifndef _DEFAULT_SOURCE +#define _DEFAULT_SOURCE // ensure mmap flags and syscall are defined +#endif + +#if defined(__sun) +// illumos provides new mman.h api when any of these are defined +// otherwise the old api based on caddr_t which predates the void pointers one. +// stock solaris provides only the former, chose to atomically to discard those +// flags only here rather than project wide tough. +#undef _XOPEN_SOURCE +#undef _POSIX_C_SOURCE +#endif + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" + +#include // mmap +#include // sysconf + +#if defined(__linux__) + #include + #include + #if defined(__GLIBC__) + #include // linux mmap flags + #else + #include + #endif +#elif defined(__APPLE__) + #include + #if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR + #include + #endif +#elif defined(__FreeBSD__) || defined(__DragonFly__) + #include + #if __FreeBSD_version >= 1200000 + #include + #include + #endif + #include +#endif + +#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) + #define MI_HAS_SYSCALL_H + #include +#endif + +//------------------------------------------------------------------------------------ +// Use syscalls for some primitives to allow for libraries that override open/read/close etc. +// and do allocation themselves; using syscalls prevents recursion when mimalloc is +// still initializing (issue #713) +//------------------------------------------------------------------------------------ + +#if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access) + +static int mi_prim_open(const char* fpath, int open_flags) { + return syscall(SYS_open,fpath,open_flags,0); +} +static ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { + return syscall(SYS_read,fd,buf,bufsize); +} +static int mi_prim_close(int fd) { + return syscall(SYS_close,fd); +} +static int mi_prim_access(const char *fpath, int mode) { + return syscall(SYS_access,fpath,mode); +} + +#elif !defined(__APPLE__) // avoid unused warnings + +static int mi_prim_open(const char* fpath, int open_flags) { + return open(fpath,open_flags,0); +} +static mi_ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { + return read(fd,buf,bufsize); +} +static int mi_prim_close(int fd) { + return close(fd); +} +static int mi_prim_access(const char *fpath, int mode) { + return access(fpath,mode); +} + +#endif + + + +//--------------------------------------------- +// init +//--------------------------------------------- + +static bool unix_detect_overcommit(void) { + bool os_overcommit = true; +#if defined(__linux__) + int fd = mi_prim_open("/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd >= 0) { + char buf[32]; + ssize_t nread = mi_prim_read(fd, &buf, sizeof(buf)); + mi_prim_close(fd); + // + // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE) + if (nread >= 1) { + os_overcommit = (buf[0] == '0' || buf[0] == '1'); + } + } +#elif defined(__FreeBSD__) + int val = 0; + size_t olen = sizeof(val); + if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) { + os_overcommit = (val != 0); + } +#else + // default: overcommit is true +#endif + return os_overcommit; +} + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) { + long psize = sysconf(_SC_PAGESIZE); + if (psize > 0) { + config->page_size = (size_t)psize; + config->alloc_granularity = (size_t)psize; + } + config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? + config->has_overcommit = unix_detect_overcommit(); + config->must_free_whole = false; // mmap can free in parts +} + + +//--------------------------------------------- +// free +//--------------------------------------------- + +int _mi_prim_free(void* addr, size_t size ) { + bool err = (munmap(addr, size) == -1); + return (err ? errno : 0); +} + + +//--------------------------------------------- +// mmap +//--------------------------------------------- + +static int unix_madvise(void* addr, size_t size, int advice) { + #if defined(__sun) + return madvise((caddr_t)addr, size, advice); // Solaris needs cast (issue #520) + #else + return madvise(addr, size, advice); + #endif +} + +static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { + MI_UNUSED(try_alignment); + void* p = NULL; + #if defined(MAP_ALIGNED) // BSD + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + size_t n = mi_bsr(try_alignment); + if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB + p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + int err = errno; + _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + } + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #elif defined(MAP_ALIGN) // Solaris + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + #endif + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations + if (addr == NULL) { + void* hint = _mi_os_get_aligned_hint(try_alignment, size); + if (hint != NULL) { + p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + int err = errno; + _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + } + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #endif + // regular mmap + p = mmap(addr, size, protect_flags, flags, fd, 0); + if (p!=MAP_FAILED) return p; + // failed to allocate + return NULL; +} + +static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { + void* p = NULL; + #if !defined(MAP_ANONYMOUS) + #define MAP_ANONYMOUS MAP_ANON + #endif + #if !defined(MAP_NORESERVE) + #define MAP_NORESERVE 0 + #endif + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + int fd = -1; + if (_mi_os_has_overcommit()) { + flags |= MAP_NORESERVE; + } + #if defined(PROT_MAX) + protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD + #endif + #if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + int os_tag = (int)mi_option_get(mi_option_os_tag); + if (os_tag < 100 || os_tag > 255) { os_tag = 100; } + fd = VM_MAKE_TAG(os_tag); + #endif + // huge page allocation + if ((large_only || _mi_os_use_large_page(size, try_alignment)) && allow_large) { + static _Atomic(size_t) large_page_try_ok; // = 0; + size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // If the OS is not configured for large OS pages, or the user does not have + // enough permission, the `mmap` will always fail (but it might also fail for other reasons). + // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times + // to avoid too many failing calls to mmap. + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + } + else { + int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux + int lfd = fd; + #ifdef MAP_ALIGNED_SUPER + lflags |= MAP_ALIGNED_SUPER; + #endif + #ifdef MAP_HUGETLB + lflags |= MAP_HUGETLB; + #endif + #ifdef MAP_HUGE_1GB + static bool mi_huge_pages_available = true; + if ((size % MI_GiB) == 0 && mi_huge_pages_available) { + lflags |= MAP_HUGE_1GB; + } + else + #endif + { + #ifdef MAP_HUGE_2MB + lflags |= MAP_HUGE_2MB; + #endif + } + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; + #endif + if (large_only || lflags != flags) { + // try large OS page allocation + *is_large = true; + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + #ifdef MAP_HUGE_1GB + if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + mi_huge_pages_available = false; // don't try huge 1GiB pages again + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); + lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + } + #endif + if (large_only) return p; + if (p == NULL) { + mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations + } + } + } + } + // regular allocation + if (p == NULL) { + *is_large = false; + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd); + if (p != NULL) { + #if defined(MADV_HUGEPAGE) + // Many Linux systems don't allow MAP_HUGETLB but they support instead + // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE + // though since properly aligned allocations will already use large pages if available + // in that case -- in particular for our large regions (in `memory.c`). + // However, some systems only allow THP if called with explicit `madvise`, so + // when large OS pages are enabled for mimalloc, we call `madvise` anyways. + if (allow_large && _mi_os_use_large_page(size, try_alignment)) { + if (unix_madvise(p, size, MADV_HUGEPAGE) == 0) { + *is_large = true; // possibly + }; + } + #elif defined(__sun) + if (allow_large && _mi_os_use_large_page(size, try_alignment)) { + struct memcntl_mha cmd = {0}; + cmd.mha_pagesize = large_os_page_size; + cmd.mha_cmd = MHA_MAPSIZE_VA; + if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { + *is_large = true; + } + } + #endif + } + } + return p; +} + +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(commit || !allow_large); + mi_assert_internal(try_alignment > 0); + + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + return (*addr != NULL ? 0 : errno); +} + + +//--------------------------------------------- +// Commit/Reset +//--------------------------------------------- + +static void unix_mprotect_hint(int err) { + #if defined(__linux__) && (MI_SECURE>=2) // guard page around every mimalloc page + if (err == ENOMEM) { + _mi_warning_message("The next warning may be caused by a low memory map limit.\n" + " On Linux this is controlled by the vm.max_map_count -- maybe increase it?\n" + " For example: sudo sysctl -w vm.max_map_count=262144\n"); + } + #else + MI_UNUSED(err); + #endif +} + + +int _mi_prim_commit(void* start, size_t size, bool commit) { + /* + #if 0 && defined(MAP_FIXED) && !defined(__APPLE__) + // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?) + if (commit) { + // commit: just change the protection + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } + else { + // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) + const int fd = mi_unix_mmap_fd(); + void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); + if (p != start) { err = errno; } + } + #else + */ + int err = 0; + if (commit) { + // commit: ensure we can access the area + err = mprotect(start, size, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } + else { + #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0 + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) + err = unix_madvise(start, size, MADV_DONTNEED); + #else + // decommit: just disable access (also used in debug and secure mode to trap on illegal access) + err = mprotect(start, size, PROT_NONE); + if (err != 0) { err = errno; } + #endif + } + unix_mprotect_hint(err); + return err; +} + +int _mi_prim_reset(void* start, size_t size) { + #if defined(MADV_FREE) + static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); + int oadvice = (int)mi_atomic_load_relaxed(&advice); + int err; + while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; + if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { + // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on + mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED); + err = unix_madvise(start, size, MADV_DONTNEED); + } + #else + int err = unix_madvise(start, csize, MADV_DONTNEED); + #endif + return err; +} + +int _mi_prim_protect(void* start, size_t size, bool protect) { + int err = mprotect(start, size, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + unix_mprotect_hint(err); + return err; +} + + + +//--------------------------------------------- +// Huge page allocation +//--------------------------------------------- + +#if (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) && !defined(__CYGWIN__) + +#ifndef MPOL_PREFERRED +#define MPOL_PREFERRED 1 +#endif + +#if defined(MI_HAS_SYSCALL_H) && defined(SYS_mbind) +static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); +} +#else +static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags); + return 0; +} +#endif + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + bool is_large = true; + *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + unsigned long numa_mask = (1UL << numa_node); + // TODO: does `mbind` work correctly for huge OS pages? should we + // use `set_mempolicy` before calling mmap instead? + // see: + long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + if (err != 0) { + err = errno; + _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err); + } + } + return (*addr != NULL ? 0 : errno); +} + +#else + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *addr = NULL; + return ENOMEM; +} + +#endif + +//--------------------------------------------- +// NUMA nodes +//--------------------------------------------- + +#if defined(__linux__) + +#include // snprintf + +size_t _mi_prim_numa_node(void) { + #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getcpu) + unsigned long node = 0; + unsigned long ncpu = 0; + long err = syscall(SYS_getcpu, &ncpu, &node, NULL); + if (err != 0) return 0; + return node; + #else + return 0; + #endif +} + +size_t _mi_prim_numa_node_count(void) { + char buf[128]; + unsigned node = 0; + for(node = 0; node < 256; node++) { + // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) + snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); + if (mi_prim_access(buf,R_OK) != 0) break; + } + return (node+1); +} + +#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000 + +size_t _mi_prim_numa_node(void) { + domainset_t dom; + size_t node; + int policy; + if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul; + for (node = 0; node < MAXMEMDOM; node++) { + if (DOMAINSET_ISSET(node, &dom)) return node; + } + return 0ul; +} + +size_t _mi_prim_numa_node_count(void) { + size_t ndomains = 0; + size_t len = sizeof(ndomains); + if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul; + return ndomains; +} + +#elif defined(__DragonFly__) + +size_t _mi_prim_numa_node(void) { + // TODO: DragonFly does not seem to provide any userland means to get this information. + return 0ul; +} + +size_t _mi_prim_numa_node_count(void) { + size_t ncpus = 0, nvirtcoresperphys = 0; + size_t len = sizeof(size_t); + if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul; + if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul; + return nvirtcoresperphys * ncpus; +} + +#else + +size_t _mi_prim_numa_node(void) { + return 0; +} + +size_t _mi_prim_numa_node_count(void) { + return 1; +} + +#endif + +// ---------------------------------------------------------------- +// Clock +// ---------------------------------------------------------------- + +#include + +#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) + +mi_msecs_t _mi_prim_clock_now(void) { + struct timespec t; + #ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &t); + #else + clock_gettime(CLOCK_REALTIME, &t); + #endif + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); +} + +#else + +// low resolution timer +mi_msecs_t _mi_prim_clock_now(void) { + #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) + return (mi_msecs_t)clock(); + #elif (CLOCKS_PER_SEC < 1000) + return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); + #else + return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); + #endif +} + +#endif + + + + +//---------------------------------------------------------------- +// Process info +//---------------------------------------------------------------- + +#if defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__) +#include +#include +#include + +#if defined(__APPLE__) +#include +#endif + +#if defined(__HAIKU__) +#include +#endif + +static mi_msecs_t timeval_secs(const struct timeval* tv) { + return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); +} + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + pinfo->utime = timeval_secs(&rusage.ru_utime); + pinfo->stime = timeval_secs(&rusage.ru_stime); +#if !defined(__HAIKU__) + pinfo->page_faults = rusage.ru_majflt; +#endif +#if defined(__HAIKU__) + // Haiku does not have (yet?) a way to + // get these stats per process + thread_info tid; + area_info mem; + ssize_t c; + get_thread_info(find_thread(0), &tid); + while (get_next_area_info(tid.team, &c, &mem) == B_OK) { + pinfo->peak_rss += mem.ram_size; + } + pinfo->page_faults = 0; +#elif defined(__APPLE__) + pinfo->peak_rss = rusage.ru_maxrss; // macos reports in bytes + struct mach_task_basic_info info; + mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + pinfo->current_rss = (size_t)info.resident_size; + } +#else + pinfo->peak_rss = rusage.ru_maxrss * 1024; // Linux/BSD report in KiB +#endif + // use defaults for commit +} + +#else + +#ifndef __wasi__ +// WebAssembly instances are not processes +#pragma message("define a way to get process info") +#endif + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + // use defaults + MI_UNUSED(pinfo); +} + +#endif + + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) { + fputs(msg,stderr); +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +#if !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) +// On Posix systemsr use `environ` to access environment variables +// even before the C runtime is initialized. +#if defined(__APPLE__) && defined(__has_include) && __has_include() +#include +static char** mi_get_environ(void) { + return (*_NSGetEnviron()); +} +#else +extern char** environ; +static char** mi_get_environ(void) { + return environ; +} +#endif +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + if (name==NULL) return false; + const size_t len = _mi_strlen(name); + if (len == 0) return false; + char** env = mi_get_environ(); + if (env == NULL) return false; + // compare up to 10000 entries + for (int i = 0; i < 10000 && env[i] != NULL; i++) { + const char* s = env[i]; + if (_mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive + // found it + _mi_strlcpy(result, s + len + 1, result_size); + return true; + } + } + return false; +} +#else +// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return false; + const char* s = getenv(name); + if (s == NULL) { + // we check the upper case name too. + char buf[64+1]; + size_t len = _mi_strnlen(name,sizeof(buf)-1); + for (size_t i = 0; i < len; i++) { + buf[i] = _mi_toupper(name[i]); + } + buf[len] = 0; + s = getenv(buf); + } + if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; + _mi_strlcpy(result, s, result_size); + return true; +} +#endif // !MI_USE_ENVIRON + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +#if defined(__APPLE__) + +#include +#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 +#include +#include +#endif +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 + // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf + // may fail silently on macOS. See PR #390, and + return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); + #else + // fall back on older macOS + arc4random_buf(buf, buf_len); + return true; + #endif +} + +#elif defined(__ANDROID__) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__sun) + +#include +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + arc4random_buf(buf, buf_len); + return true; +} + +#elif defined(__linux__) || defined(__HAIKU__) + +#include +#include +#include +#include + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` + // and for the latter the actual `getrandom` call is not always defined. + // (see ) + // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. + #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getrandom) + #ifndef GRND_NONBLOCK + #define GRND_NONBLOCK (1) + #endif + static _Atomic(uintptr_t) no_getrandom; // = 0 + if (mi_atomic_load_acquire(&no_getrandom)==0) { + ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); + if (ret >= 0) return (buf_len == (size_t)ret); + if (errno != ENOSYS) return false; + mi_atomic_store_release(&no_getrandom, (uintptr_t)1); // don't call again, and fall back to /dev/urandom + } + #endif + int flags = O_RDONLY; + #if defined(O_CLOEXEC) + flags |= O_CLOEXEC; + #endif + int fd = mi_prim_open("/dev/urandom", flags); + if (fd < 0) return false; + size_t count = 0; + while(count < buf_len) { + ssize_t ret = mi_prim_read(fd, (char*)buf + count, buf_len - count); + if (ret<=0) { + if (errno!=EAGAIN && errno!=EINTR) break; + } + else { + count += ret; + } + } + mi_prim_close(fd); + return (count==buf_len); +} + +#else + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return false; +} + +#endif + + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +#if defined(MI_USE_PTHREADS) + +// use pthread local storage keys to detect thread ending +// (and used with MI_TLS_PTHREADS for the default heap) +pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); + +static void mi_pthread_done(void* value) { + if (value!=NULL) { + _mi_thread_done((mi_heap_t*)value); + } +} + +void _mi_prim_thread_init_auto_done(void) { + mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); + pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing to do +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD + pthread_setspecific(_mi_heap_default_key, heap); + } +} + +#else + +void _mi_prim_thread_init_auto_done(void) { + // nothing +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} + +#endif diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/wasi/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/wasi/prim.c new file mode 100644 index 000000000..cb3ce1a7f --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/wasi/prim.c @@ -0,0 +1,265 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// This file is included in `src/prim/prim.c` + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" + +//--------------------------------------------- +// Initialize +//--------------------------------------------- + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) { + config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB + config->alloc_granularity = 16; + config->has_overcommit = false; + config->must_free_whole = true; +} + +//--------------------------------------------- +// Free +//--------------------------------------------- + +int _mi_prim_free(void* addr, size_t size ) { + MI_UNUSED(addr); MI_UNUSED(size); + // wasi heap cannot be shrunk + return 0; +} + + +//--------------------------------------------- +// Allocation: sbrk or memory_grow +//--------------------------------------------- + +#if defined(MI_USE_SBRK) + static void* mi_memory_grow( size_t size ) { + void* p = sbrk(size); + if (p == (void*)(-1)) return NULL; + #if !defined(__wasi__) // on wasi this is always zero initialized already (?) + memset(p,0,size); + #endif + return p; + } +#elif defined(__wasi__) + static void* mi_memory_grow( size_t size ) { + size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size())) + : __builtin_wasm_memory_size(0)); + if (base == SIZE_MAX) return NULL; + return (void*)(base * _mi_os_page_size()); + } +#endif + +#if defined(MI_USE_PTHREADS) +static pthread_mutex_t mi_heap_grow_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif + +static void* mi_prim_mem_grow(size_t size, size_t try_alignment) { + void* p = NULL; + if (try_alignment <= 1) { + // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now) + #if defined(MI_USE_PTHREADS) + pthread_mutex_lock(&mi_heap_grow_mutex); + #endif + p = mi_memory_grow(size); + #if defined(MI_USE_PTHREADS) + pthread_mutex_unlock(&mi_heap_grow_mutex); + #endif + } + else { + void* base = NULL; + size_t alloc_size = 0; + // to allocate aligned use a lock to try to avoid thread interaction + // between getting the current size and actual allocation + // (also, `sbrk` is not thread safe in general) + #if defined(MI_USE_PTHREADS) + pthread_mutex_lock(&mi_heap_grow_mutex); + #endif + { + void* current = mi_memory_grow(0); // get current size + if (current != NULL) { + void* aligned_current = mi_align_up_ptr(current, try_alignment); // and align from there to minimize wasted space + alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size()); + base = mi_memory_grow(alloc_size); + } + } + #if defined(MI_USE_PTHREADS) + pthread_mutex_unlock(&mi_heap_grow_mutex); + #endif + if (base != NULL) { + p = mi_align_up_ptr(base, try_alignment); + if ((uint8_t*)p + size > (uint8_t*)base + alloc_size) { + // another thread used wasm_memory_grow/sbrk in-between and we do not have enough + // space after alignment. Give up (and waste the space as we cannot shrink :-( ) + // (in `mi_os_mem_alloc_aligned` this will fall back to overallocation to align) + p = NULL; + } + } + } + /* + if (p == NULL) { + _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); + errno = ENOMEM; + return NULL; + } + */ + mi_assert_internal( p == NULL || try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); + return p; +} + +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { + MI_UNUSED(allow_large); MI_UNUSED(commit); + *is_large = false; + *addr = mi_prim_mem_grow(size, try_alignment); + return (*addr != NULL ? 0 : ENOMEM); +} + + +//--------------------------------------------- +// Commit/Reset/Protect +//--------------------------------------------- + +int _mi_prim_commit(void* addr, size_t size, bool commit) { + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(commit); + return 0; +} + +int _mi_prim_reset(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + +int _mi_prim_protect(void* addr, size_t size, bool protect) { + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); + return 0; +} + + +//--------------------------------------------- +// Huge pages and NUMA nodes +//--------------------------------------------- + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *addr = NULL; + return ENOSYS; +} + +size_t _mi_prim_numa_node(void) { + return 0; +} + +size_t _mi_prim_numa_node_count(void) { + return 1; +} + + +//---------------------------------------------------------------- +// Clock +//---------------------------------------------------------------- + +#include + +#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) + +mi_msecs_t _mi_prim_clock_now(void) { + struct timespec t; + #ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &t); + #else + clock_gettime(CLOCK_REALTIME, &t); + #endif + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); +} + +#else + +// low resolution timer +mi_msecs_t _mi_prim_clock_now(void) { + #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) + return (mi_msecs_t)clock(); + #elif (CLOCKS_PER_SEC < 1000) + return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); + #else + return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); + #endif +} + +#endif + + +//---------------------------------------------------------------- +// Process info +//---------------------------------------------------------------- + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + // use defaults + MI_UNUSED(pinfo); +} + + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) { + fputs(msg,stderr); +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return false; + const char* s = getenv(name); + if (s == NULL) { + // we check the upper case name too. + char buf[64+1]; + size_t len = _mi_strnlen(name,sizeof(buf)-1); + for (size_t i = 0; i < len; i++) { + buf[i] = _mi_toupper(name[i]); + } + buf[len] = 0; + s = getenv(buf); + } + if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; + _mi_strlcpy(result, s, result_size); + return true; +} + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return false; +} + + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +void _mi_prim_thread_init_auto_done(void) { + // nothing +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw-mimalloc.wprp b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw-mimalloc.wprp new file mode 100644 index 000000000..b00cd7adf --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw-mimalloc.wprp @@ -0,0 +1,61 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.h b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.h new file mode 100644 index 000000000..4e0a092a1 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.h @@ -0,0 +1,905 @@ +//**********************************************************************` +//* This is an include file generated by Message Compiler. *` +//* *` +//* Copyright (c) Microsoft Corporation. All Rights Reserved. *` +//**********************************************************************` +#pragma once + +//***************************************************************************** +// +// Notes on the ETW event code generated by MC: +// +// - Structures and arrays of structures are treated as an opaque binary blob. +// The caller is responsible for packing the data for the structure into a +// single region of memory, with no padding between values. The macro will +// have an extra parameter for the length of the blob. +// - Arrays of nul-terminated strings must be packed by the caller into a +// single binary blob containing the correct number of strings, with a nul +// after each string. The size of the blob is specified in characters, and +// includes the final nul. +// - Arrays of SID are treated as a single binary blob. The caller is +// responsible for packing the SID values into a single region of memory with +// no padding. +// - The length attribute on the data element in the manifest is significant +// for values with intype win:UnicodeString, win:AnsiString, or win:Binary. +// The length attribute must be specified for win:Binary, and is optional for +// win:UnicodeString and win:AnsiString (if no length is given, the strings +// are assumed to be nul-terminated). For win:UnicodeString, the length is +// measured in characters, not bytes. +// - For an array of win:UnicodeString, win:AnsiString, or win:Binary, the +// length attribute applies to every value in the array, so every value in +// the array must have the same length. The values in the array are provided +// to the macro via a single pointer -- the caller is responsible for packing +// all of the values into a single region of memory with no padding between +// values. +// - Values of type win:CountedUnicodeString, win:CountedAnsiString, and +// win:CountedBinary can be generated and collected on Vista or later. +// However, they may not decode properly without the Windows 10 2018 Fall +// Update. +// - Arrays of type win:CountedUnicodeString, win:CountedAnsiString, and +// win:CountedBinary must be packed by the caller into a single region of +// memory. The format for each item is a UINT16 byte-count followed by that +// many bytes of data. When providing the array to the generated macro, you +// must provide the total size of the packed array data, including the UINT16 +// sizes for each item. In the case of win:CountedUnicodeString, the data +// size is specified in WCHAR (16-bit) units. In the case of +// win:CountedAnsiString and win:CountedBinary, the data size is specified in +// bytes. +// +//***************************************************************************** + +#include +#include +#include + +#ifndef ETW_INLINE + #ifdef _ETW_KM_ + // In kernel mode, save stack space by never inlining templates. + #define ETW_INLINE DECLSPEC_NOINLINE __inline + #else + // In user mode, save code size by inlining templates as appropriate. + #define ETW_INLINE __inline + #endif +#endif // ETW_INLINE + +#if defined(__cplusplus) +extern "C" { +#endif + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// MCGEN_USE_KERNEL_MODE_APIS macro: +// Controls whether the generated code uses kernel-mode or user-mode APIs. +// - Set to 0 to use Windows user-mode APIs such as EventRegister. +// - Set to 1 to use Windows kernel-mode APIs such as EtwRegister. +// Default is based on whether the _ETW_KM_ macro is defined (i.e. by wdm.h). +// Note that the APIs can also be overridden directly, e.g. by setting the +// MCGEN_EVENTWRITETRANSFER or MCGEN_EVENTREGISTER macros. +// +#ifndef MCGEN_USE_KERNEL_MODE_APIS + #ifdef _ETW_KM_ + #define MCGEN_USE_KERNEL_MODE_APIS 1 + #else + #define MCGEN_USE_KERNEL_MODE_APIS 0 + #endif +#endif // MCGEN_USE_KERNEL_MODE_APIS + +// +// MCGEN_HAVE_EVENTSETINFORMATION macro: +// Controls how McGenEventSetInformation uses the EventSetInformation API. +// - Set to 0 to disable the use of EventSetInformation +// (McGenEventSetInformation will always return an error). +// - Set to 1 to directly invoke MCGEN_EVENTSETINFORMATION. +// - Set to 2 to to locate EventSetInformation at runtime via GetProcAddress +// (user-mode) or MmGetSystemRoutineAddress (kernel-mode). +// Default is determined as follows: +// - If MCGEN_EVENTSETINFORMATION has been customized, set to 1 +// (i.e. use MCGEN_EVENTSETINFORMATION). +// - Else if the target OS version has EventSetInformation, set to 1 +// (i.e. use MCGEN_EVENTSETINFORMATION). +// - Else set to 2 (i.e. try to dynamically locate EventSetInformation). +// Note that an McGenEventSetInformation function will only be generated if one +// or more provider in a manifest has provider traits. +// +#ifndef MCGEN_HAVE_EVENTSETINFORMATION + #ifdef MCGEN_EVENTSETINFORMATION // if MCGEN_EVENTSETINFORMATION has been customized, + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #elif MCGEN_USE_KERNEL_MODE_APIS // else if using kernel-mode APIs, + #if NTDDI_VERSION >= 0x06040000 // if target OS is Windows 10 or later, + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #else // else + #define MCGEN_HAVE_EVENTSETINFORMATION 2 // find "EtwSetInformation" via MmGetSystemRoutineAddress. + #endif // else (using user-mode APIs) + #else // if target OS and SDK is Windows 8 or later, + #if WINVER >= 0x0602 && defined(EVENT_FILTER_TYPE_SCHEMATIZED) + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #else // else + #define MCGEN_HAVE_EVENTSETINFORMATION 2 // find "EventSetInformation" via GetModuleHandleExW/GetProcAddress. + #endif + #endif +#endif // MCGEN_HAVE_EVENTSETINFORMATION + +// +// MCGEN Override Macros +// +// The following override macros may be defined before including this header +// to control the APIs used by this header: +// +// - MCGEN_EVENTREGISTER +// - MCGEN_EVENTUNREGISTER +// - MCGEN_EVENTSETINFORMATION +// - MCGEN_EVENTWRITETRANSFER +// +// If the the macro is undefined, the MC implementation will default to the +// corresponding ETW APIs. For example, if the MCGEN_EVENTREGISTER macro is +// undefined, the EventRegister[MyProviderName] macro will use EventRegister +// in user mode and will use EtwRegister in kernel mode. +// +// To prevent issues from conflicting definitions of these macros, the value +// of the override macro will be used as a suffix in certain internal function +// names. Because of this, the override macros must follow certain rules: +// +// - The macro must be defined before any MC-generated header is included and +// must not be undefined or redefined after any MC-generated header is +// included. Different translation units (i.e. different .c or .cpp files) +// may set the macros to different values, but within a translation unit +// (within a single .c or .cpp file), the macro must be set once and not +// changed. +// - The override must be an object-like macro, not a function-like macro +// (i.e. the override macro must not have a parameter list). +// - The override macro's value must be a simple identifier, i.e. must be +// something that starts with a letter or '_' and contains only letters, +// numbers, and '_' characters. +// - If the override macro's value is the name of a second object-like macro, +// the second object-like macro must follow the same rules. (The override +// macro's value can also be the name of a function-like macro, in which +// case the function-like macro does not need to follow the same rules.) +// +// For example, the following will cause compile errors: +// +// #define MCGEN_EVENTWRITETRANSFER MyNamespace::MyClass::MyFunction // Value has non-identifier characters (colon). +// #define MCGEN_EVENTWRITETRANSFER GetEventWriteFunctionPointer(7) // Value has non-identifier characters (parentheses). +// #define MCGEN_EVENTWRITETRANSFER(h,e,a,r,c,d) EventWrite(h,e,c,d) // Override is defined as a function-like macro. +// #define MY_OBJECT_LIKE_MACRO MyNamespace::MyClass::MyEventWriteFunction +// #define MCGEN_EVENTWRITETRANSFER MY_OBJECT_LIKE_MACRO // Evaluates to something with non-identifier characters (colon). +// +// The following would be ok: +// +// #define MCGEN_EVENTWRITETRANSFER MyEventWriteFunction1 // OK, suffix will be "MyEventWriteFunction1". +// #define MY_OBJECT_LIKE_MACRO MyEventWriteFunction2 +// #define MCGEN_EVENTWRITETRANSFER MY_OBJECT_LIKE_MACRO // OK, suffix will be "MyEventWriteFunction2". +// #define MY_FUNCTION_LIKE_MACRO(h,e,a,r,c,d) MyNamespace::MyClass::MyEventWriteFunction3(h,e,c,d) +// #define MCGEN_EVENTWRITETRANSFER MY_FUNCTION_LIKE_MACRO // OK, suffix will be "MY_FUNCTION_LIKE_MACRO". +// +#ifndef MCGEN_EVENTREGISTER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTREGISTER EtwRegister + #else + #define MCGEN_EVENTREGISTER EventRegister + #endif +#endif // MCGEN_EVENTREGISTER +#ifndef MCGEN_EVENTUNREGISTER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTUNREGISTER EtwUnregister + #else + #define MCGEN_EVENTUNREGISTER EventUnregister + #endif +#endif // MCGEN_EVENTUNREGISTER +#ifndef MCGEN_EVENTSETINFORMATION + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTSETINFORMATION EtwSetInformation + #else + #define MCGEN_EVENTSETINFORMATION EventSetInformation + #endif +#endif // MCGEN_EVENTSETINFORMATION +#ifndef MCGEN_EVENTWRITETRANSFER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTWRITETRANSFER EtwWriteTransfer + #else + #define MCGEN_EVENTWRITETRANSFER EventWriteTransfer + #endif +#endif // MCGEN_EVENTWRITETRANSFER + +// +// MCGEN_EVENT_ENABLED macro: +// Override to control how the EventWrite[EventName] macros determine whether +// an event is enabled. The default behavior is for EventWrite[EventName] to +// use the EventEnabled[EventName] macros. +// +#ifndef MCGEN_EVENT_ENABLED +#define MCGEN_EVENT_ENABLED(EventName) EventEnabled##EventName() +#endif + +// +// MCGEN_EVENT_ENABLED_FORCONTEXT macro: +// Override to control how the EventWrite[EventName]_ForContext macros +// determine whether an event is enabled. The default behavior is for +// EventWrite[EventName]_ForContext to use the +// EventEnabled[EventName]_ForContext macros. +// +#ifndef MCGEN_EVENT_ENABLED_FORCONTEXT +#define MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, EventName) EventEnabled##EventName##_ForContext(pContext) +#endif + +// +// MCGEN_ENABLE_CHECK macro: +// Determines whether the specified event would be considered as enabled +// based on the state of the specified context. Slightly faster than calling +// McGenEventEnabled directly. +// +#ifndef MCGEN_ENABLE_CHECK +#define MCGEN_ENABLE_CHECK(Context, Descriptor) (Context.IsEnabled && McGenEventEnabled(&Context, &Descriptor)) +#endif + +#if !defined(MCGEN_TRACE_CONTEXT_DEF) +#define MCGEN_TRACE_CONTEXT_DEF +// This structure is for use by MC-generated code and should not be used directly. +typedef struct _MCGEN_TRACE_CONTEXT +{ + TRACEHANDLE RegistrationHandle; + TRACEHANDLE Logger; // Used as pointer to provider traits. + ULONGLONG MatchAnyKeyword; + ULONGLONG MatchAllKeyword; + ULONG Flags; + ULONG IsEnabled; + UCHAR Level; + UCHAR Reserve; + USHORT EnableBitsCount; + PULONG EnableBitMask; + const ULONGLONG* EnableKeyWords; + const UCHAR* EnableLevel; +} MCGEN_TRACE_CONTEXT, *PMCGEN_TRACE_CONTEXT; +#endif // MCGEN_TRACE_CONTEXT_DEF + +#if !defined(MCGEN_LEVEL_KEYWORD_ENABLED_DEF) +#define MCGEN_LEVEL_KEYWORD_ENABLED_DEF +// +// Determines whether an event with a given Level and Keyword would be +// considered as enabled based on the state of the specified context. +// Note that you may want to use MCGEN_ENABLE_CHECK instead of calling this +// function directly. +// +FORCEINLINE +BOOLEAN +McGenLevelKeywordEnabled( + _In_ PMCGEN_TRACE_CONTEXT EnableInfo, + _In_ UCHAR Level, + _In_ ULONGLONG Keyword + ) +{ + // + // Check if the event Level is lower than the level at which + // the channel is enabled. + // If the event Level is 0 or the channel is enabled at level 0, + // all levels are enabled. + // + + if ((Level <= EnableInfo->Level) || // This also covers the case of Level == 0. + (EnableInfo->Level == 0)) { + + // + // Check if Keyword is enabled + // + + if ((Keyword == (ULONGLONG)0) || + ((Keyword & EnableInfo->MatchAnyKeyword) && + ((Keyword & EnableInfo->MatchAllKeyword) == EnableInfo->MatchAllKeyword))) { + return TRUE; + } + } + + return FALSE; +} +#endif // MCGEN_LEVEL_KEYWORD_ENABLED_DEF + +#if !defined(MCGEN_EVENT_ENABLED_DEF) +#define MCGEN_EVENT_ENABLED_DEF +// +// Determines whether the specified event would be considered as enabled based +// on the state of the specified context. Note that you may want to use +// MCGEN_ENABLE_CHECK instead of calling this function directly. +// +FORCEINLINE +BOOLEAN +McGenEventEnabled( + _In_ PMCGEN_TRACE_CONTEXT EnableInfo, + _In_ PCEVENT_DESCRIPTOR EventDescriptor + ) +{ + return McGenLevelKeywordEnabled(EnableInfo, EventDescriptor->Level, EventDescriptor->Keyword); +} +#endif // MCGEN_EVENT_ENABLED_DEF + +#if !defined(MCGEN_CONTROL_CALLBACK) +#define MCGEN_CONTROL_CALLBACK + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +VOID +__stdcall +McGenControlCallbackV2( + _In_ LPCGUID SourceId, + _In_ ULONG ControlCode, + _In_ UCHAR Level, + _In_ ULONGLONG MatchAnyKeyword, + _In_ ULONGLONG MatchAllKeyword, + _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData, + _Inout_opt_ PVOID CallbackContext + ) +/*++ + +Routine Description: + + This is the notification callback for Windows Vista and later. + +Arguments: + + SourceId - The GUID that identifies the session that enabled the provider. + + ControlCode - The parameter indicates whether the provider + is being enabled or disabled. + + Level - The level at which the event is enabled. + + MatchAnyKeyword - The bitmask of keywords that the provider uses to + determine the category of events that it writes. + + MatchAllKeyword - This bitmask additionally restricts the category + of events that the provider writes. + + FilterData - The provider-defined data. + + CallbackContext - The context of the callback that is defined when the provider + called EtwRegister to register itself. + +Remarks: + + ETW calls this function to notify provider of enable/disable + +--*/ +{ + PMCGEN_TRACE_CONTEXT Ctx = (PMCGEN_TRACE_CONTEXT)CallbackContext; + ULONG Ix; +#ifndef MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + UNREFERENCED_PARAMETER(SourceId); + UNREFERENCED_PARAMETER(FilterData); +#endif + + if (Ctx == NULL) { + return; + } + + switch (ControlCode) { + + case EVENT_CONTROL_CODE_ENABLE_PROVIDER: + Ctx->Level = Level; + Ctx->MatchAnyKeyword = MatchAnyKeyword; + Ctx->MatchAllKeyword = MatchAllKeyword; + Ctx->IsEnabled = EVENT_CONTROL_CODE_ENABLE_PROVIDER; + + for (Ix = 0; Ix < Ctx->EnableBitsCount; Ix += 1) { + if (McGenLevelKeywordEnabled(Ctx, Ctx->EnableLevel[Ix], Ctx->EnableKeyWords[Ix]) != FALSE) { + Ctx->EnableBitMask[Ix >> 5] |= (1 << (Ix % 32)); + } else { + Ctx->EnableBitMask[Ix >> 5] &= ~(1 << (Ix % 32)); + } + } + break; + + case EVENT_CONTROL_CODE_DISABLE_PROVIDER: + Ctx->IsEnabled = EVENT_CONTROL_CODE_DISABLE_PROVIDER; + Ctx->Level = 0; + Ctx->MatchAnyKeyword = 0; + Ctx->MatchAllKeyword = 0; + if (Ctx->EnableBitsCount > 0) { +#pragma warning(suppress: 26451) // Arithmetic overflow cannot occur, no matter the value of EnableBitCount + RtlZeroMemory(Ctx->EnableBitMask, (((Ctx->EnableBitsCount - 1) / 32) + 1) * sizeof(ULONG)); + } + break; + + default: + break; + } + +#ifdef MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + // + // Call user defined callback + // + MCGEN_PRIVATE_ENABLE_CALLBACK_V2( + SourceId, + ControlCode, + Level, + MatchAnyKeyword, + MatchAllKeyword, + FilterData, + CallbackContext + ); +#endif // MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + + return; +} + +#endif // MCGEN_CONTROL_CALLBACK + +#ifndef _mcgen_PENABLECALLBACK + #if MCGEN_USE_KERNEL_MODE_APIS + #define _mcgen_PENABLECALLBACK PETWENABLECALLBACK + #else + #define _mcgen_PENABLECALLBACK PENABLECALLBACK + #endif +#endif // _mcgen_PENABLECALLBACK + +#if !defined(_mcgen_PASTE2) +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_PASTE2(a, b) _mcgen_PASTE2_imp(a, b) +#define _mcgen_PASTE2_imp(a, b) a##b +#endif // _mcgen_PASTE2 + +#if !defined(_mcgen_PASTE3) +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_PASTE3(a, b, c) _mcgen_PASTE3_imp(a, b, c) +#define _mcgen_PASTE3_imp(a, b, c) a##b##_##c +#endif // _mcgen_PASTE3 + +// +// Macro validation +// + +// Validate MCGEN_EVENTREGISTER: + +// Trigger an error if MCGEN_EVENTREGISTER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER); + +// Trigger an error if MCGEN_EVENTREGISTER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER) + MCGEN_EVENTREGISTER_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTREGISTER is defined as a function-like macro: +typedef void MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTREGISTER; +typedef int _mcgen_PASTE2(MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTREGISTER); + +// Validate MCGEN_EVENTUNREGISTER: + +// Trigger an error if MCGEN_EVENTUNREGISTER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER); + +// Trigger an error if MCGEN_EVENTUNREGISTER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER) + MCGEN_EVENTUNREGISTER_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTUNREGISTER is defined as a function-like macro: +typedef void MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTUNREGISTER; +typedef int _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTUNREGISTER); + +// Validate MCGEN_EVENTSETINFORMATION: + +// Trigger an error if MCGEN_EVENTSETINFORMATION is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION); + +// Trigger an error if MCGEN_EVENTSETINFORMATION is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION) + MCGEN_EVENTSETINFORMATION_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTSETINFORMATION is defined as a function-like macro: +typedef void MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_MCGEN_EVENTSETINFORMATION; +typedef int _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_, MCGEN_EVENTSETINFORMATION); + +// Validate MCGEN_EVENTWRITETRANSFER: + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER); + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER) + MCGEN_EVENTWRITETRANSFER_must_not_be_redefined_between_headers;; + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is defined as a function-like macro: +typedef void MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_MCGEN_EVENTWRITETRANSFER; +typedef int _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_, MCGEN_EVENTWRITETRANSFER); + +#ifndef McGenEventWrite_def +#define McGenEventWrite_def + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventWrite _mcgen_PASTE2(McGenEventWrite_, MCGEN_EVENTWRITETRANSFER) + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventWrite( + _In_ PMCGEN_TRACE_CONTEXT Context, + _In_ PCEVENT_DESCRIPTOR Descriptor, + _In_opt_ LPCGUID ActivityId, + _In_range_(1, 128) ULONG EventDataCount, + _Pre_cap_(EventDataCount) EVENT_DATA_DESCRIPTOR* EventData + ) +{ + const USHORT UNALIGNED* Traits; + + // Some customized MCGEN_EVENTWRITETRANSFER macros might ignore ActivityId. + UNREFERENCED_PARAMETER(ActivityId); + + Traits = (const USHORT UNALIGNED*)(UINT_PTR)Context->Logger; + + if (Traits == NULL) { + EventData[0].Ptr = 0; + EventData[0].Size = 0; + EventData[0].Reserved = 0; + } else { + EventData[0].Ptr = (ULONG_PTR)Traits; + EventData[0].Size = *Traits; + EventData[0].Reserved = 2; // EVENT_DATA_DESCRIPTOR_TYPE_PROVIDER_METADATA + } + + return MCGEN_EVENTWRITETRANSFER( + Context->RegistrationHandle, + Descriptor, + ActivityId, + NULL, + EventDataCount, + EventData); +} +#endif // McGenEventWrite_def + +#if !defined(McGenEventRegisterUnregister) +#define McGenEventRegisterUnregister + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventRegister _mcgen_PASTE2(McGenEventRegister_, MCGEN_EVENTREGISTER) + +#pragma warning(push) +#pragma warning(disable:6103) +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventRegister( + _In_ LPCGUID ProviderId, + _In_opt_ _mcgen_PENABLECALLBACK EnableCallback, + _In_opt_ PVOID CallbackContext, + _Inout_ PREGHANDLE RegHandle + ) +/*++ + +Routine Description: + + This function registers the provider with ETW. + +Arguments: + + ProviderId - Provider ID to register with ETW. + + EnableCallback - Callback to be used. + + CallbackContext - Context for the callback. + + RegHandle - Pointer to registration handle. + +Remarks: + + Should not be called if the provider is already registered (i.e. should not + be called if *RegHandle != 0). Repeatedly registering a provider is a bug + and may indicate a race condition. However, for compatibility with previous + behavior, this function will return SUCCESS in this case. + +--*/ +{ + ULONG Error; + + if (*RegHandle != 0) + { + Error = 0; // ERROR_SUCCESS + } + else + { + Error = MCGEN_EVENTREGISTER(ProviderId, EnableCallback, CallbackContext, RegHandle); + } + + return Error; +} +#pragma warning(pop) + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventUnregister _mcgen_PASTE2(McGenEventUnregister_, MCGEN_EVENTUNREGISTER) + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventUnregister(_Inout_ PREGHANDLE RegHandle) +/*++ + +Routine Description: + + Unregister from ETW and set *RegHandle = 0. + +Arguments: + + RegHandle - the pointer to the provider registration handle + +Remarks: + + If provider has not been registered (i.e. if *RegHandle == 0), + return SUCCESS. It is safe to call McGenEventUnregister even if the + call to McGenEventRegister returned an error. + +--*/ +{ + ULONG Error; + + if(*RegHandle == 0) + { + Error = 0; // ERROR_SUCCESS + } + else + { + Error = MCGEN_EVENTUNREGISTER(*RegHandle); + *RegHandle = (REGHANDLE)0; + } + + return Error; +} + +#endif // McGenEventRegisterUnregister + +#ifndef _mcgen_EVENT_BIT_SET + #if defined(_M_IX86) || defined(_M_X64) + // This macro is for use by MC-generated code and should not be used directly. + #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((((const unsigned char*)EnableBits)[BitPosition >> 3] & (1u << (BitPosition & 7))) != 0) + #else // CPU type + // This macro is for use by MC-generated code and should not be used directly. + #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((EnableBits[BitPosition >> 5] & (1u << (BitPosition & 31))) != 0) + #endif // CPU type +#endif // _mcgen_EVENT_BIT_SET + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// Provider "microsoft-windows-mimalloc" event count 2 +//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +// Provider GUID = 138f4dbb-ee04-4899-aa0a-572ad4475779 +EXTERN_C __declspec(selectany) const GUID ETW_MI_Provider = {0x138f4dbb, 0xee04, 0x4899, {0xaa, 0x0a, 0x57, 0x2a, 0xd4, 0x47, 0x57, 0x79}}; + +#ifndef ETW_MI_Provider_Traits +#define ETW_MI_Provider_Traits NULL +#endif // ETW_MI_Provider_Traits + +// +// Event Descriptors +// +EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_ALLOC = {0x64, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0}; +#define ETW_MI_ALLOC_value 0x64 +EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_FREE = {0x65, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0}; +#define ETW_MI_FREE_value 0x65 + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// Event Enablement Bits +// These variables are for use by MC-generated code and should not be used directly. +// +EXTERN_C __declspec(selectany) DECLSPEC_CACHEALIGN ULONG microsoft_windows_mimallocEnableBits[1]; +EXTERN_C __declspec(selectany) const ULONGLONG microsoft_windows_mimallocKeywords[1] = {0x0}; +EXTERN_C __declspec(selectany) const unsigned char microsoft_windows_mimallocLevels[1] = {4}; + +// +// Provider context +// +EXTERN_C __declspec(selectany) MCGEN_TRACE_CONTEXT ETW_MI_Provider_Context = {0, (ULONG_PTR)ETW_MI_Provider_Traits, 0, 0, 0, 0, 0, 0, 1, microsoft_windows_mimallocEnableBits, microsoft_windows_mimallocKeywords, microsoft_windows_mimallocLevels}; + +// +// Provider REGHANDLE +// +#define microsoft_windows_mimallocHandle (ETW_MI_Provider_Context.RegistrationHandle) + +// +// This macro is set to 0, indicating that the EventWrite[Name] macros do not +// have an Activity parameter. This is controlled by the -km and -um options. +// +#define ETW_MI_Provider_EventWriteActivity 0 + +// +// Register with ETW using the control GUID specified in the manifest. +// Invoke this macro during module initialization (i.e. program startup, +// DLL process attach, or driver load) to initialize the provider. +// Note that if this function returns an error, the error means that +// will not work, but no action needs to be taken -- even if EventRegister +// returns an error, it is generally safe to use EventWrite and +// EventUnregister macros (they will be no-ops if EventRegister failed). +// +#ifndef EventRegistermicrosoft_windows_mimalloc +#define EventRegistermicrosoft_windows_mimalloc() McGenEventRegister(&ETW_MI_Provider, McGenControlCallbackV2, &ETW_MI_Provider_Context, µsoft_windows_mimallocHandle) +#endif + +// +// Register with ETW using a specific control GUID (i.e. a GUID other than what +// is specified in the manifest). Advanced scenarios only. +// +#ifndef EventRegisterByGuidmicrosoft_windows_mimalloc +#define EventRegisterByGuidmicrosoft_windows_mimalloc(Guid) McGenEventRegister(&(Guid), McGenControlCallbackV2, &ETW_MI_Provider_Context, µsoft_windows_mimallocHandle) +#endif + +// +// Unregister with ETW and close the provider. +// Invoke this macro during module shutdown (i.e. program exit, DLL process +// detach, or driver unload) to unregister the provider. +// Note that you MUST call EventUnregister before DLL or driver unload +// (not optional): failure to unregister a provider before DLL or driver unload +// will result in crashes. +// +#ifndef EventUnregistermicrosoft_windows_mimalloc +#define EventUnregistermicrosoft_windows_mimalloc() McGenEventUnregister(µsoft_windows_mimallocHandle) +#endif + +// +// MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION macro: +// Define this macro to enable support for caller-allocated provider context. +// +#ifdef MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION + +// +// Advanced scenarios: Caller-allocated provider context. +// Use when multiple differently-configured provider handles are needed, +// e.g. for container-aware drivers, one context per container. +// +// Usage: +// +// - Caller enables the feature before including this header, e.g. +// #define MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION 1 +// - Caller allocates memory, e.g. pContext = malloc(sizeof(McGenContext_microsoft_windows_mimalloc)); +// - Caller registers the provider, e.g. EventRegistermicrosoft_windows_mimalloc_ForContext(pContext); +// - Caller writes events, e.g. EventWriteMyEvent_ForContext(pContext, ...); +// - Caller unregisters, e.g. EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext); +// - Caller frees memory, e.g. free(pContext); +// + +typedef struct tagMcGenContext_microsoft_windows_mimalloc { + // The fields of this structure are subject to change and should + // not be accessed directly. To access the provider's REGHANDLE, + // use microsoft_windows_mimallocHandle_ForContext(pContext). + MCGEN_TRACE_CONTEXT Context; + ULONG EnableBits[1]; +} McGenContext_microsoft_windows_mimalloc; + +#define EventRegistermicrosoft_windows_mimalloc_ForContext(pContext) _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&ETW_MI_Provider, pContext) +#define EventRegisterByGuidmicrosoft_windows_mimalloc_ForContext(Guid, pContext) _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&(Guid), pContext) +#define EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext) McGenEventUnregister(&(pContext)->Context.RegistrationHandle) + +// +// Provider REGHANDLE for caller-allocated context. +// +#define microsoft_windows_mimallocHandle_ForContext(pContext) ((pContext)->Context.RegistrationHandle) + +// This function is for use by MC-generated code and should not be used directly. +// Initialize and register the caller-allocated context. +__inline +ULONG __stdcall +_mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)( + _In_ LPCGUID pProviderId, + _Out_ McGenContext_microsoft_windows_mimalloc* pContext) +{ + RtlZeroMemory(pContext, sizeof(*pContext)); + pContext->Context.Logger = (ULONG_PTR)ETW_MI_Provider_Traits; + pContext->Context.EnableBitsCount = 1; + pContext->Context.EnableBitMask = pContext->EnableBits; + pContext->Context.EnableKeyWords = microsoft_windows_mimallocKeywords; + pContext->Context.EnableLevel = microsoft_windows_mimallocLevels; + return McGenEventRegister( + pProviderId, + McGenControlCallbackV2, + &pContext->Context, + &pContext->Context.RegistrationHandle); +} + +// This function is for use by MC-generated code and should not be used directly. +// Trigger a compile error if called with the wrong parameter type. +FORCEINLINE +_Ret_ McGenContext_microsoft_windows_mimalloc* +_mcgen_CheckContextType_microsoft_windows_mimalloc(_In_ McGenContext_microsoft_windows_mimalloc* pContext) +{ + return pContext; +} + +#endif // MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION + +// +// Enablement check macro for event "ETW_MI_ALLOC" +// +#define EventEnabledETW_MI_ALLOC() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0) +#define EventEnabledETW_MI_ALLOC_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0) + +// +// Event write macros for event "ETW_MI_ALLOC" +// +#define EventWriteETW_MI_ALLOC(Address, Size) \ + MCGEN_EVENT_ENABLED(ETW_MI_ALLOC) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size) : 0 +#define EventWriteETW_MI_ALLOC_AssumeEnabled(Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size) +#define EventWriteETW_MI_ALLOC_ForContext(pContext, Address, Size) \ + MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_ALLOC) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&(pContext)->Context, &ETW_MI_ALLOC, Address, Size) : 0 +#define EventWriteETW_MI_ALLOC_ForContextAssumeEnabled(pContext, Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_ALLOC, Address, Size) + +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER) + +// +// Enablement check macro for event "ETW_MI_FREE" +// +#define EventEnabledETW_MI_FREE() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0) +#define EventEnabledETW_MI_FREE_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0) + +// +// Event write macros for event "ETW_MI_FREE" +// +#define EventWriteETW_MI_FREE(Address, Size) \ + MCGEN_EVENT_ENABLED(ETW_MI_FREE) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size) : 0 +#define EventWriteETW_MI_FREE_AssumeEnabled(Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size) +#define EventWriteETW_MI_FREE_ForContext(pContext, Address, Size) \ + MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_FREE) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&(pContext)->Context, &ETW_MI_FREE, Address, Size) : 0 +#define EventWriteETW_MI_FREE_ForContextAssumeEnabled(pContext, Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_FREE, Address, Size) + +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_TEMPLATE_FOR_ETW_MI_FREE _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER) + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// Template Functions +// + +// +// Function for template "ETW_CUSTOM_HEAP_ALLOC_DATA" (and possibly others). +// This function is for use by MC-generated code and should not be used directly. +// +#ifndef McTemplateU0xx_def +#define McTemplateU0xx_def +ETW_INLINE +ULONG +_mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER)( + _In_ PMCGEN_TRACE_CONTEXT Context, + _In_ PCEVENT_DESCRIPTOR Descriptor, + _In_ const unsigned __int64 _Arg0, + _In_ const unsigned __int64 _Arg1 + ) +{ +#define McTemplateU0xx_ARGCOUNT 2 + + EVENT_DATA_DESCRIPTOR EventData[McTemplateU0xx_ARGCOUNT + 1]; + + EventDataDescCreate(&EventData[1],&_Arg0, sizeof(const unsigned __int64) ); + + EventDataDescCreate(&EventData[2],&_Arg1, sizeof(const unsigned __int64) ); + + return McGenEventWrite(Context, Descriptor, NULL, McTemplateU0xx_ARGCOUNT + 1, EventData); +} +#endif // McTemplateU0xx_def + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +#if defined(__cplusplus) +} +#endif diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.man b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.man new file mode 100644 index 000000000..cfd1f8a9e Binary files /dev/null and b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.man differ diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/windows/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/windows/prim.c new file mode 100644 index 000000000..e3dc33e32 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/windows/prim.c @@ -0,0 +1,607 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// This file is included in `src/prim/prim.c` + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" +#include // fputs, stderr + + +//--------------------------------------------- +// Dynamically bind Windows API points for portability +//--------------------------------------------- + +// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. +// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) +// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) +// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's. +typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E { + MiMemExtendedParameterInvalidType = 0, + MiMemExtendedParameterAddressRequirements, + MiMemExtendedParameterNumaNode, + MiMemExtendedParameterPartitionHandle, + MiMemExtendedParameterUserPhysicalHandle, + MiMemExtendedParameterAttributeFlags, + MiMemExtendedParameterMax +} MI_MEM_EXTENDED_PARAMETER_TYPE; + +typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S { + struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type; + union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg; +} MI_MEM_EXTENDED_PARAMETER; + +typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { + PVOID LowestStartingAddress; + PVOID HighestEndingAddress; + SIZE_T Alignment; +} MI_MEM_ADDRESS_REQUIREMENTS; + +#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 + +#include +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +static PVirtualAlloc2 pVirtualAlloc2 = NULL; +static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; + +// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 +typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; + +typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); +typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); +typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); +typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); +static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; +static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; +static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; +static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; + +//--------------------------------------------- +// Enable large page support dynamically (if possible) +//--------------------------------------------- + +static bool win_enable_large_os_pages(size_t* large_page_size) +{ + static bool large_initialized = false; + if (large_initialized) return (_mi_os_large_page_size() > 0); + large_initialized = true; + + // Try to see if large OS pages are supported + // To use large pages on Windows, we first need access permission + // Set "Lock pages in memory" permission in the group policy editor + // + unsigned long err = 0; + HANDLE token = NULL; + BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); + if (ok) { + TOKEN_PRIVILEGES tp; + ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); + if (ok) { + tp.PrivilegeCount = 1; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); + if (ok) { + err = GetLastError(); + ok = (err == ERROR_SUCCESS); + if (ok && large_page_size != NULL) { + *large_page_size = GetLargePageMinimum(); + } + } + } + CloseHandle(token); + } + if (!ok) { + if (err == 0) err = GetLastError(); + _mi_warning_message("cannot enable large OS page support, error %lu\n", err); + } + return (ok!=0); +} + + +//--------------------------------------------- +// Initialize +//--------------------------------------------- + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) +{ + config->has_overcommit = false; + config->must_free_whole = true; + // get the page size + SYSTEM_INFO si; + GetSystemInfo(&si); + if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; } + if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; } + // get the VirtualAlloc2 function + HINSTANCE hDll; + hDll = LoadLibrary(TEXT("kernelbase.dll")); + if (hDll != NULL) { + // use VirtualAlloc2FromApp if possible as it is available to Windows store apps + pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); + if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + FreeLibrary(hDll); + } + // NtAllocateVirtualMemoryEx is used for huge page allocation + hDll = LoadLibrary(TEXT("ntdll.dll")); + if (hDll != NULL) { + pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); + FreeLibrary(hDll); + } + // Try to use Win7+ numa API + hDll = LoadLibrary(TEXT("kernel32.dll")); + if (hDll != NULL) { + pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); + pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); + pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); + pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); + FreeLibrary(hDll); + } + if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + win_enable_large_os_pages(&config->large_page_size); + } +} + + +//--------------------------------------------- +// Free +//--------------------------------------------- + +int _mi_prim_free(void* addr, size_t size ) { + MI_UNUSED(size); + DWORD errcode = 0; + bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + if (errcode == ERROR_INVALID_ADDRESS) { + // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside + // the memory region returned by VirtualAlloc; in that case we need to free using + // the start of the region. + MEMORY_BASIC_INFORMATION info = { 0 }; + VirtualQuery(addr, &info, sizeof(info)); + if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) { + errcode = 0; + err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + } + } + return (int)errcode; +} + + +//--------------------------------------------- +// VirtualAlloc +//--------------------------------------------- + +static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignment, DWORD flags) { + #if (MI_INTPTR_SIZE >= 8) + // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations + if (addr == NULL) { + void* hint = _mi_os_get_aligned_hint(try_alignment,size); + if (hint != NULL) { + void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); + if (p != NULL) return p; + _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags); + // fall through on error + } + } + #endif + // on modern Windows try use VirtualAlloc2 for aligned allocation + if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { + MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; + reqs.Alignment = try_alignment; + MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; + param.Type.Type = MiMemExtendedParameterAddressRequirements; + param.Arg.Pointer = &reqs; + void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); + if (p != NULL) return p; + _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); + // fall through on error + } + // last resort + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); +} + +static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { + mi_assert_internal(!(large_only && !allow_large)); + static _Atomic(size_t) large_page_try_ok; // = 0; + void* p = NULL; + // Try to allocate large OS pages (2MiB) if allowed or required. + if ((large_only || _mi_os_use_large_page(size, try_alignment)) + && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { + size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. + // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + } + else { + // large OS pages must always reserve and commit. + *is_large = true; + p = win_virtual_alloc_prim(addr, size, try_alignment, flags | MEM_LARGE_PAGES); + if (large_only) return p; + // fall back to non-large page allocation on error (`p == NULL`). + if (p == NULL) { + mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations + } + } + } + // Fall back to regular page allocation + if (p == NULL) { + *is_large = ((flags&MEM_LARGE_PAGES) != 0); + p = win_virtual_alloc_prim(addr, size, try_alignment, flags); + } + //if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); } + return p; +} + +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(commit || !allow_large); + mi_assert_internal(try_alignment > 0); + int flags = MEM_RESERVE; + if (commit) { flags |= MEM_COMMIT; } + *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + return (*addr != NULL ? 0 : (int)GetLastError()); +} + + +//--------------------------------------------- +// Commit/Reset/Protect +//--------------------------------------------- +#ifdef _MSC_VER +#pragma warning(disable:6250) // suppress warning calling VirtualFree without MEM_RELEASE (for decommit) +#endif + +int _mi_prim_commit(void* addr, size_t size, bool commit) { + if (commit) { + void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE); + return (p == addr ? 0 : (int)GetLastError()); + } + else { + BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); + return (ok ? 0 : (int)GetLastError()); + } +} + +int _mi_prim_reset(void* addr, size_t size) { + void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); + mi_assert_internal(p == addr); + #if 1 + if (p == addr && addr != NULL) { + VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory from the working set + } + #endif + return (p == addr ? 0 : (int)GetLastError()); +} + +int _mi_prim_protect(void* addr, size_t size, bool protect) { + DWORD oldprotect = 0; + BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); + return (ok ? 0 : (int)GetLastError()); +} + + +//--------------------------------------------- +// Huge page allocation +//--------------------------------------------- + +static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node) +{ + const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; + + win_enable_large_os_pages(NULL); + + MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + static bool mi_huge_pages_available = true; + if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { + params[0].Type.Type = MiMemExtendedParameterAttributeFlags; + params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; + ULONG param_count = 1; + if (numa_node >= 0) { + param_count++; + params[1].Type.Type = MiMemExtendedParameterNumaNode; + params[1].Arg.ULong = (unsigned)numa_node; + } + SIZE_T psize = size; + void* base = hint_addr; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + if (err == 0 && base != NULL) { + return base; + } + else { + // fall back to regular large pages + mi_huge_pages_available = false; // don't try further huge pages + _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); + } + } + // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation + if (pVirtualAlloc2 != NULL && numa_node >= 0) { + params[0].Type.Type = MiMemExtendedParameterNumaNode; + params[0].Arg.ULong = (unsigned)numa_node; + return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1); + } + + // otherwise use regular virtual alloc on older windows + return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE); +} + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node); + return (*addr != NULL ? 0 : (int)GetLastError()); +} + + +//--------------------------------------------- +// Numa nodes +//--------------------------------------------- + +size_t _mi_prim_numa_node(void) { + USHORT numa_node = 0; + if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { + // Extended API is supported + MI_PROCESSOR_NUMBER pnum; + (*pGetCurrentProcessorNumberEx)(&pnum); + USHORT nnode = 0; + BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); + if (ok) { numa_node = nnode; } + } + else if (pGetNumaProcessorNode != NULL) { + // Vista or earlier, use older API that is limited to 64 processors. Issue #277 + DWORD pnum = GetCurrentProcessorNumber(); + UCHAR nnode = 0; + BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode); + if (ok) { numa_node = nnode; } + } + return numa_node; +} + +size_t _mi_prim_numa_node_count(void) { + ULONG numa_max = 0; + GetNumaHighestNodeNumber(&numa_max); + // find the highest node number that has actual processors assigned to it. Issue #282 + while(numa_max > 0) { + if (pGetNumaNodeProcessorMaskEx != NULL) { + // Extended API is supported + GROUP_AFFINITY affinity; + if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) { + if (affinity.Mask != 0) break; // found the maximum non-empty node + } + } + else { + // Vista or earlier, use older API that is limited to 64 processors. + ULONGLONG mask; + if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { + if (mask != 0) break; // found the maximum non-empty node + }; + } + // max node was invalid or had no processor assigned, try again + numa_max--; + } + return ((size_t)numa_max + 1); +} + + +//---------------------------------------------------------------- +// Clock +//---------------------------------------------------------------- + +static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { + static LARGE_INTEGER mfreq; // = 0 + if (mfreq.QuadPart == 0LL) { + LARGE_INTEGER f; + QueryPerformanceFrequency(&f); + mfreq.QuadPart = f.QuadPart/1000LL; + if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; + } + return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); +} + +mi_msecs_t _mi_prim_clock_now(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return mi_to_msecs(t); +} + + +//---------------------------------------------------------------- +// Process Info +//---------------------------------------------------------------- + +#include +#include + +static mi_msecs_t filetime_msecs(const FILETIME* ftime) { + ULARGE_INTEGER i; + i.LowPart = ftime->dwLowDateTime; + i.HighPart = ftime->dwHighDateTime; + mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds + return msecs; +} + +typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); +static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + FILETIME ct; + FILETIME ut; + FILETIME st; + FILETIME et; + GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); + pinfo->utime = filetime_msecs(&ut); + pinfo->stime = filetime_msecs(&st); + + // load psapi on demand + if (pGetProcessMemoryInfo == NULL) { + HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll")); + if (hDll != NULL) { + pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo"); + } + } + + // get process info + PROCESS_MEMORY_COUNTERS info; + memset(&info, 0, sizeof(info)); + if (pGetProcessMemoryInfo != NULL) { + pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); + } + pinfo->current_rss = (size_t)info.WorkingSetSize; + pinfo->peak_rss = (size_t)info.PeakWorkingSetSize; + pinfo->current_commit = (size_t)info.PagefileUsage; + pinfo->peak_commit = (size_t)info.PeakPagefileUsage; + pinfo->page_faults = (size_t)info.PageFaultCount; +} + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) +{ + // on windows with redirection, the C runtime cannot handle locale dependent output + // after the main thread closes so we use direct console output. + if (!_mi_preloading()) { + // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console + static HANDLE hcon = INVALID_HANDLE_VALUE; + static bool hconIsConsole; + if (hcon == INVALID_HANDLE_VALUE) { + CONSOLE_SCREEN_BUFFER_INFO sbi; + hcon = GetStdHandle(STD_ERROR_HANDLE); + hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); + } + const size_t len = _mi_strlen(msg); + if (len > 0 && len < UINT32_MAX) { + DWORD written = 0; + if (hconIsConsole) { + WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); + } + else if (hcon != INVALID_HANDLE_VALUE) { + // use direct write if stderr was redirected + WriteFile(hcon, msg, (DWORD)len, &written, NULL); + } + else { + // finally fall back to fputs after all + fputs(msg, stderr); + } + } + } +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +// On Windows use GetEnvironmentVariable instead of getenv to work +// reliably even when this is invoked before the C runtime is initialized. +// i.e. when `_mi_preloading() == true`. +// Note: on windows, environment names are not case sensitive. +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + result[0] = 0; + size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); + return (len > 0 && len < result_size); +} + + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus) +// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using +// dynamic overriding, we observed it can raise an exception when compiled with C++, and +// sometimes deadlocks when also running under the VS debugger. +// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom. +// To be continued.. +#pragma comment (lib,"advapi32.lib") +#define RtlGenRandom SystemFunction036 +mi_decl_externc BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return (RtlGenRandom(buf, (ULONG)buf_len) != 0); +} + +#else + +#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG +#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002 +#endif + +typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG); +static PBCryptGenRandom pBCryptGenRandom = NULL; + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + if (pBCryptGenRandom == NULL) { + HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll")); + if (hDll != NULL) { + pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom"); + } + if (pBCryptGenRandom == NULL) return false; + } + return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); +} + +#endif // MI_USE_RTLGENRANDOM + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +#if !defined(MI_SHARED_LIB) + +// use thread local storage keys to detect thread ending +#include +#if (_WIN32_WINNT < 0x600) // before Windows Vista +WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); +WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); +WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); +WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); +#endif + +static DWORD mi_fls_key = (DWORD)(-1); + +static void NTAPI mi_fls_done(PVOID value) { + mi_heap_t* heap = (mi_heap_t*)value; + if (heap != NULL) { + _mi_thread_done(heap); + FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672 + } +} + +void _mi_prim_thread_init_auto_done(void) { + mi_fls_key = FlsAlloc(&mi_fls_done); +} + +void _mi_prim_thread_done_auto_done(void) { + // call thread-done on all threads (except the main thread) to prevent + // dangling callback pointer if statically linked with a DLL; Issue #208 + FlsFree(mi_fls_key); +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + mi_assert_internal(mi_fls_key != (DWORD)(-1)); + FlsSetValue(mi_fls_key, heap); +} + +#else + +// Dll; nothing to do as in that case thread_done is handled through the DLL_THREAD_DETACH event. + +void _mi_prim_thread_init_auto_done(void) { +} + +void _mi_prim_thread_done_auto_done(void) { +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} + +#endif diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/windows/readme.md b/source/luametatex/source/libraries/mimalloc/src/prim/windows/readme.md new file mode 100644 index 000000000..217c3d174 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/windows/readme.md @@ -0,0 +1,17 @@ +## Primitives: + +- `prim.c` contains Windows primitives for OS allocation. + +## Event Tracing for Windows (ETW) + +- `etw.h` is generated from `etw.man` which contains the manifest for mimalloc events. + (100 is an allocation, 101 is for a free) + +- `etw-mimalloc.wprp` is a profile for the Windows Performance Recorder (WPR). + In an admin prompt, you can use: + ``` + > wpr -start src\prim\windows\etw-mimalloc.wprp -filemode + > + > wpr -stop test.etl + ``` + and then open `test.etl` in the Windows Performance Analyzer (WPA). \ No newline at end of file diff --git a/source/luametatex/source/libraries/mimalloc/src/random.c b/source/luametatex/source/libraries/mimalloc/src/random.c index 06d4ba4ad..4fc8b2f8f 100644 --- a/source/luametatex/source/libraries/mimalloc/src/random.c +++ b/source/luametatex/source/libraries/mimalloc/src/random.c @@ -4,14 +4,10 @@ This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ -#ifndef _DEFAULT_SOURCE -#define _DEFAULT_SOURCE // for syscall() on Linux -#endif - #include "mimalloc.h" -#include "mimalloc-internal.h" - -#include // memset +#include "mimalloc/internal.h" +#include "mimalloc/prim.h" // _mi_prim_random_buf +#include // memset /* ---------------------------------------------------------------------------- We use our own PRNG to keep predictable performance of random number generation @@ -158,159 +154,13 @@ uintptr_t _mi_random_next(mi_random_ctx_t* ctx) { /* ---------------------------------------------------------------------------- -To initialize a fresh random context we rely on the OS: -- Windows : BCryptGenRandom (or RtlGenRandom) -- macOS : CCRandomGenerateBytes, arc4random_buf -- bsd,wasi : arc4random_buf -- Linux : getrandom,/dev/urandom +To initialize a fresh random context. If we cannot get good randomness, we fall back to weak randomness based on a timer and ASLR. -----------------------------------------------------------------------------*/ -#if defined(_WIN32) - -#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus) -// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using -// dynamic overriding, we observed it can raise an exception when compiled with C++, and -// sometimes deadlocks when also running under the VS debugger. -// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom. -// To be continued.. -#pragma comment (lib,"advapi32.lib") -#define RtlGenRandom SystemFunction036 -#ifdef __cplusplus -extern "C" { -#endif -BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); -#ifdef __cplusplus -} -#endif -static bool os_random_buf(void* buf, size_t buf_len) { - return (RtlGenRandom(buf, (ULONG)buf_len) != 0); -} -#else - -#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG -#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002 -#endif - -typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG); -static PBCryptGenRandom pBCryptGenRandom = NULL; - -static bool os_random_buf(void* buf, size_t buf_len) { - if (pBCryptGenRandom == NULL) { - HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll")); - if (hDll != NULL) { - pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom"); - } - } - if (pBCryptGenRandom == NULL) { - return false; - } - else { - return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); - } -} -#endif - -#elif defined(__APPLE__) -#include -#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 -#include -#include -#endif -static bool os_random_buf(void* buf, size_t buf_len) { - #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 - // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf - // may fail silently on macOS. See PR #390, and - return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); - #else - // fall back on older macOS - arc4random_buf(buf, buf_len); - return true; - #endif -} - -#elif defined(__ANDROID__) || defined(__DragonFly__) || \ - defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ - defined(__sun) // todo: what to use with __wasi__? -#include -static bool os_random_buf(void* buf, size_t buf_len) { - arc4random_buf(buf, buf_len); - return true; -} -#elif defined(__linux__) || defined(__HAIKU__) -#if defined(__linux__) -#include -#endif -#include -#include -#include -#include -#include -static bool os_random_buf(void* buf, size_t buf_len) { - // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` - // and for the latter the actual `getrandom` call is not always defined. - // (see ) - // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. -#ifdef SYS_getrandom - #ifndef GRND_NONBLOCK - #define GRND_NONBLOCK (1) - #endif - static _Atomic(uintptr_t) no_getrandom; // = 0 - if (mi_atomic_load_acquire(&no_getrandom)==0) { - ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); - if (ret >= 0) return (buf_len == (size_t)ret); - if (errno != ENOSYS) return false; - mi_atomic_store_release(&no_getrandom, 1UL); // don't call again, and fall back to /dev/urandom - } -#endif - int flags = O_RDONLY; - #if defined(O_CLOEXEC) - flags |= O_CLOEXEC; - #endif - int fd = open("/dev/urandom", flags, 0); - if (fd < 0) return false; - size_t count = 0; - while(count < buf_len) { - ssize_t ret = read(fd, (char*)buf + count, buf_len - count); - if (ret<=0) { - if (errno!=EAGAIN && errno!=EINTR) break; - } - else { - count += ret; - } - } - close(fd); - return (count==buf_len); -} -#else -static bool os_random_buf(void* buf, size_t buf_len) { - return false; -} -#endif - -#if defined(_WIN32) -#include -#elif defined(__APPLE__) -#include -#else -#include -#endif - uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random - - #if defined(_WIN32) - LARGE_INTEGER pcount; - QueryPerformanceCounter(&pcount); - x ^= (uintptr_t)(pcount.QuadPart); - #elif defined(__APPLE__) - x ^= (uintptr_t)mach_absolute_time(); - #else - struct timespec time; - clock_gettime(CLOCK_MONOTONIC, &time); - x ^= (uintptr_t)time.tv_sec; - x ^= (uintptr_t)time.tv_nsec; - #endif + x ^= _mi_prim_clock_now(); // and do a few randomization steps uintptr_t max = ((x ^ (x >> 17)) & 0x0F) + 1; for (uintptr_t i = 0; i < max; i++) { @@ -322,7 +172,7 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) { static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) { uint8_t key[32]; - if (use_weak || !os_random_buf(key, sizeof(key))) { + if (use_weak || !_mi_prim_random_buf(key, sizeof(key))) { // if we fail to get random data from the OS, we fall back to a // weak random source based on the current time #if !defined(__wasi__) diff --git a/source/luametatex/source/libraries/mimalloc/src/region.c b/source/luametatex/source/libraries/mimalloc/src/region.c index 3571abb60..6c8ffb79c 100644 --- a/source/luametatex/source/libraries/mimalloc/src/region.c +++ b/source/luametatex/source/libraries/mimalloc/src/region.c @@ -32,30 +32,15 @@ Possible issues: do this better without adding too much complexity? -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #include // memset #include "bitmap.h" -// Internal raw OS interface -size_t _mi_os_large_page_size(void); -bool _mi_os_protect(void* addr, size_t size); -bool _mi_os_unprotect(void* addr, size_t size); -bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); -bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); -bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats); - -// arena.c -mi_arena_id_t _mi_arena_id_none(void); -void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats); -void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); - - +// os.c +bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats); // Constants #if (MI_INTPTR_SIZE==8) @@ -330,7 +315,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* } mi_assert_internal(!_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); - #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED + #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED // && !MI_TSAN if (*commit) { ((uint8_t*)p)[0] = 0; } #endif @@ -376,7 +361,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, if (p != NULL) { mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0); - #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED + #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED // && !MI_TSAN if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed #endif } diff --git a/source/luametatex/source/libraries/mimalloc/src/segment-cache.c b/source/luametatex/source/libraries/mimalloc/src/segment-cache.c index d93fd6441..eeae1b508 100644 --- a/source/luametatex/source/libraries/mimalloc/src/segment-cache.c +++ b/source/luametatex/source/libraries/mimalloc/src/segment-cache.c @@ -11,10 +11,10 @@ terms of the MIT license. A copy of the license can be found in the file The full memory map of all segments is also implemented here. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" -#include "bitmap.h" // atomic bitmap +#include "./bitmap.h" // atomic bitmap //#define MI_CACHE_DISABLE 1 // define to completely disable the segment cache @@ -35,8 +35,8 @@ typedef struct mi_cache_slot_s { static mi_decl_cache_align mi_cache_slot_t cache[MI_CACHE_MAX]; // = 0 -static mi_decl_cache_align mi_bitmap_field_t cache_available[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! -static mi_decl_cache_align mi_bitmap_field_t cache_available_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; +static mi_decl_cache_align mi_bitmap_field_t cache_unavailable[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; // zero bit = available! +static mi_decl_cache_align mi_bitmap_field_t cache_unavailable_large[MI_CACHE_FIELDS] = { MI_CACHE_BITS_SET }; static mi_decl_cache_align mi_bitmap_field_t cache_inuse[MI_CACHE_FIELDS]; // zero bit = free static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void* arg) { @@ -48,7 +48,8 @@ static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void mi_decl_noinline static void* mi_segment_cache_pop_ex( bool all_suitable, size_t size, mi_commit_mask_t* commit_mask, - mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, + mi_commit_mask_t* decommit_mask, bool large_allowed, + bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { #ifdef MI_CACHE_DISABLE @@ -66,23 +67,28 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex( if (start_field >= MI_CACHE_FIELDS) start_field = 0; } - // find an available slot + // find an available slot and make it unavailable mi_bitmap_index_t bitidx = 0; bool claimed = false; mi_arena_id_t req_arena_id = _req_arena_id; mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable); // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache? - if (*large) { // large allowed? - claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); + if (large_allowed) { // large allowed? + claimed = _mi_bitmap_try_find_from_claim_pred(cache_unavailable_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); if (claimed) *large = true; } if (!claimed) { - claimed = _mi_bitmap_try_find_from_claim_pred (cache_available, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); + claimed = _mi_bitmap_try_find_from_claim_pred (cache_unavailable, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx); if (claimed) *large = false; } if (!claimed) return NULL; + // no longer available but still in-use + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); + // found a slot mi_cache_slot_t* slot = &cache[mi_bitmap_index_bit(bitidx)]; void* p = slot->p; @@ -95,16 +101,15 @@ mi_decl_noinline static void* mi_segment_cache_pop_ex( mi_atomic_storei64_release(&slot->expire,(mi_msecs_t)0); // mark the slot as free again - mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); _mi_bitmap_unclaim(cache_inuse, MI_CACHE_FIELDS, 1, bitidx); return p; #endif } -mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) +mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool large_allowed, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld) { - return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large, is_pinned, is_zero, _req_arena_id, memid, tld); + return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large_allowed, large, is_pinned, is_zero, _req_arena_id, memid, tld); } static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats) @@ -113,10 +118,11 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo // nothing } else if (mi_commit_mask_is_full(cmask)) { + // decommit the whole in one call _mi_os_decommit(p, total, stats); } else { - // todo: one call to decommit the whole at once? + // decommit parts mi_assert_internal((total%MI_COMMIT_MASK_BITS)==0); size_t part = total/MI_COMMIT_MASK_BITS; size_t idx; @@ -148,21 +154,25 @@ static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, if (expire != 0 && (force || now >= expire)) { // racy read // seems expired, first claim it from available purged++; - mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); - if (_mi_bitmap_claim(cache_available, MI_CACHE_FIELDS, 1, bitidx, NULL)) { - // was available, we claimed it + mi_bitmap_index_t bitidx = mi_bitmap_index_create_from_bit(idx); + if (_mi_bitmap_claim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx, NULL)) { // no need to check large as those cannot be decommitted anyways + // it was available, we claimed it (and made it unavailable) + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); + // we can now access it safely expire = mi_atomic_loadi64_acquire(&slot->expire); if (expire != 0 && (force || now >= expire)) { // safe read + mi_assert_internal(_mi_bitmap_is_claimed(cache_inuse, MI_CACHE_FIELDS, 1, bitidx)); // still expired, decommit it mi_atomic_storei64_relaxed(&slot->expire,(mi_msecs_t)0); - mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask) && _mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(!mi_commit_mask_is_empty(&slot->commit_mask)); _mi_abandoned_await_readers(); // wait until safe to decommit // decommit committed parts // TODO: instead of decommit, we could also free to the OS? mi_commit_mask_decommit(&slot->commit_mask, slot->p, MI_SEGMENT_SIZE, tld->stats); mi_commit_mask_create_empty(&slot->decommit_mask); } - _mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop + _mi_bitmap_unclaim(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop } if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push } @@ -184,23 +194,20 @@ void _mi_segment_cache_free_all(mi_os_tld_t* tld) { mi_commit_mask_t decommit_mask; bool is_pinned; bool is_zero; + bool is_large; size_t memid; const size_t size = MI_SEGMENT_SIZE; - // iterate twice: first large pages, then regular memory - for (int i = 0; i < 2; i++) { - void* p; - do { - // keep popping and freeing the memory - bool large = (i == 0); - p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask, - &large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); - if (p != NULL) { - size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); - if (csize > 0 && !is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); - _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); - } - } while (p != NULL); - } + void* p; + do { + // keep popping and freeing the memory + p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask, + true /* allow large */, &is_large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld); + if (p != NULL) { + size_t csize = _mi_commit_mask_committed_size(&commit_mask, size); + if (csize > 0 && !is_pinned) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } + _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats); + } + } while (p != NULL); } mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld) @@ -209,27 +216,34 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me return false; #else - // only for normal segment blocks + // purge expired entries + mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); + + // only cache normal segment blocks if (size != MI_SEGMENT_SIZE || ((uintptr_t)start % MI_SEGMENT_ALIGN) != 0) return false; + // Also do not cache arena allocated segments that cannot be decommitted. (as arena allocation is fast) + // This is a common case with reserved huge OS pages. + // + // (note: we could also allow segments that are already fully decommitted but that never happens + // as the first slice is always committed (for the segment metadata)) + if (!_mi_arena_is_os_allocated(memid) && is_pinned) return false; + // numa node determines start field int numa_node = _mi_os_numa_node(NULL); size_t start_field = 0; if (numa_node > 0) { - start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count())*numa_node; + start_field = (MI_CACHE_FIELDS / _mi_os_numa_node_count()) * numa_node; if (start_field >= MI_CACHE_FIELDS) start_field = 0; } - // purge expired entries - mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld); - // find an available slot mi_bitmap_index_t bitidx; bool claimed = _mi_bitmap_try_find_from_claim(cache_inuse, MI_CACHE_FIELDS, start_field, 1, &bitidx); if (!claimed) return false; - mi_assert_internal(_mi_bitmap_is_claimed(cache_available, MI_CACHE_FIELDS, 1, bitidx)); - mi_assert_internal(_mi_bitmap_is_claimed(cache_available_large, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable, MI_CACHE_FIELDS, 1, bitidx)); + mi_assert_internal(_mi_bitmap_is_claimed(cache_unavailable_large, MI_CACHE_FIELDS, 1, bitidx)); #if MI_DEBUG>1 if (is_pinned || is_large) { mi_assert_internal(mi_commit_mask_is_full(commit_mask)); @@ -257,7 +271,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me } // make it available - _mi_bitmap_unclaim((is_large ? cache_available_large : cache_available), MI_CACHE_FIELDS, 1, bitidx); + _mi_bitmap_unclaim((is_large ? cache_unavailable_large : cache_unavailable), MI_CACHE_FIELDS, 1, bitidx); return true; #endif } @@ -273,7 +287,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me #if (MI_INTPTR_SIZE==8) -#define MI_MAX_ADDRESS ((size_t)20 << 40) // 20TB +#define MI_MAX_ADDRESS ((size_t)40 << 40) // 20TB #else #define MI_MAX_ADDRESS ((size_t)2 << 30) // 2Gb #endif diff --git a/source/luametatex/source/libraries/mimalloc/src/segment.c b/source/luametatex/source/libraries/mimalloc/src/segment.c index dc98e3e7b..3e56d50f5 100644 --- a/source/luametatex/source/libraries/mimalloc/src/segment.c +++ b/source/luametatex/source/libraries/mimalloc/src/segment.c @@ -5,8 +5,8 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" #include // memset #include @@ -316,7 +316,13 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c ptrdiff_t idx = slice - segment->slices; size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE; // make the start not OS page aligned for smaller blocks to avoid page/cache effects - size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? 3*MI_MAX_ALIGN_GUARANTEE : 0); + // note: the offset must always be an xblock_size multiple since we assume small allocations + // are aligned (see `mi_heap_malloc_aligned`). + size_t start_offset = 0; + if (xblock_size >= MI_INTPTR_SIZE) { + if (xblock_size <= 64) { start_offset = 3*xblock_size; } + else if (xblock_size <= 512) { start_offset = xblock_size; } + } if (page_size != NULL) { *page_size = psize - start_offset; } return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset); } @@ -391,8 +397,10 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) { if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) { - const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); - if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize); + if (!segment->mem_is_pinned) { + const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size); + if (csize > 0) { _mi_stat_decrease(&_mi_stats_main.committed, csize); } + } _mi_abandoned_await_readers(); // wait until safe to free _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats); } @@ -503,6 +511,7 @@ static bool mi_segment_ensure_committed(mi_segment_t* segment, uint8_t* p, size_ mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask)); // note: assumes commit_mask is always full for huge segments as otherwise the commit mask bits can overflow if (mi_commit_mask_is_full(&segment->commit_mask) && mi_commit_mask_is_empty(&segment->decommit_mask)) return true; // fully committed + mi_assert_internal(segment->kind != MI_SEGMENT_HUGE); return mi_segment_commitx(segment,true,p,size,stats); } @@ -632,7 +641,8 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_ // for huge pages, just mark as free but don't add to the queues if (segment->kind == MI_SEGMENT_HUGE) { - mi_assert_internal(segment->used == 1); // decreased right after this call in `mi_segment_page_clear` + // issue #691: segment->used can be 0 if the huge page block was freed while abandoned (reclaim will get here in that case) + mi_assert_internal((segment->used==0 && slice->xblock_size==0) || segment->used == 1); // decreased right after this call in `mi_segment_page_clear` slice->xblock_size = 0; // mark as free anyways // we should mark the last slice `xblock_size=0` now to maintain invariants but we skip it to // avoid a possible cache miss (and the segment is about to be freed) @@ -795,15 +805,13 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment const size_t extra = align_offset - info_size; // recalculate due to potential guard pages *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices); - //segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE); - //segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE; } const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE; mi_segment_t* segment = NULL; // get from cache? if (page_alignment == 0) { - segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); + segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, mem_large, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld); } // get from OS @@ -830,7 +838,10 @@ static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment if (!ok) return NULL; // failed to commit mi_commit_mask_set(pcommit_mask, &commit_needed_mask); } - mi_track_mem_undefined(segment,commit_needed); + else if (*is_zero) { + // track zero initialization for valgrind + mi_track_mem_defined(segment, commit_needed * MI_COMMIT_SIZE); + } segment->memid = memid; segment->mem_is_pinned = is_pinned; segment->mem_is_large = mem_large; @@ -874,10 +885,13 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi // zero the segment info? -- not always needed as it may be zero initialized from the OS mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan - if (!is_zero) { - ptrdiff_t ofs = offsetof(mi_segment_t, next); + { + ptrdiff_t ofs = offsetof(mi_segment_t, next); size_t prefix = offsetof(mi_segment_t, slices) - ofs; - memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*(segment_slices+1)); // one more + size_t zsize = prefix + (sizeof(mi_slice_t) * (segment_slices + 1)); // one more + if (!is_zero) { + memset((uint8_t*)segment + ofs, 0, zsize); + } } segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed @@ -893,6 +907,10 @@ static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask)); #endif } + else { + segment->decommit_expire = 0; + mi_commit_mask_create_empty( &segment->decommit_mask ); + } // initialize segment info const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices); @@ -954,7 +972,9 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t // Remove the free pages mi_slice_t* slice = &segment->slices[0]; const mi_slice_t* end = mi_segment_slices_end(segment); + #if MI_DEBUG>1 size_t page_count = 0; + #endif while (slice < end) { mi_assert_internal(slice->slice_count > 0); mi_assert_internal(slice->slice_offset == 0); @@ -962,7 +982,9 @@ static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t if (slice->xblock_size == 0 && segment->kind != MI_SEGMENT_HUGE) { mi_segment_span_remove_from_queue(slice, tld); } + #if MI_DEBUG>1 page_count++; + #endif slice = slice + slice->slice_count; } mi_assert_internal(page_count == 2); // first page is allocated by the segment itself @@ -1048,7 +1070,7 @@ We maintain a global list of abandoned segments that are reclaimed on demand. Since this is shared among threads the implementation needs to avoid the A-B-A problem on popping abandoned segments: -We use tagged pointers to avoid accidentially identifying +We use tagged pointers to avoid accidentally identifying reused segments, much like stamped references in Java. Secondly, we maintain a reader counter to avoid resetting or decommitting segments that have a pending read operation. diff --git a/source/luametatex/source/libraries/mimalloc/src/static.c b/source/luametatex/source/libraries/mimalloc/src/static.c index 5b34ddbb6..d992f4daf 100644 --- a/source/luametatex/source/libraries/mimalloc/src/static.c +++ b/source/luametatex/source/libraries/mimalloc/src/static.c @@ -14,26 +14,27 @@ terms of the MIT license. A copy of the license can be found in the file #endif #include "mimalloc.h" -#include "mimalloc-internal.h" +#include "mimalloc/internal.h" // For a static override we create a single object file // containing the whole library. If it is linked first // it will override all the standard library allocation // functions (on Unix's). -#include "stats.c" -#include "random.c" -#include "os.c" -#include "bitmap.c" -#include "arena.c" -#include "segment-cache.c" -#include "segment.c" -#include "page.c" -#include "heap.c" -#include "alloc.c" +#include "alloc.c" // includes alloc-override.c #include "alloc-aligned.c" #include "alloc-posix.c" -#if MI_OSX_ZONE -#include "alloc-override-osx.c" -#endif +#include "arena.c" +#include "bitmap.c" +#include "heap.c" #include "init.c" #include "options.c" +#include "os.c" +#include "page.c" // includes page-queue.c +#include "random.c" +#include "segment.c" +#include "segment-cache.c" +#include "stats.c" +#include "prim/prim.c" +#if MI_OSX_ZONE +#include "prim/osx/alloc-override-zone.c" +#endif diff --git a/source/luametatex/source/libraries/mimalloc/src/stats.c b/source/luametatex/source/libraries/mimalloc/src/stats.c index 2a8b9404f..d2a316818 100644 --- a/source/luametatex/source/libraries/mimalloc/src/stats.c +++ b/source/luametatex/source/libraries/mimalloc/src/stats.c @@ -5,10 +5,11 @@ terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ #include "mimalloc.h" -#include "mimalloc-internal.h" -#include "mimalloc-atomic.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" -#include // fputs, stderr +#include // snprintf #include // memset #if defined(_MSC_VER) && (_MSC_VER < 1920) @@ -291,8 +292,6 @@ static void mi_cdecl mi_buffered_out(const char* msg, void* arg) { // Print statistics //------------------------------------------------------------ -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults); - static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_attr_noexcept { // wrap the output function to be line buffered char buf[256]; @@ -337,15 +336,15 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0) mi_stat_counter_print_avg(&stats->searches, "searches", out, arg); _mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count()); - mi_msecs_t elapsed; - mi_msecs_t user_time; - mi_msecs_t sys_time; + size_t elapsed; + size_t user_time; + size_t sys_time; size_t current_rss; size_t peak_rss; size_t current_commit; size_t peak_commit; size_t page_faults; - mi_stat_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); + mi_process_info(&elapsed, &user_time, &sys_time, ¤t_rss, &peak_rss, ¤t_commit, &peak_commit, &page_faults); _mi_fprintf(out, arg, "%10s: %7ld.%03ld s\n", "elapsed", elapsed/1000, elapsed%1000); _mi_fprintf(out, arg, "%10s: user: %ld.%03ld s, system: %ld.%03ld s, faults: %lu, rss: ", "process", user_time/1000, user_time%1000, sys_time/1000, sys_time%1000, (unsigned long)page_faults ); @@ -404,46 +403,12 @@ void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept { // ---------------------------------------------------------------- // Basic timer for convenience; use milli-seconds to avoid doubles // ---------------------------------------------------------------- -#ifdef _WIN32 -#include -static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { - static LARGE_INTEGER mfreq; // = 0 - if (mfreq.QuadPart == 0LL) { - LARGE_INTEGER f; - QueryPerformanceFrequency(&f); - mfreq.QuadPart = f.QuadPart/1000LL; - if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; - } - return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); -} + +static mi_msecs_t mi_clock_diff; mi_msecs_t _mi_clock_now(void) { - LARGE_INTEGER t; - QueryPerformanceCounter(&t); - return mi_to_msecs(t); -} -#else -#include -#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) -mi_msecs_t _mi_clock_now(void) { - struct timespec t; - #ifdef CLOCK_MONOTONIC - clock_gettime(CLOCK_MONOTONIC, &t); - #else - clock_gettime(CLOCK_REALTIME, &t); - #endif - return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); -} -#else -// low resolution timer -mi_msecs_t _mi_clock_now(void) { - return ((mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000)); + return _mi_prim_clock_now(); } -#endif -#endif - - -static mi_msecs_t mi_clock_diff; mi_msecs_t _mi_clock_start(void) { if (mi_clock_diff == 0.0) { @@ -463,156 +428,27 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) { // Basic process statistics // -------------------------------------------------------- -#if defined(_WIN32) -#include - -static mi_msecs_t filetime_msecs(const FILETIME* ftime) { - ULARGE_INTEGER i; - i.LowPart = ftime->dwLowDateTime; - i.HighPart = ftime->dwHighDateTime; - mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds - return msecs; -} - -typedef struct _PROCESS_MEMORY_COUNTERS { - DWORD cb; - DWORD PageFaultCount; - SIZE_T PeakWorkingSetSize; - SIZE_T WorkingSetSize; - SIZE_T QuotaPeakPagedPoolUsage; - SIZE_T QuotaPagedPoolUsage; - SIZE_T QuotaPeakNonPagedPoolUsage; - SIZE_T QuotaNonPagedPoolUsage; - SIZE_T PagefileUsage; - SIZE_T PeakPagefileUsage; -} PROCESS_MEMORY_COUNTERS; -typedef PROCESS_MEMORY_COUNTERS* PPROCESS_MEMORY_COUNTERS; -typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); -static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - FILETIME ct; - FILETIME ut; - FILETIME st; - FILETIME et; - GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); - *utime = filetime_msecs(&ut); - *stime = filetime_msecs(&st); - - // load psapi on demand - if (pGetProcessMemoryInfo == NULL) { - HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll")); - if (hDll != NULL) { - pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo"); - } - } - - // get process info - PROCESS_MEMORY_COUNTERS info; - memset(&info, 0, sizeof(info)); - if (pGetProcessMemoryInfo != NULL) { - pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); - } - *current_rss = (size_t)info.WorkingSetSize; - *peak_rss = (size_t)info.PeakWorkingSetSize; - *current_commit = (size_t)info.PagefileUsage; - *peak_commit = (size_t)info.PeakPagefileUsage; - *page_faults = (size_t)info.PageFaultCount; -} - -#elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__)) -#include -#include -#include - -#if defined(__APPLE__) -#include -#endif - -#if defined(__HAIKU__) -#include -#endif - -static mi_msecs_t timeval_secs(const struct timeval* tv) { - return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); -} - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - struct rusage rusage; - getrusage(RUSAGE_SELF, &rusage); - *utime = timeval_secs(&rusage.ru_utime); - *stime = timeval_secs(&rusage.ru_stime); -#if !defined(__HAIKU__) - *page_faults = rusage.ru_majflt; -#endif - // estimate commit using our stats - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *current_rss = *current_commit; // estimate -#if defined(__HAIKU__) - // Haiku does not have (yet?) a way to - // get these stats per process - thread_info tid; - area_info mem; - ssize_t c; - get_thread_info(find_thread(0), &tid); - while (get_next_area_info(tid.team, &c, &mem) == B_OK) { - *peak_rss += mem.ram_size; - } - *page_faults = 0; -#elif defined(__APPLE__) - *peak_rss = rusage.ru_maxrss; // BSD reports in bytes - struct mach_task_basic_info info; - mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; - if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { - *current_rss = (size_t)info.resident_size; - } -#else - *peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB -#endif -} - -#else -#ifndef __wasi__ -// WebAssembly instances are not processes -#pragma message("define a way to get process info") -#endif - -static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) -{ - *elapsed = _mi_clock_end(mi_process_start); - *peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); - *current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); - *peak_rss = *peak_commit; - *current_rss = *current_commit; - *page_faults = 0; - *utime = 0; - *stime = 0; -} -#endif - - mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept { - mi_msecs_t elapsed = 0; - mi_msecs_t utime = 0; - mi_msecs_t stime = 0; - size_t current_rss0 = 0; - size_t peak_rss0 = 0; - size_t current_commit0 = 0; - size_t peak_commit0 = 0; - size_t page_faults0 = 0; - mi_stat_process_info(&elapsed,&utime, &stime, ¤t_rss0, &peak_rss0, ¤t_commit0, &peak_commit0, &page_faults0); - if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX)); - if (user_msecs!=NULL) *user_msecs = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX)); - if (system_msecs!=NULL) *system_msecs = (stime < 0 ? 0 : (stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)stime : PTRDIFF_MAX)); - if (current_rss!=NULL) *current_rss = current_rss0; - if (peak_rss!=NULL) *peak_rss = peak_rss0; - if (current_commit!=NULL) *current_commit = current_commit0; - if (peak_commit!=NULL) *peak_commit = peak_commit0; - if (page_faults!=NULL) *page_faults = page_faults0; + mi_process_info_t pinfo; + _mi_memzero(&pinfo,sizeof(pinfo)); + pinfo.elapsed = _mi_clock_end(mi_process_start); + pinfo.current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current)); + pinfo.peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak)); + pinfo.current_rss = pinfo.current_commit; + pinfo.peak_rss = pinfo.peak_commit; + pinfo.utime = 0; + pinfo.stime = 0; + pinfo.page_faults = 0; + + _mi_prim_process_info(&pinfo); + + if (elapsed_msecs!=NULL) *elapsed_msecs = (pinfo.elapsed < 0 ? 0 : (pinfo.elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.elapsed : PTRDIFF_MAX)); + if (user_msecs!=NULL) *user_msecs = (pinfo.utime < 0 ? 0 : (pinfo.utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.utime : PTRDIFF_MAX)); + if (system_msecs!=NULL) *system_msecs = (pinfo.stime < 0 ? 0 : (pinfo.stime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)pinfo.stime : PTRDIFF_MAX)); + if (current_rss!=NULL) *current_rss = pinfo.current_rss; + if (peak_rss!=NULL) *peak_rss = pinfo.peak_rss; + if (current_commit!=NULL) *current_commit = pinfo.current_commit; + if (peak_commit!=NULL) *peak_commit = pinfo.peak_commit; + if (page_faults!=NULL) *page_faults = pinfo.page_faults; } -- cgit v1.2.3