diff options
Diffstat (limited to 'source/luametatex/source/libraries/mimalloc/include')
6 files changed, 241 insertions, 95 deletions
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-atomic.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-atomic.h index 7ad5da585..c66f80493 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-atomic.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc-atomic.h @@ -11,9 +11,9 @@ terms of the MIT license. A copy of the license can be found in the file // -------------------------------------------------------------------------------------------- // Atomics // We need to be portable between C, C++, and MSVC. -// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. -// This is why we try to use only `uintptr_t` and `<type>*` as atomic types. -// To gain better insight in the range of used atomics, we use explicitly named memory order operations +// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode. +// This is why we try to use only `uintptr_t` and `<type>*` as atomic types. +// To gain better insight in the range of used atomics, we use explicitly named memory order operations // instead of passing the memory order as a parameter. // ----------------------------------------------------------------------------------------------- @@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file #endif #elif defined(_MSC_VER) // Use MSVC C wrapper for C11 atomics -#define _Atomic(tp) tp +#define _Atomic(tp) tp #define MI_ATOMIC_VAR_INIT(x) x #define mi_atomic(name) mi_atomic_##name #define mi_memory_order(name) mi_memory_order_##name @@ -275,7 +275,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) { return (intptr_t)mi_atomic_addi(p, -sub); } -// Yield +// Yield #if defined(__cplusplus) #include <thread> static inline void mi_atomic_yield(void) { diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h index 550b65433..a68e69662 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h @@ -41,10 +41,10 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__cplusplus) #define mi_decl_externc extern "C" #else -#define mi_decl_externc +#define mi_decl_externc #endif -#if !defined(_WIN32) && !defined(__wasi__) +#if !defined(_WIN32) && !defined(__wasi__) #define MI_USE_PTHREADS #include <pthread.h> #endif @@ -60,6 +60,8 @@ void _mi_error_message(int err, const char* fmt, ...); // random.c void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_init_weak(mi_random_ctx_t* ctx); +void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx); void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); uintptr_t _mi_random_next(mi_random_ctx_t* ctx); uintptr_t _mi_heap_random_next(mi_heap_t* heap); @@ -87,11 +89,15 @@ bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); // bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); + +void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats); +void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); // arena.c -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld); +void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats); mi_arena_id_t _mi_arena_id_none(void); bool _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id); @@ -99,16 +105,22 @@ bool _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id) void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); +void _mi_segment_cache_free_all(mi_os_tld_t* tld); void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); // "segment.c" -mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); + +#if MI_HUGE_PAGE_ABANDON void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#else +void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#endif uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); @@ -118,7 +130,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* // "page.c" -void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept mi_attr_malloc; +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks void _mi_page_unfull(mi_page_t* page); @@ -144,6 +156,7 @@ void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid); +void _mi_heap_destroy_all(void); // "stats.c" void _mi_stats_done(mi_stats_t* stats); @@ -155,9 +168,11 @@ mi_msecs_t _mi_clock_start(void); // "alloc.c" void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; +void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); +void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration #if MI_DEBUG>1 bool _mi_page_is_valid(mi_page_t* page); @@ -345,14 +360,14 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea #if defined(MI_MALLOC_OVERRIDE) #if defined(__APPLE__) // macOS -#define MI_TLS_SLOT 89 // seems unused? -// #define MI_TLS_RECURSE_GUARD 1 +#define MI_TLS_SLOT 89 // seems unused? +// #define MI_TLS_RECURSE_GUARD 1 // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h> #elif defined(__OpenBSD__) -// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) +// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371> -#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) +#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) // #elif defined(__DragonFly__) // #warning "mimalloc is not working correctly on DragonFly yet." // #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458> @@ -392,7 +407,7 @@ static inline mi_heap_t* mi_get_default_heap(void) { #ifdef __GNUC__ __asm(""); // prevent conditional load of the address of _mi_heap_empty #endif - heap = (mi_heap_t*)&_mi_heap_empty; + heap = (mi_heap_t*)&_mi_heap_empty; } return heap; #elif defined(MI_TLS_PTHREAD_SLOT_OFS) @@ -402,7 +417,7 @@ static inline mi_heap_t* mi_get_default_heap(void) { mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); #else - #if defined(MI_TLS_RECURSE_GUARD) + #if defined(MI_TLS_RECURSE_GUARD) if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); #endif return _mi_heap_default; @@ -445,9 +460,12 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) { } // Segment that contains the pointer +// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), +// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; +// therefore we align one byte before `p`. static inline mi_segment_t* _mi_ptr_segment(const void* p) { - // mi_assert_internal(p != NULL); - return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK); + mi_assert_internal(p != NULL); + return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); } static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) { @@ -475,12 +493,13 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { return start; } -// Get the page containing the pointer +// Get the page containing the pointer (performance critical as it is called in mi_free) static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { + mi_assert_internal(p > (void*)segment); ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE); + mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE); size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; - mi_assert_internal(idx < segment->slice_entries); + mi_assert_internal(idx <= segment->slice_entries); mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data mi_assert_internal(slice->slice_offset == 0); @@ -512,6 +531,10 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { } } +static inline bool mi_page_is_huge(const mi_page_t* page) { + return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); +} + // Get the usable block size of a page without fixed padding. // This may still include internal padding due to alignment and rounding up size classes. static inline size_t mi_page_usable_block_size(const mi_page_t* page) { @@ -683,7 +706,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl next = (mi_block_t*)block->next; #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); - return next; + return next; } static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { @@ -825,7 +848,7 @@ static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { return (uintptr_t)NtCurrentTeb(); } -// We use assembly for a fast thread id on the main platforms. The TLS layout depends on +// We use assembly for a fast thread id on the main platforms. The TLS layout depends on // both the OS and libc implementation so we use specific tests for each main platform. // If you test on another platform and it works please send a PR :-) // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. @@ -938,7 +961,7 @@ static inline size_t mi_ctz(uintptr_t x) { #endif } -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) #include <limits.h> // LONG_MAX #define MI_HAVE_FAST_BITSCAN @@ -949,7 +972,7 @@ static inline size_t mi_clz(uintptr_t x) { _BitScanReverse(&idx, x); #else _BitScanReverse64(&idx, x); -#endif +#endif return ((MI_INTPTR_BITS - 1) - idx); } static inline size_t mi_ctz(uintptr_t x) { @@ -959,7 +982,7 @@ static inline size_t mi_ctz(uintptr_t x) { _BitScanForward(&idx, x); #else _BitScanForward64(&idx, x); -#endif +#endif return idx; } @@ -989,7 +1012,7 @@ static inline size_t mi_clz32(uint32_t x) { } static inline size_t mi_clz(uintptr_t x) { - if (x==0) return MI_INTPTR_BITS; + if (x==0) return MI_INTPTR_BITS; #if (MI_INTPTR_BITS <= 32) return mi_clz32((uint32_t)x); #else @@ -1020,9 +1043,9 @@ static inline size_t mi_bsr(uintptr_t x) { // --------------------------------------------------------------------------------- // Provide our own `_mi_memcpy` for potential performance optimizations. // -// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if -// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support -// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. +// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if +// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support +// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. // --------------------------------------------------------------------------------- #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) @@ -1057,7 +1080,7 @@ static inline void _mi_memzero(void* dst, size_t n) { // ------------------------------------------------------------------------------- -// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned +// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned // This is used for example in `mi_realloc`. // ------------------------------------------------------------------------------- diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-new-delete.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-new-delete.h index 1c12fad2f..c16f4a665 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-new-delete.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc-new-delete.h @@ -22,17 +22,26 @@ terms of the MIT license. A copy of the license can be found in the file #include <new> #include <mimalloc.h> + #if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_) + // stay consistent with VCRT definitions + #define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n) + #define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n) + #else + #define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict + #define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict + #endif + void operator delete(void* p) noexcept { mi_free(p); }; void operator delete[](void* p) noexcept { mi_free(p); }; void operator delete (void* p, const std::nothrow_t&) noexcept { mi_free(p); } void operator delete[](void* p, const std::nothrow_t&) noexcept { mi_free(p); } - void* operator new(std::size_t n) noexcept(false) { return mi_new(n); } - void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); } + mi_decl_new(n) void* operator new(std::size_t n) noexcept(false) { return mi_new(n); } + mi_decl_new(n) void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); } - void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } - void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } + mi_decl_new_nothrow(n) void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } + mi_decl_new_nothrow(n) void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); } #if (__cplusplus >= 201402L || _MSC_VER >= 1916) void operator delete (void* p, std::size_t n) noexcept { mi_free_size(p,n); }; @@ -46,7 +55,7 @@ terms of the MIT license. A copy of the license can be found in the file void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); }; void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); } void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); } - + void* operator new (std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); } void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); } void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); } diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h index bb9df4fa3..f60d7acd0 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h @@ -10,33 +10,52 @@ terms of the MIT license. A copy of the license can be found in the file // ------------------------------------------------------ // Track memory ranges with macros for tools like Valgrind -// or other memory checkers. +// address sanitizer, or other memory checkers. // ------------------------------------------------------ #if MI_VALGRIND #define MI_TRACK_ENABLED 1 +#define MI_TRACK_TOOL "valgrind" #include <valgrind/valgrind.h> #include <valgrind/memcheck.h> #define mi_track_malloc(p,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero) -#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/) +#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/) #define mi_track_free(p) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/) +#define mi_track_free_size(p,_size) mi_track_free(p) #define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size) #define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size) #define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size) +#elif MI_ASAN + +#define MI_TRACK_ENABLED 1 +#define MI_TRACK_TOOL "asan" + +#include <sanitizer/asan_interface.h> + +#define mi_track_malloc(p,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_resize(p,oldsize,newsize) ASAN_POISON_MEMORY_REGION(p,oldsize); ASAN_UNPOISON_MEMORY_REGION(p,newsize) +#define mi_track_free(p) ASAN_POISON_MEMORY_REGION(p,mi_usable_size(p)) +#define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size) +#define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size) +#define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size) + #else #define MI_TRACK_ENABLED 0 - -#define mi_track_malloc(p,size,zero) -#define mi_track_resize(p,oldsize,newsize) -#define mi_track_free(p) -#define mi_track_mem_defined(p,size) -#define mi_track_mem_undefined(p,size) -#define mi_track_mem_noaccess(p,size) +#define MI_TRACK_TOOL "none" + +#define mi_track_malloc(p,size,zero) +#define mi_track_resize(p,oldsize,newsize) +#define mi_track_free(p) +#define mi_track_free_size(p,_size) +#define mi_track_mem_defined(p,size) +#define mi_track_mem_undefined(p,size) +#define mi_track_mem_noaccess(p,size) #endif diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-types.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-types.h index 1387a7200..f3af528e5 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-types.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc-types.h @@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifdef _MSC_VER #pragma warning(disable:4214) // bitfield is not int -#endif +#endif // Minimal alignment necessary. On most platforms 16 bytes are needed // due to SSE registers for example. This must be at least `sizeof(void*)` @@ -71,6 +71,13 @@ terms of the MIT license. A copy of the license can be found in the file #endif +// We used to abandon huge pages but to eagerly deallocate if freed from another thread, +// but that makes it not possible to visit them during a heap walk or include them in a +// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from +// another thread so most memory is available until it gets properly freed by the owning thread. +// #define MI_HUGE_PAGE_ABANDON 1 + + // ------------------------------------------------------ // Platform specific values // ------------------------------------------------------ @@ -135,7 +142,7 @@ typedef int32_t mi_ssize_t; #define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit) #if MI_INTPTR_SIZE > 4 -#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64MiB +#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB #else #define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit #endif @@ -147,7 +154,7 @@ typedef int32_t mi_ssize_t; // Derived constants #define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT) #define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE -#define MI_SEGMENT_MASK (MI_SEGMENT_SIZE - 1) +#define MI_SEGMENT_MASK (MI_SEGMENT_ALIGN - 1) #define MI_SEGMENT_SLICE_SIZE (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT) #define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024 @@ -166,12 +173,6 @@ typedef int32_t mi_ssize_t; #if (MI_MEDIUM_OBJ_WSIZE_MAX >= 655360) #error "mimalloc internal: define more bins" #endif -#if (MI_ALIGNMENT_MAX > MI_SEGMENT_SIZE/2) -#error "mimalloc internal: the max aligned boundary is too large for the segment size" -#endif -#if (MI_ALIGNED_MAX % MI_SEGMENT_SLICE_SIZE != 0) -#error "mimalloc internal: the max aligned boundary must be an integral multiple of the segment slice size" -#endif // Maximum slice offset (15) #define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1) @@ -182,7 +183,8 @@ typedef int32_t mi_ssize_t; // blocks up to this size are always allocated aligned #define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE) - +// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments +#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1) // ------------------------------------------------------ @@ -253,49 +255,50 @@ typedef uintptr_t mi_thread_free_t; // We don't count `freed` (as |free|) but use `used` to reduce // the number of memory accesses in the `mi_page_all_free` function(s). // -// Notes: +// Notes: // - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`) // - Using `uint16_t` does not seem to slow things down // - The size is 8 words on 64-bit which helps the page index calculations -// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 +// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10 // and 12 are still good for address calculation) -// - To limit the structure size, the `xblock_size` is 32-bits only; for +// - To limit the structure size, the `xblock_size` is 32-bits only; for // blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size // - `thread_free` uses the bottom bits as a delayed-free flags to optimize // concurrent frees where only the first concurrent free adds to the owning // heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`). // The invariant is that no-delayed-free is only set if there is -// at least one block that will be added, or as already been added, to +// at least one block that will be added, or as already been added, to // the owning heap `thread_delayed_free` list. This guarantees that pages // will be freed correctly even if only other threads free blocks. typedef struct mi_page_s { // "owned" by the segment uint32_t slice_count; // slices in this page (0 if not a page) uint32_t slice_offset; // distance from the actual page data slice (0 if a page) - uint8_t is_reset : 1; // `true` if the page memory was reset - uint8_t is_committed : 1; // `true` if the page virtual memory is committed - uint8_t is_zero_init : 1; // `true` if the page was zero initialized + uint8_t is_reset : 1; // `true` if the page memory was reset + uint8_t is_committed : 1; // `true` if the page virtual memory is committed + uint8_t is_zero_init : 1; // `true` if the page was zero initialized // layout like this to optimize access in `mi_malloc` and `mi_free` uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear` uint16_t reserved; // number of blocks reserved in memory mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits) - uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized - uint8_t retire_expire : 7; // expiration count for retired blocks + uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized + uint8_t retire_expire : 7; // expiration count for retired blocks mi_block_t* free; // list of available free blocks (`malloc` allocates from this list) + uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) + uint32_t xblock_size; // size available in each block (always `>0`) + mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) + #ifdef MI_ENCODE_FREELIST uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`) #endif - uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`) - uint32_t xblock_size; // size available in each block (always `>0`) - mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`) _Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads _Atomic(uintptr_t) xheap; - struct mi_page_s* next; // next page owned by this thread with the same `block_size` - struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` + struct mi_page_s* next; // next page owned by this thread with the same `block_size` + struct mi_page_s* prev; // previous page owned by this thread with the same `block_size` // 64-bit 9 words, 32-bit 12 words, (+2 for secure) #if MI_INTPTR_SIZE==8 @@ -329,7 +332,7 @@ typedef enum mi_segment_kind_e { // is still tracked in fine-grained MI_COMMIT_SIZE chunks) // ------------------------------------------------------ -#define MI_MINIMAL_COMMIT_SIZE (2*MI_MiB) +#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB #define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB #define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE) #define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS @@ -355,6 +358,8 @@ typedef struct mi_segment_s { bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages) bool mem_is_large; // in large/huge os pages? bool mem_is_committed; // `true` if the whole segment is eagerly committed + size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX) + size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX) bool allow_decommit; mi_msecs_t decommit_expire; @@ -376,9 +381,10 @@ typedef struct mi_segment_s { // layout like this to optimize access in `mi_free` mi_segment_kind_t kind; - _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT` - mi_slice_t slices[MI_SLICES_PER_SEGMENT]; + _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment + + mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment } mi_segment_t; @@ -412,6 +418,7 @@ typedef struct mi_random_cxt_s { uint32_t input[16]; uint32_t output[16]; int output_available; + bool weak; } mi_random_ctx_t; diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc.h index 32eab19ea..9b72fbfda 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc.h @@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file #ifndef MIMALLOC_H #define MIMALLOC_H -#define MI_MALLOC_VERSION 207 // major + 2 digits minor +#define MI_MALLOC_VERSION 209 // major + 2 digits minor // ------------------------------------------------------ // Compiler specific attributes @@ -28,6 +28,8 @@ terms of the MIT license. A copy of the license can be found in the file #define mi_decl_nodiscard [[nodiscard]] #elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl #define mi_decl_nodiscard __attribute__((warn_unused_result)) +#elif defined(_HAS_NODISCARD) + #define mi_decl_nodiscard _NODISCARD #elif (_MSC_VER >= 1700) #define mi_decl_nodiscard _Check_return_ #else @@ -158,8 +160,8 @@ mi_decl_export void mi_thread_init(void) mi_attr_noexcept; mi_decl_export void mi_thread_done(void) mi_attr_noexcept; mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept; -mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, - size_t* current_rss, size_t* peak_rss, +mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs, + size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept; // ------------------------------------------------------------------------------------- @@ -167,11 +169,6 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s // Note that `alignment` always follows `size` for consistency with unaligned // allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`. // ------------------------------------------------------------------------------------- -#if (INTPTR_MAX > INT32_MAX) -#define MI_ALIGNMENT_MAX (16*1024*1024UL) // maximum supported alignment is 16MiB -#else -#define MI_ALIGNMENT_MAX (1024*1024UL) // maximum supported alignment for 32-bit systems is 1MiB -#endif mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2); mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1); @@ -288,6 +285,7 @@ mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allo mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept; #if MI_MALLOC_VERSION >= 200 +// Create a heap that only allocates in the specified arena mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id); #endif @@ -347,6 +345,7 @@ typedef enum mi_option_e { mi_option_allow_decommit, mi_option_segment_decommit_delay, mi_option_decommit_extend_delay, + mi_option_destroy_on_exit, _mi_option_last } mi_option_t; @@ -405,6 +404,9 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, s mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2); mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) mi_attr_malloc mi_attr_alloc_size(2); +mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3); + #ifdef __cplusplus } #endif @@ -422,7 +424,7 @@ mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, #include <utility> // std::forward #endif -template<class T> struct mi_stl_allocator { +template<class T> struct _mi_stl_allocator_common { typedef T value_type; typedef std::size_t size_type; typedef std::ptrdiff_t difference_type; @@ -430,6 +432,27 @@ template<class T> struct mi_stl_allocator { typedef value_type const& const_reference; typedef value_type* pointer; typedef value_type const* const_pointer; + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using propagate_on_container_copy_assignment = std::true_type; + using propagate_on_container_move_assignment = std::true_type; + using propagate_on_container_swap = std::true_type; + template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); } + template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); } + #else + void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } + void destroy(pointer p) { p->~value_type(); } + #endif + + size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); } + pointer address(reference x) const { return &x; } + const_pointer address(const_reference x) const { return &x; } +}; + +template<class T> struct mi_stl_allocator : public _mi_stl_allocator_common<T> { + using typename _mi_stl_allocator_common<T>::size_type; + using typename _mi_stl_allocator_common<T>::value_type; + using typename _mi_stl_allocator_common<T>::pointer; template <class U> struct rebind { typedef mi_stl_allocator<U> other; }; mi_stl_allocator() mi_attr_noexcept = default; @@ -446,24 +469,89 @@ template<class T> struct mi_stl_allocator { #endif #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 - using propagate_on_container_copy_assignment = std::true_type; - using propagate_on_container_move_assignment = std::true_type; - using propagate_on_container_swap = std::true_type; - using is_always_equal = std::true_type; - template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); } - template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); } - #else - void construct(pointer p, value_type const& val) { ::new(p) value_type(val); } - void destroy(pointer p) { p->~value_type(); } + using is_always_equal = std::true_type; #endif - - size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); } - pointer address(reference x) const { return &x; } - const_pointer address(const_reference x) const { return &x; } }; template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return true; } template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return false; } + + +#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11 +#include <memory> // std::shared_ptr + +// Common base class for STL allocators in a specific heap +template<class T, bool destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> { + using typename _mi_stl_allocator_common<T>::size_type; + using typename _mi_stl_allocator_common<T>::value_type; + using typename _mi_stl_allocator_common<T>::pointer; + + _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { } /* will not delete nor destroy the passed in heap */ + + #if (__cplusplus >= 201703L) // C++17 + mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); } + mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); } + #else + mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); } + #endif + + #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11 + using is_always_equal = std::false_type; + #endif + + void collect(bool force) { mi_heap_collect(this->heap.get(), force); } + template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, destroy>& x) const { return (this->heap == x.heap); } + +protected: + std::shared_ptr<mi_heap_t> heap; + template<class U, bool D> friend struct _mi_heap_stl_allocator_common; + + _mi_heap_stl_allocator_common() { + mi_heap_t* hp = mi_heap_new(); + this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */ + } + _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { } + template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, destroy>& x) mi_attr_noexcept : heap(x.heap) { } + +private: + static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } } + static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } } +}; + +// STL allocator allocation in a specific heap +template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T, false> { + using typename _mi_heap_stl_allocator_common<T, false>::size_type; + mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T, false>() { } // creates fresh heap that is deleted when the destructor is called + mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { } // no delete nor destroy on the passed in heap + template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, false>(x) { } + + mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T* p, size_type) { mi_free(p); } + template<class U> struct rebind { typedef mi_heap_stl_allocator<U> other; }; +}; + +template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); } +template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); } + + +// STL allocator allocation in a specific heap, where `free` does nothing and +// the heap is destroyed in one go on destruction -- use with care! +template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T, true> { + using typename _mi_heap_stl_allocator_common<T, true>::size_type; + mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T, true>() { } // creates fresh heap that is destroyed when the destructor is called + mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { } // no delete nor destroy on the passed in heap + template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, true>(x) { } + + mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; } + void deallocate(T*, size_type) { /* do nothing as we destroy the heap on destruct. */ } + template<class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; }; +}; + +template<class T1, class T2> bool operator==(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); } +template<class T1, class T2> bool operator!=(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); } + +#endif // C++11 + #endif // __cplusplus #endif |