diff options
Diffstat (limited to 'source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h')
-rw-r--r-- | source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h | 77 |
1 files changed, 50 insertions, 27 deletions
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h index 550b65433..a68e69662 100644 --- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h +++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h @@ -41,10 +41,10 @@ terms of the MIT license. A copy of the license can be found in the file #if defined(__cplusplus) #define mi_decl_externc extern "C" #else -#define mi_decl_externc +#define mi_decl_externc #endif -#if !defined(_WIN32) && !defined(__wasi__) +#if !defined(_WIN32) && !defined(__wasi__) #define MI_USE_PTHREADS #include <pthread.h> #endif @@ -60,6 +60,8 @@ void _mi_error_message(int err, const char* fmt, ...); // random.c void _mi_random_init(mi_random_ctx_t* ctx); +void _mi_random_init_weak(mi_random_ctx_t* ctx); +void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx); void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx); uintptr_t _mi_random_next(mi_random_ctx_t* ctx); uintptr_t _mi_heap_random_next(mi_heap_t* heap); @@ -87,11 +89,15 @@ bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); // bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); size_t _mi_os_good_alloc_size(size_t size); bool _mi_os_has_overcommit(void); +bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats); + +void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats); +void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats); // arena.c -void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); +void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); -void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld); +void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats); mi_arena_id_t _mi_arena_id_none(void); bool _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id); @@ -99,16 +105,22 @@ bool _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id) void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld); void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld); +void _mi_segment_cache_free_all(mi_os_tld_t* tld); void _mi_segment_map_allocated_at(const mi_segment_t* segment); void _mi_segment_map_freed_at(const mi_segment_t* segment); // "segment.c" -mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); +mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld); void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld); void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld); bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld); void _mi_segment_thread_collect(mi_segments_tld_t* tld); + +#if MI_HUGE_PAGE_ABANDON void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#else +void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block); +#endif uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld); @@ -118,7 +130,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* // "page.c" -void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept mi_attr_malloc; +void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc; void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks void _mi_page_unfull(mi_page_t* page); @@ -144,6 +156,7 @@ void _mi_heap_destroy_pages(mi_heap_t* heap); void _mi_heap_collect_abandon(mi_heap_t* heap); void _mi_heap_set_default_direct(mi_heap_t* heap); bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid); +void _mi_heap_destroy_all(void); // "stats.c" void _mi_stats_done(mi_stats_t* stats); @@ -155,9 +168,11 @@ mi_msecs_t _mi_clock_start(void); // "alloc.c" void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic` void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept; +void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned` void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept; mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p); bool _mi_free_delayed_block(mi_block_t* block); +void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration #if MI_DEBUG>1 bool _mi_page_is_valid(mi_page_t* page); @@ -345,14 +360,14 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea #if defined(MI_MALLOC_OVERRIDE) #if defined(__APPLE__) // macOS -#define MI_TLS_SLOT 89 // seems unused? -// #define MI_TLS_RECURSE_GUARD 1 +#define MI_TLS_SLOT 89 // seems unused? +// #define MI_TLS_RECURSE_GUARD 1 // other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89) // see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h> #elif defined(__OpenBSD__) -// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) +// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16) // see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371> -#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) +#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24) // #elif defined(__DragonFly__) // #warning "mimalloc is not working correctly on DragonFly yet." // #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458> @@ -392,7 +407,7 @@ static inline mi_heap_t* mi_get_default_heap(void) { #ifdef __GNUC__ __asm(""); // prevent conditional load of the address of _mi_heap_empty #endif - heap = (mi_heap_t*)&_mi_heap_empty; + heap = (mi_heap_t*)&_mi_heap_empty; } return heap; #elif defined(MI_TLS_PTHREAD_SLOT_OFS) @@ -402,7 +417,7 @@ static inline mi_heap_t* mi_get_default_heap(void) { mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key)); return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap); #else - #if defined(MI_TLS_RECURSE_GUARD) + #if defined(MI_TLS_RECURSE_GUARD) if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get(); #endif return _mi_heap_default; @@ -445,9 +460,12 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) { } // Segment that contains the pointer +// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE), +// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it; +// therefore we align one byte before `p`. static inline mi_segment_t* _mi_ptr_segment(const void* p) { - // mi_assert_internal(p != NULL); - return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK); + mi_assert_internal(p != NULL); + return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK); } static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) { @@ -475,12 +493,13 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) { return start; } -// Get the page containing the pointer +// Get the page containing the pointer (performance critical as it is called in mi_free) static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) { + mi_assert_internal(p > (void*)segment); ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment; - mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE); + mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE); size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT; - mi_assert_internal(idx < segment->slice_entries); + mi_assert_internal(idx <= segment->slice_entries); mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx]; mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data mi_assert_internal(slice->slice_offset == 0); @@ -512,6 +531,10 @@ static inline size_t mi_page_block_size(const mi_page_t* page) { } } +static inline bool mi_page_is_huge(const mi_page_t* page) { + return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE); +} + // Get the usable block size of a page without fixed padding. // This may still include internal padding due to alignment and rounding up size classes. static inline size_t mi_page_usable_block_size(const mi_page_t* page) { @@ -683,7 +706,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl next = (mi_block_t*)block->next; #endif mi_track_mem_noaccess(block,sizeof(mi_block_t)); - return next; + return next; } static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) { @@ -825,7 +848,7 @@ static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept { return (uintptr_t)NtCurrentTeb(); } -// We use assembly for a fast thread id on the main platforms. The TLS layout depends on +// We use assembly for a fast thread id on the main platforms. The TLS layout depends on // both the OS and libc implementation so we use specific tests for each main platform. // If you test on another platform and it works please send a PR :-) // see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register. @@ -938,7 +961,7 @@ static inline size_t mi_ctz(uintptr_t x) { #endif } -#elif defined(_MSC_VER) +#elif defined(_MSC_VER) #include <limits.h> // LONG_MAX #define MI_HAVE_FAST_BITSCAN @@ -949,7 +972,7 @@ static inline size_t mi_clz(uintptr_t x) { _BitScanReverse(&idx, x); #else _BitScanReverse64(&idx, x); -#endif +#endif return ((MI_INTPTR_BITS - 1) - idx); } static inline size_t mi_ctz(uintptr_t x) { @@ -959,7 +982,7 @@ static inline size_t mi_ctz(uintptr_t x) { _BitScanForward(&idx, x); #else _BitScanForward64(&idx, x); -#endif +#endif return idx; } @@ -989,7 +1012,7 @@ static inline size_t mi_clz32(uint32_t x) { } static inline size_t mi_clz(uintptr_t x) { - if (x==0) return MI_INTPTR_BITS; + if (x==0) return MI_INTPTR_BITS; #if (MI_INTPTR_BITS <= 32) return mi_clz32((uint32_t)x); #else @@ -1020,9 +1043,9 @@ static inline size_t mi_bsr(uintptr_t x) { // --------------------------------------------------------------------------------- // Provide our own `_mi_memcpy` for potential performance optimizations. // -// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if -// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support -// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. +// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if +// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support +// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253. // --------------------------------------------------------------------------------- #if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64)) @@ -1057,7 +1080,7 @@ static inline void _mi_memzero(void* dst, size_t n) { // ------------------------------------------------------------------------------- -// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned +// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned // This is used for example in `mi_realloc`. // ------------------------------------------------------------------------------- |