diff options
Diffstat (limited to 'source/luametatex/source/libraries/mimalloc/src/os.c')
-rw-r--r-- | source/luametatex/source/libraries/mimalloc/src/os.c | 122 |
1 files changed, 79 insertions, 43 deletions
diff --git a/source/luametatex/source/libraries/mimalloc/src/os.c b/source/luametatex/source/libraries/mimalloc/src/os.c index 6d7249873..0f9847417 100644 --- a/source/luametatex/source/libraries/mimalloc/src/os.c +++ b/source/luametatex/source/libraries/mimalloc/src/os.c @@ -88,7 +88,7 @@ static size_t os_alloc_granularity = 4096; // if non-zero, use large page allocation static size_t large_os_page_size = 0; -// is memory overcommit allowed? +// is memory overcommit allowed? // set dynamically in _mi_os_init (and if true we use MAP_NORESERVE) static bool os_overcommit = true; @@ -139,7 +139,7 @@ typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E { MiMemExtendedParameterUserPhysicalHandle, MiMemExtendedParameterAttributeFlags, MiMemExtendedParameterMax -} MI_MEM_EXTENDED_PARAMETER_TYPE; +} MI_MEM_EXTENDED_PARAMETER_TYPE; typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S { struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type; @@ -166,9 +166,11 @@ typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); +typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; +static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; static bool mi_win_enable_large_os_pages(void) { @@ -205,7 +207,7 @@ static bool mi_win_enable_large_os_pages(void) return (ok!=0); } -void _mi_os_init(void) +void _mi_os_init(void) { os_overcommit = false; // get the page size @@ -234,6 +236,7 @@ void _mi_os_init(void) pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); + pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); FreeLibrary(hDll); } if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { @@ -266,9 +269,9 @@ static void os_detect_overcommit(void) { size_t olen = sizeof(val); if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) { os_overcommit = (val != 0); - } + } #else - // default: overcommit is true + // default: overcommit is true #endif } @@ -306,10 +309,10 @@ static int mi_madvise(void* addr, size_t length, int advice) { static mi_decl_cache_align _Atomic(uintptr_t)aligned_base; // Return a MI_SEGMENT_SIZE aligned address that is probably available. -// If this returns NULL, the OS will determine the address but on some OS's that may not be +// If this returns NULL, the OS will determine the address but on some OS's that may not be // properly aligned which can be more costly as it needs to be adjusted afterwards. -// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization; -// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses +// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization; +// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses // in the middle of the 2TiB - 6TiB address range (see issue #372)) #define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start @@ -383,12 +386,12 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats #endif if (was_committed) { _mi_stat_decrease(&stats->committed, size); } _mi_stat_decrease(&stats->reserved, size); - return !err; + return !err; } /* ----------------------------------------------------------- - Raw allocation on Windows (VirtualAlloc) + Raw allocation on Windows (VirtualAlloc) -------------------------------------------------------------- */ #ifdef _WIN32 @@ -406,7 +409,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags); // fall through on error } - } + } #endif // on modern Windows try use VirtualAlloc2 for aligned allocation if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { @@ -464,12 +467,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, -------------------------------------------------------------- */ #elif defined(MI_USE_SBRK) || defined(__wasi__) -#if defined(MI_USE_SBRK) +#if defined(MI_USE_SBRK) static void* mi_memory_grow( size_t size ) { void* p = sbrk(size); if (p == (void*)(-1)) return NULL; #if !defined(__wasi__) // on wasi this is always zero initialized already (?) - memset(p,0,size); + memset(p,0,size); #endif return p; } @@ -477,8 +480,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment, static void* mi_memory_grow( size_t size ) { size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size())) : __builtin_wasm_memory_size(0)); - if (base == SIZE_MAX) return NULL; - return (void*)(base * _mi_os_page_size()); + if (base == SIZE_MAX) return NULL; + return (void*)(base * _mi_os_page_size()); } #endif @@ -490,7 +493,7 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) { void* p = NULL; if (try_alignment <= 1) { // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now) - #if defined(MI_USE_PTHREADS) + #if defined(MI_USE_PTHREADS) pthread_mutex_lock(&mi_heap_grow_mutex); #endif p = mi_memory_grow(size); @@ -512,7 +515,7 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) { if (current != NULL) { void* aligned_current = mi_align_up_ptr(current, try_alignment); // and align from there to minimize wasted space alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size()); - base = mi_memory_grow(alloc_size); + base = mi_memory_grow(alloc_size); } } #if defined(MI_USE_PTHREADS) @@ -529,7 +532,7 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) { } } if (p == NULL) { - _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); + _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); errno = ENOMEM; return NULL; } @@ -540,10 +543,10 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) { /* ----------------------------------------------------------- Raw allocation on Unix's (mmap) -------------------------------------------------------------- */ -#else +#else #define MI_OS_USE_MMAP static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { - MI_UNUSED(try_alignment); + MI_UNUSED(try_alignment); #if defined(MAP_ALIGNED) // BSD if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { size_t n = mi_bsr(try_alignment); @@ -574,7 +577,7 @@ static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int pr #endif // regular mmap void* p = mmap(addr, size, protect_flags, flags, fd, 0); - if (p!=MAP_FAILED) return p; + if (p!=MAP_FAILED) return p; // failed to allocate return NULL; } @@ -602,7 +605,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro int flags = MAP_PRIVATE | MAP_ANONYMOUS; if (_mi_os_has_overcommit()) { flags |= MAP_NORESERVE; - } + } #if defined(PROT_MAX) protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD #endif @@ -685,7 +688,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { *is_large = true; } - } + } #endif } } @@ -753,7 +756,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, // try first with a hint (this will be aligned directly on Win 10+ or BSD) void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats); if (p == NULL) return NULL; - + // if not aligned, free it, overallocate, and unmap around it if (((uintptr_t)p % alignment != 0)) { mi_os_mem_free(p, size, commit, stats); @@ -765,7 +768,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit, // over-allocate uncommitted (virtual) memory p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats); if (p == NULL) return NULL; - + // set p to the aligned part in the full region // note: this is dangerous on Windows as VirtualFree needs the actual region pointer // but in mi_os_mem_free we handle this (hopefully exceptional) situation. @@ -837,7 +840,45 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ ); } +/* ----------------------------------------------------------- + OS aligned allocation with an offset. This is used + for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc + page where the object can be aligned at an offset from the start of the segment. + As we may need to overallocate, we need to free such pointers using `mi_free_aligned` + to use the actual start of the memory region. +----------------------------------------------------------- */ +void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) { + mi_assert(offset <= MI_SEGMENT_SIZE); + mi_assert(offset <= size); + mi_assert((alignment % _mi_os_page_size()) == 0); + if (offset > MI_SEGMENT_SIZE) return NULL; + if (offset == 0) { + // regular aligned allocation + return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats); + } + else { + // overallocate to align at an offset + const size_t extra = _mi_align_up(offset, alignment) - offset; + const size_t oversize = size + extra; + void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats); + if (start == NULL) return NULL; + void* p = (uint8_t*)start + extra; + mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment)); + // decommit the overallocation at the start + if (commit && extra > _mi_os_page_size()) { + _mi_os_decommit(start, extra, tld_stats); + } + return p; + } +} + +void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) { + mi_assert(align_offset <= MI_SEGMENT_SIZE); + const size_t extra = _mi_align_up(align_offset, alignment) - align_offset; + void* start = (uint8_t*)p - extra; + _mi_os_free_ex(start, size + extra, was_committed, tld_stats); +} /* ----------------------------------------------------------- OS memory API: reset, commit, decommit, protect, unprotect. @@ -916,7 +957,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ // commit: just change the protection err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); if (err != 0) { err = errno; } - } + } else { // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) const int fd = mi_unix_mmap_fd(); @@ -926,10 +967,10 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ #else // Linux, macOSX and others. if (commit) { - // commit: ensure we can access the area + // commit: ensure we can access the area err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); if (err != 0) { err = errno; } - } + } else { #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0 // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) @@ -1008,7 +1049,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats) int oadvice = (int)mi_atomic_load_relaxed(&advice); int err; while ((err = mi_madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; - if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { + if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED); err = mi_madvise(start, csize, MADV_DONTNEED); @@ -1041,13 +1082,8 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) { bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) { MI_UNUSED(tld_stats); mi_stats_t* stats = &_mi_stats_main; - if (mi_option_is_enabled(mi_option_reset_decommits)) { - return mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!) - } - else { - *is_zero = false; - return mi_os_resetx(addr, size, false, stats); - } + *is_zero = false; + return mi_os_resetx(addr, size, false, stats); } */ @@ -1156,7 +1192,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node) params[0].Arg.ULong = (unsigned)numa_node; return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1); } - + // otherwise use regular virtual alloc on older windows return VirtualAlloc(addr, size, flags, PAGE_READWRITE); } @@ -1305,7 +1341,7 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) { /* ---------------------------------------------------------------------------- Support NUMA aware allocation -----------------------------------------------------------------------------*/ -#ifdef _WIN32 +#ifdef _WIN32 static size_t mi_os_numa_nodex(void) { USHORT numa_node = 0; if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { @@ -1314,14 +1350,14 @@ static size_t mi_os_numa_nodex(void) { (*pGetCurrentProcessorNumberEx)(&pnum); USHORT nnode = 0; BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); - if (ok) numa_node = nnode; + if (ok) { numa_node = nnode; } } - else { + else if (pGetNumaProcessorNode != NULL) { // Vista or earlier, use older API that is limited to 64 processors. Issue #277 DWORD pnum = GetCurrentProcessorNumber(); UCHAR nnode = 0; - BOOL ok = GetNumaProcessorNode((UCHAR)pnum, &nnode); - if (ok) numa_node = nnode; + BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode); + if (ok) { numa_node = nnode; } } return numa_node; } @@ -1425,7 +1461,7 @@ size_t _mi_os_numa_node_count_get(void) { else { count = mi_os_numa_node_countx(); // or detect dynamically if (count == 0) count = 1; - } + } mi_atomic_store_release(&_mi_numa_node_count, count); // save it _mi_verbose_message("using %zd numa regions\n", count); } |