/* ---------------------------------------------------------------------------- Copyright (c) 2019-2020, Microsoft Research, Daan Leijen This is free software; you can redistribute it and/or modify it under the terms of the MIT license. A copy of the license can be found in the file "LICENSE" at the root of this distribution. -----------------------------------------------------------------------------*/ /* ---------------------------------------------------------------------------- This implements a layer between the raw OS memory (VirtualAlloc/mmap/sbrk/..) and the segment and huge object allocation by mimalloc. There may be multiple implementations of this (one could be the identity going directly to the OS, another could be a simple cache etc), but the current one uses large "regions". In contrast to the rest of mimalloc, the "regions" are shared between threads and need to be accessed using atomic operations. We need this memory layer between the raw OS calls because of: 1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order to reuse memory effectively. 2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of an OS allocation/free is still (much) too expensive relative to the accesses in that object :-( (`malloc-large` tests this). This means we need a cheaper way to reuse memory. 3. This layer allows for NUMA aware allocation. Possible issues: - (2) can potentially be addressed too with a small cache per thread which is much simpler. Generally though that requires shrinking of huge pages, and may overuse memory per thread. (and is not compatible with `sbrk`). - Since the current regions are per-process, we need atomic operations to claim blocks which may be contended - In the worst case, we need to search the whole region map (16KiB for 256GiB) linearly. At what point will direct OS calls be faster? Is there a way to do this better without adding too much complexity? -----------------------------------------------------------------------------*/ #include "mimalloc.h" #include "mimalloc-internal.h" #include "mimalloc-atomic.h" #include // memset #include "bitmap.h" // Internal raw OS interface size_t _mi_os_large_page_size(void); bool _mi_os_protect(void* addr, size_t size); bool _mi_os_unprotect(void* addr, size_t size); bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats); bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats); bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats); bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats); // arena.c mi_arena_id_t _mi_arena_id_none(void); void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats); void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld); // Constants #if (MI_INTPTR_SIZE==8) #define MI_HEAP_REGION_MAX_SIZE (256 * MI_GiB) // 64KiB for the region map #elif (MI_INTPTR_SIZE==4) #define MI_HEAP_REGION_MAX_SIZE (3 * MI_GiB) // ~ KiB for the region map #else #error "define the maximum heap space allowed for regions on this platform" #endif #define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS #define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits) #define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits) #define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB #define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE) // Region info typedef union mi_region_info_u { size_t value; struct { bool valid; // initialized? bool is_large:1; // allocated in fixed large/huge OS pages bool is_pinned:1; // pinned memory cannot be decommitted short numa_node; // the associated NUMA node (where -1 means no associated node) } x; } mi_region_info_t; // A region owns a chunk of REGION_SIZE (256MiB) (virtual) memory with // a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block. typedef struct mem_region_s { _Atomic(size_t) info; // mi_region_info_t.value _Atomic(void*) start; // start of the memory area mi_bitmap_field_t in_use; // bit per in-use block mi_bitmap_field_t dirty; // track if non-zero per block mi_bitmap_field_t commit; // track if committed per block mi_bitmap_field_t reset; // track if reset per block _Atomic(size_t) arena_memid; // if allocated from a (huge page) arena _Atomic(size_t) padding; // round to 8 fields (needs to be atomic for msvc, see issue #508) } mem_region_t; // The region map static mem_region_t regions[MI_REGION_MAX]; // Allocated regions static _Atomic(size_t) regions_count; // = 0; /* ---------------------------------------------------------------------------- Utility functions -----------------------------------------------------------------------------*/ // Blocks (of 4MiB) needed for the given size. static size_t mi_region_block_count(size_t size) { return _mi_divide_up(size, MI_SEGMENT_SIZE); } /* // Return a rounded commit/reset size such that we don't fragment large OS pages into small ones. static size_t mi_good_commit_size(size_t size) { if (size > (SIZE_MAX - _mi_os_large_page_size())) return size; return _mi_align_up(size, _mi_os_large_page_size()); } */ // Return if a pointer points into a region reserved by us. mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept { if (p==NULL) return false; size_t count = mi_atomic_load_relaxed(®ions_count); for (size_t i = 0; i < count; i++) { uint8_t* start = (uint8_t*)mi_atomic_load_ptr_relaxed(uint8_t, ®ions[i].start); if (start != NULL && (uint8_t*)p >= start && (uint8_t*)p < start + MI_REGION_SIZE) return true; } return false; } static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) { uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start); mi_assert_internal(start != NULL); return (start + (bit_idx * MI_SEGMENT_SIZE)); } static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) { mi_assert_internal(bit_idx < MI_BITMAP_FIELD_BITS); size_t idx = region - regions; mi_assert_internal(®ions[idx] == region); return (idx*MI_BITMAP_FIELD_BITS + bit_idx)<<1; } static size_t mi_memid_create_from_arena(size_t arena_memid) { return (arena_memid << 1) | 1; } static bool mi_memid_is_arena(size_t id, mem_region_t** region, mi_bitmap_index_t* bit_idx, size_t* arena_memid) { if ((id&1)==1) { if (arena_memid != NULL) *arena_memid = (id>>1); return true; } else { size_t idx = (id >> 1) / MI_BITMAP_FIELD_BITS; *bit_idx = (mi_bitmap_index_t)(id>>1) % MI_BITMAP_FIELD_BITS; *region = ®ions[idx]; return false; } } /* ---------------------------------------------------------------------------- Allocate a region is allocated from the OS (or an arena) -----------------------------------------------------------------------------*/ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // not out of regions yet? if (mi_atomic_load_relaxed(®ions_count) >= MI_REGION_MAX - 1) return false; // try to allocate a fresh region from the OS bool region_commit = (commit && mi_option_is_enabled(mi_option_eager_region_commit)); bool region_large = (commit && allow_large); bool is_zero = false; bool is_pinned = false; size_t arena_memid = 0; void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, ®ion_commit, ®ion_large, &is_pinned, &is_zero, _mi_arena_id_none(), & arena_memid, tld); if (start == NULL) return false; mi_assert_internal(!(region_large && !allow_large)); mi_assert_internal(!region_large || region_commit); // claim a fresh slot const size_t idx = mi_atomic_increment_acq_rel(®ions_count); if (idx >= MI_REGION_MAX) { mi_atomic_decrement_acq_rel(®ions_count); _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats); _mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB)); return false; } // allocated, initialize and claim the initial blocks mem_region_t* r = ®ions[idx]; r->arena_memid = arena_memid; mi_atomic_store_release(&r->in_use, (size_t)0); mi_atomic_store_release(&r->dirty, (is_zero ? 0 : MI_BITMAP_FIELD_FULL)); mi_atomic_store_release(&r->commit, (region_commit ? MI_BITMAP_FIELD_FULL : 0)); mi_atomic_store_release(&r->reset, (size_t)0); *bit_idx = 0; _mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL); mi_atomic_store_ptr_release(void,&r->start, start); // and share it mi_region_info_t info; info.value = 0; // initialize the full union to zero info.x.valid = true; info.x.is_large = region_large; info.x.is_pinned = is_pinned; info.x.numa_node = (short)_mi_os_numa_node(tld); mi_atomic_store_release(&r->info, info.value); // now make it available to others *region = r; return true; } /* ---------------------------------------------------------------------------- Try to claim blocks in suitable regions -----------------------------------------------------------------------------*/ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, bool allow_large ) { // initialized at all? mi_region_info_t info; info.value = mi_atomic_load_relaxed(&((mem_region_t*)region)->info); if (info.value==0) return false; // numa correct if (numa_node >= 0) { // use negative numa node to always succeed int rnode = info.x.numa_node; if (rnode >= 0 && rnode != numa_node) return false; } // check allow-large if (!allow_large && info.x.is_large) return false; return true; } static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld) { // try all regions for a free slot const size_t count = mi_atomic_load_relaxed(®ions_count); // monotonic, so ok to be relaxed size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though for (size_t visited = 0; visited < count; visited++, idx++) { if (idx >= count) idx = 0; // wrap around mem_region_t* r = ®ions[idx]; // if this region suits our demand (numa node matches, large OS page matches) if (mi_region_is_suitable(r, numa_node, allow_large)) { // then try to atomically claim a segment(s) in this region if (_mi_bitmap_try_find_claim_field(&r->in_use, 0, blocks, bit_idx)) { tld->region_idx = idx; // remember the last found position *region = r; return true; } } } return false; } static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(blocks <= MI_BITMAP_FIELD_BITS); mem_region_t* region; mi_bitmap_index_t bit_idx; const int numa_node = (_mi_os_numa_node_count() <= 1 ? -1 : _mi_os_numa_node(tld)); // try to claim in existing regions if (!mi_region_try_claim(numa_node, blocks, *large, ®ion, &bit_idx, tld)) { // otherwise try to allocate a fresh region and claim in there if (!mi_region_try_alloc_os(blocks, *commit, *large, ®ion, &bit_idx, tld)) { // out of regions or memory return NULL; } } // ------------------------------------------------ // found a region and claimed `blocks` at `bit_idx`, initialize them now mi_assert_internal(region != NULL); mi_assert_internal(_mi_bitmap_is_claimed(®ion->in_use, 1, blocks, bit_idx)); mi_region_info_t info; info.value = mi_atomic_load_acquire(®ion->info); uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ion->start); mi_assert_internal(!(info.x.is_large && !*large)); mi_assert_internal(start != NULL); *is_zero = _mi_bitmap_claim(®ion->dirty, 1, blocks, bit_idx, NULL); *large = info.x.is_large; *is_pinned = info.x.is_pinned; *memid = mi_memid_create(region, bit_idx); void* p = start + (mi_bitmap_index_bit_in_field(bit_idx) * MI_SEGMENT_SIZE); // commit if (*commit) { // ensure commit bool any_uncommitted; _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, &any_uncommitted); if (any_uncommitted) { mi_assert_internal(!info.x.is_large && !info.x.is_pinned); bool commit_zero = false; if (!_mi_mem_commit(p, blocks * MI_SEGMENT_SIZE, &commit_zero, tld)) { // failed to commit! unclaim and return mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); return NULL; } if (commit_zero) *is_zero = true; } } else { // no need to commit, but check if already fully committed *commit = _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx); } mi_assert_internal(!*commit || _mi_bitmap_is_claimed(®ion->commit, 1, blocks, bit_idx)); // unreset reset blocks if (_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)) { // some blocks are still reset mi_assert_internal(!info.x.is_large && !info.x.is_pinned); mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0); mi_bitmap_unclaim(®ion->reset, 1, blocks, bit_idx); if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed bool reset_zero = false; _mi_mem_unreset(p, blocks * MI_SEGMENT_SIZE, &reset_zero, tld); if (reset_zero) *is_zero = true; } } mi_assert_internal(!_mi_bitmap_is_any_claimed(®ion->reset, 1, blocks, bit_idx)); #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED if (*commit) { ((uint8_t*)p)[0] = 0; } #endif // and return the allocation mi_assert_internal(p != NULL); return p; } /* ---------------------------------------------------------------------------- Allocation -----------------------------------------------------------------------------*/ // Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`. // (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`) void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld) { mi_assert_internal(memid != NULL && tld != NULL); mi_assert_internal(size > 0); *memid = 0; *is_zero = false; *is_pinned = false; bool default_large = false; if (large==NULL) large = &default_large; // ensure `large != NULL` if (size == 0) return NULL; size = _mi_align_up(size, _mi_os_page_size()); // allocate from regions if possible void* p = NULL; size_t arena_memid; const size_t blocks = mi_region_block_count(size); if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) { p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld); if (p == NULL) { _mi_warning_message("unable to allocate from region: size %zu\n", size); } } if (p == NULL) { // and otherwise fall back to the OS p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(), & arena_memid, tld); *memid = mi_memid_create_from_arena(arena_memid); } if (p != NULL) { mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0); #if (MI_DEBUG>=2) && !MI_TRACK_ENABLED if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed #endif } return p; } /* ---------------------------------------------------------------------------- Free -----------------------------------------------------------------------------*/ // Free previously allocated memory with a given id. void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) { mi_assert_internal(size > 0 && tld != NULL); if (p==NULL) return; if (size==0) return; size = _mi_align_up(size, _mi_os_page_size()); size_t arena_memid = 0; mi_bitmap_index_t bit_idx; mem_region_t* region; if (mi_memid_is_arena(id,®ion,&bit_idx,&arena_memid)) { // was a direct arena allocation, pass through _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats); } else { // allocated in a region mi_assert_internal(align_offset == 0); mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return; const size_t blocks = mi_region_block_count(size); mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS); mi_region_info_t info; info.value = mi_atomic_load_acquire(®ion->info); mi_assert_internal(info.value != 0); void* blocks_start = mi_region_blocks_start(region, bit_idx); mi_assert_internal(blocks_start == p); // not a pointer in our area? mi_assert_internal(bit_idx + blocks <= MI_BITMAP_FIELD_BITS); if (blocks_start != p || bit_idx + blocks > MI_BITMAP_FIELD_BITS) return; // or `abort`? // committed? if (full_commit && (size % MI_SEGMENT_SIZE) == 0) { _mi_bitmap_claim(®ion->commit, 1, blocks, bit_idx, NULL); } if (any_reset) { // set the is_reset bits if any pages were reset _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, NULL); } // reset the blocks to reduce the working set. if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset) && (mi_option_is_enabled(mi_option_eager_commit) || mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead { bool any_unreset; _mi_bitmap_claim(®ion->reset, 1, blocks, bit_idx, &any_unreset); if (any_unreset) { _mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit) _mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld); } } // and unclaim bool all_unclaimed = mi_bitmap_unclaim(®ion->in_use, 1, blocks, bit_idx); mi_assert_internal(all_unclaimed); MI_UNUSED(all_unclaimed); } } /* ---------------------------------------------------------------------------- collection -----------------------------------------------------------------------------*/ void _mi_mem_collect(mi_os_tld_t* tld) { // free every region that has no segments in use. size_t rcount = mi_atomic_load_relaxed(®ions_count); for (size_t i = 0; i < rcount; i++) { mem_region_t* region = ®ions[i]; if (mi_atomic_load_relaxed(®ion->info) != 0) { // if no segments used, try to claim the whole region size_t m = mi_atomic_load_relaxed(®ion->in_use); while (m == 0 && !mi_atomic_cas_weak_release(®ion->in_use, &m, MI_BITMAP_FIELD_FULL)) { /* nothing */ }; if (m == 0) { // on success, free the whole region uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t,®ions[i].start); size_t arena_memid = mi_atomic_load_relaxed(®ions[i].arena_memid); size_t commit = mi_atomic_load_relaxed(®ions[i].commit); memset((void*)®ions[i], 0, sizeof(mem_region_t)); // cast to void* to avoid atomic warning // and release the whole region mi_atomic_store_release(®ion->info, (size_t)0); if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) { _mi_abandoned_await_readers(); // ensure no pending reads _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats); } } } } } /* ---------------------------------------------------------------------------- Other -----------------------------------------------------------------------------*/ bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) { if (mi_option_is_enabled(mi_option_reset_decommits)) { return _mi_os_decommit(p, size, tld->stats); } else { return _mi_os_reset(p, size, tld->stats); } } bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { if (mi_option_is_enabled(mi_option_reset_decommits)) { return _mi_os_commit(p, size, is_zero, tld->stats); } else { return _mi_os_unreset(p, size, is_zero, tld->stats); } } bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) { return _mi_os_commit(p, size, is_zero, tld->stats); } bool _mi_mem_decommit(void* p, size_t size, mi_os_tld_t* tld) { return _mi_os_decommit(p, size, tld->stats); } bool _mi_mem_protect(void* p, size_t size) { return _mi_os_protect(p, size); } bool _mi_mem_unprotect(void* p, size_t size) { return _mi_os_unprotect(p, size); }