diff options
Diffstat (limited to 'source/luametatex/source/libraries/mimalloc/src/prim')
11 files changed, 3193 insertions, 0 deletions
diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c b/source/luametatex/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c new file mode 100644 index 000000000..80bcfa939 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/osx/alloc-override-zone.c @@ -0,0 +1,458 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2022, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +#include "mimalloc.h" +#include "mimalloc/internal.h" + +#if defined(MI_MALLOC_OVERRIDE) + +#if !defined(__APPLE__) +#error "this file should only be included on macOS" +#endif + +/* ------------------------------------------------------ + Override system malloc on macOS + This is done through the malloc zone interface. + It seems to be most robust in combination with interposing + though or otherwise we may get zone errors as there are could + be allocations done by the time we take over the + zone. +------------------------------------------------------ */ + +#include <AvailabilityMacros.h> +#include <malloc/malloc.h> +#include <string.h> // memset +#include <stdlib.h> + +#ifdef __cplusplus +extern "C" { +#endif + +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) +// only available from OSX 10.6 +extern malloc_zone_t* malloc_default_purgeable_zone(void) __attribute__((weak_import)); +#endif + +/* ------------------------------------------------------ + malloc zone members +------------------------------------------------------ */ + +static size_t zone_size(malloc_zone_t* zone, const void* p) { + MI_UNUSED(zone); + if (!mi_is_in_heap_region(p)){ return 0; } // not our pointer, bail out + return mi_usable_size(p); +} + +static void* zone_malloc(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); + return mi_malloc(size); +} + +static void* zone_calloc(malloc_zone_t* zone, size_t count, size_t size) { + MI_UNUSED(zone); + return mi_calloc(count, size); +} + +static void* zone_valloc(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); + return mi_malloc_aligned(size, _mi_os_page_size()); +} + +static void zone_free(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); + mi_cfree(p); +} + +static void* zone_realloc(malloc_zone_t* zone, void* p, size_t newsize) { + MI_UNUSED(zone); + return mi_realloc(p, newsize); +} + +static void* zone_memalign(malloc_zone_t* zone, size_t alignment, size_t size) { + MI_UNUSED(zone); + return mi_malloc_aligned(size,alignment); +} + +static void zone_destroy(malloc_zone_t* zone) { + MI_UNUSED(zone); + // todo: ignore for now? +} + +static unsigned zone_batch_malloc(malloc_zone_t* zone, size_t size, void** ps, unsigned count) { + size_t i; + for (i = 0; i < count; i++) { + ps[i] = zone_malloc(zone, size); + if (ps[i] == NULL) break; + } + return i; +} + +static void zone_batch_free(malloc_zone_t* zone, void** ps, unsigned count) { + for(size_t i = 0; i < count; i++) { + zone_free(zone, ps[i]); + ps[i] = NULL; + } +} + +static size_t zone_pressure_relief(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); MI_UNUSED(size); + mi_collect(false); + return 0; +} + +static void zone_free_definite_size(malloc_zone_t* zone, void* p, size_t size) { + MI_UNUSED(size); + zone_free(zone,p); +} + +static boolean_t zone_claimed_address(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); + return mi_is_in_heap_region(p); +} + + +/* ------------------------------------------------------ + Introspection members +------------------------------------------------------ */ + +static kern_return_t intro_enumerator(task_t task, void* p, + unsigned type_mask, vm_address_t zone_address, + memory_reader_t reader, + vm_range_recorder_t recorder) +{ + // todo: enumerate all memory + MI_UNUSED(task); MI_UNUSED(p); MI_UNUSED(type_mask); MI_UNUSED(zone_address); + MI_UNUSED(reader); MI_UNUSED(recorder); + return KERN_SUCCESS; +} + +static size_t intro_good_size(malloc_zone_t* zone, size_t size) { + MI_UNUSED(zone); + return mi_good_size(size); +} + +static boolean_t intro_check(malloc_zone_t* zone) { + MI_UNUSED(zone); + return true; +} + +static void intro_print(malloc_zone_t* zone, boolean_t verbose) { + MI_UNUSED(zone); MI_UNUSED(verbose); + mi_stats_print(NULL); +} + +static void intro_log(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); MI_UNUSED(p); + // todo? +} + +static void intro_force_lock(malloc_zone_t* zone) { + MI_UNUSED(zone); + // todo? +} + +static void intro_force_unlock(malloc_zone_t* zone) { + MI_UNUSED(zone); + // todo? +} + +static void intro_statistics(malloc_zone_t* zone, malloc_statistics_t* stats) { + MI_UNUSED(zone); + // todo... + stats->blocks_in_use = 0; + stats->size_in_use = 0; + stats->max_size_in_use = 0; + stats->size_allocated = 0; +} + +static boolean_t intro_zone_locked(malloc_zone_t* zone) { + MI_UNUSED(zone); + return false; +} + + +/* ------------------------------------------------------ + At process start, override the default allocator +------------------------------------------------------ */ + +#if defined(__GNUC__) && !defined(__clang__) +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +#endif + +#if defined(__clang__) +#pragma clang diagnostic ignored "-Wc99-extensions" +#endif + +static malloc_introspection_t mi_introspect = { + .enumerator = &intro_enumerator, + .good_size = &intro_good_size, + .check = &intro_check, + .print = &intro_print, + .log = &intro_log, + .force_lock = &intro_force_lock, + .force_unlock = &intro_force_unlock, +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) + .statistics = &intro_statistics, + .zone_locked = &intro_zone_locked, +#endif +}; + +static malloc_zone_t mi_malloc_zone = { + // note: even with designators, the order is important for C++ compilation + //.reserved1 = NULL, + //.reserved2 = NULL, + .size = &zone_size, + .malloc = &zone_malloc, + .calloc = &zone_calloc, + .valloc = &zone_valloc, + .free = &zone_free, + .realloc = &zone_realloc, + .destroy = &zone_destroy, + .zone_name = "mimalloc", + .batch_malloc = &zone_batch_malloc, + .batch_free = &zone_batch_free, + .introspect = &mi_introspect, +#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) + #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) + .version = 10, + #else + .version = 9, + #endif + // switch to version 9+ on OSX 10.6 to support memalign. + .memalign = &zone_memalign, + .free_definite_size = &zone_free_definite_size, + .pressure_relief = &zone_pressure_relief, + #if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14) + .claimed_address = &zone_claimed_address, + #endif +#else + .version = 4, +#endif +}; + +#ifdef __cplusplus +} +#endif + + +#if defined(MI_OSX_INTERPOSE) && defined(MI_SHARED_LIB_EXPORT) + +// ------------------------------------------------------ +// Override malloc_xxx and malloc_zone_xxx api's to use only +// our mimalloc zone. Since even the loader uses malloc +// on macOS, this ensures that all allocations go through +// mimalloc (as all calls are interposed). +// The main `malloc`, `free`, etc calls are interposed in `alloc-override.c`, +// Here, we also override macOS specific API's like +// `malloc_zone_calloc` etc. see <https://github.com/aosm/libmalloc/blob/master/man/malloc_zone_malloc.3> +// ------------------------------------------------------ + +static inline malloc_zone_t* mi_get_default_zone(void) +{ + static bool init; + if mi_unlikely(!init) { + init = true; + malloc_zone_register(&mi_malloc_zone); // by calling register we avoid a zone error on free (see <http://eatmyrandom.blogspot.com/2010/03/mallocfree-interception-on-mac-os-x.html>) + } + return &mi_malloc_zone; +} + +mi_decl_externc int malloc_jumpstart(uintptr_t cookie); +mi_decl_externc void _malloc_fork_prepare(void); +mi_decl_externc void _malloc_fork_parent(void); +mi_decl_externc void _malloc_fork_child(void); + + +static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) { + MI_UNUSED(size); MI_UNUSED(flags); + return mi_get_default_zone(); +} + +static malloc_zone_t* mi_malloc_default_zone (void) { + return mi_get_default_zone(); +} + +static malloc_zone_t* mi_malloc_default_purgeable_zone(void) { + return mi_get_default_zone(); +} + +static void mi_malloc_destroy_zone(malloc_zone_t* zone) { + MI_UNUSED(zone); + // nothing. +} + +static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, vm_address_t** addresses, unsigned* count) { + MI_UNUSED(task); MI_UNUSED(mr); + if (addresses != NULL) *addresses = NULL; + if (count != NULL) *count = 0; + return KERN_SUCCESS; +} + +static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) { + return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name); +} + +static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) { + MI_UNUSED(zone); MI_UNUSED(name); +} + +static int mi_malloc_jumpstart(uintptr_t cookie) { + MI_UNUSED(cookie); + return 1; // or 0 for no error? +} + +static void mi__malloc_fork_prepare(void) { + // nothing +} +static void mi__malloc_fork_parent(void) { + // nothing +} +static void mi__malloc_fork_child(void) { + // nothing +} + +static void mi_malloc_printf(const char* fmt, ...) { + MI_UNUSED(fmt); +} + +static bool zone_check(malloc_zone_t* zone) { + MI_UNUSED(zone); + return true; +} + +static malloc_zone_t* zone_from_ptr(const void* p) { + MI_UNUSED(p); + return mi_get_default_zone(); +} + +static void zone_log(malloc_zone_t* zone, void* p) { + MI_UNUSED(zone); MI_UNUSED(p); +} + +static void zone_print(malloc_zone_t* zone, bool b) { + MI_UNUSED(zone); MI_UNUSED(b); +} + +static void zone_print_ptr_info(void* p) { + MI_UNUSED(p); +} + +static void zone_register(malloc_zone_t* zone) { + MI_UNUSED(zone); +} + +static void zone_unregister(malloc_zone_t* zone) { + MI_UNUSED(zone); +} + +// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1` +// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73> +struct mi_interpose_s { + const void* replacement; + const void* target; +}; +#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun } +#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun) +#define MI_INTERPOSE_ZONE(fun) MI_INTERPOSE_FUN(malloc_##fun,fun) +__attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[] __attribute__((section("__DATA, __interpose"))) = +{ + + MI_INTERPOSE_MI(malloc_create_zone), + MI_INTERPOSE_MI(malloc_default_purgeable_zone), + MI_INTERPOSE_MI(malloc_default_zone), + MI_INTERPOSE_MI(malloc_destroy_zone), + MI_INTERPOSE_MI(malloc_get_all_zones), + MI_INTERPOSE_MI(malloc_get_zone_name), + MI_INTERPOSE_MI(malloc_jumpstart), + MI_INTERPOSE_MI(malloc_printf), + MI_INTERPOSE_MI(malloc_set_zone_name), + MI_INTERPOSE_MI(_malloc_fork_child), + MI_INTERPOSE_MI(_malloc_fork_parent), + MI_INTERPOSE_MI(_malloc_fork_prepare), + + MI_INTERPOSE_ZONE(zone_batch_free), + MI_INTERPOSE_ZONE(zone_batch_malloc), + MI_INTERPOSE_ZONE(zone_calloc), + MI_INTERPOSE_ZONE(zone_check), + MI_INTERPOSE_ZONE(zone_free), + MI_INTERPOSE_ZONE(zone_from_ptr), + MI_INTERPOSE_ZONE(zone_log), + MI_INTERPOSE_ZONE(zone_malloc), + MI_INTERPOSE_ZONE(zone_memalign), + MI_INTERPOSE_ZONE(zone_print), + MI_INTERPOSE_ZONE(zone_print_ptr_info), + MI_INTERPOSE_ZONE(zone_realloc), + MI_INTERPOSE_ZONE(zone_register), + MI_INTERPOSE_ZONE(zone_unregister), + MI_INTERPOSE_ZONE(zone_valloc) +}; + + +#else + +// ------------------------------------------------------ +// hook into the zone api's without interposing +// This is the official way of adding an allocator but +// it seems less robust than using interpose. +// ------------------------------------------------------ + +static inline malloc_zone_t* mi_get_default_zone(void) +{ + // The first returned zone is the real default + malloc_zone_t** zones = NULL; + unsigned count = 0; + kern_return_t ret = malloc_get_all_zones(0, NULL, (vm_address_t**)&zones, &count); + if (ret == KERN_SUCCESS && count > 0) { + return zones[0]; + } + else { + // fallback + return malloc_default_zone(); + } +} + +#if defined(__clang__) +__attribute__((constructor(0))) +#else +__attribute__((constructor)) // seems not supported by g++-11 on the M1 +#endif +static void _mi_macos_override_malloc(void) { + malloc_zone_t* purgeable_zone = NULL; + + #if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6) + // force the purgeable zone to exist to avoid strange bugs + if (malloc_default_purgeable_zone) { + purgeable_zone = malloc_default_purgeable_zone(); + } + #endif + + // Register our zone. + // thomcc: I think this is still needed to put us in the zone list. + malloc_zone_register(&mi_malloc_zone); + // Unregister the default zone, this makes our zone the new default + // as that was the last registered. + malloc_zone_t *default_zone = mi_get_default_zone(); + // thomcc: Unsure if the next test is *always* false or just false in the + // cases I've tried. I'm also unsure if the code inside is needed. at all + if (default_zone != &mi_malloc_zone) { + malloc_zone_unregister(default_zone); + + // Reregister the default zone so free and realloc in that zone keep working. + malloc_zone_register(default_zone); + } + + // Unregister, and re-register the purgeable_zone to avoid bugs if it occurs + // earlier than the default zone. + if (purgeable_zone != NULL) { + malloc_zone_unregister(purgeable_zone); + malloc_zone_register(purgeable_zone); + } + +} +#endif // MI_OSX_INTERPOSE + +#endif // MI_MALLOC_OVERRIDE diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/osx/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/osx/prim.c new file mode 100644 index 000000000..8a2f4e8aa --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/osx/prim.c @@ -0,0 +1,9 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// We use the unix/prim.c with the mmap API on macOSX +#include "../unix/prim.c" diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/prim.c new file mode 100644 index 000000000..9a597d8eb --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/prim.c @@ -0,0 +1,24 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// Select the implementation of the primitives +// depending on the OS. + +#if defined(_WIN32) +#include "windows/prim.c" // VirtualAlloc (Windows) + +#elif defined(__APPLE__) +#include "osx/prim.c" // macOSX (actually defers to mmap in unix/prim.c) + +#elif defined(__wasi__) +#define MI_USE_SBRK +#include "wasi/prim.c" // memory-grow or sbrk (Wasm) + +#else +#include "unix/prim.c" // mmap() (Linux, macOSX, BSD, Illumnos, Haiku, DragonFly, etc.) + +#endif diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/readme.md b/source/luametatex/source/libraries/mimalloc/src/prim/readme.md new file mode 100644 index 000000000..380dd3a71 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/readme.md @@ -0,0 +1,9 @@ +## Portability Primitives + +This is the portability layer where all primitives needed from the OS are defined. + +- `include/mimalloc/prim.h`: primitive portability API definition. +- `prim.c`: Selects one of `unix/prim.c`, `wasi/prim.c`, or `windows/prim.c` depending on the host platform + (and on macOS, `osx/prim.c` defers to `unix/prim.c`). + +Note: still work in progress, there may still be places in the sources that still depend on OS ifdef's.
\ No newline at end of file diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/unix/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/unix/prim.c new file mode 100644 index 000000000..8d9c7a723 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/unix/prim.c @@ -0,0 +1,838 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// This file is included in `src/prim/prim.c` + +#ifndef _DEFAULT_SOURCE +#define _DEFAULT_SOURCE // ensure mmap flags and syscall are defined +#endif + +#if defined(__sun) +// illumos provides new mman.h api when any of these are defined +// otherwise the old api based on caddr_t which predates the void pointers one. +// stock solaris provides only the former, chose to atomically to discard those +// flags only here rather than project wide tough. +#undef _XOPEN_SOURCE +#undef _POSIX_C_SOURCE +#endif + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" + +#include <sys/mman.h> // mmap +#include <unistd.h> // sysconf + +#if defined(__linux__) + #include <features.h> + #include <fcntl.h> + #if defined(__GLIBC__) + #include <linux/mman.h> // linux mmap flags + #else + #include <sys/mman.h> + #endif +#elif defined(__APPLE__) + #include <TargetConditionals.h> + #if !TARGET_IOS_IPHONE && !TARGET_IOS_SIMULATOR + #include <mach/vm_statistics.h> + #endif +#elif defined(__FreeBSD__) || defined(__DragonFly__) + #include <sys/param.h> + #if __FreeBSD_version >= 1200000 + #include <sys/cpuset.h> + #include <sys/domainset.h> + #endif + #include <sys/sysctl.h> +#endif + +#if !defined(__HAIKU__) && !defined(__APPLE__) && !defined(__CYGWIN__) + #define MI_HAS_SYSCALL_H + #include <sys/syscall.h> +#endif + +//------------------------------------------------------------------------------------ +// Use syscalls for some primitives to allow for libraries that override open/read/close etc. +// and do allocation themselves; using syscalls prevents recursion when mimalloc is +// still initializing (issue #713) +//------------------------------------------------------------------------------------ + +#if defined(MI_HAS_SYSCALL_H) && defined(SYS_open) && defined(SYS_close) && defined(SYS_read) && defined(SYS_access) + +static int mi_prim_open(const char* fpath, int open_flags) { + return syscall(SYS_open,fpath,open_flags,0); +} +static ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { + return syscall(SYS_read,fd,buf,bufsize); +} +static int mi_prim_close(int fd) { + return syscall(SYS_close,fd); +} +static int mi_prim_access(const char *fpath, int mode) { + return syscall(SYS_access,fpath,mode); +} + +#elif !defined(__APPLE__) // avoid unused warnings + +static int mi_prim_open(const char* fpath, int open_flags) { + return open(fpath,open_flags,0); +} +static mi_ssize_t mi_prim_read(int fd, void* buf, size_t bufsize) { + return read(fd,buf,bufsize); +} +static int mi_prim_close(int fd) { + return close(fd); +} +static int mi_prim_access(const char *fpath, int mode) { + return access(fpath,mode); +} + +#endif + + + +//--------------------------------------------- +// init +//--------------------------------------------- + +static bool unix_detect_overcommit(void) { + bool os_overcommit = true; +#if defined(__linux__) + int fd = mi_prim_open("/proc/sys/vm/overcommit_memory", O_RDONLY); + if (fd >= 0) { + char buf[32]; + ssize_t nread = mi_prim_read(fd, &buf, sizeof(buf)); + mi_prim_close(fd); + // <https://www.kernel.org/doc/Documentation/vm/overcommit-accounting> + // 0: heuristic overcommit, 1: always overcommit, 2: never overcommit (ignore NORESERVE) + if (nread >= 1) { + os_overcommit = (buf[0] == '0' || buf[0] == '1'); + } + } +#elif defined(__FreeBSD__) + int val = 0; + size_t olen = sizeof(val); + if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) { + os_overcommit = (val != 0); + } +#else + // default: overcommit is true +#endif + return os_overcommit; +} + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) { + long psize = sysconf(_SC_PAGESIZE); + if (psize > 0) { + config->page_size = (size_t)psize; + config->alloc_granularity = (size_t)psize; + } + config->large_page_size = 2*MI_MiB; // TODO: can we query the OS for this? + config->has_overcommit = unix_detect_overcommit(); + config->must_free_whole = false; // mmap can free in parts +} + + +//--------------------------------------------- +// free +//--------------------------------------------- + +int _mi_prim_free(void* addr, size_t size ) { + bool err = (munmap(addr, size) == -1); + return (err ? errno : 0); +} + + +//--------------------------------------------- +// mmap +//--------------------------------------------- + +static int unix_madvise(void* addr, size_t size, int advice) { + #if defined(__sun) + return madvise((caddr_t)addr, size, advice); // Solaris needs cast (issue #520) + #else + return madvise(addr, size, advice); + #endif +} + +static void* unix_mmap_prim(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) { + MI_UNUSED(try_alignment); + void* p = NULL; + #if defined(MAP_ALIGNED) // BSD + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + size_t n = mi_bsr(try_alignment); + if (((size_t)1 << n) == try_alignment && n >= 12 && n <= 30) { // alignment is a power of 2 and 4096 <= alignment <= 1GiB + p = mmap(addr, size, protect_flags, flags | MAP_ALIGNED(n), fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + int err = errno; + _mi_warning_message("unable to directly request aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + } + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #elif defined(MAP_ALIGN) // Solaris + if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) { + p = mmap((void*)try_alignment, size, protect_flags, flags | MAP_ALIGN, fd, 0); // addr parameter is the required alignment + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + #endif + #if (MI_INTPTR_SIZE >= 8) && !defined(MAP_ALIGNED) + // on 64-bit systems, use the virtual address area after 2TiB for 4MiB aligned allocations + if (addr == NULL) { + void* hint = _mi_os_get_aligned_hint(try_alignment, size); + if (hint != NULL) { + p = mmap(hint, size, protect_flags, flags, fd, 0); + if (p==MAP_FAILED || !_mi_is_aligned(p,try_alignment)) { + int err = errno; + _mi_warning_message("unable to directly request hinted aligned OS memory (error: %d (0x%x), size: 0x%zx bytes, alignment: 0x%zx, hint address: %p)\n", err, err, size, try_alignment, hint); + } + if (p!=MAP_FAILED) return p; + // fall back to regular mmap + } + } + #endif + // regular mmap + p = mmap(addr, size, protect_flags, flags, fd, 0); + if (p!=MAP_FAILED) return p; + // failed to allocate + return NULL; +} + +static void* unix_mmap(void* addr, size_t size, size_t try_alignment, int protect_flags, bool large_only, bool allow_large, bool* is_large) { + void* p = NULL; + #if !defined(MAP_ANONYMOUS) + #define MAP_ANONYMOUS MAP_ANON + #endif + #if !defined(MAP_NORESERVE) + #define MAP_NORESERVE 0 + #endif + int flags = MAP_PRIVATE | MAP_ANONYMOUS; + int fd = -1; + if (_mi_os_has_overcommit()) { + flags |= MAP_NORESERVE; + } + #if defined(PROT_MAX) + protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD + #endif + #if defined(VM_MAKE_TAG) + // macOS: tracking anonymous page with a specific ID. (All up to 98 are taken officially but LLVM sanitizers had taken 99) + int os_tag = (int)mi_option_get(mi_option_os_tag); + if (os_tag < 100 || os_tag > 255) { os_tag = 100; } + fd = VM_MAKE_TAG(os_tag); + #endif + // huge page allocation + if ((large_only || _mi_os_use_large_page(size, try_alignment)) && allow_large) { + static _Atomic(size_t) large_page_try_ok; // = 0; + size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // If the OS is not configured for large OS pages, or the user does not have + // enough permission, the `mmap` will always fail (but it might also fail for other reasons). + // Therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times + // to avoid too many failing calls to mmap. + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + } + else { + int lflags = flags & ~MAP_NORESERVE; // using NORESERVE on huge pages seems to fail on Linux + int lfd = fd; + #ifdef MAP_ALIGNED_SUPER + lflags |= MAP_ALIGNED_SUPER; + #endif + #ifdef MAP_HUGETLB + lflags |= MAP_HUGETLB; + #endif + #ifdef MAP_HUGE_1GB + static bool mi_huge_pages_available = true; + if ((size % MI_GiB) == 0 && mi_huge_pages_available) { + lflags |= MAP_HUGE_1GB; + } + else + #endif + { + #ifdef MAP_HUGE_2MB + lflags |= MAP_HUGE_2MB; + #endif + } + #ifdef VM_FLAGS_SUPERPAGE_SIZE_2MB + lfd |= VM_FLAGS_SUPERPAGE_SIZE_2MB; + #endif + if (large_only || lflags != flags) { + // try large OS page allocation + *is_large = true; + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + #ifdef MAP_HUGE_1GB + if (p == NULL && (lflags & MAP_HUGE_1GB) != 0) { + mi_huge_pages_available = false; // don't try huge 1GiB pages again + _mi_warning_message("unable to allocate huge (1GiB) page, trying large (2MiB) pages instead (errno: %i)\n", errno); + lflags = ((lflags & ~MAP_HUGE_1GB) | MAP_HUGE_2MB); + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, lflags, lfd); + } + #endif + if (large_only) return p; + if (p == NULL) { + mi_atomic_store_release(&large_page_try_ok, (size_t)8); // on error, don't try again for the next N allocations + } + } + } + } + // regular allocation + if (p == NULL) { + *is_large = false; + p = unix_mmap_prim(addr, size, try_alignment, protect_flags, flags, fd); + if (p != NULL) { + #if defined(MADV_HUGEPAGE) + // Many Linux systems don't allow MAP_HUGETLB but they support instead + // transparent huge pages (THP). Generally, it is not required to call `madvise` with MADV_HUGE + // though since properly aligned allocations will already use large pages if available + // in that case -- in particular for our large regions (in `memory.c`). + // However, some systems only allow THP if called with explicit `madvise`, so + // when large OS pages are enabled for mimalloc, we call `madvise` anyways. + if (allow_large && _mi_os_use_large_page(size, try_alignment)) { + if (unix_madvise(p, size, MADV_HUGEPAGE) == 0) { + *is_large = true; // possibly + }; + } + #elif defined(__sun) + if (allow_large && _mi_os_use_large_page(size, try_alignment)) { + struct memcntl_mha cmd = {0}; + cmd.mha_pagesize = large_os_page_size; + cmd.mha_cmd = MHA_MAPSIZE_VA; + if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) { + *is_large = true; + } + } + #endif + } + } + return p; +} + +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(commit || !allow_large); + mi_assert_internal(try_alignment > 0); + + int protect_flags = (commit ? (PROT_WRITE | PROT_READ) : PROT_NONE); + *addr = unix_mmap(NULL, size, try_alignment, protect_flags, false, allow_large, is_large); + return (*addr != NULL ? 0 : errno); +} + + +//--------------------------------------------- +// Commit/Reset +//--------------------------------------------- + +static void unix_mprotect_hint(int err) { + #if defined(__linux__) && (MI_SECURE>=2) // guard page around every mimalloc page + if (err == ENOMEM) { + _mi_warning_message("The next warning may be caused by a low memory map limit.\n" + " On Linux this is controlled by the vm.max_map_count -- maybe increase it?\n" + " For example: sudo sysctl -w vm.max_map_count=262144\n"); + } + #else + MI_UNUSED(err); + #endif +} + + +int _mi_prim_commit(void* start, size_t size, bool commit) { + /* + #if 0 && defined(MAP_FIXED) && !defined(__APPLE__) + // Linux: disabled for now as mmap fixed seems much more expensive than MADV_DONTNEED (and splits VMA's?) + if (commit) { + // commit: just change the protection + err = mprotect(start, csize, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } + else { + // decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss) + const int fd = mi_unix_mmap_fd(); + void* p = mmap(start, csize, PROT_NONE, (MAP_FIXED | MAP_PRIVATE | MAP_ANONYMOUS | MAP_NORESERVE), fd, 0); + if (p != start) { err = errno; } + } + #else + */ + int err = 0; + if (commit) { + // commit: ensure we can access the area + err = mprotect(start, size, (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + } + else { + #if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0 + // decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE) + // (on the other hand, MADV_FREE would be good enough.. it is just not reflected in the stats :-( ) + err = unix_madvise(start, size, MADV_DONTNEED); + #else + // decommit: just disable access (also used in debug and secure mode to trap on illegal access) + err = mprotect(start, size, PROT_NONE); + if (err != 0) { err = errno; } + #endif + } + unix_mprotect_hint(err); + return err; +} + +int _mi_prim_reset(void* start, size_t size) { + #if defined(MADV_FREE) + static _Atomic(size_t) advice = MI_ATOMIC_VAR_INIT(MADV_FREE); + int oadvice = (int)mi_atomic_load_relaxed(&advice); + int err; + while ((err = unix_madvise(start, size, oadvice)) != 0 && errno == EAGAIN) { errno = 0; }; + if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) { + // if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on + mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED); + err = unix_madvise(start, size, MADV_DONTNEED); + } + #else + int err = unix_madvise(start, csize, MADV_DONTNEED); + #endif + return err; +} + +int _mi_prim_protect(void* start, size_t size, bool protect) { + int err = mprotect(start, size, protect ? PROT_NONE : (PROT_READ | PROT_WRITE)); + if (err != 0) { err = errno; } + unix_mprotect_hint(err); + return err; +} + + + +//--------------------------------------------- +// Huge page allocation +//--------------------------------------------- + +#if (MI_INTPTR_SIZE >= 8) && !defined(__HAIKU__) && !defined(__CYGWIN__) + +#ifndef MPOL_PREFERRED +#define MPOL_PREFERRED 1 +#endif + +#if defined(MI_HAS_SYSCALL_H) && defined(SYS_mbind) +static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + return syscall(SYS_mbind, start, len, mode, nmask, maxnode, flags); +} +#else +static long mi_prim_mbind(void* start, unsigned long len, unsigned long mode, const unsigned long* nmask, unsigned long maxnode, unsigned flags) { + MI_UNUSED(start); MI_UNUSED(len); MI_UNUSED(mode); MI_UNUSED(nmask); MI_UNUSED(maxnode); MI_UNUSED(flags); + return 0; +} +#endif + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + bool is_large = true; + *addr = unix_mmap(hint_addr, size, MI_SEGMENT_SIZE, PROT_READ | PROT_WRITE, true, true, &is_large); + if (*addr != NULL && numa_node >= 0 && numa_node < 8*MI_INTPTR_SIZE) { // at most 64 nodes + unsigned long numa_mask = (1UL << numa_node); + // TODO: does `mbind` work correctly for huge OS pages? should we + // use `set_mempolicy` before calling mmap instead? + // see: <https://lkml.org/lkml/2017/2/9/875> + long err = mi_prim_mbind(*addr, size, MPOL_PREFERRED, &numa_mask, 8*MI_INTPTR_SIZE, 0); + if (err != 0) { + err = errno; + _mi_warning_message("failed to bind huge (1GiB) pages to numa node %d (error: %d (0x%x))\n", numa_node, err, err); + } + } + return (*addr != NULL ? 0 : errno); +} + +#else + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *addr = NULL; + return ENOMEM; +} + +#endif + +//--------------------------------------------- +// NUMA nodes +//--------------------------------------------- + +#if defined(__linux__) + +#include <stdio.h> // snprintf + +size_t _mi_prim_numa_node(void) { + #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getcpu) + unsigned long node = 0; + unsigned long ncpu = 0; + long err = syscall(SYS_getcpu, &ncpu, &node, NULL); + if (err != 0) return 0; + return node; + #else + return 0; + #endif +} + +size_t _mi_prim_numa_node_count(void) { + char buf[128]; + unsigned node = 0; + for(node = 0; node < 256; node++) { + // enumerate node entries -- todo: it there a more efficient way to do this? (but ensure there is no allocation) + snprintf(buf, 127, "/sys/devices/system/node/node%u", node + 1); + if (mi_prim_access(buf,R_OK) != 0) break; + } + return (node+1); +} + +#elif defined(__FreeBSD__) && __FreeBSD_version >= 1200000 + +size_t _mi_prim_numa_node(void) { + domainset_t dom; + size_t node; + int policy; + if (cpuset_getdomain(CPU_LEVEL_CPUSET, CPU_WHICH_PID, -1, sizeof(dom), &dom, &policy) == -1) return 0ul; + for (node = 0; node < MAXMEMDOM; node++) { + if (DOMAINSET_ISSET(node, &dom)) return node; + } + return 0ul; +} + +size_t _mi_prim_numa_node_count(void) { + size_t ndomains = 0; + size_t len = sizeof(ndomains); + if (sysctlbyname("vm.ndomains", &ndomains, &len, NULL, 0) == -1) return 0ul; + return ndomains; +} + +#elif defined(__DragonFly__) + +size_t _mi_prim_numa_node(void) { + // TODO: DragonFly does not seem to provide any userland means to get this information. + return 0ul; +} + +size_t _mi_prim_numa_node_count(void) { + size_t ncpus = 0, nvirtcoresperphys = 0; + size_t len = sizeof(size_t); + if (sysctlbyname("hw.ncpu", &ncpus, &len, NULL, 0) == -1) return 0ul; + if (sysctlbyname("hw.cpu_topology_ht_ids", &nvirtcoresperphys, &len, NULL, 0) == -1) return 0ul; + return nvirtcoresperphys * ncpus; +} + +#else + +size_t _mi_prim_numa_node(void) { + return 0; +} + +size_t _mi_prim_numa_node_count(void) { + return 1; +} + +#endif + +// ---------------------------------------------------------------- +// Clock +// ---------------------------------------------------------------- + +#include <time.h> + +#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) + +mi_msecs_t _mi_prim_clock_now(void) { + struct timespec t; + #ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &t); + #else + clock_gettime(CLOCK_REALTIME, &t); + #endif + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); +} + +#else + +// low resolution timer +mi_msecs_t _mi_prim_clock_now(void) { + #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) + return (mi_msecs_t)clock(); + #elif (CLOCKS_PER_SEC < 1000) + return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); + #else + return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); + #endif +} + +#endif + + + + +//---------------------------------------------------------------- +// Process info +//---------------------------------------------------------------- + +#if defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__) +#include <stdio.h> +#include <unistd.h> +#include <sys/resource.h> + +#if defined(__APPLE__) +#include <mach/mach.h> +#endif + +#if defined(__HAIKU__) +#include <kernel/OS.h> +#endif + +static mi_msecs_t timeval_secs(const struct timeval* tv) { + return ((mi_msecs_t)tv->tv_sec * 1000L) + ((mi_msecs_t)tv->tv_usec / 1000L); +} + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + struct rusage rusage; + getrusage(RUSAGE_SELF, &rusage); + pinfo->utime = timeval_secs(&rusage.ru_utime); + pinfo->stime = timeval_secs(&rusage.ru_stime); +#if !defined(__HAIKU__) + pinfo->page_faults = rusage.ru_majflt; +#endif +#if defined(__HAIKU__) + // Haiku does not have (yet?) a way to + // get these stats per process + thread_info tid; + area_info mem; + ssize_t c; + get_thread_info(find_thread(0), &tid); + while (get_next_area_info(tid.team, &c, &mem) == B_OK) { + pinfo->peak_rss += mem.ram_size; + } + pinfo->page_faults = 0; +#elif defined(__APPLE__) + pinfo->peak_rss = rusage.ru_maxrss; // macos reports in bytes + struct mach_task_basic_info info; + mach_msg_type_number_t infoCount = MACH_TASK_BASIC_INFO_COUNT; + if (task_info(mach_task_self(), MACH_TASK_BASIC_INFO, (task_info_t)&info, &infoCount) == KERN_SUCCESS) { + pinfo->current_rss = (size_t)info.resident_size; + } +#else + pinfo->peak_rss = rusage.ru_maxrss * 1024; // Linux/BSD report in KiB +#endif + // use defaults for commit +} + +#else + +#ifndef __wasi__ +// WebAssembly instances are not processes +#pragma message("define a way to get process info") +#endif + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + // use defaults + MI_UNUSED(pinfo); +} + +#endif + + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) { + fputs(msg,stderr); +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +#if !defined(MI_USE_ENVIRON) || (MI_USE_ENVIRON!=0) +// On Posix systemsr use `environ` to access environment variables +// even before the C runtime is initialized. +#if defined(__APPLE__) && defined(__has_include) && __has_include(<crt_externs.h>) +#include <crt_externs.h> +static char** mi_get_environ(void) { + return (*_NSGetEnviron()); +} +#else +extern char** environ; +static char** mi_get_environ(void) { + return environ; +} +#endif +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + if (name==NULL) return false; + const size_t len = _mi_strlen(name); + if (len == 0) return false; + char** env = mi_get_environ(); + if (env == NULL) return false; + // compare up to 10000 entries + for (int i = 0; i < 10000 && env[i] != NULL; i++) { + const char* s = env[i]; + if (_mi_strnicmp(name, s, len) == 0 && s[len] == '=') { // case insensitive + // found it + _mi_strlcpy(result, s + len + 1, result_size); + return true; + } + } + return false; +} +#else +// fallback: use standard C `getenv` but this cannot be used while initializing the C runtime +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return false; + const char* s = getenv(name); + if (s == NULL) { + // we check the upper case name too. + char buf[64+1]; + size_t len = _mi_strnlen(name,sizeof(buf)-1); + for (size_t i = 0; i < len; i++) { + buf[i] = _mi_toupper(name[i]); + } + buf[len] = 0; + s = getenv(buf); + } + if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; + _mi_strlcpy(result, s, result_size); + return true; +} +#endif // !MI_USE_ENVIRON + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +#if defined(__APPLE__) + +#include <AvailabilityMacros.h> +#if defined(MAC_OS_X_VERSION_10_10) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_10 +#include <CommonCrypto/CommonCryptoError.h> +#include <CommonCrypto/CommonRandom.h> +#endif +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15 + // We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf + // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html> + return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess); + #else + // fall back on older macOS + arc4random_buf(buf, buf_len); + return true; + #endif +} + +#elif defined(__ANDROID__) || defined(__DragonFly__) || \ + defined(__FreeBSD__) || defined(__NetBSD__) || defined(__OpenBSD__) || \ + defined(__sun) + +#include <stdlib.h> +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + arc4random_buf(buf, buf_len); + return true; +} + +#elif defined(__linux__) || defined(__HAIKU__) + +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <errno.h> + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + // Modern Linux provides `getrandom` but different distributions either use `sys/random.h` or `linux/random.h` + // and for the latter the actual `getrandom` call is not always defined. + // (see <https://stackoverflow.com/questions/45237324/why-doesnt-getrandom-compile>) + // We therefore use a syscall directly and fall back dynamically to /dev/urandom when needed. + #if defined(MI_HAS_SYSCALL_H) && defined(SYS_getrandom) + #ifndef GRND_NONBLOCK + #define GRND_NONBLOCK (1) + #endif + static _Atomic(uintptr_t) no_getrandom; // = 0 + if (mi_atomic_load_acquire(&no_getrandom)==0) { + ssize_t ret = syscall(SYS_getrandom, buf, buf_len, GRND_NONBLOCK); + if (ret >= 0) return (buf_len == (size_t)ret); + if (errno != ENOSYS) return false; + mi_atomic_store_release(&no_getrandom, (uintptr_t)1); // don't call again, and fall back to /dev/urandom + } + #endif + int flags = O_RDONLY; + #if defined(O_CLOEXEC) + flags |= O_CLOEXEC; + #endif + int fd = mi_prim_open("/dev/urandom", flags); + if (fd < 0) return false; + size_t count = 0; + while(count < buf_len) { + ssize_t ret = mi_prim_read(fd, (char*)buf + count, buf_len - count); + if (ret<=0) { + if (errno!=EAGAIN && errno!=EINTR) break; + } + else { + count += ret; + } + } + mi_prim_close(fd); + return (count==buf_len); +} + +#else + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return false; +} + +#endif + + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +#if defined(MI_USE_PTHREADS) + +// use pthread local storage keys to detect thread ending +// (and used with MI_TLS_PTHREADS for the default heap) +pthread_key_t _mi_heap_default_key = (pthread_key_t)(-1); + +static void mi_pthread_done(void* value) { + if (value!=NULL) { + _mi_thread_done((mi_heap_t*)value); + } +} + +void _mi_prim_thread_init_auto_done(void) { + mi_assert_internal(_mi_heap_default_key == (pthread_key_t)(-1)); + pthread_key_create(&_mi_heap_default_key, &mi_pthread_done); +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing to do +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + if (_mi_heap_default_key != (pthread_key_t)(-1)) { // can happen during recursive invocation on freeBSD + pthread_setspecific(_mi_heap_default_key, heap); + } +} + +#else + +void _mi_prim_thread_init_auto_done(void) { + // nothing +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} + +#endif diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/wasi/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/wasi/prim.c new file mode 100644 index 000000000..cb3ce1a7f --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/wasi/prim.c @@ -0,0 +1,265 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// This file is included in `src/prim/prim.c` + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" + +//--------------------------------------------- +// Initialize +//--------------------------------------------- + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) { + config->page_size = 64*MI_KiB; // WebAssembly has a fixed page size: 64KiB + config->alloc_granularity = 16; + config->has_overcommit = false; + config->must_free_whole = true; +} + +//--------------------------------------------- +// Free +//--------------------------------------------- + +int _mi_prim_free(void* addr, size_t size ) { + MI_UNUSED(addr); MI_UNUSED(size); + // wasi heap cannot be shrunk + return 0; +} + + +//--------------------------------------------- +// Allocation: sbrk or memory_grow +//--------------------------------------------- + +#if defined(MI_USE_SBRK) + static void* mi_memory_grow( size_t size ) { + void* p = sbrk(size); + if (p == (void*)(-1)) return NULL; + #if !defined(__wasi__) // on wasi this is always zero initialized already (?) + memset(p,0,size); + #endif + return p; + } +#elif defined(__wasi__) + static void* mi_memory_grow( size_t size ) { + size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size())) + : __builtin_wasm_memory_size(0)); + if (base == SIZE_MAX) return NULL; + return (void*)(base * _mi_os_page_size()); + } +#endif + +#if defined(MI_USE_PTHREADS) +static pthread_mutex_t mi_heap_grow_mutex = PTHREAD_MUTEX_INITIALIZER; +#endif + +static void* mi_prim_mem_grow(size_t size, size_t try_alignment) { + void* p = NULL; + if (try_alignment <= 1) { + // `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now) + #if defined(MI_USE_PTHREADS) + pthread_mutex_lock(&mi_heap_grow_mutex); + #endif + p = mi_memory_grow(size); + #if defined(MI_USE_PTHREADS) + pthread_mutex_unlock(&mi_heap_grow_mutex); + #endif + } + else { + void* base = NULL; + size_t alloc_size = 0; + // to allocate aligned use a lock to try to avoid thread interaction + // between getting the current size and actual allocation + // (also, `sbrk` is not thread safe in general) + #if defined(MI_USE_PTHREADS) + pthread_mutex_lock(&mi_heap_grow_mutex); + #endif + { + void* current = mi_memory_grow(0); // get current size + if (current != NULL) { + void* aligned_current = mi_align_up_ptr(current, try_alignment); // and align from there to minimize wasted space + alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size()); + base = mi_memory_grow(alloc_size); + } + } + #if defined(MI_USE_PTHREADS) + pthread_mutex_unlock(&mi_heap_grow_mutex); + #endif + if (base != NULL) { + p = mi_align_up_ptr(base, try_alignment); + if ((uint8_t*)p + size > (uint8_t*)base + alloc_size) { + // another thread used wasm_memory_grow/sbrk in-between and we do not have enough + // space after alignment. Give up (and waste the space as we cannot shrink :-( ) + // (in `mi_os_mem_alloc_aligned` this will fall back to overallocation to align) + p = NULL; + } + } + } + /* + if (p == NULL) { + _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment); + errno = ENOMEM; + return NULL; + } + */ + mi_assert_internal( p == NULL || try_alignment == 0 || (uintptr_t)p % try_alignment == 0 ); + return p; +} + +// Note: the `try_alignment` is just a hint and the returned pointer is not guaranteed to be aligned. +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { + MI_UNUSED(allow_large); MI_UNUSED(commit); + *is_large = false; + *addr = mi_prim_mem_grow(size, try_alignment); + return (*addr != NULL ? 0 : ENOMEM); +} + + +//--------------------------------------------- +// Commit/Reset/Protect +//--------------------------------------------- + +int _mi_prim_commit(void* addr, size_t size, bool commit) { + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(commit); + return 0; +} + +int _mi_prim_reset(void* addr, size_t size) { + MI_UNUSED(addr); MI_UNUSED(size); + return 0; +} + +int _mi_prim_protect(void* addr, size_t size, bool protect) { + MI_UNUSED(addr); MI_UNUSED(size); MI_UNUSED(protect); + return 0; +} + + +//--------------------------------------------- +// Huge pages and NUMA nodes +//--------------------------------------------- + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + MI_UNUSED(hint_addr); MI_UNUSED(size); MI_UNUSED(numa_node); + *addr = NULL; + return ENOSYS; +} + +size_t _mi_prim_numa_node(void) { + return 0; +} + +size_t _mi_prim_numa_node_count(void) { + return 1; +} + + +//---------------------------------------------------------------- +// Clock +//---------------------------------------------------------------- + +#include <time.h> + +#if defined(CLOCK_REALTIME) || defined(CLOCK_MONOTONIC) + +mi_msecs_t _mi_prim_clock_now(void) { + struct timespec t; + #ifdef CLOCK_MONOTONIC + clock_gettime(CLOCK_MONOTONIC, &t); + #else + clock_gettime(CLOCK_REALTIME, &t); + #endif + return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000); +} + +#else + +// low resolution timer +mi_msecs_t _mi_prim_clock_now(void) { + #if !defined(CLOCKS_PER_SEC) || (CLOCKS_PER_SEC == 1000) || (CLOCKS_PER_SEC == 0) + return (mi_msecs_t)clock(); + #elif (CLOCKS_PER_SEC < 1000) + return (mi_msecs_t)clock() * (1000 / (mi_msecs_t)CLOCKS_PER_SEC); + #else + return (mi_msecs_t)clock() / ((mi_msecs_t)CLOCKS_PER_SEC / 1000); + #endif +} + +#endif + + +//---------------------------------------------------------------- +// Process info +//---------------------------------------------------------------- + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + // use defaults + MI_UNUSED(pinfo); +} + + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) { + fputs(msg,stderr); +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + // cannot call getenv() when still initializing the C runtime. + if (_mi_preloading()) return false; + const char* s = getenv(name); + if (s == NULL) { + // we check the upper case name too. + char buf[64+1]; + size_t len = _mi_strnlen(name,sizeof(buf)-1); + for (size_t i = 0; i < len; i++) { + buf[i] = _mi_toupper(name[i]); + } + buf[len] = 0; + s = getenv(buf); + } + if (s == NULL || _mi_strnlen(s,result_size) >= result_size) return false; + _mi_strlcpy(result, s, result_size); + return true; +} + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return false; +} + + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +void _mi_prim_thread_init_auto_done(void) { + // nothing +} + +void _mi_prim_thread_done_auto_done(void) { + // nothing +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw-mimalloc.wprp b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw-mimalloc.wprp new file mode 100644 index 000000000..b00cd7adf --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw-mimalloc.wprp @@ -0,0 +1,61 @@ +<WindowsPerformanceRecorder Version="1.0"> + <Profiles> + <SystemCollector Id="WPR_initiated_WprApp_WPR_System_Collector" Name="WPR_initiated_WprApp_WPR System Collector"> + <BufferSize Value="1024" /> + <Buffers Value="100" /> + </SystemCollector> + <EventCollector Id="Mimalloc_Collector" Name="Mimalloc Collector"> + <BufferSize Value="1024" /> + <Buffers Value="100" /> + </EventCollector> + <SystemProvider Id="WPR_initiated_WprApp_WPR_System_Collector_Provider"> + <Keywords> + <Keyword Value="Loader" /> + </Keywords> + </SystemProvider> + <EventProvider Id="MimallocEventProvider" Name="138f4dbb-ee04-4899-aa0a-572ad4475779" NonPagedMemory="true" Stack="true"> + <EventFilters FilterIn="true"> + <EventId Value="100" /> + <EventId Value="101" /> + </EventFilters> + </EventProvider> + <Profile Id="CustomHeap.Verbose.File" Name="CustomHeap" Description="RunningProfile:CustomHeap.Verbose.File" LoggingMode="File" DetailLevel="Verbose"> + <ProblemCategories> + <ProblemCategory Value="Resource Analysis" /> + </ProblemCategories> + <Collectors> + <SystemCollectorId Value="WPR_initiated_WprApp_WPR_System_Collector"> + <SystemProviderId Value="WPR_initiated_WprApp_WPR_System_Collector_Provider" /> + </SystemCollectorId> + <EventCollectorId Value="Mimalloc_Collector"> + <EventProviders> + <EventProviderId Value="MimallocEventProvider" > + <Keywords> + <Keyword Value="100"/> + <Keyword Value="101"/> + </Keywords> + </EventProviderId> + </EventProviders> + </EventCollectorId> + </Collectors> + <TraceMergeProperties> + <TraceMergeProperty Id="BaseVerboseTraceMergeProperties" Name="BaseTraceMergeProperties"> + <DeletePreMergedTraceFiles Value="true" /> + <FileCompression Value="false" /> + <InjectOnly Value="false" /> + <SkipMerge Value="false" /> + <CustomEvents> + <CustomEvent Value="ImageId" /> + <CustomEvent Value="BuildInfo" /> + <CustomEvent Value="VolumeMapping" /> + <CustomEvent Value="EventMetadata" /> + <CustomEvent Value="PerfTrackMetadata" /> + <CustomEvent Value="WinSAT" /> + <CustomEvent Value="NetworkInterface" /> + </CustomEvents> + </TraceMergeProperty> + </TraceMergeProperties> + </Profile> + </Profiles> +</WindowsPerformanceRecorder> + diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.h b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.h new file mode 100644 index 000000000..4e0a092a1 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.h @@ -0,0 +1,905 @@ +//**********************************************************************` +//* This is an include file generated by Message Compiler. *` +//* *` +//* Copyright (c) Microsoft Corporation. All Rights Reserved. *` +//**********************************************************************` +#pragma once + +//***************************************************************************** +// +// Notes on the ETW event code generated by MC: +// +// - Structures and arrays of structures are treated as an opaque binary blob. +// The caller is responsible for packing the data for the structure into a +// single region of memory, with no padding between values. The macro will +// have an extra parameter for the length of the blob. +// - Arrays of nul-terminated strings must be packed by the caller into a +// single binary blob containing the correct number of strings, with a nul +// after each string. The size of the blob is specified in characters, and +// includes the final nul. +// - Arrays of SID are treated as a single binary blob. The caller is +// responsible for packing the SID values into a single region of memory with +// no padding. +// - The length attribute on the data element in the manifest is significant +// for values with intype win:UnicodeString, win:AnsiString, or win:Binary. +// The length attribute must be specified for win:Binary, and is optional for +// win:UnicodeString and win:AnsiString (if no length is given, the strings +// are assumed to be nul-terminated). For win:UnicodeString, the length is +// measured in characters, not bytes. +// - For an array of win:UnicodeString, win:AnsiString, or win:Binary, the +// length attribute applies to every value in the array, so every value in +// the array must have the same length. The values in the array are provided +// to the macro via a single pointer -- the caller is responsible for packing +// all of the values into a single region of memory with no padding between +// values. +// - Values of type win:CountedUnicodeString, win:CountedAnsiString, and +// win:CountedBinary can be generated and collected on Vista or later. +// However, they may not decode properly without the Windows 10 2018 Fall +// Update. +// - Arrays of type win:CountedUnicodeString, win:CountedAnsiString, and +// win:CountedBinary must be packed by the caller into a single region of +// memory. The format for each item is a UINT16 byte-count followed by that +// many bytes of data. When providing the array to the generated macro, you +// must provide the total size of the packed array data, including the UINT16 +// sizes for each item. In the case of win:CountedUnicodeString, the data +// size is specified in WCHAR (16-bit) units. In the case of +// win:CountedAnsiString and win:CountedBinary, the data size is specified in +// bytes. +// +//***************************************************************************** + +#include <wmistr.h> +#include <evntrace.h> +#include <evntprov.h> + +#ifndef ETW_INLINE + #ifdef _ETW_KM_ + // In kernel mode, save stack space by never inlining templates. + #define ETW_INLINE DECLSPEC_NOINLINE __inline + #else + // In user mode, save code size by inlining templates as appropriate. + #define ETW_INLINE __inline + #endif +#endif // ETW_INLINE + +#if defined(__cplusplus) +extern "C" { +#endif + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// MCGEN_USE_KERNEL_MODE_APIS macro: +// Controls whether the generated code uses kernel-mode or user-mode APIs. +// - Set to 0 to use Windows user-mode APIs such as EventRegister. +// - Set to 1 to use Windows kernel-mode APIs such as EtwRegister. +// Default is based on whether the _ETW_KM_ macro is defined (i.e. by wdm.h). +// Note that the APIs can also be overridden directly, e.g. by setting the +// MCGEN_EVENTWRITETRANSFER or MCGEN_EVENTREGISTER macros. +// +#ifndef MCGEN_USE_KERNEL_MODE_APIS + #ifdef _ETW_KM_ + #define MCGEN_USE_KERNEL_MODE_APIS 1 + #else + #define MCGEN_USE_KERNEL_MODE_APIS 0 + #endif +#endif // MCGEN_USE_KERNEL_MODE_APIS + +// +// MCGEN_HAVE_EVENTSETINFORMATION macro: +// Controls how McGenEventSetInformation uses the EventSetInformation API. +// - Set to 0 to disable the use of EventSetInformation +// (McGenEventSetInformation will always return an error). +// - Set to 1 to directly invoke MCGEN_EVENTSETINFORMATION. +// - Set to 2 to to locate EventSetInformation at runtime via GetProcAddress +// (user-mode) or MmGetSystemRoutineAddress (kernel-mode). +// Default is determined as follows: +// - If MCGEN_EVENTSETINFORMATION has been customized, set to 1 +// (i.e. use MCGEN_EVENTSETINFORMATION). +// - Else if the target OS version has EventSetInformation, set to 1 +// (i.e. use MCGEN_EVENTSETINFORMATION). +// - Else set to 2 (i.e. try to dynamically locate EventSetInformation). +// Note that an McGenEventSetInformation function will only be generated if one +// or more provider in a manifest has provider traits. +// +#ifndef MCGEN_HAVE_EVENTSETINFORMATION + #ifdef MCGEN_EVENTSETINFORMATION // if MCGEN_EVENTSETINFORMATION has been customized, + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #elif MCGEN_USE_KERNEL_MODE_APIS // else if using kernel-mode APIs, + #if NTDDI_VERSION >= 0x06040000 // if target OS is Windows 10 or later, + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #else // else + #define MCGEN_HAVE_EVENTSETINFORMATION 2 // find "EtwSetInformation" via MmGetSystemRoutineAddress. + #endif // else (using user-mode APIs) + #else // if target OS and SDK is Windows 8 or later, + #if WINVER >= 0x0602 && defined(EVENT_FILTER_TYPE_SCHEMATIZED) + #define MCGEN_HAVE_EVENTSETINFORMATION 1 // directly invoke MCGEN_EVENTSETINFORMATION(...). + #else // else + #define MCGEN_HAVE_EVENTSETINFORMATION 2 // find "EventSetInformation" via GetModuleHandleExW/GetProcAddress. + #endif + #endif +#endif // MCGEN_HAVE_EVENTSETINFORMATION + +// +// MCGEN Override Macros +// +// The following override macros may be defined before including this header +// to control the APIs used by this header: +// +// - MCGEN_EVENTREGISTER +// - MCGEN_EVENTUNREGISTER +// - MCGEN_EVENTSETINFORMATION +// - MCGEN_EVENTWRITETRANSFER +// +// If the the macro is undefined, the MC implementation will default to the +// corresponding ETW APIs. For example, if the MCGEN_EVENTREGISTER macro is +// undefined, the EventRegister[MyProviderName] macro will use EventRegister +// in user mode and will use EtwRegister in kernel mode. +// +// To prevent issues from conflicting definitions of these macros, the value +// of the override macro will be used as a suffix in certain internal function +// names. Because of this, the override macros must follow certain rules: +// +// - The macro must be defined before any MC-generated header is included and +// must not be undefined or redefined after any MC-generated header is +// included. Different translation units (i.e. different .c or .cpp files) +// may set the macros to different values, but within a translation unit +// (within a single .c or .cpp file), the macro must be set once and not +// changed. +// - The override must be an object-like macro, not a function-like macro +// (i.e. the override macro must not have a parameter list). +// - The override macro's value must be a simple identifier, i.e. must be +// something that starts with a letter or '_' and contains only letters, +// numbers, and '_' characters. +// - If the override macro's value is the name of a second object-like macro, +// the second object-like macro must follow the same rules. (The override +// macro's value can also be the name of a function-like macro, in which +// case the function-like macro does not need to follow the same rules.) +// +// For example, the following will cause compile errors: +// +// #define MCGEN_EVENTWRITETRANSFER MyNamespace::MyClass::MyFunction // Value has non-identifier characters (colon). +// #define MCGEN_EVENTWRITETRANSFER GetEventWriteFunctionPointer(7) // Value has non-identifier characters (parentheses). +// #define MCGEN_EVENTWRITETRANSFER(h,e,a,r,c,d) EventWrite(h,e,c,d) // Override is defined as a function-like macro. +// #define MY_OBJECT_LIKE_MACRO MyNamespace::MyClass::MyEventWriteFunction +// #define MCGEN_EVENTWRITETRANSFER MY_OBJECT_LIKE_MACRO // Evaluates to something with non-identifier characters (colon). +// +// The following would be ok: +// +// #define MCGEN_EVENTWRITETRANSFER MyEventWriteFunction1 // OK, suffix will be "MyEventWriteFunction1". +// #define MY_OBJECT_LIKE_MACRO MyEventWriteFunction2 +// #define MCGEN_EVENTWRITETRANSFER MY_OBJECT_LIKE_MACRO // OK, suffix will be "MyEventWriteFunction2". +// #define MY_FUNCTION_LIKE_MACRO(h,e,a,r,c,d) MyNamespace::MyClass::MyEventWriteFunction3(h,e,c,d) +// #define MCGEN_EVENTWRITETRANSFER MY_FUNCTION_LIKE_MACRO // OK, suffix will be "MY_FUNCTION_LIKE_MACRO". +// +#ifndef MCGEN_EVENTREGISTER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTREGISTER EtwRegister + #else + #define MCGEN_EVENTREGISTER EventRegister + #endif +#endif // MCGEN_EVENTREGISTER +#ifndef MCGEN_EVENTUNREGISTER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTUNREGISTER EtwUnregister + #else + #define MCGEN_EVENTUNREGISTER EventUnregister + #endif +#endif // MCGEN_EVENTUNREGISTER +#ifndef MCGEN_EVENTSETINFORMATION + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTSETINFORMATION EtwSetInformation + #else + #define MCGEN_EVENTSETINFORMATION EventSetInformation + #endif +#endif // MCGEN_EVENTSETINFORMATION +#ifndef MCGEN_EVENTWRITETRANSFER + #if MCGEN_USE_KERNEL_MODE_APIS + #define MCGEN_EVENTWRITETRANSFER EtwWriteTransfer + #else + #define MCGEN_EVENTWRITETRANSFER EventWriteTransfer + #endif +#endif // MCGEN_EVENTWRITETRANSFER + +// +// MCGEN_EVENT_ENABLED macro: +// Override to control how the EventWrite[EventName] macros determine whether +// an event is enabled. The default behavior is for EventWrite[EventName] to +// use the EventEnabled[EventName] macros. +// +#ifndef MCGEN_EVENT_ENABLED +#define MCGEN_EVENT_ENABLED(EventName) EventEnabled##EventName() +#endif + +// +// MCGEN_EVENT_ENABLED_FORCONTEXT macro: +// Override to control how the EventWrite[EventName]_ForContext macros +// determine whether an event is enabled. The default behavior is for +// EventWrite[EventName]_ForContext to use the +// EventEnabled[EventName]_ForContext macros. +// +#ifndef MCGEN_EVENT_ENABLED_FORCONTEXT +#define MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, EventName) EventEnabled##EventName##_ForContext(pContext) +#endif + +// +// MCGEN_ENABLE_CHECK macro: +// Determines whether the specified event would be considered as enabled +// based on the state of the specified context. Slightly faster than calling +// McGenEventEnabled directly. +// +#ifndef MCGEN_ENABLE_CHECK +#define MCGEN_ENABLE_CHECK(Context, Descriptor) (Context.IsEnabled && McGenEventEnabled(&Context, &Descriptor)) +#endif + +#if !defined(MCGEN_TRACE_CONTEXT_DEF) +#define MCGEN_TRACE_CONTEXT_DEF +// This structure is for use by MC-generated code and should not be used directly. +typedef struct _MCGEN_TRACE_CONTEXT +{ + TRACEHANDLE RegistrationHandle; + TRACEHANDLE Logger; // Used as pointer to provider traits. + ULONGLONG MatchAnyKeyword; + ULONGLONG MatchAllKeyword; + ULONG Flags; + ULONG IsEnabled; + UCHAR Level; + UCHAR Reserve; + USHORT EnableBitsCount; + PULONG EnableBitMask; + const ULONGLONG* EnableKeyWords; + const UCHAR* EnableLevel; +} MCGEN_TRACE_CONTEXT, *PMCGEN_TRACE_CONTEXT; +#endif // MCGEN_TRACE_CONTEXT_DEF + +#if !defined(MCGEN_LEVEL_KEYWORD_ENABLED_DEF) +#define MCGEN_LEVEL_KEYWORD_ENABLED_DEF +// +// Determines whether an event with a given Level and Keyword would be +// considered as enabled based on the state of the specified context. +// Note that you may want to use MCGEN_ENABLE_CHECK instead of calling this +// function directly. +// +FORCEINLINE +BOOLEAN +McGenLevelKeywordEnabled( + _In_ PMCGEN_TRACE_CONTEXT EnableInfo, + _In_ UCHAR Level, + _In_ ULONGLONG Keyword + ) +{ + // + // Check if the event Level is lower than the level at which + // the channel is enabled. + // If the event Level is 0 or the channel is enabled at level 0, + // all levels are enabled. + // + + if ((Level <= EnableInfo->Level) || // This also covers the case of Level == 0. + (EnableInfo->Level == 0)) { + + // + // Check if Keyword is enabled + // + + if ((Keyword == (ULONGLONG)0) || + ((Keyword & EnableInfo->MatchAnyKeyword) && + ((Keyword & EnableInfo->MatchAllKeyword) == EnableInfo->MatchAllKeyword))) { + return TRUE; + } + } + + return FALSE; +} +#endif // MCGEN_LEVEL_KEYWORD_ENABLED_DEF + +#if !defined(MCGEN_EVENT_ENABLED_DEF) +#define MCGEN_EVENT_ENABLED_DEF +// +// Determines whether the specified event would be considered as enabled based +// on the state of the specified context. Note that you may want to use +// MCGEN_ENABLE_CHECK instead of calling this function directly. +// +FORCEINLINE +BOOLEAN +McGenEventEnabled( + _In_ PMCGEN_TRACE_CONTEXT EnableInfo, + _In_ PCEVENT_DESCRIPTOR EventDescriptor + ) +{ + return McGenLevelKeywordEnabled(EnableInfo, EventDescriptor->Level, EventDescriptor->Keyword); +} +#endif // MCGEN_EVENT_ENABLED_DEF + +#if !defined(MCGEN_CONTROL_CALLBACK) +#define MCGEN_CONTROL_CALLBACK + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +VOID +__stdcall +McGenControlCallbackV2( + _In_ LPCGUID SourceId, + _In_ ULONG ControlCode, + _In_ UCHAR Level, + _In_ ULONGLONG MatchAnyKeyword, + _In_ ULONGLONG MatchAllKeyword, + _In_opt_ PEVENT_FILTER_DESCRIPTOR FilterData, + _Inout_opt_ PVOID CallbackContext + ) +/*++ + +Routine Description: + + This is the notification callback for Windows Vista and later. + +Arguments: + + SourceId - The GUID that identifies the session that enabled the provider. + + ControlCode - The parameter indicates whether the provider + is being enabled or disabled. + + Level - The level at which the event is enabled. + + MatchAnyKeyword - The bitmask of keywords that the provider uses to + determine the category of events that it writes. + + MatchAllKeyword - This bitmask additionally restricts the category + of events that the provider writes. + + FilterData - The provider-defined data. + + CallbackContext - The context of the callback that is defined when the provider + called EtwRegister to register itself. + +Remarks: + + ETW calls this function to notify provider of enable/disable + +--*/ +{ + PMCGEN_TRACE_CONTEXT Ctx = (PMCGEN_TRACE_CONTEXT)CallbackContext; + ULONG Ix; +#ifndef MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + UNREFERENCED_PARAMETER(SourceId); + UNREFERENCED_PARAMETER(FilterData); +#endif + + if (Ctx == NULL) { + return; + } + + switch (ControlCode) { + + case EVENT_CONTROL_CODE_ENABLE_PROVIDER: + Ctx->Level = Level; + Ctx->MatchAnyKeyword = MatchAnyKeyword; + Ctx->MatchAllKeyword = MatchAllKeyword; + Ctx->IsEnabled = EVENT_CONTROL_CODE_ENABLE_PROVIDER; + + for (Ix = 0; Ix < Ctx->EnableBitsCount; Ix += 1) { + if (McGenLevelKeywordEnabled(Ctx, Ctx->EnableLevel[Ix], Ctx->EnableKeyWords[Ix]) != FALSE) { + Ctx->EnableBitMask[Ix >> 5] |= (1 << (Ix % 32)); + } else { + Ctx->EnableBitMask[Ix >> 5] &= ~(1 << (Ix % 32)); + } + } + break; + + case EVENT_CONTROL_CODE_DISABLE_PROVIDER: + Ctx->IsEnabled = EVENT_CONTROL_CODE_DISABLE_PROVIDER; + Ctx->Level = 0; + Ctx->MatchAnyKeyword = 0; + Ctx->MatchAllKeyword = 0; + if (Ctx->EnableBitsCount > 0) { +#pragma warning(suppress: 26451) // Arithmetic overflow cannot occur, no matter the value of EnableBitCount + RtlZeroMemory(Ctx->EnableBitMask, (((Ctx->EnableBitsCount - 1) / 32) + 1) * sizeof(ULONG)); + } + break; + + default: + break; + } + +#ifdef MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + // + // Call user defined callback + // + MCGEN_PRIVATE_ENABLE_CALLBACK_V2( + SourceId, + ControlCode, + Level, + MatchAnyKeyword, + MatchAllKeyword, + FilterData, + CallbackContext + ); +#endif // MCGEN_PRIVATE_ENABLE_CALLBACK_V2 + + return; +} + +#endif // MCGEN_CONTROL_CALLBACK + +#ifndef _mcgen_PENABLECALLBACK + #if MCGEN_USE_KERNEL_MODE_APIS + #define _mcgen_PENABLECALLBACK PETWENABLECALLBACK + #else + #define _mcgen_PENABLECALLBACK PENABLECALLBACK + #endif +#endif // _mcgen_PENABLECALLBACK + +#if !defined(_mcgen_PASTE2) +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_PASTE2(a, b) _mcgen_PASTE2_imp(a, b) +#define _mcgen_PASTE2_imp(a, b) a##b +#endif // _mcgen_PASTE2 + +#if !defined(_mcgen_PASTE3) +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_PASTE3(a, b, c) _mcgen_PASTE3_imp(a, b, c) +#define _mcgen_PASTE3_imp(a, b, c) a##b##_##c +#endif // _mcgen_PASTE3 + +// +// Macro validation +// + +// Validate MCGEN_EVENTREGISTER: + +// Trigger an error if MCGEN_EVENTREGISTER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER); + +// Trigger an error if MCGEN_EVENTREGISTER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTREGISTER) + MCGEN_EVENTREGISTER_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTREGISTER is defined as a function-like macro: +typedef void MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTREGISTER; +typedef int _mcgen_PASTE2(MCGEN_EVENTREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTREGISTER); + +// Validate MCGEN_EVENTUNREGISTER: + +// Trigger an error if MCGEN_EVENTUNREGISTER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER); + +// Trigger an error if MCGEN_EVENTUNREGISTER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTUNREGISTER) + MCGEN_EVENTUNREGISTER_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTUNREGISTER is defined as a function-like macro: +typedef void MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_MCGEN_EVENTUNREGISTER; +typedef int _mcgen_PASTE2(MCGEN_EVENTUNREGISTER_must_not_be_a_functionLike_macro_, MCGEN_EVENTUNREGISTER); + +// Validate MCGEN_EVENTSETINFORMATION: + +// Trigger an error if MCGEN_EVENTSETINFORMATION is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION); + +// Trigger an error if MCGEN_EVENTSETINFORMATION is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTSETINFORMATION) + MCGEN_EVENTSETINFORMATION_must_not_be_redefined_between_headers; + +// Trigger an error if MCGEN_EVENTSETINFORMATION is defined as a function-like macro: +typedef void MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_MCGEN_EVENTSETINFORMATION; +typedef int _mcgen_PASTE2(MCGEN_EVENTSETINFORMATION_must_not_be_a_functionLike_macro_, MCGEN_EVENTSETINFORMATION); + +// Validate MCGEN_EVENTWRITETRANSFER: + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is not an unqualified (simple) identifier: +struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER); + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is redefined: +typedef struct _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_definition_must_be_an_unqualified_identifier_, MCGEN_EVENTWRITETRANSFER) + MCGEN_EVENTWRITETRANSFER_must_not_be_redefined_between_headers;; + +// Trigger an error if MCGEN_EVENTWRITETRANSFER is defined as a function-like macro: +typedef void MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_MCGEN_EVENTWRITETRANSFER; +typedef int _mcgen_PASTE2(MCGEN_EVENTWRITETRANSFER_must_not_be_a_functionLike_macro_, MCGEN_EVENTWRITETRANSFER); + +#ifndef McGenEventWrite_def +#define McGenEventWrite_def + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventWrite _mcgen_PASTE2(McGenEventWrite_, MCGEN_EVENTWRITETRANSFER) + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventWrite( + _In_ PMCGEN_TRACE_CONTEXT Context, + _In_ PCEVENT_DESCRIPTOR Descriptor, + _In_opt_ LPCGUID ActivityId, + _In_range_(1, 128) ULONG EventDataCount, + _Pre_cap_(EventDataCount) EVENT_DATA_DESCRIPTOR* EventData + ) +{ + const USHORT UNALIGNED* Traits; + + // Some customized MCGEN_EVENTWRITETRANSFER macros might ignore ActivityId. + UNREFERENCED_PARAMETER(ActivityId); + + Traits = (const USHORT UNALIGNED*)(UINT_PTR)Context->Logger; + + if (Traits == NULL) { + EventData[0].Ptr = 0; + EventData[0].Size = 0; + EventData[0].Reserved = 0; + } else { + EventData[0].Ptr = (ULONG_PTR)Traits; + EventData[0].Size = *Traits; + EventData[0].Reserved = 2; // EVENT_DATA_DESCRIPTOR_TYPE_PROVIDER_METADATA + } + + return MCGEN_EVENTWRITETRANSFER( + Context->RegistrationHandle, + Descriptor, + ActivityId, + NULL, + EventDataCount, + EventData); +} +#endif // McGenEventWrite_def + +#if !defined(McGenEventRegisterUnregister) +#define McGenEventRegisterUnregister + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventRegister _mcgen_PASTE2(McGenEventRegister_, MCGEN_EVENTREGISTER) + +#pragma warning(push) +#pragma warning(disable:6103) +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventRegister( + _In_ LPCGUID ProviderId, + _In_opt_ _mcgen_PENABLECALLBACK EnableCallback, + _In_opt_ PVOID CallbackContext, + _Inout_ PREGHANDLE RegHandle + ) +/*++ + +Routine Description: + + This function registers the provider with ETW. + +Arguments: + + ProviderId - Provider ID to register with ETW. + + EnableCallback - Callback to be used. + + CallbackContext - Context for the callback. + + RegHandle - Pointer to registration handle. + +Remarks: + + Should not be called if the provider is already registered (i.e. should not + be called if *RegHandle != 0). Repeatedly registering a provider is a bug + and may indicate a race condition. However, for compatibility with previous + behavior, this function will return SUCCESS in this case. + +--*/ +{ + ULONG Error; + + if (*RegHandle != 0) + { + Error = 0; // ERROR_SUCCESS + } + else + { + Error = MCGEN_EVENTREGISTER(ProviderId, EnableCallback, CallbackContext, RegHandle); + } + + return Error; +} +#pragma warning(pop) + +// This macro is for use by MC-generated code and should not be used directly. +#define McGenEventUnregister _mcgen_PASTE2(McGenEventUnregister_, MCGEN_EVENTUNREGISTER) + +// This function is for use by MC-generated code and should not be used directly. +DECLSPEC_NOINLINE __inline +ULONG __stdcall +McGenEventUnregister(_Inout_ PREGHANDLE RegHandle) +/*++ + +Routine Description: + + Unregister from ETW and set *RegHandle = 0. + +Arguments: + + RegHandle - the pointer to the provider registration handle + +Remarks: + + If provider has not been registered (i.e. if *RegHandle == 0), + return SUCCESS. It is safe to call McGenEventUnregister even if the + call to McGenEventRegister returned an error. + +--*/ +{ + ULONG Error; + + if(*RegHandle == 0) + { + Error = 0; // ERROR_SUCCESS + } + else + { + Error = MCGEN_EVENTUNREGISTER(*RegHandle); + *RegHandle = (REGHANDLE)0; + } + + return Error; +} + +#endif // McGenEventRegisterUnregister + +#ifndef _mcgen_EVENT_BIT_SET + #if defined(_M_IX86) || defined(_M_X64) + // This macro is for use by MC-generated code and should not be used directly. + #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((((const unsigned char*)EnableBits)[BitPosition >> 3] & (1u << (BitPosition & 7))) != 0) + #else // CPU type + // This macro is for use by MC-generated code and should not be used directly. + #define _mcgen_EVENT_BIT_SET(EnableBits, BitPosition) ((EnableBits[BitPosition >> 5] & (1u << (BitPosition & 31))) != 0) + #endif // CPU type +#endif // _mcgen_EVENT_BIT_SET + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ +// Provider "microsoft-windows-mimalloc" event count 2 +//+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ + +// Provider GUID = 138f4dbb-ee04-4899-aa0a-572ad4475779 +EXTERN_C __declspec(selectany) const GUID ETW_MI_Provider = {0x138f4dbb, 0xee04, 0x4899, {0xaa, 0x0a, 0x57, 0x2a, 0xd4, 0x47, 0x57, 0x79}}; + +#ifndef ETW_MI_Provider_Traits +#define ETW_MI_Provider_Traits NULL +#endif // ETW_MI_Provider_Traits + +// +// Event Descriptors +// +EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_ALLOC = {0x64, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0}; +#define ETW_MI_ALLOC_value 0x64 +EXTERN_C __declspec(selectany) const EVENT_DESCRIPTOR ETW_MI_FREE = {0x65, 0x1, 0x0, 0x4, 0x0, 0x0, 0x0}; +#define ETW_MI_FREE_value 0x65 + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// Event Enablement Bits +// These variables are for use by MC-generated code and should not be used directly. +// +EXTERN_C __declspec(selectany) DECLSPEC_CACHEALIGN ULONG microsoft_windows_mimallocEnableBits[1]; +EXTERN_C __declspec(selectany) const ULONGLONG microsoft_windows_mimallocKeywords[1] = {0x0}; +EXTERN_C __declspec(selectany) const unsigned char microsoft_windows_mimallocLevels[1] = {4}; + +// +// Provider context +// +EXTERN_C __declspec(selectany) MCGEN_TRACE_CONTEXT ETW_MI_Provider_Context = {0, (ULONG_PTR)ETW_MI_Provider_Traits, 0, 0, 0, 0, 0, 0, 1, microsoft_windows_mimallocEnableBits, microsoft_windows_mimallocKeywords, microsoft_windows_mimallocLevels}; + +// +// Provider REGHANDLE +// +#define microsoft_windows_mimallocHandle (ETW_MI_Provider_Context.RegistrationHandle) + +// +// This macro is set to 0, indicating that the EventWrite[Name] macros do not +// have an Activity parameter. This is controlled by the -km and -um options. +// +#define ETW_MI_Provider_EventWriteActivity 0 + +// +// Register with ETW using the control GUID specified in the manifest. +// Invoke this macro during module initialization (i.e. program startup, +// DLL process attach, or driver load) to initialize the provider. +// Note that if this function returns an error, the error means that +// will not work, but no action needs to be taken -- even if EventRegister +// returns an error, it is generally safe to use EventWrite and +// EventUnregister macros (they will be no-ops if EventRegister failed). +// +#ifndef EventRegistermicrosoft_windows_mimalloc +#define EventRegistermicrosoft_windows_mimalloc() McGenEventRegister(&ETW_MI_Provider, McGenControlCallbackV2, &ETW_MI_Provider_Context, µsoft_windows_mimallocHandle) +#endif + +// +// Register with ETW using a specific control GUID (i.e. a GUID other than what +// is specified in the manifest). Advanced scenarios only. +// +#ifndef EventRegisterByGuidmicrosoft_windows_mimalloc +#define EventRegisterByGuidmicrosoft_windows_mimalloc(Guid) McGenEventRegister(&(Guid), McGenControlCallbackV2, &ETW_MI_Provider_Context, µsoft_windows_mimallocHandle) +#endif + +// +// Unregister with ETW and close the provider. +// Invoke this macro during module shutdown (i.e. program exit, DLL process +// detach, or driver unload) to unregister the provider. +// Note that you MUST call EventUnregister before DLL or driver unload +// (not optional): failure to unregister a provider before DLL or driver unload +// will result in crashes. +// +#ifndef EventUnregistermicrosoft_windows_mimalloc +#define EventUnregistermicrosoft_windows_mimalloc() McGenEventUnregister(µsoft_windows_mimallocHandle) +#endif + +// +// MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION macro: +// Define this macro to enable support for caller-allocated provider context. +// +#ifdef MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION + +// +// Advanced scenarios: Caller-allocated provider context. +// Use when multiple differently-configured provider handles are needed, +// e.g. for container-aware drivers, one context per container. +// +// Usage: +// +// - Caller enables the feature before including this header, e.g. +// #define MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION 1 +// - Caller allocates memory, e.g. pContext = malloc(sizeof(McGenContext_microsoft_windows_mimalloc)); +// - Caller registers the provider, e.g. EventRegistermicrosoft_windows_mimalloc_ForContext(pContext); +// - Caller writes events, e.g. EventWriteMyEvent_ForContext(pContext, ...); +// - Caller unregisters, e.g. EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext); +// - Caller frees memory, e.g. free(pContext); +// + +typedef struct tagMcGenContext_microsoft_windows_mimalloc { + // The fields of this structure are subject to change and should + // not be accessed directly. To access the provider's REGHANDLE, + // use microsoft_windows_mimallocHandle_ForContext(pContext). + MCGEN_TRACE_CONTEXT Context; + ULONG EnableBits[1]; +} McGenContext_microsoft_windows_mimalloc; + +#define EventRegistermicrosoft_windows_mimalloc_ForContext(pContext) _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&ETW_MI_Provider, pContext) +#define EventRegisterByGuidmicrosoft_windows_mimalloc_ForContext(Guid, pContext) _mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)(&(Guid), pContext) +#define EventUnregistermicrosoft_windows_mimalloc_ForContext(pContext) McGenEventUnregister(&(pContext)->Context.RegistrationHandle) + +// +// Provider REGHANDLE for caller-allocated context. +// +#define microsoft_windows_mimallocHandle_ForContext(pContext) ((pContext)->Context.RegistrationHandle) + +// This function is for use by MC-generated code and should not be used directly. +// Initialize and register the caller-allocated context. +__inline +ULONG __stdcall +_mcgen_PASTE2(_mcgen_RegisterForContext_microsoft_windows_mimalloc_, MCGEN_EVENTREGISTER)( + _In_ LPCGUID pProviderId, + _Out_ McGenContext_microsoft_windows_mimalloc* pContext) +{ + RtlZeroMemory(pContext, sizeof(*pContext)); + pContext->Context.Logger = (ULONG_PTR)ETW_MI_Provider_Traits; + pContext->Context.EnableBitsCount = 1; + pContext->Context.EnableBitMask = pContext->EnableBits; + pContext->Context.EnableKeyWords = microsoft_windows_mimallocKeywords; + pContext->Context.EnableLevel = microsoft_windows_mimallocLevels; + return McGenEventRegister( + pProviderId, + McGenControlCallbackV2, + &pContext->Context, + &pContext->Context.RegistrationHandle); +} + +// This function is for use by MC-generated code and should not be used directly. +// Trigger a compile error if called with the wrong parameter type. +FORCEINLINE +_Ret_ McGenContext_microsoft_windows_mimalloc* +_mcgen_CheckContextType_microsoft_windows_mimalloc(_In_ McGenContext_microsoft_windows_mimalloc* pContext) +{ + return pContext; +} + +#endif // MCGEN_ENABLE_FORCONTEXT_CODE_GENERATION + +// +// Enablement check macro for event "ETW_MI_ALLOC" +// +#define EventEnabledETW_MI_ALLOC() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0) +#define EventEnabledETW_MI_ALLOC_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0) + +// +// Event write macros for event "ETW_MI_ALLOC" +// +#define EventWriteETW_MI_ALLOC(Address, Size) \ + MCGEN_EVENT_ENABLED(ETW_MI_ALLOC) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size) : 0 +#define EventWriteETW_MI_ALLOC_AssumeEnabled(Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&ETW_MI_Provider_Context, &ETW_MI_ALLOC, Address, Size) +#define EventWriteETW_MI_ALLOC_ForContext(pContext, Address, Size) \ + MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_ALLOC) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&(pContext)->Context, &ETW_MI_ALLOC, Address, Size) : 0 +#define EventWriteETW_MI_ALLOC_ForContextAssumeEnabled(pContext, Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_ALLOC, Address, Size) + +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_TEMPLATE_FOR_ETW_MI_ALLOC _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER) + +// +// Enablement check macro for event "ETW_MI_FREE" +// +#define EventEnabledETW_MI_FREE() _mcgen_EVENT_BIT_SET(microsoft_windows_mimallocEnableBits, 0) +#define EventEnabledETW_MI_FREE_ForContext(pContext) _mcgen_EVENT_BIT_SET(_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->EnableBits, 0) + +// +// Event write macros for event "ETW_MI_FREE" +// +#define EventWriteETW_MI_FREE(Address, Size) \ + MCGEN_EVENT_ENABLED(ETW_MI_FREE) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size) : 0 +#define EventWriteETW_MI_FREE_AssumeEnabled(Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&ETW_MI_Provider_Context, &ETW_MI_FREE, Address, Size) +#define EventWriteETW_MI_FREE_ForContext(pContext, Address, Size) \ + MCGEN_EVENT_ENABLED_FORCONTEXT(pContext, ETW_MI_FREE) \ + ? _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&(pContext)->Context, &ETW_MI_FREE, Address, Size) : 0 +#define EventWriteETW_MI_FREE_ForContextAssumeEnabled(pContext, Address, Size) \ + _mcgen_TEMPLATE_FOR_ETW_MI_FREE(&_mcgen_CheckContextType_microsoft_windows_mimalloc(pContext)->Context, &ETW_MI_FREE, Address, Size) + +// This macro is for use by MC-generated code and should not be used directly. +#define _mcgen_TEMPLATE_FOR_ETW_MI_FREE _mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER) + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// MCGEN_DISABLE_PROVIDER_CODE_GENERATION macro: +// Define this macro to have the compiler skip the generated functions in this +// header. +// +#ifndef MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +// +// Template Functions +// + +// +// Function for template "ETW_CUSTOM_HEAP_ALLOC_DATA" (and possibly others). +// This function is for use by MC-generated code and should not be used directly. +// +#ifndef McTemplateU0xx_def +#define McTemplateU0xx_def +ETW_INLINE +ULONG +_mcgen_PASTE2(McTemplateU0xx_, MCGEN_EVENTWRITETRANSFER)( + _In_ PMCGEN_TRACE_CONTEXT Context, + _In_ PCEVENT_DESCRIPTOR Descriptor, + _In_ const unsigned __int64 _Arg0, + _In_ const unsigned __int64 _Arg1 + ) +{ +#define McTemplateU0xx_ARGCOUNT 2 + + EVENT_DATA_DESCRIPTOR EventData[McTemplateU0xx_ARGCOUNT + 1]; + + EventDataDescCreate(&EventData[1],&_Arg0, sizeof(const unsigned __int64) ); + + EventDataDescCreate(&EventData[2],&_Arg1, sizeof(const unsigned __int64) ); + + return McGenEventWrite(Context, Descriptor, NULL, McTemplateU0xx_ARGCOUNT + 1, EventData); +} +#endif // McTemplateU0xx_def + +#endif // MCGEN_DISABLE_PROVIDER_CODE_GENERATION + +#if defined(__cplusplus) +} +#endif diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.man b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.man Binary files differnew file mode 100644 index 000000000..cfd1f8a9e --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/windows/etw.man diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/windows/prim.c b/source/luametatex/source/libraries/mimalloc/src/prim/windows/prim.c new file mode 100644 index 000000000..e3dc33e32 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/windows/prim.c @@ -0,0 +1,607 @@ +/* ---------------------------------------------------------------------------- +Copyright (c) 2018-2023, Microsoft Research, Daan Leijen +This is free software; you can redistribute it and/or modify it under the +terms of the MIT license. A copy of the license can be found in the file +"LICENSE" at the root of this distribution. +-----------------------------------------------------------------------------*/ + +// This file is included in `src/prim/prim.c` + +#include "mimalloc.h" +#include "mimalloc/internal.h" +#include "mimalloc/atomic.h" +#include "mimalloc/prim.h" +#include <stdio.h> // fputs, stderr + + +//--------------------------------------------- +// Dynamically bind Windows API points for portability +//--------------------------------------------- + +// We use VirtualAlloc2 for aligned allocation, but it is only supported on Windows 10 and Windows Server 2016. +// So, we need to look it up dynamically to run on older systems. (use __stdcall for 32-bit compatibility) +// NtAllocateVirtualAllocEx is used for huge OS page allocation (1GiB) +// We define a minimal MEM_EXTENDED_PARAMETER ourselves in order to be able to compile with older SDK's. +typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E { + MiMemExtendedParameterInvalidType = 0, + MiMemExtendedParameterAddressRequirements, + MiMemExtendedParameterNumaNode, + MiMemExtendedParameterPartitionHandle, + MiMemExtendedParameterUserPhysicalHandle, + MiMemExtendedParameterAttributeFlags, + MiMemExtendedParameterMax +} MI_MEM_EXTENDED_PARAMETER_TYPE; + +typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S { + struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type; + union { DWORD64 ULong64; PVOID Pointer; SIZE_T Size; HANDLE Handle; DWORD ULong; } Arg; +} MI_MEM_EXTENDED_PARAMETER; + +typedef struct MI_MEM_ADDRESS_REQUIREMENTS_S { + PVOID LowestStartingAddress; + PVOID HighestEndingAddress; + SIZE_T Alignment; +} MI_MEM_ADDRESS_REQUIREMENTS; + +#define MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE 0x00000010 + +#include <winternl.h> +typedef PVOID (__stdcall *PVirtualAlloc2)(HANDLE, PVOID, SIZE_T, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +typedef NTSTATUS (__stdcall *PNtAllocateVirtualMemoryEx)(HANDLE, PVOID*, SIZE_T*, ULONG, ULONG, MI_MEM_EXTENDED_PARAMETER*, ULONG); +static PVirtualAlloc2 pVirtualAlloc2 = NULL; +static PNtAllocateVirtualMemoryEx pNtAllocateVirtualMemoryEx = NULL; + +// Similarly, GetNumaProcesorNodeEx is only supported since Windows 7 +typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; } MI_PROCESSOR_NUMBER; + +typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber); +typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber); +typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask); +typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber); +static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL; +static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL; +static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL; +static PGetNumaProcessorNode pGetNumaProcessorNode = NULL; + +//--------------------------------------------- +// Enable large page support dynamically (if possible) +//--------------------------------------------- + +static bool win_enable_large_os_pages(size_t* large_page_size) +{ + static bool large_initialized = false; + if (large_initialized) return (_mi_os_large_page_size() > 0); + large_initialized = true; + + // Try to see if large OS pages are supported + // To use large pages on Windows, we first need access permission + // Set "Lock pages in memory" permission in the group policy editor + // <https://devblogs.microsoft.com/oldnewthing/20110128-00/?p=11643> + unsigned long err = 0; + HANDLE token = NULL; + BOOL ok = OpenProcessToken(GetCurrentProcess(), TOKEN_ADJUST_PRIVILEGES | TOKEN_QUERY, &token); + if (ok) { + TOKEN_PRIVILEGES tp; + ok = LookupPrivilegeValue(NULL, TEXT("SeLockMemoryPrivilege"), &tp.Privileges[0].Luid); + if (ok) { + tp.PrivilegeCount = 1; + tp.Privileges[0].Attributes = SE_PRIVILEGE_ENABLED; + ok = AdjustTokenPrivileges(token, FALSE, &tp, 0, (PTOKEN_PRIVILEGES)NULL, 0); + if (ok) { + err = GetLastError(); + ok = (err == ERROR_SUCCESS); + if (ok && large_page_size != NULL) { + *large_page_size = GetLargePageMinimum(); + } + } + } + CloseHandle(token); + } + if (!ok) { + if (err == 0) err = GetLastError(); + _mi_warning_message("cannot enable large OS page support, error %lu\n", err); + } + return (ok!=0); +} + + +//--------------------------------------------- +// Initialize +//--------------------------------------------- + +void _mi_prim_mem_init( mi_os_mem_config_t* config ) +{ + config->has_overcommit = false; + config->must_free_whole = true; + // get the page size + SYSTEM_INFO si; + GetSystemInfo(&si); + if (si.dwPageSize > 0) { config->page_size = si.dwPageSize; } + if (si.dwAllocationGranularity > 0) { config->alloc_granularity = si.dwAllocationGranularity; } + // get the VirtualAlloc2 function + HINSTANCE hDll; + hDll = LoadLibrary(TEXT("kernelbase.dll")); + if (hDll != NULL) { + // use VirtualAlloc2FromApp if possible as it is available to Windows store apps + pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2FromApp"); + if (pVirtualAlloc2==NULL) pVirtualAlloc2 = (PVirtualAlloc2)(void (*)(void))GetProcAddress(hDll, "VirtualAlloc2"); + FreeLibrary(hDll); + } + // NtAllocateVirtualMemoryEx is used for huge page allocation + hDll = LoadLibrary(TEXT("ntdll.dll")); + if (hDll != NULL) { + pNtAllocateVirtualMemoryEx = (PNtAllocateVirtualMemoryEx)(void (*)(void))GetProcAddress(hDll, "NtAllocateVirtualMemoryEx"); + FreeLibrary(hDll); + } + // Try to use Win7+ numa API + hDll = LoadLibrary(TEXT("kernel32.dll")); + if (hDll != NULL) { + pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx"); + pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx"); + pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx"); + pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode"); + FreeLibrary(hDll); + } + if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) { + win_enable_large_os_pages(&config->large_page_size); + } +} + + +//--------------------------------------------- +// Free +//--------------------------------------------- + +int _mi_prim_free(void* addr, size_t size ) { + MI_UNUSED(size); + DWORD errcode = 0; + bool err = (VirtualFree(addr, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + if (errcode == ERROR_INVALID_ADDRESS) { + // In mi_os_mem_alloc_aligned the fallback path may have returned a pointer inside + // the memory region returned by VirtualAlloc; in that case we need to free using + // the start of the region. + MEMORY_BASIC_INFORMATION info = { 0 }; + VirtualQuery(addr, &info, sizeof(info)); + if (info.AllocationBase < addr && ((uint8_t*)addr - (uint8_t*)info.AllocationBase) < (ptrdiff_t)MI_SEGMENT_SIZE) { + errcode = 0; + err = (VirtualFree(info.AllocationBase, 0, MEM_RELEASE) == 0); + if (err) { errcode = GetLastError(); } + } + } + return (int)errcode; +} + + +//--------------------------------------------- +// VirtualAlloc +//--------------------------------------------- + +static void* win_virtual_alloc_prim(void* addr, size_t size, size_t try_alignment, DWORD flags) { + #if (MI_INTPTR_SIZE >= 8) + // on 64-bit systems, try to use the virtual address area after 2TiB for 4MiB aligned allocations + if (addr == NULL) { + void* hint = _mi_os_get_aligned_hint(try_alignment,size); + if (hint != NULL) { + void* p = VirtualAlloc(hint, size, flags, PAGE_READWRITE); + if (p != NULL) return p; + _mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags); + // fall through on error + } + } + #endif + // on modern Windows try use VirtualAlloc2 for aligned allocation + if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) { + MI_MEM_ADDRESS_REQUIREMENTS reqs = { 0, 0, 0 }; + reqs.Alignment = try_alignment; + MI_MEM_EXTENDED_PARAMETER param = { {0, 0}, {0} }; + param.Type.Type = MiMemExtendedParameterAddressRequirements; + param.Arg.Pointer = &reqs; + void* p = (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, ¶m, 1); + if (p != NULL) return p; + _mi_warning_message("unable to allocate aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), addr, try_alignment, flags); + // fall through on error + } + // last resort + return VirtualAlloc(addr, size, flags, PAGE_READWRITE); +} + +static void* win_virtual_alloc(void* addr, size_t size, size_t try_alignment, DWORD flags, bool large_only, bool allow_large, bool* is_large) { + mi_assert_internal(!(large_only && !allow_large)); + static _Atomic(size_t) large_page_try_ok; // = 0; + void* p = NULL; + // Try to allocate large OS pages (2MiB) if allowed or required. + if ((large_only || _mi_os_use_large_page(size, try_alignment)) + && allow_large && (flags&MEM_COMMIT)!=0 && (flags&MEM_RESERVE)!=0) { + size_t try_ok = mi_atomic_load_acquire(&large_page_try_ok); + if (!large_only && try_ok > 0) { + // if a large page allocation fails, it seems the calls to VirtualAlloc get very expensive. + // therefore, once a large page allocation failed, we don't try again for `large_page_try_ok` times. + mi_atomic_cas_strong_acq_rel(&large_page_try_ok, &try_ok, try_ok - 1); + } + else { + // large OS pages must always reserve and commit. + *is_large = true; + p = win_virtual_alloc_prim(addr, size, try_alignment, flags | MEM_LARGE_PAGES); + if (large_only) return p; + // fall back to non-large page allocation on error (`p == NULL`). + if (p == NULL) { + mi_atomic_store_release(&large_page_try_ok,10UL); // on error, don't try again for the next N allocations + } + } + } + // Fall back to regular page allocation + if (p == NULL) { + *is_large = ((flags&MEM_LARGE_PAGES) != 0); + p = win_virtual_alloc_prim(addr, size, try_alignment, flags); + } + //if (p == NULL) { _mi_warning_message("unable to allocate OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x, large only: %d, allow large: %d)\n", size, GetLastError(), addr, try_alignment, flags, large_only, allow_large); } + return p; +} + +int _mi_prim_alloc(size_t size, size_t try_alignment, bool commit, bool allow_large, bool* is_large, void** addr) { + mi_assert_internal(size > 0 && (size % _mi_os_page_size()) == 0); + mi_assert_internal(commit || !allow_large); + mi_assert_internal(try_alignment > 0); + int flags = MEM_RESERVE; + if (commit) { flags |= MEM_COMMIT; } + *addr = win_virtual_alloc(NULL, size, try_alignment, flags, false, allow_large, is_large); + return (*addr != NULL ? 0 : (int)GetLastError()); +} + + +//--------------------------------------------- +// Commit/Reset/Protect +//--------------------------------------------- +#ifdef _MSC_VER +#pragma warning(disable:6250) // suppress warning calling VirtualFree without MEM_RELEASE (for decommit) +#endif + +int _mi_prim_commit(void* addr, size_t size, bool commit) { + if (commit) { + void* p = VirtualAlloc(addr, size, MEM_COMMIT, PAGE_READWRITE); + return (p == addr ? 0 : (int)GetLastError()); + } + else { + BOOL ok = VirtualFree(addr, size, MEM_DECOMMIT); + return (ok ? 0 : (int)GetLastError()); + } +} + +int _mi_prim_reset(void* addr, size_t size) { + void* p = VirtualAlloc(addr, size, MEM_RESET, PAGE_READWRITE); + mi_assert_internal(p == addr); + #if 1 + if (p == addr && addr != NULL) { + VirtualUnlock(addr,size); // VirtualUnlock after MEM_RESET removes the memory from the working set + } + #endif + return (p == addr ? 0 : (int)GetLastError()); +} + +int _mi_prim_protect(void* addr, size_t size, bool protect) { + DWORD oldprotect = 0; + BOOL ok = VirtualProtect(addr, size, protect ? PAGE_NOACCESS : PAGE_READWRITE, &oldprotect); + return (ok ? 0 : (int)GetLastError()); +} + + +//--------------------------------------------- +// Huge page allocation +//--------------------------------------------- + +static void* _mi_prim_alloc_huge_os_pagesx(void* hint_addr, size_t size, int numa_node) +{ + const DWORD flags = MEM_LARGE_PAGES | MEM_COMMIT | MEM_RESERVE; + + win_enable_large_os_pages(NULL); + + MI_MEM_EXTENDED_PARAMETER params[3] = { {{0,0},{0}},{{0,0},{0}},{{0,0},{0}} }; + // on modern Windows try use NtAllocateVirtualMemoryEx for 1GiB huge pages + static bool mi_huge_pages_available = true; + if (pNtAllocateVirtualMemoryEx != NULL && mi_huge_pages_available) { + params[0].Type.Type = MiMemExtendedParameterAttributeFlags; + params[0].Arg.ULong64 = MI_MEM_EXTENDED_PARAMETER_NONPAGED_HUGE; + ULONG param_count = 1; + if (numa_node >= 0) { + param_count++; + params[1].Type.Type = MiMemExtendedParameterNumaNode; + params[1].Arg.ULong = (unsigned)numa_node; + } + SIZE_T psize = size; + void* base = hint_addr; + NTSTATUS err = (*pNtAllocateVirtualMemoryEx)(GetCurrentProcess(), &base, &psize, flags, PAGE_READWRITE, params, param_count); + if (err == 0 && base != NULL) { + return base; + } + else { + // fall back to regular large pages + mi_huge_pages_available = false; // don't try further huge pages + _mi_warning_message("unable to allocate using huge (1GiB) pages, trying large (2MiB) pages instead (status 0x%lx)\n", err); + } + } + // on modern Windows try use VirtualAlloc2 for numa aware large OS page allocation + if (pVirtualAlloc2 != NULL && numa_node >= 0) { + params[0].Type.Type = MiMemExtendedParameterNumaNode; + params[0].Arg.ULong = (unsigned)numa_node; + return (*pVirtualAlloc2)(GetCurrentProcess(), hint_addr, size, flags, PAGE_READWRITE, params, 1); + } + + // otherwise use regular virtual alloc on older windows + return VirtualAlloc(hint_addr, size, flags, PAGE_READWRITE); +} + +int _mi_prim_alloc_huge_os_pages(void* hint_addr, size_t size, int numa_node, void** addr) { + *addr = _mi_prim_alloc_huge_os_pagesx(hint_addr,size,numa_node); + return (*addr != NULL ? 0 : (int)GetLastError()); +} + + +//--------------------------------------------- +// Numa nodes +//--------------------------------------------- + +size_t _mi_prim_numa_node(void) { + USHORT numa_node = 0; + if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) { + // Extended API is supported + MI_PROCESSOR_NUMBER pnum; + (*pGetCurrentProcessorNumberEx)(&pnum); + USHORT nnode = 0; + BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode); + if (ok) { numa_node = nnode; } + } + else if (pGetNumaProcessorNode != NULL) { + // Vista or earlier, use older API that is limited to 64 processors. Issue #277 + DWORD pnum = GetCurrentProcessorNumber(); + UCHAR nnode = 0; + BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode); + if (ok) { numa_node = nnode; } + } + return numa_node; +} + +size_t _mi_prim_numa_node_count(void) { + ULONG numa_max = 0; + GetNumaHighestNodeNumber(&numa_max); + // find the highest node number that has actual processors assigned to it. Issue #282 + while(numa_max > 0) { + if (pGetNumaNodeProcessorMaskEx != NULL) { + // Extended API is supported + GROUP_AFFINITY affinity; + if ((*pGetNumaNodeProcessorMaskEx)((USHORT)numa_max, &affinity)) { + if (affinity.Mask != 0) break; // found the maximum non-empty node + } + } + else { + // Vista or earlier, use older API that is limited to 64 processors. + ULONGLONG mask; + if (GetNumaNodeProcessorMask((UCHAR)numa_max, &mask)) { + if (mask != 0) break; // found the maximum non-empty node + }; + } + // max node was invalid or had no processor assigned, try again + numa_max--; + } + return ((size_t)numa_max + 1); +} + + +//---------------------------------------------------------------- +// Clock +//---------------------------------------------------------------- + +static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) { + static LARGE_INTEGER mfreq; // = 0 + if (mfreq.QuadPart == 0LL) { + LARGE_INTEGER f; + QueryPerformanceFrequency(&f); + mfreq.QuadPart = f.QuadPart/1000LL; + if (mfreq.QuadPart == 0) mfreq.QuadPart = 1; + } + return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart); +} + +mi_msecs_t _mi_prim_clock_now(void) { + LARGE_INTEGER t; + QueryPerformanceCounter(&t); + return mi_to_msecs(t); +} + + +//---------------------------------------------------------------- +// Process Info +//---------------------------------------------------------------- + +#include <windows.h> +#include <psapi.h> + +static mi_msecs_t filetime_msecs(const FILETIME* ftime) { + ULARGE_INTEGER i; + i.LowPart = ftime->dwLowDateTime; + i.HighPart = ftime->dwHighDateTime; + mi_msecs_t msecs = (i.QuadPart / 10000); // FILETIME is in 100 nano seconds + return msecs; +} + +typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD); +static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL; + +void _mi_prim_process_info(mi_process_info_t* pinfo) +{ + FILETIME ct; + FILETIME ut; + FILETIME st; + FILETIME et; + GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut); + pinfo->utime = filetime_msecs(&ut); + pinfo->stime = filetime_msecs(&st); + + // load psapi on demand + if (pGetProcessMemoryInfo == NULL) { + HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll")); + if (hDll != NULL) { + pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo"); + } + } + + // get process info + PROCESS_MEMORY_COUNTERS info; + memset(&info, 0, sizeof(info)); + if (pGetProcessMemoryInfo != NULL) { + pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info)); + } + pinfo->current_rss = (size_t)info.WorkingSetSize; + pinfo->peak_rss = (size_t)info.PeakWorkingSetSize; + pinfo->current_commit = (size_t)info.PagefileUsage; + pinfo->peak_commit = (size_t)info.PeakPagefileUsage; + pinfo->page_faults = (size_t)info.PageFaultCount; +} + +//---------------------------------------------------------------- +// Output +//---------------------------------------------------------------- + +void _mi_prim_out_stderr( const char* msg ) +{ + // on windows with redirection, the C runtime cannot handle locale dependent output + // after the main thread closes so we use direct console output. + if (!_mi_preloading()) { + // _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console + static HANDLE hcon = INVALID_HANDLE_VALUE; + static bool hconIsConsole; + if (hcon == INVALID_HANDLE_VALUE) { + CONSOLE_SCREEN_BUFFER_INFO sbi; + hcon = GetStdHandle(STD_ERROR_HANDLE); + hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi)); + } + const size_t len = _mi_strlen(msg); + if (len > 0 && len < UINT32_MAX) { + DWORD written = 0; + if (hconIsConsole) { + WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL); + } + else if (hcon != INVALID_HANDLE_VALUE) { + // use direct write if stderr was redirected + WriteFile(hcon, msg, (DWORD)len, &written, NULL); + } + else { + // finally fall back to fputs after all + fputs(msg, stderr); + } + } + } +} + + +//---------------------------------------------------------------- +// Environment +//---------------------------------------------------------------- + +// On Windows use GetEnvironmentVariable instead of getenv to work +// reliably even when this is invoked before the C runtime is initialized. +// i.e. when `_mi_preloading() == true`. +// Note: on windows, environment names are not case sensitive. +bool _mi_prim_getenv(const char* name, char* result, size_t result_size) { + result[0] = 0; + size_t len = GetEnvironmentVariableA(name, result, (DWORD)result_size); + return (len > 0 && len < result_size); +} + + + +//---------------------------------------------------------------- +// Random +//---------------------------------------------------------------- + +#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus) +// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using +// dynamic overriding, we observed it can raise an exception when compiled with C++, and +// sometimes deadlocks when also running under the VS debugger. +// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom. +// To be continued.. +#pragma comment (lib,"advapi32.lib") +#define RtlGenRandom SystemFunction036 +mi_decl_externc BOOLEAN NTAPI RtlGenRandom(PVOID RandomBuffer, ULONG RandomBufferLength); + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + return (RtlGenRandom(buf, (ULONG)buf_len) != 0); +} + +#else + +#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG +#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002 +#endif + +typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG); +static PBCryptGenRandom pBCryptGenRandom = NULL; + +bool _mi_prim_random_buf(void* buf, size_t buf_len) { + if (pBCryptGenRandom == NULL) { + HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll")); + if (hDll != NULL) { + pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom"); + } + if (pBCryptGenRandom == NULL) return false; + } + return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0); +} + +#endif // MI_USE_RTLGENRANDOM + +//---------------------------------------------------------------- +// Thread init/done +//---------------------------------------------------------------- + +#if !defined(MI_SHARED_LIB) + +// use thread local storage keys to detect thread ending +#include <fibersapi.h> +#if (_WIN32_WINNT < 0x600) // before Windows Vista +WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback ); +WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex ); +WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData ); +WINBASEAPI BOOL WINAPI FlsFree(_In_ DWORD dwFlsIndex); +#endif + +static DWORD mi_fls_key = (DWORD)(-1); + +static void NTAPI mi_fls_done(PVOID value) { + mi_heap_t* heap = (mi_heap_t*)value; + if (heap != NULL) { + _mi_thread_done(heap); + FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672 + } +} + +void _mi_prim_thread_init_auto_done(void) { + mi_fls_key = FlsAlloc(&mi_fls_done); +} + +void _mi_prim_thread_done_auto_done(void) { + // call thread-done on all threads (except the main thread) to prevent + // dangling callback pointer if statically linked with a DLL; Issue #208 + FlsFree(mi_fls_key); +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + mi_assert_internal(mi_fls_key != (DWORD)(-1)); + FlsSetValue(mi_fls_key, heap); +} + +#else + +// Dll; nothing to do as in that case thread_done is handled through the DLL_THREAD_DETACH event. + +void _mi_prim_thread_init_auto_done(void) { +} + +void _mi_prim_thread_done_auto_done(void) { +} + +void _mi_prim_thread_associate_default_heap(mi_heap_t* heap) { + MI_UNUSED(heap); +} + +#endif diff --git a/source/luametatex/source/libraries/mimalloc/src/prim/windows/readme.md b/source/luametatex/source/libraries/mimalloc/src/prim/windows/readme.md new file mode 100644 index 000000000..217c3d174 --- /dev/null +++ b/source/luametatex/source/libraries/mimalloc/src/prim/windows/readme.md @@ -0,0 +1,17 @@ +## Primitives: + +- `prim.c` contains Windows primitives for OS allocation. + +## Event Tracing for Windows (ETW) + +- `etw.h` is generated from `etw.man` which contains the manifest for mimalloc events. + (100 is an allocation, 101 is for a free) + +- `etw-mimalloc.wprp` is a profile for the Windows Performance Recorder (WPR). + In an admin prompt, you can use: + ``` + > wpr -start src\prim\windows\etw-mimalloc.wprp -filemode + > <my mimalloc program> + > wpr -stop test.etl + ``` + and then open `test.etl` in the Windows Performance Analyzer (WPA).
\ No newline at end of file |