summaryrefslogtreecommitdiff
path: root/source
diff options
context:
space:
mode:
Diffstat (limited to 'source')
-rw-r--r--source/luametatex/source/libraries/mimalloc/CMakeLists.txt63
-rw-r--r--source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config-version.cmake2
-rw-r--r--source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config.cmake8
-rw-r--r--source/luametatex/source/libraries/mimalloc/include/mimalloc-atomic.h10
-rw-r--r--source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h77
-rw-r--r--source/luametatex/source/libraries/mimalloc/include/mimalloc-new-delete.h19
-rw-r--r--source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h37
-rw-r--r--source/luametatex/source/libraries/mimalloc/include/mimalloc-types.h61
-rw-r--r--source/luametatex/source/libraries/mimalloc/include/mimalloc.h132
-rw-r--r--source/luametatex/source/libraries/mimalloc/readme.md61
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c70
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c24
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/alloc-override.c54
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/alloc-posix.c18
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/alloc.c307
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/arena.c85
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/bitmap.c22
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/heap.c33
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/init.c73
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/options.c45
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/os.c122
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/page-queue.c5
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/page.c84
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/random.c57
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/region.c96
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/segment-cache.c63
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/segment.c374
-rw-r--r--source/luametatex/source/libraries/mimalloc/src/stats.c92
-rw-r--r--source/luametatex/source/libraries/readme.txt4
-rw-r--r--source/luametatex/source/luametatex.h6
-rw-r--r--source/luametatex/source/tex/texalign.c12
-rw-r--r--source/luametatex/source/tex/texcommands.c2
-rw-r--r--source/luametatex/source/tex/texdumpdata.h2
-rw-r--r--source/luametatex/source/tex/texmainbody.c7
-rw-r--r--source/luametatex/source/tex/texnesting.c2
-rw-r--r--source/luametatex/source/tex/texpackaging.c241
-rw-r--r--source/luametatex/source/tex/texpackaging.h15
-rw-r--r--source/luametatex/source/tex/textoken.c10
38 files changed, 1526 insertions, 869 deletions
diff --git a/source/luametatex/source/libraries/mimalloc/CMakeLists.txt b/source/luametatex/source/libraries/mimalloc/CMakeLists.txt
index 6cd826650..74c1f2916 100644
--- a/source/luametatex/source/libraries/mimalloc/CMakeLists.txt
+++ b/source/luametatex/source/libraries/mimalloc/CMakeLists.txt
@@ -1,4 +1,4 @@
-cmake_minimum_required(VERSION 3.0)
+cmake_minimum_required(VERSION 3.13)
project(libmimalloc C CXX)
set(CMAKE_C_STANDARD 11)
@@ -11,10 +11,11 @@ option(MI_OVERRIDE "Override the standard malloc interface (e.g. define
option(MI_XMALLOC "Enable abort() call on memory allocation failure by default" OFF)
option(MI_SHOW_ERRORS "Show error and warning messages by default (only enabled by default in DEBUG mode)" OFF)
option(MI_VALGRIND "Compile with Valgrind support (adds a small overhead)" OFF)
+option(MI_ASAN "Compile with address sanitizer support (adds a small overhead)" OFF)
option(MI_USE_CXX "Use the C++ compiler to compile the library (instead of the C compiler)" OFF)
option(MI_SEE_ASM "Generate assembly files" OFF)
option(MI_OSX_INTERPOSE "Use interpose to override standard malloc on macOS" ON)
-option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON)
+option(MI_OSX_ZONE "Use malloc zone to override standard malloc on macOS" ON)
option(MI_WIN_REDIRECT "Use redirection module ('mimalloc-redirect') on Windows if compiling mimalloc as a DLL" ON)
option(MI_LOCAL_DYNAMIC_TLS "Use slightly slower, dlopen-compatible TLS mechanism (Unix)" OFF)
option(MI_BUILD_SHARED "Build shared library" ON)
@@ -55,7 +56,7 @@ set(mi_sources
# Convenience: set default build type depending on the build directory
# -----------------------------------------------------------------------------
-message(STATUS "")
+message(STATUS "")
if (NOT CMAKE_BUILD_TYPE)
if ("${CMAKE_BINARY_DIR}" MATCHES ".*(D|d)ebug$" OR MI_DEBUG_FULL)
message(STATUS "No build type selected, default to: Debug")
@@ -87,7 +88,7 @@ if(MI_OVERRIDE)
# use zone's on macOS
message(STATUS " Use malloc zone to override malloc (MI_OSX_ZONE=ON)")
list(APPEND mi_sources src/alloc-override-osx.c)
- list(APPEND mi_defines MI_OSX_ZONE=1)
+ list(APPEND mi_defines MI_OSX_ZONE=1)
if (NOT MI_OSX_INTERPOSE)
message(STATUS " WARNING: zone overriding usually also needs interpose (use -DMI_OSX_INTERPOSE=ON)")
endif()
@@ -139,6 +140,25 @@ if(MI_VALGRIND)
endif()
endif()
+if(MI_ASAN)
+ if (MI_VALGRIND)
+ set(MI_ASAN OFF)
+ message(WARNING "Cannot enable address sanitizer support with also Valgrind support enabled (MI_ASAN=OFF)")
+ else()
+ CHECK_INCLUDE_FILES("sanitizer/asan_interface.h" MI_HAS_ASANH)
+ if (NOT MI_HAS_ASANH)
+ set(MI_ASAN OFF)
+ message(WARNING "Cannot find the 'sanitizer/asan_interface.h' -- install address sanitizer support first")
+ message(STATUS "Compile **without** address sanitizer support (MI_ASAN=OFF)")
+ else()
+ message(STATUS "Compile with address sanitizer support (MI_ASAN=ON)")
+ list(APPEND mi_defines MI_ASAN=1)
+ list(APPEND mi_cflags -fsanitize=address)
+ list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=address)
+ endif()
+ endif()
+endif()
+
if(MI_SEE_ASM)
message(STATUS "Generate assembly listings (MI_SEE_ASM=ON)")
list(APPEND mi_cflags -save-temps)
@@ -181,12 +201,12 @@ if(MI_DEBUG_TSAN)
list(APPEND mi_cflags -fsanitize=thread -g -O1)
list(APPEND CMAKE_EXE_LINKER_FLAGS -fsanitize=thread)
else()
- message(WARNING "Can only use thread sanitizer with clang (MI_DEBUG_TSAN=ON but ignored)")
- endif()
+ message(WARNING "Can only use thread sanitizer with clang (MI_DEBUG_TSAN=ON but ignored)")
+ endif()
endif()
if(MI_DEBUG_UBSAN)
- if(CMAKE_BUILD_TYPE MATCHES "Debug")
+ if(CMAKE_BUILD_TYPE MATCHES "Debug")
if(CMAKE_CXX_COMPILER_ID MATCHES "Clang")
message(STATUS "Build with undefined-behavior sanitizer (MI_DEBUG_UBSAN=ON)")
list(APPEND mi_cflags -fsanitize=undefined -g -fno-sanitize-recover=undefined)
@@ -196,10 +216,10 @@ if(MI_DEBUG_UBSAN)
set(MI_USE_CXX "ON")
endif()
else()
- message(WARNING "Can only use undefined-behavior sanitizer with clang++ (MI_DEBUG_UBSAN=ON but ignored)")
- endif()
+ message(WARNING "Can only use undefined-behavior sanitizer with clang++ (MI_DEBUG_UBSAN=ON but ignored)")
+ endif()
else()
- message(WARNING "Can only use thread sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})")
+ message(WARNING "Can only use thread sanitizer with a debug build (CMAKE_BUILD_TYPE=${CMAKE_BUILD_TYPE})")
endif()
endif()
@@ -220,7 +240,7 @@ if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang|GNU")
list(APPEND mi_cflags -Wall -Wextra -Wno-unknown-pragmas -fvisibility=hidden)
if(NOT MI_USE_CXX)
list(APPEND mi_cflags -Wstrict-prototypes)
- endif()
+ endif()
if(CMAKE_C_COMPILER_ID MATCHES "AppleClang|Clang")
list(APPEND mi_cflags -Wpedantic -Wno-static-in-inline)
endif()
@@ -252,7 +272,7 @@ if(WIN32)
else()
set(pc_libraries "")
find_library(MI_LIBPTHREAD pthread)
- if (MI_LIBPTHREAD)
+ if (MI_LIBPTHREAD)
list(APPEND mi_libraries ${MI_LIBPTHREAD})
set(pc_libraries "${pc_libraries} -pthread")
endif()
@@ -260,12 +280,12 @@ else()
if(MI_LIBRT)
list(APPEND mi_libraries ${MI_LIBRT})
set(pc_libraries "${pc_libraries} -lrt")
- endif()
+ endif()
find_library(MI_LIBATOMIC atomic)
- if (NOT MI_LIBATOMIC AND MI_USE_LIBATOMIC)
+ if (NOT MI_LIBATOMIC AND MI_USE_LIBATOMIC)
set(MI_LIBATOMIC atomic)
endif()
- if (MI_LIBATOMIC)
+ if (MI_LIBATOMIC)
list(APPEND mi_libraries ${MI_LIBATOMIC})
set(pc_libraries "${pc_libraries} -latomic")
endif()
@@ -282,7 +302,7 @@ set(mi_install_libdir "${CMAKE_INSTALL_LIBDIR}")
# are either installed at top level, or use versioned directories for side-by-side installation (default)
if (MI_INSTALL_TOPLEVEL)
set(mi_install_objdir "${CMAKE_INSTALL_LIBDIR}")
- set(mi_install_incdir "${CMAKE_INSTALL_INCLUDEDIR}")
+ set(mi_install_incdir "${CMAKE_INSTALL_INCLUDEDIR}")
set(mi_install_cmakedir "${CMAKE_INSTALL_LIBDIR}/cmake/mimalloc")
else()
set(mi_install_objdir "${CMAKE_INSTALL_LIBDIR}/mimalloc-${mi_version}") # for static library and object files
@@ -290,13 +310,16 @@ else()
set(mi_install_cmakedir "${CMAKE_INSTALL_LIBDIR}/cmake/mimalloc-${mi_version}") # for cmake package info
endif()
- set(mi_basename "mimalloc")
+set(mi_basename "mimalloc")
if(MI_SECURE)
set(mi_basename "${mi_basename}-secure")
endif()
if(MI_VALGRIND)
set(mi_basename "${mi_basename}-valgrind")
endif()
+if(MI_ASAN)
+ set(mi_basename "${mi_basename}-asan")
+endif()
string(TOLOWER "${CMAKE_BUILD_TYPE}" CMAKE_BUILD_TYPE_LC)
if(NOT(CMAKE_BUILD_TYPE_LC MATCHES "^(release|relwithdebinfo|minsizerel|none)$"))
set(mi_basename "${mi_basename}-${CMAKE_BUILD_TYPE_LC}") #append build type (e.g. -debug) if not a release version
@@ -340,7 +363,7 @@ if(MI_BUILD_SHARED)
set_target_properties(mimalloc PROPERTIES VERSION ${mi_version} SOVERSION ${mi_version_major} OUTPUT_NAME ${mi_basename} )
target_compile_definitions(mimalloc PRIVATE ${mi_defines} MI_SHARED_LIB MI_SHARED_LIB_EXPORT)
target_compile_options(mimalloc PRIVATE ${mi_cflags})
- target_link_libraries(mimalloc PUBLIC ${mi_libraries})
+ target_link_libraries(mimalloc PRIVATE ${mi_libraries})
target_include_directories(mimalloc PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${mi_install_incdir}>
@@ -360,7 +383,7 @@ if(MI_BUILD_SHARED)
install(FILES "$<TARGET_FILE_DIR:mimalloc>/mimalloc-redirect${MIMALLOC_REDIRECT_SUFFIX}.dll" DESTINATION ${mi_install_libdir})
endif()
- install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_libdir} LIBRARY)
+ install(TARGETS mimalloc EXPORT mimalloc DESTINATION ${mi_install_libdir} LIBRARY)
install(EXPORT mimalloc DESTINATION ${mi_install_cmakedir})
endif()
@@ -370,7 +393,7 @@ if (MI_BUILD_STATIC)
set_property(TARGET mimalloc-static PROPERTY POSITION_INDEPENDENT_CODE ON)
target_compile_definitions(mimalloc-static PRIVATE ${mi_defines} MI_STATIC_LIB)
target_compile_options(mimalloc-static PRIVATE ${mi_cflags})
- target_link_libraries(mimalloc-static PUBLIC ${mi_libraries})
+ target_link_libraries(mimalloc-static PRIVATE ${mi_libraries})
target_include_directories(mimalloc-static PUBLIC
$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:${mi_install_incdir}>
diff --git a/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config-version.cmake b/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config-version.cmake
index f0669c84d..64d710533 100644
--- a/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config-version.cmake
+++ b/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config-version.cmake
@@ -1,6 +1,6 @@
set(mi_version_major 2)
set(mi_version_minor 0)
-set(mi_version_patch 7)
+set(mi_version_patch 9)
set(mi_version ${mi_version_major}.${mi_version_minor})
set(PACKAGE_VERSION ${mi_version})
diff --git a/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config.cmake b/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config.cmake
index 8a28e37e7..a49b02a25 100644
--- a/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config.cmake
+++ b/source/luametatex/source/libraries/mimalloc/cmake/mimalloc-config.cmake
@@ -2,13 +2,13 @@ include(${CMAKE_CURRENT_LIST_DIR}/mimalloc.cmake)
get_filename_component(MIMALLOC_CMAKE_DIR "${CMAKE_CURRENT_LIST_DIR}" PATH) # one up from the cmake dir, e.g. /usr/local/lib/cmake/mimalloc-2.0
get_filename_component(MIMALLOC_VERSION_DIR "${CMAKE_CURRENT_LIST_DIR}" NAME)
string(REPLACE "/lib/cmake" "/lib" MIMALLOC_LIBRARY_DIR "${MIMALLOC_CMAKE_DIR}")
-if("${MIMALLOC_VERSION_DIR}" EQUAL "mimalloc")
+if("${MIMALLOC_VERSION_DIR}" EQUAL "mimalloc")
# top level install
string(REPLACE "/lib/cmake" "/include" MIMALLOC_INCLUDE_DIR "${MIMALLOC_CMAKE_DIR}")
set(MIMALLOC_OBJECT_DIR "${MIMALLOC_LIBRARY_DIR}")
-else()
+else()
# versioned
string(REPLACE "/lib/cmake/" "/include/" MIMALLOC_INCLUDE_DIR "${CMAKE_CURRENT_LIST_DIR}")
- string(REPLACE "/lib/cmake/" "/lib/" MIMALLOC_OBJECT_DIR "${CMAKE_CURRENT_LIST_DIR}")
-endif()
+ string(REPLACE "/lib/cmake/" "/lib/" MIMALLOC_OBJECT_DIR "${CMAKE_CURRENT_LIST_DIR}")
+endif()
set(MIMALLOC_TARGET_DIR "${MIMALLOC_LIBRARY_DIR}") # legacy
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-atomic.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-atomic.h
index 7ad5da585..c66f80493 100644
--- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-atomic.h
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc-atomic.h
@@ -11,9 +11,9 @@ terms of the MIT license. A copy of the license can be found in the file
// --------------------------------------------------------------------------------------------
// Atomics
// We need to be portable between C, C++, and MSVC.
-// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
-// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
-// To gain better insight in the range of used atomics, we use explicitly named memory order operations
+// We base the primitives on the C/C++ atomics and create a mimimal wrapper for MSVC in C compilation mode.
+// This is why we try to use only `uintptr_t` and `<type>*` as atomic types.
+// To gain better insight in the range of used atomics, we use explicitly named memory order operations
// instead of passing the memory order as a parameter.
// -----------------------------------------------------------------------------------------------
@@ -30,7 +30,7 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
#elif defined(_MSC_VER)
// Use MSVC C wrapper for C11 atomics
-#define _Atomic(tp) tp
+#define _Atomic(tp) tp
#define MI_ATOMIC_VAR_INIT(x) x
#define mi_atomic(name) mi_atomic_##name
#define mi_memory_order(name) mi_memory_order_##name
@@ -275,7 +275,7 @@ static inline intptr_t mi_atomic_subi(_Atomic(intptr_t)*p, intptr_t sub) {
return (intptr_t)mi_atomic_addi(p, -sub);
}
-// Yield
+// Yield
#if defined(__cplusplus)
#include <thread>
static inline void mi_atomic_yield(void) {
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h
index 550b65433..a68e69662 100644
--- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc-internal.h
@@ -41,10 +41,10 @@ terms of the MIT license. A copy of the license can be found in the file
#if defined(__cplusplus)
#define mi_decl_externc extern "C"
#else
-#define mi_decl_externc
+#define mi_decl_externc
#endif
-#if !defined(_WIN32) && !defined(__wasi__)
+#if !defined(_WIN32) && !defined(__wasi__)
#define MI_USE_PTHREADS
#include <pthread.h>
#endif
@@ -60,6 +60,8 @@ void _mi_error_message(int err, const char* fmt, ...);
// random.c
void _mi_random_init(mi_random_ctx_t* ctx);
+void _mi_random_init_weak(mi_random_ctx_t* ctx);
+void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx);
void _mi_random_split(mi_random_ctx_t* ctx, mi_random_ctx_t* new_ctx);
uintptr_t _mi_random_next(mi_random_ctx_t* ctx);
uintptr_t _mi_heap_random_next(mi_heap_t* heap);
@@ -87,11 +89,15 @@ bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
// bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
size_t _mi_os_good_alloc_size(size_t size);
bool _mi_os_has_overcommit(void);
+bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats);
+
+void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t align_offset, bool commit, bool* large, mi_stats_t* tld_stats);
+void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats);
// arena.c
-void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-void _mi_arena_free(void* p, size_t size, size_t memid, bool is_committed, mi_os_tld_t* tld);
+void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats);
mi_arena_id_t _mi_arena_id_none(void);
bool _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id);
@@ -99,16 +105,22 @@ bool _mi_arena_memid_is_suitable(size_t memid, mi_arena_id_t req_arena_id)
void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld);
void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld);
+void _mi_segment_cache_free_all(mi_os_tld_t* tld);
void _mi_segment_map_allocated_at(const mi_segment_t* segment);
void _mi_segment_map_freed_at(const mi_segment_t* segment);
// "segment.c"
-mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_wsize, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
+mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld);
void _mi_segment_page_free(mi_page_t* page, bool force, mi_segments_tld_t* tld);
void _mi_segment_page_abandon(mi_page_t* page, mi_segments_tld_t* tld);
bool _mi_segment_try_reclaim_abandoned( mi_heap_t* heap, bool try_all, mi_segments_tld_t* tld);
void _mi_segment_thread_collect(mi_segments_tld_t* tld);
+
+#if MI_HUGE_PAGE_ABANDON
void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
+#else
+void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block);
+#endif
uint8_t* _mi_segment_page_start(const mi_segment_t* segment, const mi_page_t* page, size_t* page_size); // page start for any page
void _mi_abandoned_reclaim_all(mi_heap_t* heap, mi_segments_tld_t* tld);
@@ -118,7 +130,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t*
// "page.c"
-void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept mi_attr_malloc;
+void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept mi_attr_malloc;
void _mi_page_retire(mi_page_t* page) mi_attr_noexcept; // free the page if there are no other pages with many free blocks
void _mi_page_unfull(mi_page_t* page);
@@ -144,6 +156,7 @@ void _mi_heap_destroy_pages(mi_heap_t* heap);
void _mi_heap_collect_abandon(mi_heap_t* heap);
void _mi_heap_set_default_direct(mi_heap_t* heap);
bool _mi_heap_memid_is_suitable(mi_heap_t* heap, size_t memid);
+void _mi_heap_destroy_all(void);
// "stats.c"
void _mi_stats_done(mi_stats_t* stats);
@@ -155,9 +168,11 @@ mi_msecs_t _mi_clock_start(void);
// "alloc.c"
void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t size, bool zero) mi_attr_noexcept; // called from `_mi_malloc_generic`
void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept;
+void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept; // called from `_mi_heap_malloc_aligned`
void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero) mi_attr_noexcept;
mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* page, const void* p);
bool _mi_free_delayed_block(mi_block_t* block);
+void _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept; // for runtime integration
#if MI_DEBUG>1
bool _mi_page_is_valid(mi_page_t* page);
@@ -345,14 +360,14 @@ mi_heap_t* _mi_heap_main_get(void); // statically allocated main backing hea
#if defined(MI_MALLOC_OVERRIDE)
#if defined(__APPLE__) // macOS
-#define MI_TLS_SLOT 89 // seems unused?
-// #define MI_TLS_RECURSE_GUARD 1
+#define MI_TLS_SLOT 89 // seems unused?
+// #define MI_TLS_RECURSE_GUARD 1
// other possible unused ones are 9, 29, __PTK_FRAMEWORK_JAVASCRIPTCORE_KEY4 (94), __PTK_FRAMEWORK_GC_KEY9 (112) and __PTK_FRAMEWORK_OLDGC_KEY9 (89)
// see <https://github.com/rweichler/substrate/blob/master/include/pthread_machdep.h>
#elif defined(__OpenBSD__)
-// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
+// use end bytes of a name; goes wrong if anyone uses names > 23 characters (ptrhread specifies 16)
// see <https://github.com/openbsd/src/blob/master/lib/libc/include/thread_private.h#L371>
-#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
+#define MI_TLS_PTHREAD_SLOT_OFS (6*sizeof(int) + 4*sizeof(void*) + 24)
// #elif defined(__DragonFly__)
// #warning "mimalloc is not working correctly on DragonFly yet."
// #define MI_TLS_PTHREAD_SLOT_OFS (4 + 1*sizeof(void*)) // offset `uniqueid` (also used by gdb?) <https://github.com/DragonFlyBSD/DragonFlyBSD/blob/master/lib/libthread_xu/thread/thr_private.h#L458>
@@ -392,7 +407,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
#ifdef __GNUC__
__asm(""); // prevent conditional load of the address of _mi_heap_empty
#endif
- heap = (mi_heap_t*)&_mi_heap_empty;
+ heap = (mi_heap_t*)&_mi_heap_empty;
}
return heap;
#elif defined(MI_TLS_PTHREAD_SLOT_OFS)
@@ -402,7 +417,7 @@ static inline mi_heap_t* mi_get_default_heap(void) {
mi_heap_t* heap = (mi_unlikely(_mi_heap_default_key == (pthread_key_t)(-1)) ? _mi_heap_main_get() : (mi_heap_t*)pthread_getspecific(_mi_heap_default_key));
return (mi_unlikely(heap == NULL) ? (mi_heap_t*)&_mi_heap_empty : heap);
#else
- #if defined(MI_TLS_RECURSE_GUARD)
+ #if defined(MI_TLS_RECURSE_GUARD)
if (mi_unlikely(!_mi_process_is_initialized)) return _mi_heap_main_get();
#endif
return _mi_heap_default;
@@ -445,9 +460,12 @@ static inline mi_page_t* _mi_get_free_small_page(size_t size) {
}
// Segment that contains the pointer
+// Large aligned blocks may be aligned at N*MI_SEGMENT_SIZE (inside a huge segment > MI_SEGMENT_SIZE),
+// and we need align "down" to the segment info which is `MI_SEGMENT_SIZE` bytes before it;
+// therefore we align one byte before `p`.
static inline mi_segment_t* _mi_ptr_segment(const void* p) {
- // mi_assert_internal(p != NULL);
- return (mi_segment_t*)((uintptr_t)p & ~MI_SEGMENT_MASK);
+ mi_assert_internal(p != NULL);
+ return (mi_segment_t*)(((uintptr_t)p - 1) & ~MI_SEGMENT_MASK);
}
static inline mi_page_t* mi_slice_to_page(mi_slice_t* s) {
@@ -475,12 +493,13 @@ static inline mi_slice_t* mi_slice_first(const mi_slice_t* slice) {
return start;
}
-// Get the page containing the pointer
+// Get the page containing the pointer (performance critical as it is called in mi_free)
static inline mi_page_t* _mi_segment_page_of(const mi_segment_t* segment, const void* p) {
+ mi_assert_internal(p > (void*)segment);
ptrdiff_t diff = (uint8_t*)p - (uint8_t*)segment;
- mi_assert_internal(diff >= 0 && diff < (ptrdiff_t)MI_SEGMENT_SIZE);
+ mi_assert_internal(diff > 0 && diff <= (ptrdiff_t)MI_SEGMENT_SIZE);
size_t idx = (size_t)diff >> MI_SEGMENT_SLICE_SHIFT;
- mi_assert_internal(idx < segment->slice_entries);
+ mi_assert_internal(idx <= segment->slice_entries);
mi_slice_t* slice0 = (mi_slice_t*)&segment->slices[idx];
mi_slice_t* slice = mi_slice_first(slice0); // adjust to the block that holds the page data
mi_assert_internal(slice->slice_offset == 0);
@@ -512,6 +531,10 @@ static inline size_t mi_page_block_size(const mi_page_t* page) {
}
}
+static inline bool mi_page_is_huge(const mi_page_t* page) {
+ return (_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
+}
+
// Get the usable block size of a page without fixed padding.
// This may still include internal padding due to alignment and rounding up size classes.
static inline size_t mi_page_usable_block_size(const mi_page_t* page) {
@@ -683,7 +706,7 @@ static inline mi_block_t* mi_block_nextx( const void* null, const mi_block_t* bl
next = (mi_block_t*)block->next;
#endif
mi_track_mem_noaccess(block,sizeof(mi_block_t));
- return next;
+ return next;
}
static inline void mi_block_set_nextx(const void* null, mi_block_t* block, const mi_block_t* next, const uintptr_t* keys) {
@@ -825,7 +848,7 @@ static inline mi_threadid_t _mi_thread_id(void) mi_attr_noexcept {
return (uintptr_t)NtCurrentTeb();
}
-// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
+// We use assembly for a fast thread id on the main platforms. The TLS layout depends on
// both the OS and libc implementation so we use specific tests for each main platform.
// If you test on another platform and it works please send a PR :-)
// see also https://akkadia.org/drepper/tls.pdf for more info on the TLS register.
@@ -938,7 +961,7 @@ static inline size_t mi_ctz(uintptr_t x) {
#endif
}
-#elif defined(_MSC_VER)
+#elif defined(_MSC_VER)
#include <limits.h> // LONG_MAX
#define MI_HAVE_FAST_BITSCAN
@@ -949,7 +972,7 @@ static inline size_t mi_clz(uintptr_t x) {
_BitScanReverse(&idx, x);
#else
_BitScanReverse64(&idx, x);
-#endif
+#endif
return ((MI_INTPTR_BITS - 1) - idx);
}
static inline size_t mi_ctz(uintptr_t x) {
@@ -959,7 +982,7 @@ static inline size_t mi_ctz(uintptr_t x) {
_BitScanForward(&idx, x);
#else
_BitScanForward64(&idx, x);
-#endif
+#endif
return idx;
}
@@ -989,7 +1012,7 @@ static inline size_t mi_clz32(uint32_t x) {
}
static inline size_t mi_clz(uintptr_t x) {
- if (x==0) return MI_INTPTR_BITS;
+ if (x==0) return MI_INTPTR_BITS;
#if (MI_INTPTR_BITS <= 32)
return mi_clz32((uint32_t)x);
#else
@@ -1020,9 +1043,9 @@ static inline size_t mi_bsr(uintptr_t x) {
// ---------------------------------------------------------------------------------
// Provide our own `_mi_memcpy` for potential performance optimizations.
//
-// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
-// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
-// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
+// For now, only on Windows with msvc/clang-cl we optimize to `rep movsb` if
+// we happen to run on x86/x64 cpu's that have "fast short rep movsb" (FSRM) support
+// (AMD Zen3+ (~2020) or Intel Ice Lake+ (~2017). See also issue #201 and pr #253.
// ---------------------------------------------------------------------------------
#if !MI_TRACK_ENABLED && defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
@@ -1057,7 +1080,7 @@ static inline void _mi_memzero(void* dst, size_t n) {
// -------------------------------------------------------------------------------
-// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
+// The `_mi_memcpy_aligned` can be used if the pointers are machine-word aligned
// This is used for example in `mi_realloc`.
// -------------------------------------------------------------------------------
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-new-delete.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-new-delete.h
index 1c12fad2f..c16f4a665 100644
--- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-new-delete.h
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc-new-delete.h
@@ -22,17 +22,26 @@ terms of the MIT license. A copy of the license can be found in the file
#include <new>
#include <mimalloc.h>
+ #if defined(_MSC_VER) && defined(_Ret_notnull_) && defined(_Post_writable_byte_size_)
+ // stay consistent with VCRT definitions
+ #define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict _Ret_notnull_ _Post_writable_byte_size_(n)
+ #define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict _Ret_maybenull_ _Success_(return != NULL) _Post_writable_byte_size_(n)
+ #else
+ #define mi_decl_new(n) mi_decl_nodiscard mi_decl_restrict
+ #define mi_decl_new_nothrow(n) mi_decl_nodiscard mi_decl_restrict
+ #endif
+
void operator delete(void* p) noexcept { mi_free(p); };
void operator delete[](void* p) noexcept { mi_free(p); };
void operator delete (void* p, const std::nothrow_t&) noexcept { mi_free(p); }
void operator delete[](void* p, const std::nothrow_t&) noexcept { mi_free(p); }
- void* operator new(std::size_t n) noexcept(false) { return mi_new(n); }
- void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); }
+ mi_decl_new(n) void* operator new(std::size_t n) noexcept(false) { return mi_new(n); }
+ mi_decl_new(n) void* operator new[](std::size_t n) noexcept(false) { return mi_new(n); }
- void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
- void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
+ mi_decl_new_nothrow(n) void* operator new (std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
+ mi_decl_new_nothrow(n) void* operator new[](std::size_t n, const std::nothrow_t& tag) noexcept { (void)(tag); return mi_new_nothrow(n); }
#if (__cplusplus >= 201402L || _MSC_VER >= 1916)
void operator delete (void* p, std::size_t n) noexcept { mi_free_size(p,n); };
@@ -46,7 +55,7 @@ terms of the MIT license. A copy of the license can be found in the file
void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
-
+
void* operator new (std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
void* operator new[](std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h
index bb9df4fa3..f60d7acd0 100644
--- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc-track.h
@@ -10,33 +10,52 @@ terms of the MIT license. A copy of the license can be found in the file
// ------------------------------------------------------
// Track memory ranges with macros for tools like Valgrind
-// or other memory checkers.
+// address sanitizer, or other memory checkers.
// ------------------------------------------------------
#if MI_VALGRIND
#define MI_TRACK_ENABLED 1
+#define MI_TRACK_TOOL "valgrind"
#include <valgrind/valgrind.h>
#include <valgrind/memcheck.h>
#define mi_track_malloc(p,size,zero) VALGRIND_MALLOCLIKE_BLOCK(p,size,MI_PADDING_SIZE /*red zone*/,zero)
-#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
+#define mi_track_resize(p,oldsize,newsize) VALGRIND_RESIZEINPLACE_BLOCK(p,oldsize,newsize,MI_PADDING_SIZE /*red zone*/)
#define mi_track_free(p) VALGRIND_FREELIKE_BLOCK(p,MI_PADDING_SIZE /*red zone*/)
+#define mi_track_free_size(p,_size) mi_track_free(p)
#define mi_track_mem_defined(p,size) VALGRIND_MAKE_MEM_DEFINED(p,size)
#define mi_track_mem_undefined(p,size) VALGRIND_MAKE_MEM_UNDEFINED(p,size)
#define mi_track_mem_noaccess(p,size) VALGRIND_MAKE_MEM_NOACCESS(p,size)
+#elif MI_ASAN
+
+#define MI_TRACK_ENABLED 1
+#define MI_TRACK_TOOL "asan"
+
+#include <sanitizer/asan_interface.h>
+
+#define mi_track_malloc(p,size,zero) ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_resize(p,oldsize,newsize) ASAN_POISON_MEMORY_REGION(p,oldsize); ASAN_UNPOISON_MEMORY_REGION(p,newsize)
+#define mi_track_free(p) ASAN_POISON_MEMORY_REGION(p,mi_usable_size(p))
+#define mi_track_free_size(p,size) ASAN_POISON_MEMORY_REGION(p,size)
+#define mi_track_mem_defined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_mem_undefined(p,size) ASAN_UNPOISON_MEMORY_REGION(p,size)
+#define mi_track_mem_noaccess(p,size) ASAN_POISON_MEMORY_REGION(p,size)
+
#else
#define MI_TRACK_ENABLED 0
-
-#define mi_track_malloc(p,size,zero)
-#define mi_track_resize(p,oldsize,newsize)
-#define mi_track_free(p)
-#define mi_track_mem_defined(p,size)
-#define mi_track_mem_undefined(p,size)
-#define mi_track_mem_noaccess(p,size)
+#define MI_TRACK_TOOL "none"
+
+#define mi_track_malloc(p,size,zero)
+#define mi_track_resize(p,oldsize,newsize)
+#define mi_track_free(p)
+#define mi_track_free_size(p,_size)
+#define mi_track_mem_defined(p,size)
+#define mi_track_mem_undefined(p,size)
+#define mi_track_mem_noaccess(p,size)
#endif
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc-types.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc-types.h
index 1387a7200..f3af528e5 100644
--- a/source/luametatex/source/libraries/mimalloc/include/mimalloc-types.h
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc-types.h
@@ -14,7 +14,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifdef _MSC_VER
#pragma warning(disable:4214) // bitfield is not int
-#endif
+#endif
// Minimal alignment necessary. On most platforms 16 bytes are needed
// due to SSE registers for example. This must be at least `sizeof(void*)`
@@ -71,6 +71,13 @@ terms of the MIT license. A copy of the license can be found in the file
#endif
+// We used to abandon huge pages but to eagerly deallocate if freed from another thread,
+// but that makes it not possible to visit them during a heap walk or include them in a
+// `mi_heap_destroy`. We therefore instead reset/decommit the huge blocks if freed from
+// another thread so most memory is available until it gets properly freed by the owning thread.
+// #define MI_HUGE_PAGE_ABANDON 1
+
+
// ------------------------------------------------------
// Platform specific values
// ------------------------------------------------------
@@ -135,7 +142,7 @@ typedef int32_t mi_ssize_t;
#define MI_SEGMENT_SLICE_SHIFT (13 + MI_INTPTR_SHIFT) // 64KiB (32KiB on 32-bit)
#if MI_INTPTR_SIZE > 4
-#define MI_SEGMENT_SHIFT (10 + MI_SEGMENT_SLICE_SHIFT) // 64MiB
+#define MI_SEGMENT_SHIFT ( 9 + MI_SEGMENT_SLICE_SHIFT) // 32MiB
#else
#define MI_SEGMENT_SHIFT ( 7 + MI_SEGMENT_SLICE_SHIFT) // 4MiB on 32-bit
#endif
@@ -147,7 +154,7 @@ typedef int32_t mi_ssize_t;
// Derived constants
#define MI_SEGMENT_SIZE (MI_ZU(1)<<MI_SEGMENT_SHIFT)
#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
-#define MI_SEGMENT_MASK (MI_SEGMENT_SIZE - 1)
+#define MI_SEGMENT_MASK (MI_SEGMENT_ALIGN - 1)
#define MI_SEGMENT_SLICE_SIZE (MI_ZU(1)<< MI_SEGMENT_SLICE_SHIFT)
#define MI_SLICES_PER_SEGMENT (MI_SEGMENT_SIZE / MI_SEGMENT_SLICE_SIZE) // 1024
@@ -166,12 +173,6 @@ typedef int32_t mi_ssize_t;
#if (MI_MEDIUM_OBJ_WSIZE_MAX >= 655360)
#error "mimalloc internal: define more bins"
#endif
-#if (MI_ALIGNMENT_MAX > MI_SEGMENT_SIZE/2)
-#error "mimalloc internal: the max aligned boundary is too large for the segment size"
-#endif
-#if (MI_ALIGNED_MAX % MI_SEGMENT_SLICE_SIZE != 0)
-#error "mimalloc internal: the max aligned boundary must be an integral multiple of the segment slice size"
-#endif
// Maximum slice offset (15)
#define MI_MAX_SLICE_OFFSET ((MI_ALIGNMENT_MAX / MI_SEGMENT_SLICE_SIZE) - 1)
@@ -182,7 +183,8 @@ typedef int32_t mi_ssize_t;
// blocks up to this size are always allocated aligned
#define MI_MAX_ALIGN_GUARANTEE (8*MI_MAX_ALIGN_SIZE)
-
+// Alignments over MI_ALIGNMENT_MAX are allocated in dedicated huge page segments
+#define MI_ALIGNMENT_MAX (MI_SEGMENT_SIZE >> 1)
// ------------------------------------------------------
@@ -253,49 +255,50 @@ typedef uintptr_t mi_thread_free_t;
// We don't count `freed` (as |free|) but use `used` to reduce
// the number of memory accesses in the `mi_page_all_free` function(s).
//
-// Notes:
+// Notes:
// - Access is optimized for `mi_free` and `mi_page_alloc` (in `alloc.c`)
// - Using `uint16_t` does not seem to slow things down
// - The size is 8 words on 64-bit which helps the page index calculations
-// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
+// (and 10 words on 32-bit, and encoded free lists add 2 words. Sizes 10
// and 12 are still good for address calculation)
-// - To limit the structure size, the `xblock_size` is 32-bits only; for
+// - To limit the structure size, the `xblock_size` is 32-bits only; for
// blocks > MI_HUGE_BLOCK_SIZE the size is determined from the segment page size
// - `thread_free` uses the bottom bits as a delayed-free flags to optimize
// concurrent frees where only the first concurrent free adds to the owning
// heap `thread_delayed_free` list (see `alloc.c:mi_free_block_mt`).
// The invariant is that no-delayed-free is only set if there is
-// at least one block that will be added, or as already been added, to
+// at least one block that will be added, or as already been added, to
// the owning heap `thread_delayed_free` list. This guarantees that pages
// will be freed correctly even if only other threads free blocks.
typedef struct mi_page_s {
// "owned" by the segment
uint32_t slice_count; // slices in this page (0 if not a page)
uint32_t slice_offset; // distance from the actual page data slice (0 if a page)
- uint8_t is_reset : 1; // `true` if the page memory was reset
- uint8_t is_committed : 1; // `true` if the page virtual memory is committed
- uint8_t is_zero_init : 1; // `true` if the page was zero initialized
+ uint8_t is_reset : 1; // `true` if the page memory was reset
+ uint8_t is_committed : 1; // `true` if the page virtual memory is committed
+ uint8_t is_zero_init : 1; // `true` if the page was zero initialized
// layout like this to optimize access in `mi_malloc` and `mi_free`
uint16_t capacity; // number of blocks committed, must be the first field, see `segment.c:page_clear`
uint16_t reserved; // number of blocks reserved in memory
mi_page_flags_t flags; // `in_full` and `has_aligned` flags (8 bits)
- uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized
- uint8_t retire_expire : 7; // expiration count for retired blocks
+ uint8_t is_zero : 1; // `true` if the blocks in the free list are zero initialized
+ uint8_t retire_expire : 7; // expiration count for retired blocks
mi_block_t* free; // list of available free blocks (`malloc` allocates from this list)
+ uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
+ uint32_t xblock_size; // size available in each block (always `>0`)
+ mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
+
#ifdef MI_ENCODE_FREELIST
uintptr_t keys[2]; // two random keys to encode the free lists (see `_mi_block_next`)
#endif
- uint32_t used; // number of blocks in use (including blocks in `local_free` and `thread_free`)
- uint32_t xblock_size; // size available in each block (always `>0`)
- mi_block_t* local_free; // list of deferred free blocks by this thread (migrates to `free`)
_Atomic(mi_thread_free_t) xthread_free; // list of deferred free blocks freed by other threads
_Atomic(uintptr_t) xheap;
- struct mi_page_s* next; // next page owned by this thread with the same `block_size`
- struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
+ struct mi_page_s* next; // next page owned by this thread with the same `block_size`
+ struct mi_page_s* prev; // previous page owned by this thread with the same `block_size`
// 64-bit 9 words, 32-bit 12 words, (+2 for secure)
#if MI_INTPTR_SIZE==8
@@ -329,7 +332,7 @@ typedef enum mi_segment_kind_e {
// is still tracked in fine-grained MI_COMMIT_SIZE chunks)
// ------------------------------------------------------
-#define MI_MINIMAL_COMMIT_SIZE (2*MI_MiB)
+#define MI_MINIMAL_COMMIT_SIZE (16*MI_SEGMENT_SLICE_SIZE) // 1MiB
#define MI_COMMIT_SIZE (MI_SEGMENT_SLICE_SIZE) // 64KiB
#define MI_COMMIT_MASK_BITS (MI_SEGMENT_SIZE / MI_COMMIT_SIZE)
#define MI_COMMIT_MASK_FIELD_BITS MI_SIZE_BITS
@@ -355,6 +358,8 @@ typedef struct mi_segment_s {
bool mem_is_pinned; // `true` if we cannot decommit/reset/protect in this memory (i.e. when allocated using large OS pages)
bool mem_is_large; // in large/huge os pages?
bool mem_is_committed; // `true` if the whole segment is eagerly committed
+ size_t mem_alignment; // page alignment for huge pages (only used for alignment > MI_ALIGNMENT_MAX)
+ size_t mem_align_offset; // offset for huge page alignment (only used for alignment > MI_ALIGNMENT_MAX)
bool allow_decommit;
mi_msecs_t decommit_expire;
@@ -376,9 +381,10 @@ typedef struct mi_segment_s {
// layout like this to optimize access in `mi_free`
mi_segment_kind_t kind;
- _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
size_t slice_entries; // entries in the `slices` array, at most `MI_SLICES_PER_SEGMENT`
- mi_slice_t slices[MI_SLICES_PER_SEGMENT];
+ _Atomic(mi_threadid_t) thread_id; // unique id of the thread owning this segment
+
+ mi_slice_t slices[MI_SLICES_PER_SEGMENT+1]; // one more for huge blocks with large alignment
} mi_segment_t;
@@ -412,6 +418,7 @@ typedef struct mi_random_cxt_s {
uint32_t input[16];
uint32_t output[16];
int output_available;
+ bool weak;
} mi_random_ctx_t;
diff --git a/source/luametatex/source/libraries/mimalloc/include/mimalloc.h b/source/luametatex/source/libraries/mimalloc/include/mimalloc.h
index 32eab19ea..9b72fbfda 100644
--- a/source/luametatex/source/libraries/mimalloc/include/mimalloc.h
+++ b/source/luametatex/source/libraries/mimalloc/include/mimalloc.h
@@ -8,7 +8,7 @@ terms of the MIT license. A copy of the license can be found in the file
#ifndef MIMALLOC_H
#define MIMALLOC_H
-#define MI_MALLOC_VERSION 207 // major + 2 digits minor
+#define MI_MALLOC_VERSION 209 // major + 2 digits minor
// ------------------------------------------------------
// Compiler specific attributes
@@ -28,6 +28,8 @@ terms of the MIT license. A copy of the license can be found in the file
#define mi_decl_nodiscard [[nodiscard]]
#elif (defined(__GNUC__) && (__GNUC__ >= 4)) || defined(__clang__) // includes clang, icc, and clang-cl
#define mi_decl_nodiscard __attribute__((warn_unused_result))
+#elif defined(_HAS_NODISCARD)
+ #define mi_decl_nodiscard _NODISCARD
#elif (_MSC_VER >= 1700)
#define mi_decl_nodiscard _Check_return_
#else
@@ -158,8 +160,8 @@ mi_decl_export void mi_thread_init(void) mi_attr_noexcept;
mi_decl_export void mi_thread_done(void) mi_attr_noexcept;
mi_decl_export void mi_thread_stats_print_out(mi_output_fun* out, void* arg) mi_attr_noexcept;
-mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
- size_t* current_rss, size_t* peak_rss,
+mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, size_t* system_msecs,
+ size_t* current_rss, size_t* peak_rss,
size_t* current_commit, size_t* peak_commit, size_t* page_faults) mi_attr_noexcept;
// -------------------------------------------------------------------------------------
@@ -167,11 +169,6 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
// Note that `alignment` always follows `size` for consistency with unaligned
// allocation, but unfortunately this differs from `posix_memalign` and `aligned_alloc`.
// -------------------------------------------------------------------------------------
-#if (INTPTR_MAX > INT32_MAX)
-#define MI_ALIGNMENT_MAX (16*1024*1024UL) // maximum supported alignment is 16MiB
-#else
-#define MI_ALIGNMENT_MAX (1024*1024UL) // maximum supported alignment for 32-bit systems is 1MiB
-#endif
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned(size_t size, size_t alignment) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1) mi_attr_alloc_align(2);
mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_malloc_aligned_at(size_t size, size_t alignment, size_t offset) mi_attr_noexcept mi_attr_malloc mi_attr_alloc_size(1);
@@ -288,6 +285,7 @@ mi_decl_export int mi_reserve_os_memory_ex(size_t size, bool commit, bool allo
mi_decl_export bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is_large, bool is_zero, int numa_node, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept;
#if MI_MALLOC_VERSION >= 200
+// Create a heap that only allocates in the specified arena
mi_decl_nodiscard mi_decl_export mi_heap_t* mi_heap_new_in_arena(mi_arena_id_t arena_id);
#endif
@@ -347,6 +345,7 @@ typedef enum mi_option_e {
mi_option_allow_decommit,
mi_option_segment_decommit_delay,
mi_option_decommit_extend_delay,
+ mi_option_destroy_on_exit,
_mi_option_last
} mi_option_t;
@@ -405,6 +404,9 @@ mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_new_n(size_t count, s
mi_decl_nodiscard mi_decl_export void* mi_new_realloc(void* p, size_t newsize) mi_attr_alloc_size(2);
mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount, size_t size) mi_attr_alloc_size2(2, 3);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) mi_attr_malloc mi_attr_alloc_size(2);
+mi_decl_nodiscard mi_decl_export mi_decl_restrict void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) mi_attr_malloc mi_attr_alloc_size2(2, 3);
+
#ifdef __cplusplus
}
#endif
@@ -422,7 +424,7 @@ mi_decl_nodiscard mi_decl_export void* mi_new_reallocn(void* p, size_t newcount,
#include <utility> // std::forward
#endif
-template<class T> struct mi_stl_allocator {
+template<class T> struct _mi_stl_allocator_common {
typedef T value_type;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
@@ -430,6 +432,27 @@ template<class T> struct mi_stl_allocator {
typedef value_type const& const_reference;
typedef value_type* pointer;
typedef value_type const* const_pointer;
+
+ #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11
+ using propagate_on_container_copy_assignment = std::true_type;
+ using propagate_on_container_move_assignment = std::true_type;
+ using propagate_on_container_swap = std::true_type;
+ template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); }
+ template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); }
+ #else
+ void construct(pointer p, value_type const& val) { ::new(p) value_type(val); }
+ void destroy(pointer p) { p->~value_type(); }
+ #endif
+
+ size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); }
+ pointer address(reference x) const { return &x; }
+ const_pointer address(const_reference x) const { return &x; }
+};
+
+template<class T> struct mi_stl_allocator : public _mi_stl_allocator_common<T> {
+ using typename _mi_stl_allocator_common<T>::size_type;
+ using typename _mi_stl_allocator_common<T>::value_type;
+ using typename _mi_stl_allocator_common<T>::pointer;
template <class U> struct rebind { typedef mi_stl_allocator<U> other; };
mi_stl_allocator() mi_attr_noexcept = default;
@@ -446,24 +469,89 @@ template<class T> struct mi_stl_allocator {
#endif
#if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11
- using propagate_on_container_copy_assignment = std::true_type;
- using propagate_on_container_move_assignment = std::true_type;
- using propagate_on_container_swap = std::true_type;
- using is_always_equal = std::true_type;
- template <class U, class ...Args> void construct(U* p, Args&& ...args) { ::new(p) U(std::forward<Args>(args)...); }
- template <class U> void destroy(U* p) mi_attr_noexcept { p->~U(); }
- #else
- void construct(pointer p, value_type const& val) { ::new(p) value_type(val); }
- void destroy(pointer p) { p->~value_type(); }
+ using is_always_equal = std::true_type;
#endif
-
- size_type max_size() const mi_attr_noexcept { return (PTRDIFF_MAX/sizeof(value_type)); }
- pointer address(reference x) const { return &x; }
- const_pointer address(const_reference x) const { return &x; }
};
template<class T1,class T2> bool operator==(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return true; }
template<class T1,class T2> bool operator!=(const mi_stl_allocator<T1>& , const mi_stl_allocator<T2>& ) mi_attr_noexcept { return false; }
+
+
+#if (__cplusplus >= 201103L) || (_MSC_VER > 1900) // C++11
+#include <memory> // std::shared_ptr
+
+// Common base class for STL allocators in a specific heap
+template<class T, bool destroy> struct _mi_heap_stl_allocator_common : public _mi_stl_allocator_common<T> {
+ using typename _mi_stl_allocator_common<T>::size_type;
+ using typename _mi_stl_allocator_common<T>::value_type;
+ using typename _mi_stl_allocator_common<T>::pointer;
+
+ _mi_heap_stl_allocator_common(mi_heap_t* hp) : heap(hp) { } /* will not delete nor destroy the passed in heap */
+
+ #if (__cplusplus >= 201703L) // C++17
+ mi_decl_nodiscard T* allocate(size_type count) { return static_cast<T*>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(T))); }
+ mi_decl_nodiscard T* allocate(size_type count, const void*) { return allocate(count); }
+ #else
+ mi_decl_nodiscard pointer allocate(size_type count, const void* = 0) { return static_cast<pointer>(mi_heap_alloc_new_n(this->heap.get(), count, sizeof(value_type))); }
+ #endif
+
+ #if ((__cplusplus >= 201103L) || (_MSC_VER > 1900)) // C++11
+ using is_always_equal = std::false_type;
+ #endif
+
+ void collect(bool force) { mi_heap_collect(this->heap.get(), force); }
+ template<class U> bool is_equal(const _mi_heap_stl_allocator_common<U, destroy>& x) const { return (this->heap == x.heap); }
+
+protected:
+ std::shared_ptr<mi_heap_t> heap;
+ template<class U, bool D> friend struct _mi_heap_stl_allocator_common;
+
+ _mi_heap_stl_allocator_common() {
+ mi_heap_t* hp = mi_heap_new();
+ this->heap.reset(hp, (destroy ? &heap_destroy : &heap_delete)); /* calls heap_delete/destroy when the refcount drops to zero */
+ }
+ _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common& x) mi_attr_noexcept : heap(x.heap) { }
+ template<class U> _mi_heap_stl_allocator_common(const _mi_heap_stl_allocator_common<U, destroy>& x) mi_attr_noexcept : heap(x.heap) { }
+
+private:
+ static void heap_delete(mi_heap_t* hp) { if (hp != NULL) { mi_heap_delete(hp); } }
+ static void heap_destroy(mi_heap_t* hp) { if (hp != NULL) { mi_heap_destroy(hp); } }
+};
+
+// STL allocator allocation in a specific heap
+template<class T> struct mi_heap_stl_allocator : public _mi_heap_stl_allocator_common<T, false> {
+ using typename _mi_heap_stl_allocator_common<T, false>::size_type;
+ mi_heap_stl_allocator() : _mi_heap_stl_allocator_common<T, false>() { } // creates fresh heap that is deleted when the destructor is called
+ mi_heap_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, false>(hp) { } // no delete nor destroy on the passed in heap
+ template<class U> mi_heap_stl_allocator(const mi_heap_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, false>(x) { }
+
+ mi_heap_stl_allocator select_on_container_copy_construction() const { return *this; }
+ void deallocate(T* p, size_type) { mi_free(p); }
+ template<class U> struct rebind { typedef mi_heap_stl_allocator<U> other; };
+};
+
+template<class T1, class T2> bool operator==(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); }
+template<class T1, class T2> bool operator!=(const mi_heap_stl_allocator<T1>& x, const mi_heap_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
+
+
+// STL allocator allocation in a specific heap, where `free` does nothing and
+// the heap is destroyed in one go on destruction -- use with care!
+template<class T> struct mi_heap_destroy_stl_allocator : public _mi_heap_stl_allocator_common<T, true> {
+ using typename _mi_heap_stl_allocator_common<T, true>::size_type;
+ mi_heap_destroy_stl_allocator() : _mi_heap_stl_allocator_common<T, true>() { } // creates fresh heap that is destroyed when the destructor is called
+ mi_heap_destroy_stl_allocator(mi_heap_t* hp) : _mi_heap_stl_allocator_common<T, true>(hp) { } // no delete nor destroy on the passed in heap
+ template<class U> mi_heap_destroy_stl_allocator(const mi_heap_destroy_stl_allocator<U>& x) mi_attr_noexcept : _mi_heap_stl_allocator_common<T, true>(x) { }
+
+ mi_heap_destroy_stl_allocator select_on_container_copy_construction() const { return *this; }
+ void deallocate(T*, size_type) { /* do nothing as we destroy the heap on destruct. */ }
+ template<class U> struct rebind { typedef mi_heap_destroy_stl_allocator<U> other; };
+};
+
+template<class T1, class T2> bool operator==(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (x.is_equal(y)); }
+template<class T1, class T2> bool operator!=(const mi_heap_destroy_stl_allocator<T1>& x, const mi_heap_destroy_stl_allocator<T2>& y) mi_attr_noexcept { return (!x.is_equal(y)); }
+
+#endif // C++11
+
#endif // __cplusplus
#endif
diff --git a/source/luametatex/source/libraries/mimalloc/readme.md b/source/luametatex/source/libraries/mimalloc/readme.md
index 588630992..10195b026 100644
--- a/source/luametatex/source/libraries/mimalloc/readme.md
+++ b/source/luametatex/source/libraries/mimalloc/readme.md
@@ -12,8 +12,8 @@ is a general purpose allocator with excellent [performance](#performance) charac
Initially developed by Daan Leijen for the run-time systems of the
[Koka](https://koka-lang.github.io) and [Lean](https://github.com/leanprover/lean) languages.
-Latest release tag: `v2.0.7` (2022-11-03).
-Latest stable tag: `v1.7.7` (2022-11-03).
+Latest release tag: `v2.0.9` (2022-12-23).
+Latest stable tag: `v1.7.9` (2022-12-23).
mimalloc is a drop-in replacement for `malloc` and can be used in other programs
without code changes, for example, on dynamically linked ELF-based systems (Linux, BSD, etc.) you can use it as:
@@ -27,6 +27,8 @@ It also has an easy way to override the default allocator in [Windows](#override
to integrate and adapt in other projects. For runtime systems it
provides hooks for a monotonic _heartbeat_ and deferred freeing (for
bounded worst-case times with reference counting).
+ Partly due to its simplicity, mimalloc has been ported to many systems (Windows, macOS,
+ Linux, WASM, various BSD's, Haiku, MUSL, etc) and has excellent support for dynamic overriding.
- __free list sharding__: instead of one big free list (per size class) we have
many smaller lists per "mimalloc page" which reduces fragmentation and
increases locality --
@@ -36,13 +38,13 @@ It also has an easy way to override the default allocator in [Windows](#override
per mimalloc page, but for each page we have multiple free lists. In particular, there
is one list for thread-local `free` operations, and another one for concurrent `free`
operations. Free-ing from another thread can now be a single CAS without needing
- sophisticated coordination between threads. Since there will be
+ sophisticated coordination between threads. Since there will be
thousands of separate free lists, contention is naturally distributed over the heap,
and the chance of contending on a single location will be low -- this is quite
similar to randomized algorithms like skip lists where adding
a random oracle removes the need for a more complex algorithm.
- __eager page reset__: when a "page" becomes empty (with increased chance
- due to free list sharding) the memory is marked to the OS as unused ("reset" or "purged")
+ due to free list sharding) the memory is marked to the OS as unused (reset or decommitted)
reducing (real) memory pressure and fragmentation, especially in long running
programs.
- __secure__: _mimalloc_ can be built in secure mode, adding guard pages,
@@ -50,20 +52,19 @@ It also has an easy way to override the default allocator in [Windows](#override
heap vulnerabilities. The performance penalty is usually around 10% on average
over our benchmarks.
- __first-class heaps__: efficiently create and use multiple heaps to allocate across different regions.
- A heap can be destroyed at once instead of deallocating each object separately.
+ A heap can be destroyed at once instead of deallocating each object separately.
- __bounded__: it does not suffer from _blowup_ \[1\], has bounded worst-case allocation
- times (_wcat_), bounded space overhead (~0.2% meta-data, with low internal fragmentation),
- and has no internal points of contention using only atomic operations.
+ times (_wcat_) (upto OS primitives), bounded space overhead (~0.2% meta-data, with low
+ internal fragmentation), and has no internal points of contention using only atomic operations.
- __fast__: In our benchmarks (see [below](#performance)),
_mimalloc_ outperforms other leading allocators (_jemalloc_, _tcmalloc_, _Hoard_, etc),
- and often uses less memory. A nice property
- is that it does consistently well over a wide range of benchmarks. There is also good huge OS page
- support for larger server programs.
+ and often uses less memory. A nice property is that it does consistently well over a wide range
+ of benchmarks. There is also good huge OS page support for larger server programs.
The [documentation](https://microsoft.github.io/mimalloc) gives a full overview of the API.
-You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results.
+You can read more on the design of _mimalloc_ in the [technical report](https://www.microsoft.com/en-us/research/publication/mimalloc-free-list-sharding-in-action) which also has detailed benchmark results.
-Enjoy!
+Enjoy!
### Branches
@@ -77,8 +78,13 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
and fragmentation compared to mimalloc `v1.x` (especially for large workloads). Should otherwise have similar performance
(see [below](#performance)); please report if you observe any significant performance regression.
+* 2022-12-23, `v1.7.9`, `v2.0.9`: Supports building with asan and improved [Valgrind](#valgrind) support.
+ Support abitrary large alignments (in particular for `std::pmr` pools).
+ Added C++ STL allocators attached to a specific heap (thanks @vmarkovtsev).
+ Heap walks now visit all object (including huge objects). Support Windows nano server containers (by Johannes Schindelin,@dscho). Various small bug fixes.
+
* 2022-11-03, `v1.7.7`, `v2.0.7`: Initial support for [Valgrind](#valgrind) for leak testing and heap block overflow detection. Initial
- support for attaching heaps to a speficic memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .
+ support for attaching heaps to a specific memory area (only in v2). Fix `realloc` behavior for zero size blocks, remove restriction to integral multiple of the alignment in `alloc_align`, improved aligned allocation performance, reduced contention with many threads on few processors (thank you @dposluns!), vs2022 support, support `pkg-config`, .
* 2022-04-14, `v1.7.6`, `v2.0.6`: fix fallback path for aligned OS allocation on Windows, improve Windows aligned allocation
even when compiling with older SDK's, fix dynamic overriding on macOS Monterey, fix MSVC C++ dynamic overriding, fix
@@ -87,7 +93,7 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
* 2022-02-14, `v1.7.5`, `v2.0.5` (alpha): fix malloc override on
Windows 11, fix compilation with musl, potentially reduced
- committed memory, add `bin/minject` for Windows,
+ committed memory, add `bin/minject` for Windows,
improved wasm support, faster aligned allocation,
various small fixes.
@@ -99,9 +105,9 @@ Note: the `v2.x` version has a new algorithm for managing internal mimalloc page
thread_id on Android, prefer 2-6TiB area for aligned allocation to work better on pre-windows 8, various small fixes.
* 2021-04-06, `v1.7.1`, `v2.0.1` (beta): fix bug in arena allocation for huge pages, improved aslr on large allocations, initial M1 support (still experimental).
-
+
* 2021-01-31, `v2.0.0`: beta release 2.0: new slice algorithm for managing internal mimalloc pages.
-
+
* 2021-01-31, `v1.7.0`: stable release 1.7: support explicit user provided memory regions, more precise statistics,
improve macOS overriding, initial support for Apple M1, improved DragonFly support, faster memcpy on Windows, various small fixes.
@@ -115,9 +121,9 @@ Special thanks to:
memory model bugs using the [genMC] model checker.
* Weipeng Liu (@pongba), Zhuowei Li, Junhua Wang, and Jakub Szymanski, for their early support of mimalloc and deployment
at large scale services, leading to many improvements in the mimalloc algorithms for large workloads.
-* Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs
+* Jason Gibson (@jasongibson) for exhaustive testing on large scale workloads and server environments, and finding complex bugs
in (early versions of) `mimalloc`.
-* Manuel Pöter (@mpoeter) and Sam Gross(@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. Sam also created the [no GIL](https://github.com/colesbury/nogil) Python fork which
+* Manuel Pöter (@mpoeter) and Sam Gross(@colesbury) for finding an ABA concurrency issue in abandoned segment reclamation. Sam also created the [no GIL](https://github.com/colesbury/nogil) Python fork which
uses mimalloc internally.
@@ -304,8 +310,8 @@ or via environment variables:
of a thread to not allocate in the huge OS pages; this prevents threads that are short lived
and allocate just a little to take up space in the huge OS page area (which cannot be reset).
The huge pages are usually allocated evenly among NUMA nodes.
- We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
- the huge pages at a specific numa node instead.
+ We can use `MIMALLOC_RESERVE_HUGE_OS_PAGES_AT=N` where `N` is the numa node (starting at 0) to allocate all
+ the huge pages at a specific numa node instead.
Use caution when using `fork` in combination with either large or huge OS pages: on a fork, the OS uses copy-on-write
for all pages in the original process including the huge OS pages. When any memory is now written in that area, the
@@ -342,24 +348,24 @@ When _mimalloc_ is built using debug mode, various checks are done at runtime to
## Valgrind
-Generally, we recommend using the standard allocator with the amazing [Valgrind] tool (and
-also for other address sanitizers).
-However, it is possible to build mimalloc with Valgrind support. This has a small performance
-overhead but does allow detecting memory leaks and byte-precise buffer overflows directly on final
+Generally, we recommend using the standard allocator with the amazing [Valgrind] tool (and
+also for other address sanitizers).
+However, it is possible to build mimalloc with Valgrind support. This has a small performance
+overhead but does allow detecting memory leaks and byte-precise buffer overflows directly on final
executables. To build with valgrind support, use the `MI_VALGRIND=ON` cmake option:
```
> cmake ../.. -DMI_VALGRIND=ON
```
-This can also be combined with secure mode or debug mode.
+This can also be combined with secure mode or debug mode.
You can then run your programs directly under valgrind:
```
> valgrind <myprogram>
```
-If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly),
+If you rely on overriding `malloc`/`free` by mimalloc (instead of using the `mi_malloc`/`mi_free` API directly),
you also need to tell `valgrind` to not intercept those calls itself, and use:
```
@@ -573,7 +579,7 @@ The _alloc-test_, by
[OLogN Technologies AG](http://ithare.com/testing-memory-allocators-ptmalloc2-tcmalloc-hoard-jemalloc-while-trying-to-simulate-real-world-loads/), is a very allocation intensive benchmark doing millions of
allocations in various size classes. The test is scaled such that when an
allocator performs almost identically on _alloc-test1_ as _alloc-testN_ it
-means that it scales linearly.
+means that it scales linearly.
The _sh6bench_ and _sh8bench_ benchmarks are
developed by [MicroQuill](http://www.microquill.com/) as part of SmartHeap.
@@ -754,4 +760,3 @@ free list encoding](https://github.com/microsoft/mimalloc/blob/783e3377f79ee82af
* 2019-10-07, `v1.1.0`: stable release 1.1.
* 2019-09-01, `v1.0.8`: pre-release 8: more robust windows dynamic overriding, initial huge page support.
* 2019-08-10, `v1.0.6`: pre-release 6: various performance improvements.
-
diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c b/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c
index 9614aa092..9fe82890f 100644
--- a/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c
+++ b/source/luametatex/source/libraries/mimalloc/src/alloc-aligned.c
@@ -18,9 +18,9 @@ terms of the MIT license. A copy of the license can be found in the file
static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t* const heap, const size_t size, const size_t alignment, const size_t offset, const bool zero) mi_attr_noexcept
{
mi_assert_internal(size <= PTRDIFF_MAX);
- mi_assert_internal(alignment!=0 && _mi_is_power_of_two(alignment) && alignment <= MI_ALIGNMENT_MAX);
+ mi_assert_internal(alignment != 0 && _mi_is_power_of_two(alignment));
- const uintptr_t align_mask = alignment-1; // for any x, `(x & align_mask) == (x % alignment)`
+ const uintptr_t align_mask = alignment - 1; // for any x, `(x & align_mask) == (x % alignment)`
const size_t padsize = size + MI_PADDING_SIZE;
// use regular allocation if it is guaranteed to fit the alignment constraints
@@ -30,26 +30,59 @@ static mi_decl_noinline void* mi_heap_malloc_zero_aligned_at_fallback(mi_heap_t*
return p;
}
- // otherwise over-allocate
- const size_t oversize = size + alignment - 1;
- void* p = _mi_heap_malloc_zero(heap, oversize, zero);
- if (p == NULL) return NULL;
+ void* p;
+ size_t oversize;
+ if mi_unlikely(alignment > MI_ALIGNMENT_MAX) {
+ // use OS allocation for very large alignment and allocate inside a huge page (dedicated segment with 1 page)
+ // This can support alignments >= MI_SEGMENT_SIZE by ensuring the object can be aligned at a point in the
+ // first (and single) page such that the segment info is `MI_SEGMENT_SIZE` bytes before it (so it can be found by aligning the pointer down)
+ if mi_unlikely(offset != 0) {
+ // todo: cannot support offset alignment for very large alignments yet
+ #if MI_DEBUG > 0
+ _mi_error_message(EOVERFLOW, "aligned allocation with a very large alignment cannot be used with an alignment offset (size %zu, alignment %zu, offset %zu)\n", size, alignment, offset);
+ #endif
+ return NULL;
+ }
+ oversize = (size <= MI_SMALL_SIZE_MAX ? MI_SMALL_SIZE_MAX + 1 /* ensure we use generic malloc path */ : size);
+ p = _mi_heap_malloc_zero_ex(heap, oversize, false, alignment); // the page block size should be large enough to align in the single huge page block
+ // zero afterwards as only the area from the aligned_p may be committed!
+ if (p == NULL) return NULL;
+ }
+ else {
+ // otherwise over-allocate
+ oversize = size + alignment - 1;
+ p = _mi_heap_malloc_zero(heap, oversize, zero);
+ if (p == NULL) return NULL;
+ }
// .. and align within the allocation
- uintptr_t adjust = alignment - (((uintptr_t)p + offset) & align_mask);
- mi_assert_internal(adjust <= alignment);
- void* aligned_p = (adjust == alignment ? p : (void*)((uintptr_t)p + adjust));
- if (aligned_p != p) mi_page_set_has_aligned(_mi_ptr_page(p), true);
- mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
+ const uintptr_t poffset = ((uintptr_t)p + offset) & align_mask;
+ const uintptr_t adjust = (poffset == 0 ? 0 : alignment - poffset);
+ mi_assert_internal(adjust < alignment);
+ void* aligned_p = (void*)((uintptr_t)p + adjust);
+ if (aligned_p != p) {
+ mi_page_set_has_aligned(_mi_ptr_page(p), true);
+ }
+
+ mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
mi_assert_internal(p == _mi_page_ptr_unalign(_mi_ptr_segment(aligned_p), _mi_ptr_page(aligned_p), aligned_p));
+ mi_assert_internal(((uintptr_t)aligned_p + offset) % alignment == 0);
+ mi_assert_internal(mi_page_usable_block_size(_mi_ptr_page(p)) >= adjust + size);
+
+ // now zero the block if needed
+ if (zero && alignment > MI_ALIGNMENT_MAX) {
+ const ptrdiff_t diff = (uint8_t*)aligned_p - (uint8_t*)p;
+ const ptrdiff_t zsize = mi_page_usable_block_size(_mi_ptr_page(p)) - diff - MI_PADDING_SIZE;
+ if (zsize > 0) { _mi_memzero(aligned_p, zsize); }
+ }
#if MI_TRACK_ENABLED
if (p != aligned_p) {
- mi_track_free(p);
- mi_track_malloc(aligned_p,size,zero);
+ mi_track_free_size(p, oversize);
+ mi_track_malloc(aligned_p, size, zero);
}
else {
- mi_track_resize(aligned_p,oversize,size);
+ mi_track_resize(aligned_p, oversize, size);
}
#endif
return aligned_p;
@@ -60,19 +93,21 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
{
// note: we don't require `size > offset`, we just guarantee that the address at offset is aligned regardless of the allocated size.
mi_assert(alignment > 0);
- if mi_unlikely(alignment==0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
+ if mi_unlikely(alignment == 0 || !_mi_is_power_of_two(alignment)) { // require power-of-two (see <https://en.cppreference.com/w/c/memory/aligned_alloc>)
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation requires the alignment to be a power-of-two (size %zu, alignment %zu)\n", size, alignment);
#endif
return NULL;
}
+ /*
if mi_unlikely(alignment > MI_ALIGNMENT_MAX) { // we cannot align at a boundary larger than this (or otherwise we cannot find segment headers)
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation has a maximum alignment of %zu (size %zu, alignment %zu)\n", MI_ALIGNMENT_MAX, size, alignment);
#endif
return NULL;
}
- if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
+ */
+ if mi_unlikely(size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
#if MI_DEBUG > 0
_mi_error_message(EOVERFLOW, "aligned allocation request is too large (size %zu, alignment %zu)\n", size, alignment);
#endif
@@ -82,7 +117,7 @@ static void* mi_heap_malloc_zero_aligned_at(mi_heap_t* const heap, const size_t
const size_t padsize = size + MI_PADDING_SIZE; // note: cannot overflow due to earlier size > PTRDIFF_MAX check
// try first if there happens to be a small block available with just the right alignment
- if mi_likely(padsize <= MI_SMALL_SIZE_MAX) {
+ if mi_likely(padsize <= MI_SMALL_SIZE_MAX && alignment <= padsize) {
mi_page_t* page = _mi_heap_get_free_small_page(heap, padsize);
const bool is_aligned = (((uintptr_t)page->free+offset) & align_mask)==0;
if mi_likely(page->free != NULL && is_aligned)
@@ -269,4 +304,3 @@ mi_decl_nodiscard void* mi_recalloc_aligned_at(void* p, size_t newcount, size_t
mi_decl_nodiscard void* mi_recalloc_aligned(void* p, size_t newcount, size_t size, size_t alignment) mi_attr_noexcept {
return mi_heap_recalloc_aligned(mi_get_default_heap(), p, newcount, size, alignment);
}
-
diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c b/source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c
index ba2313a2a..a2819a8bf 100644
--- a/source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c
+++ b/source/luametatex/source/libraries/mimalloc/src/alloc-override-osx.c
@@ -19,8 +19,8 @@ terms of the MIT license. A copy of the license can be found in the file
This is done through the malloc zone interface.
It seems to be most robust in combination with interposing
though or otherwise we may get zone errors as there are could
- be allocations done by the time we take over the
- zone.
+ be allocations done by the time we take over the
+ zone.
------------------------------------------------------ */
#include <AvailabilityMacros.h>
@@ -215,7 +215,7 @@ static malloc_zone_t mi_malloc_zone = {
.zone_name = "mimalloc",
.batch_malloc = &zone_batch_malloc,
.batch_free = &zone_batch_free,
- .introspect = &mi_introspect,
+ .introspect = &mi_introspect,
#if defined(MAC_OS_X_VERSION_10_6) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_6)
#if defined(MAC_OS_X_VERSION_10_14) && (MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_14)
.version = 10,
@@ -242,7 +242,7 @@ static malloc_zone_t mi_malloc_zone = {
#if defined(MI_OSX_INTERPOSE) && defined(MI_SHARED_LIB_EXPORT)
// ------------------------------------------------------
-// Override malloc_xxx and malloc_zone_xxx api's to use only
+// Override malloc_xxx and malloc_zone_xxx api's to use only
// our mimalloc zone. Since even the loader uses malloc
// on macOS, this ensures that all allocations go through
// mimalloc (as all calls are interposed).
@@ -254,7 +254,7 @@ static malloc_zone_t mi_malloc_zone = {
static inline malloc_zone_t* mi_get_default_zone(void)
{
static bool init;
- if mi_unlikely(!init) {
+ if mi_unlikely(!init) {
init = true;
malloc_zone_register(&mi_malloc_zone); // by calling register we avoid a zone error on free (see <http://eatmyrandom.blogspot.com/2010/03/mallocfree-interception-on-mac-os-x.html>)
}
@@ -272,7 +272,7 @@ static malloc_zone_t* mi_malloc_create_zone(vm_size_t size, unsigned flags) {
return mi_get_default_zone();
}
-static malloc_zone_t* mi_malloc_default_zone (void) {
+static malloc_zone_t* mi_malloc_default_zone (void) {
return mi_get_default_zone();
}
@@ -292,11 +292,11 @@ static kern_return_t mi_malloc_get_all_zones (task_t task, memory_reader_t mr, v
return KERN_SUCCESS;
}
-static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) {
+static const char* mi_malloc_get_zone_name(malloc_zone_t* zone) {
return (zone == NULL ? mi_malloc_zone.zone_name : zone->zone_name);
}
-static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) {
+static void mi_malloc_set_zone_name(malloc_zone_t* zone, const char* name) {
MI_UNUSED(zone); MI_UNUSED(name);
}
@@ -306,7 +306,7 @@ static int mi_malloc_jumpstart(uintptr_t cookie) {
}
static void mi__malloc_fork_prepare(void) {
- // nothing
+ // nothing
}
static void mi__malloc_fork_parent(void) {
// nothing
@@ -367,13 +367,13 @@ __attribute__((used)) static const struct mi_interpose_s _mi_zone_interposes[]
MI_INTERPOSE_MI(malloc_destroy_zone),
MI_INTERPOSE_MI(malloc_get_all_zones),
MI_INTERPOSE_MI(malloc_get_zone_name),
- MI_INTERPOSE_MI(malloc_jumpstart),
+ MI_INTERPOSE_MI(malloc_jumpstart),
MI_INTERPOSE_MI(malloc_printf),
MI_INTERPOSE_MI(malloc_set_zone_name),
MI_INTERPOSE_MI(_malloc_fork_child),
MI_INTERPOSE_MI(_malloc_fork_parent),
MI_INTERPOSE_MI(_malloc_fork_prepare),
-
+
MI_INTERPOSE_ZONE(zone_batch_free),
MI_INTERPOSE_ZONE(zone_batch_malloc),
MI_INTERPOSE_ZONE(zone_calloc),
@@ -416,7 +416,7 @@ static inline malloc_zone_t* mi_get_default_zone(void)
}
#if defined(__clang__)
-__attribute__((constructor(0)))
+__attribute__((constructor(0)))
#else
__attribute__((constructor)) // seems not supported by g++-11 on the M1
#endif
diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-override.c b/source/luametatex/source/libraries/mimalloc/src/alloc-override.c
index 6b9845d39..84a0d19df 100644
--- a/source/luametatex/source/libraries/mimalloc/src/alloc-override.c
+++ b/source/luametatex/source/libraries/mimalloc/src/alloc-override.c
@@ -13,7 +13,7 @@ terms of the MIT license. A copy of the license can be found in the file
#error "It is only possible to override "malloc" on Windows when building as a DLL (and linking the C runtime as a DLL)"
#endif
-#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32))
+#if defined(MI_MALLOC_OVERRIDE) && !(defined(_WIN32))
#if defined(__APPLE__)
#include <AvailabilityMacros.h>
@@ -43,7 +43,7 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
#define MI_FORWARD0(fun,x) MI_FORWARD(fun)
#define MI_FORWARD02(fun,x,y) MI_FORWARD(fun)
#else
- // otherwise use forwarding by calling our `mi_` function
+ // otherwise use forwarding by calling our `mi_` function
#define MI_FORWARD1(fun,x) { return fun(x); }
#define MI_FORWARD2(fun,x,y) { return fun(x,y); }
#define MI_FORWARD3(fun,x,y,z) { return fun(x,y,z); }
@@ -51,11 +51,17 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
#define MI_FORWARD02(fun,x,y) { fun(x,y); }
#endif
-#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
- // define MI_OSX_IS_INTERPOSED as we should not provide forwarding definitions for
+
+#if defined(__APPLE__) && defined(MI_SHARED_LIB_EXPORT) && defined(MI_OSX_INTERPOSE)
+ // define MI_OSX_IS_INTERPOSED as we should not provide forwarding definitions for
// functions that are interposed (or the interposing does not work)
#define MI_OSX_IS_INTERPOSED
+ mi_decl_externc static size_t mi_malloc_size_checked(void *p) {
+ if (!mi_is_in_heap_region(p)) return 0;
+ return mi_usable_size(p);
+ }
+
// use interposing so `DYLD_INSERT_LIBRARIES` works without `DYLD_FORCE_FLAT_NAMESPACE=1`
// See: <https://books.google.com/books?id=K8vUkpOXhN4C&pg=PA73>
struct mi_interpose_s {
@@ -64,7 +70,7 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
};
#define MI_INTERPOSE_FUN(oldfun,newfun) { (const void*)&newfun, (const void*)&oldfun }
#define MI_INTERPOSE_MI(fun) MI_INTERPOSE_FUN(fun,mi_##fun)
-
+
__attribute__((used)) static struct mi_interpose_s _mi_interposes[] __attribute__((section("__DATA, __interpose"))) =
{
MI_INTERPOSE_MI(malloc),
@@ -76,9 +82,9 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
MI_INTERPOSE_MI(posix_memalign),
MI_INTERPOSE_MI(reallocf),
MI_INTERPOSE_MI(valloc),
- MI_INTERPOSE_MI(malloc_size),
+ MI_INTERPOSE_FUN(malloc_size,mi_malloc_size_checked),
MI_INTERPOSE_MI(malloc_good_size),
- #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
+ #if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
MI_INTERPOSE_MI(aligned_alloc),
#endif
#ifdef MI_OSX_ZONE
@@ -122,11 +128,11 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
// cannot override malloc unless using a dll.
// we just override new/delete which does work in a static library.
#else
- // On all other systems forward to our API
+ // On all other systems forward to our API
mi_decl_export void* malloc(size_t size) MI_FORWARD1(mi_malloc, size)
mi_decl_export void* calloc(size_t size, size_t n) MI_FORWARD2(mi_calloc, size, n)
mi_decl_export void* realloc(void* p, size_t newsize) MI_FORWARD2(mi_realloc, p, newsize)
- mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p)
+ mi_decl_export void free(void* p) MI_FORWARD0(mi_free, p)
#endif
#if (defined(__GNUC__) || defined(__clang__)) && !defined(__APPLE__)
@@ -168,20 +174,20 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
void operator delete[](void* p, std::size_t n, std::align_val_t al) noexcept { mi_free_size_aligned(p, n, static_cast<size_t>(al)); };
void operator delete (void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
void operator delete[](void* p, std::align_val_t al, const std::nothrow_t&) noexcept { mi_free_aligned(p, static_cast<size_t>(al)); }
-
+
void* operator new( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
void* operator new[]( std::size_t n, std::align_val_t al) noexcept(false) { return mi_new_aligned(n, static_cast<size_t>(al)); }
void* operator new (std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
void* operator new[](std::size_t n, std::align_val_t al, const std::nothrow_t&) noexcept { return mi_new_aligned_nothrow(n, static_cast<size_t>(al)); }
#endif
-#elif (defined(__GNUC__) || defined(__clang__))
+#elif (defined(__GNUC__) || defined(__clang__))
// ------------------------------------------------------
// Override by defining the mangled C++ names of the operators (as
// used by GCC and CLang).
// See <https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling>
// ------------------------------------------------------
-
+
void _ZdlPv(void* p) MI_FORWARD0(mi_free,p) // delete
void _ZdaPv(void* p) MI_FORWARD0(mi_free,p) // delete[]
void _ZdlPvm(void* p, size_t n) MI_FORWARD02(mi_free_size,p,n)
@@ -190,12 +196,12 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
void _ZdaPvSt11align_val_t(void* p, size_t al) { mi_free_aligned(p,al); }
void _ZdlPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
void _ZdaPvmSt11align_val_t(void* p, size_t n, size_t al) { mi_free_size_aligned(p,n,al); }
-
+
#if (MI_INTPTR_SIZE==8)
void* _Znwm(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit
void* _Znam(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit
void* _ZnwmRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
- void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
+ void* _ZnamRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
void* _ZnwmSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
void* _ZnamSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
void* _ZnwmSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
@@ -204,7 +210,7 @@ typedef struct mi_nothrow_s { int _tag; } mi_nothrow_t;
void* _Znwj(size_t n) MI_FORWARD1(mi_new,n) // new 64-bit
void* _Znaj(size_t n) MI_FORWARD1(mi_new,n) // new[] 64-bit
void* _ZnwjRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
- void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
+ void* _ZnajRKSt9nothrow_t(size_t n, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_nothrow(n); }
void* _ZnwjSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
void* _ZnajSt11align_val_t(size_t n, size_t al) MI_FORWARD2(mi_new_aligned, n, al)
void* _ZnwjSt11align_val_tRKSt9nothrow_t(size_t n, size_t al, mi_nothrow_t tag) { MI_UNUSED(tag); return mi_new_aligned_nothrow(n,al); }
@@ -234,29 +240,37 @@ extern "C" {
// No forwarding here due to aliasing/name mangling issues
void* valloc(size_t size) { return mi_valloc(size); }
- void vfree(void* p) { mi_free(p); }
+ void vfree(void* p) { mi_free(p); }
size_t malloc_good_size(size_t size) { return mi_malloc_good_size(size); }
int posix_memalign(void** p, size_t alignment, size_t size) { return mi_posix_memalign(p, alignment, size); }
-
+
// `aligned_alloc` is only available when __USE_ISOC11 is defined.
// Note: Conda has a custom glibc where `aligned_alloc` is declared `static inline` and we cannot
// override it, but both _ISOC11_SOURCE and __USE_ISOC11 are undefined in Conda GCC7 or GCC9.
// Fortunately, in the case where `aligned_alloc` is declared as `static inline` it
// uses internally `memalign`, `posix_memalign`, or `_aligned_malloc` so we can avoid overriding it ourselves.
- #if __USE_ISOC11
+ #if __USE_ISOC11
void* aligned_alloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
#endif
#endif
// no forwarding here due to aliasing/name mangling issues
-void cfree(void* p) { mi_free(p); }
+void cfree(void* p) { mi_free(p); }
void* pvalloc(size_t size) { return mi_pvalloc(size); }
void* reallocarray(void* p, size_t count, size_t size) { return mi_reallocarray(p, count, size); }
int reallocarr(void* p, size_t count, size_t size) { return mi_reallocarr(p, count, size); }
void* memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); }
void* _aligned_malloc(size_t alignment, size_t size) { return mi_aligned_alloc(alignment, size); }
-#if defined(__GLIBC__) && defined(__linux__)
+#if defined(__wasi__)
+ // forward __libc interface (see PR #667)
+ void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc, size)
+ void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc, count, size)
+ void* __libc_realloc(void* p, size_t size) MI_FORWARD2(mi_realloc, p, size)
+ void __libc_free(void* p) MI_FORWARD0(mi_free, p)
+ void* __libc_memalign(size_t alignment, size_t size) { return mi_memalign(alignment, size); }
+
+#elif defined(__GLIBC__) && defined(__linux__)
// forward __libc interface (needed for glibc-based Linux distributions)
void* __libc_malloc(size_t size) MI_FORWARD1(mi_malloc,size)
void* __libc_calloc(size_t count, size_t size) MI_FORWARD2(mi_calloc,count,size)
diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c b/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c
index 214a97345..e6505f290 100644
--- a/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c
+++ b/source/luametatex/source/libraries/mimalloc/src/alloc-posix.c
@@ -33,12 +33,12 @@ terms of the MIT license. A copy of the license can be found in the file
mi_decl_nodiscard size_t mi_malloc_size(const void* p) mi_attr_noexcept {
- //if (!mi_is_in_heap_region(p)) return 0;
+ // if (!mi_is_in_heap_region(p)) return 0;
return mi_usable_size(p);
}
mi_decl_nodiscard size_t mi_malloc_usable_size(const void *p) mi_attr_noexcept {
- //if (!mi_is_in_heap_region(p)) return 0;
+ // if (!mi_is_in_heap_region(p)) return 0;
return mi_usable_size(p);
}
@@ -86,12 +86,12 @@ mi_decl_nodiscard mi_decl_restrict void* mi_aligned_alloc(size_t alignment, size
// C11 requires the size to be an integral multiple of the alignment, see <https://en.cppreference.com/w/c/memory/aligned_alloc>.
// unfortunately, it turns out quite some programs pass a size that is not an integral multiple so skip this check..
/* if mi_unlikely((size & (alignment - 1)) != 0) { // C11 requires alignment>0 && integral multiple, see <https://en.cppreference.com/w/c/memory/aligned_alloc>
- #if MI_DEBUG > 0
- _mi_error_message(EOVERFLOW, "(mi_)aligned_alloc requires the size to be an integral multiple of the alignment (size %zu, alignment %zu)\n", size, alignment);
- #endif
- return NULL;
- }
- */
+ #if MI_DEBUG > 0
+ _mi_error_message(EOVERFLOW, "(mi_)aligned_alloc requires the size to be an integral multiple of the alignment (size %zu, alignment %zu)\n", size, alignment);
+ #endif
+ return NULL;
+ }
+ */
// C11 also requires alignment to be a power-of-two (and > 0) which is checked in mi_malloc_aligned
void* p = mi_malloc_aligned(size, alignment);
mi_assert_internal(((uintptr_t)p % alignment) == 0);
@@ -110,7 +110,7 @@ mi_decl_nodiscard int mi_reallocarr( void* p, size_t count, size_t size ) mi_att
errno = EINVAL;
return EINVAL;
}
- void** op = (void**)p;
+ void** op = (void**)p;
void* newp = mi_reallocarray(*op, count, size);
if mi_unlikely(newp == NULL) { return errno; }
*op = newp;
diff --git a/source/luametatex/source/libraries/mimalloc/src/alloc.c b/source/luametatex/source/libraries/mimalloc/src/alloc.c
index 348218246..86453f152 100644
--- a/source/luametatex/source/libraries/mimalloc/src/alloc.c
+++ b/source/luametatex/source/libraries/mimalloc/src/alloc.c
@@ -30,7 +30,7 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
mi_assert_internal(page->xblock_size==0||mi_page_block_size(page) >= size);
mi_block_t* const block = page->free;
if mi_unlikely(block == NULL) {
- return _mi_malloc_generic(heap, size, zero);
+ return _mi_malloc_generic(heap, size, zero, 0);
}
mi_assert_internal(block != NULL && _mi_ptr_page(block) == page);
// pop from the free list
@@ -38,20 +38,22 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
page->free = mi_block_next(page, block);
mi_assert_internal(page->free == NULL || _mi_ptr_page(page->free) == page);
- // allow use of the block internally
+ // allow use of the block internally
// note: when tracking we need to avoid ever touching the MI_PADDING since
// that is tracked by valgrind etc. as non-accessible (through the red-zone, see `mimalloc-track.h`)
mi_track_mem_undefined(block, mi_page_usable_block_size(page));
-
+
// zero the block? note: we need to zero the full block size (issue #63)
if mi_unlikely(zero) {
mi_assert_internal(page->xblock_size != 0); // do not call with zero'ing for huge blocks (see _mi_malloc_generic)
const size_t zsize = (page->is_zero ? sizeof(block->next) + MI_PADDING_SIZE : page->xblock_size);
- _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE);
+ _mi_memzero_aligned(block, zsize - MI_PADDING_SIZE);
}
#if (MI_DEBUG>0) && !MI_TRACK_ENABLED
- if (!page->is_zero && !zero) { memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page)); }
+ if (!page->is_zero && !zero && !mi_page_is_huge(page)) {
+ memset(block, MI_DEBUG_UNINIT, mi_page_usable_block_size(page));
+ }
#elif (MI_SECURE!=0)
if (!zero) { block->next = 0; } // don't leak internal data
#endif
@@ -77,32 +79,37 @@ extern inline void* _mi_page_malloc(mi_heap_t* heap, mi_page_t* page, size_t siz
#endif
padding->canary = (uint32_t)(mi_ptr_encode(page,block,page->keys));
padding->delta = (uint32_t)(delta);
- uint8_t* fill = (uint8_t*)padding - delta;
- const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes
- for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; }
+ if (!mi_page_is_huge(page)) {
+ uint8_t* fill = (uint8_t*)padding - delta;
+ const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // set at most N initial padding bytes
+ for (size_t i = 0; i < maxpad; i++) { fill[i] = MI_DEBUG_PADDING; }
+ }
#endif
return block;
}
static inline mi_decl_restrict void* mi_heap_malloc_small_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
- mi_assert(heap!=NULL);
- mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
+ mi_assert(heap != NULL);
+ #if MI_DEBUG
+ const uintptr_t tid = _mi_thread_id();
+ mi_assert(heap->thread_id == 0 || heap->thread_id == tid); // heaps are thread local
+ #endif
mi_assert(size <= MI_SMALL_SIZE_MAX);
- #if (MI_PADDING)
+#if (MI_PADDING)
if (size == 0) {
size = sizeof(void*);
}
- #endif
- mi_page_t* page = _mi_heap_get_free_small_page(heap,size + MI_PADDING_SIZE);
+#endif
+ mi_page_t* page = _mi_heap_get_free_small_page(heap, size + MI_PADDING_SIZE);
void* p = _mi_page_malloc(heap, page, size + MI_PADDING_SIZE, zero);
- mi_assert_internal(p==NULL || mi_usable_size(p) >= size);
- #if MI_STAT>1
+ mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
+#if MI_STAT>1
if (p != NULL) {
if (!mi_heap_is_initialized(heap)) { heap = mi_get_default_heap(); }
mi_heap_stat_increase(heap, malloc, mi_usable_size(p));
}
- #endif
+#endif
mi_track_malloc(p,size,zero);
return p;
}
@@ -117,14 +124,15 @@ mi_decl_nodiscard extern inline mi_decl_restrict void* mi_malloc_small(size_t si
}
// The main allocation function
-extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
+extern inline void* _mi_heap_malloc_zero_ex(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept {
if mi_likely(size <= MI_SMALL_SIZE_MAX) {
+ mi_assert_internal(huge_alignment == 0);
return mi_heap_malloc_small_zero(heap, size, zero);
}
else {
mi_assert(heap!=NULL);
- mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
- void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero); // note: size can overflow but it is detected in malloc_generic
+ mi_assert(heap->thread_id == 0 || heap->thread_id == _mi_thread_id()); // heaps are thread local
+ void* const p = _mi_malloc_generic(heap, size + MI_PADDING_SIZE, zero, huge_alignment); // note: size can overflow but it is detected in malloc_generic
mi_assert_internal(p == NULL || mi_usable_size(p) >= size);
#if MI_STAT>1
if (p != NULL) {
@@ -137,6 +145,10 @@ extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero
}
}
+extern inline void* _mi_heap_malloc_zero(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept {
+ return _mi_heap_malloc_zero_ex(heap, size, zero, 0);
+}
+
mi_decl_nodiscard extern inline mi_decl_restrict void* mi_heap_malloc(mi_heap_t* heap, size_t size) mi_attr_noexcept {
return _mi_heap_malloc_zero(heap, size, false);
}
@@ -220,9 +232,9 @@ static bool mi_page_decode_padding(const mi_page_t* page, const mi_block_t* bloc
mi_track_mem_defined(padding,sizeof(mi_padding_t));
*delta = padding->delta;
uint32_t canary = padding->canary;
- uintptr_t keys[2];
+ uintptr_t keys[2];
keys[0] = page->keys[0];
- keys[1] = page->keys[1];
+ keys[1] = page->keys[1];
bool ok = ((uint32_t)mi_ptr_encode(page,block,keys) == canary && *delta <= *bsize);
mi_track_mem_noaccess(padding,sizeof(mi_padding_t));
return ok;
@@ -245,17 +257,19 @@ static bool mi_verify_padding(const mi_page_t* page, const mi_block_t* block, si
if (!ok) return false;
mi_assert_internal(bsize >= delta);
*size = bsize - delta;
- uint8_t* fill = (uint8_t*)block + bsize - delta;
- const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
- mi_track_mem_defined(fill,maxpad);
- for (size_t i = 0; i < maxpad; i++) {
- if (fill[i] != MI_DEBUG_PADDING) {
- *wrong = bsize - delta + i;
- ok = false;
- break;
+ if (!mi_page_is_huge(page)) {
+ uint8_t* fill = (uint8_t*)block + bsize - delta;
+ const size_t maxpad = (delta > MI_MAX_ALIGN_SIZE ? MI_MAX_ALIGN_SIZE : delta); // check at most the first N padding bytes
+ mi_track_mem_defined(fill, maxpad);
+ for (size_t i = 0; i < maxpad; i++) {
+ if (fill[i] != MI_DEBUG_PADDING) {
+ *wrong = bsize - delta + i;
+ ok = false;
+ break;
+ }
}
+ mi_track_mem_noaccess(fill, maxpad);
}
- mi_track_mem_noaccess(fill,maxpad);
return ok;
}
@@ -325,7 +339,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
}
else {
mi_heap_stat_decrease(heap, huge, bsize);
- }
+ }
}
#else
static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
@@ -333,6 +347,7 @@ static void mi_stat_free(const mi_page_t* page, const mi_block_t* block) {
}
#endif
+#if MI_HUGE_PAGE_ABANDON
#if (MI_STAT>0)
// maintain stats for huge objects
static void mi_stat_huge_free(const mi_page_t* page) {
@@ -350,29 +365,41 @@ static void mi_stat_huge_free(const mi_page_t* page) {
MI_UNUSED(page);
}
#endif
+#endif
// ------------------------------------------------------
// Free
// ------------------------------------------------------
-// multi-threaded free (or free in huge block)
+// multi-threaded free (or free in huge block if compiled with MI_HUGE_PAGE_ABANDON)
static mi_decl_noinline void _mi_free_block_mt(mi_page_t* page, mi_block_t* block)
{
// The padding check may access the non-thread-owned page for the key values.
// that is safe as these are constant and the page won't be freed (as the block is not freed yet).
mi_check_padding(page, block);
- mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
- #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED // note: when tracking, cannot use mi_usable_size with multi-threading
- memset(block, MI_DEBUG_FREED, mi_usable_size(block));
- #endif
-
+ mi_padding_shrink(page, block, sizeof(mi_block_t)); // for small size, ensure we can fit the delayed thread pointers without triggering overflow detection
+
// huge page segments are always abandoned and can be freed immediately
mi_segment_t* segment = _mi_page_segment(page);
- if (segment->kind==MI_SEGMENT_HUGE) {
+ if (segment->kind == MI_SEGMENT_HUGE) {
+ #if MI_HUGE_PAGE_ABANDON
+ // huge page segments are always abandoned and can be freed immediately
mi_stat_huge_free(page);
_mi_segment_huge_page_free(segment, page, block);
return;
+ #else
+ // huge pages are special as they occupy the entire segment
+ // as these are large we reset the memory occupied by the page so it is available to other threads
+ // (as the owning thread needs to actually free the memory later).
+ _mi_segment_huge_page_reset(segment, page, block);
+ #endif
+ }
+
+ #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED // note: when tracking, cannot use mi_usable_size with multi-threading
+ if (segment->kind != MI_SEGMENT_HUGE) { // not for huge segments as we just reset the content
+ memset(block, MI_DEBUG_FREED, mi_usable_size(block));
}
+ #endif
// Try to put the block on either the page-local thread free list, or the heap delayed free list.
mi_thread_free_t tfreex;
@@ -423,7 +450,9 @@ static inline void _mi_free_block(mi_page_t* page, bool local, mi_block_t* block
if mi_unlikely(mi_check_is_double_free(page, block)) return;
mi_check_padding(page, block);
#if (MI_DEBUG!=0) && !MI_TRACK_ENABLED
- memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
+ if (!mi_page_is_huge(page)) { // huge page content may be already decommitted
+ memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
+ }
#endif
mi_block_set_next(page, block, page->local_free);
page->local_free = block;
@@ -450,20 +479,21 @@ mi_block_t* _mi_page_ptr_unalign(const mi_segment_t* segment, const mi_page_t* p
}
-static void mi_decl_noinline mi_free_generic(const mi_segment_t* segment, bool local, void* p) mi_attr_noexcept {
- mi_page_t* const page = _mi_segment_page_of(segment, p);
+void mi_decl_noinline _mi_free_generic(const mi_segment_t* segment, mi_page_t* page, bool is_local, void* p) mi_attr_noexcept {
mi_block_t* const block = (mi_page_has_aligned(page) ? _mi_page_ptr_unalign(segment, page, p) : (mi_block_t*)p);
mi_stat_free(page, block); // stat_free may access the padding
mi_track_free(p);
- _mi_free_block(page, local, block);
+ _mi_free_block(page, is_local, block);
}
// Get the segment data belonging to a pointer
// This is just a single `and` in assembly but does further checks in debug mode
// (and secure mode) if this was a valid pointer.
-static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
+static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* msg)
{
MI_UNUSED(msg);
+ mi_assert(p != NULL);
+
#if (MI_DEBUG>0)
if mi_unlikely(((uintptr_t)p & (MI_INTPTR_SIZE - 1)) != 0) {
_mi_error_message(EINVAL, "%s: invalid (unaligned) pointer: %p\n", msg, p);
@@ -472,14 +502,20 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
#endif
mi_segment_t* const segment = _mi_ptr_segment(p);
- if mi_unlikely(segment == NULL) return NULL; // checks also for (p==NULL)
+ mi_assert_internal(segment != NULL);
#if (MI_DEBUG>0)
if mi_unlikely(!mi_is_in_heap_region(p)) {
- _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
- "(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
- if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
- _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
+ #if (MI_INTPTR_SIZE == 8 && defined(__linux__))
+ if (((uintptr_t)p >> 40) != 0x7F) { // linux tends to align large blocks above 0x7F000000000 (issue #640)
+ #else
+ {
+ #endif
+ _mi_warning_message("%s: pointer might not point to a valid heap region: %p\n"
+ "(this may still be a valid very large allocation (over 64MiB))\n", msg, p);
+ if mi_likely(_mi_ptr_cookie(segment) == segment->cookie) {
+ _mi_warning_message("(yes, the previous pointer %p was valid after all)\n", p);
+ }
}
}
#endif
@@ -489,38 +525,44 @@ static inline mi_segment_t* mi_checked_ptr_segment(const void* p, const char* ms
return NULL;
}
#endif
+
return segment;
}
-// Free a block
+// Free a block
+// fast path written carefully to prevent spilling on the stack
void mi_free(void* p) mi_attr_noexcept
{
+ if mi_unlikely(p == NULL) return;
mi_segment_t* const segment = mi_checked_ptr_segment(p,"mi_free");
- if mi_unlikely(segment == NULL) return;
-
- mi_threadid_t tid = _mi_thread_id();
- mi_page_t* const page = _mi_segment_page_of(segment, p);
-
- if mi_likely(tid == mi_atomic_load_relaxed(&segment->thread_id) && page->flags.full_aligned == 0) { // the thread id matches and it is not a full page, nor has aligned blocks
- // local, and not full or aligned
- mi_block_t* block = (mi_block_t*)(p);
- if mi_unlikely(mi_check_is_double_free(page,block)) return;
- mi_check_padding(page, block);
- mi_stat_free(page, block);
- #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED
- memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
- #endif
- mi_track_free(p);
- mi_block_set_next(page, block, page->local_free);
- page->local_free = block;
- if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page))
- _mi_page_retire(page);
+ const bool is_local= (_mi_thread_id() == mi_atomic_load_relaxed(&segment->thread_id));
+ mi_page_t* const page = _mi_segment_page_of(segment, p);
+
+ if mi_likely(is_local) { // thread-local free?
+ if mi_likely(page->flags.full_aligned == 0) // and it is not a full page (full pages need to move from the full bin), nor has aligned blocks (aligned blocks need to be unaligned)
+ {
+ mi_block_t* const block = (mi_block_t*)p;
+ if mi_unlikely(mi_check_is_double_free(page, block)) return;
+ mi_check_padding(page, block);
+ mi_stat_free(page, block);
+ #if (MI_DEBUG!=0) && !MI_TRACK_ENABLED
+ memset(block, MI_DEBUG_FREED, mi_page_block_size(page));
+ #endif
+ mi_track_free(p);
+ mi_block_set_next(page, block, page->local_free);
+ page->local_free = block;
+ if mi_unlikely(--page->used == 0) { // using this expression generates better code than: page->used--; if (mi_page_all_free(page))
+ _mi_page_retire(page);
+ }
+ }
+ else {
+ // page is full or contains (inner) aligned blocks; use generic path
+ _mi_free_generic(segment, page, true, p);
}
}
else {
- // non-local, aligned blocks, or a full page; use the more generic path
- // note: recalc page in generic to improve code generation
- mi_free_generic(segment, tid == segment->thread_id, p);
+ // not thread-local; use generic path
+ _mi_free_generic(segment, page, false, p);
}
}
@@ -559,9 +601,9 @@ mi_decl_noinline static size_t mi_page_usable_aligned_size_of(const mi_segment_t
}
static inline size_t _mi_usable_size(const void* p, const char* msg) mi_attr_noexcept {
+ if (p == NULL) return 0;
const mi_segment_t* const segment = mi_checked_ptr_segment(p, msg);
- if (segment==NULL) return 0; // also returns 0 if `p == NULL`
- const mi_page_t* const page = _mi_segment_page_of(segment, p);
+ const mi_page_t* const page = _mi_segment_page_of(segment, p);
if mi_likely(!mi_page_has_aligned(page)) {
const mi_block_t* block = (const mi_block_t*)p;
return mi_page_usable_size_of(page, block);
@@ -578,24 +620,6 @@ mi_decl_nodiscard size_t mi_usable_size(const void* p) mi_attr_noexcept {
// ------------------------------------------------------
-// ensure explicit external inline definitions are emitted!
-// ------------------------------------------------------
-
-#ifdef __cplusplus
-void* _mi_externs[] = {
- (void*)&_mi_page_malloc,
- (void*)&_mi_heap_malloc_zero,
- (void*)&mi_malloc,
- (void*)&mi_malloc_small,
- (void*)&mi_zalloc_small,
- (void*)&mi_heap_malloc,
- (void*)&mi_heap_zalloc,
- (void*)&mi_heap_malloc_small
-};
-#endif
-
-
-// ------------------------------------------------------
// Allocation extensions
// ------------------------------------------------------
@@ -641,7 +665,7 @@ mi_decl_nodiscard mi_decl_restrict void* mi_mallocn(size_t count, size_t size) m
// Expand (or shrink) in place (or fail)
void* mi_expand(void* p, size_t newsize) mi_attr_noexcept {
#if MI_PADDING
- // we do not shrink/expand with padding enabled
+ // we do not shrink/expand with padding enabled
MI_UNUSED(p); MI_UNUSED(newsize);
return NULL;
#else
@@ -659,7 +683,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero)
const size_t size = _mi_usable_size(p,"mi_realloc"); // also works if p == NULL (with size 0)
if mi_unlikely(newsize <= size && newsize >= (size / 2) && newsize > 0) { // note: newsize must be > 0 or otherwise we return NULL for realloc(NULL,0)
// todo: adjust potential padding to reflect the new size?
- mi_track_free(p);
+ mi_track_free_size(p, size);
mi_track_malloc(p,newsize,true);
return p; // reallocation still fits and not more than 50% waste
}
@@ -683,7 +707,7 @@ void* _mi_heap_realloc_zero(mi_heap_t* heap, void* p, size_t newsize, bool zero)
}
mi_decl_nodiscard void* mi_heap_realloc(mi_heap_t* heap, void* p, size_t newsize) mi_attr_noexcept {
- return _mi_heap_realloc_zero(heap, p, newsize, false);
+ return _mi_heap_realloc_zero(heap, p, newsize, false);
}
mi_decl_nodiscard void* mi_heap_reallocn(mi_heap_t* heap, void* p, size_t count, size_t size) mi_attr_noexcept {
@@ -743,7 +767,9 @@ mi_decl_nodiscard mi_decl_restrict char* mi_heap_strdup(mi_heap_t* heap, const c
if (s == NULL) return NULL;
size_t n = strlen(s);
char* t = (char*)mi_heap_malloc(heap,n+1);
- if (t != NULL) _mi_memcpy(t, s, n + 1);
+ if (t == NULL) return NULL;
+ _mi_memcpy(t, s, n);
+ t[n] = 0;
return t;
}
@@ -793,6 +819,7 @@ mi_decl_nodiscard mi_decl_restrict char* mi_heap_realpath(mi_heap_t* heap, const
}
}
#else
+/*
#include <unistd.h> // pathconf
static size_t mi_path_max(void) {
static size_t path_max = 0;
@@ -804,20 +831,31 @@ static size_t mi_path_max(void) {
}
return path_max;
}
-
+*/
char* mi_heap_realpath(mi_heap_t* heap, const char* fname, char* resolved_name) mi_attr_noexcept {
if (resolved_name != NULL) {
return realpath(fname,resolved_name);
}
else {
- size_t n = mi_path_max();
+ char* rname = realpath(fname, NULL);
+ if (rname == NULL) return NULL;
+ char* result = mi_heap_strdup(heap, rname);
+ free(rname); // use regular free! (which may be redirected to our free but that's ok)
+ return result;
+ }
+ /*
+ const size_t n = mi_path_max();
char* buf = (char*)mi_malloc(n+1);
- if (buf==NULL) return NULL;
+ if (buf == NULL) {
+ errno = ENOMEM;
+ return NULL;
+ }
char* rname = realpath(fname,buf);
char* result = mi_heap_strndup(heap,rname,n); // ok if `rname==NULL`
mi_free(buf);
return result;
}
+ */
}
#endif
@@ -843,9 +881,9 @@ static bool mi_try_new_handler(bool nothrow) {
#else
std::new_handler h = std::set_new_handler();
std::set_new_handler(h);
- #endif
+ #endif
if (h==NULL) {
- _mi_error_message(ENOMEM, "out of memory in 'new'");
+ _mi_error_message(ENOMEM, "out of memory in 'new'");
if (!nothrow) {
throw std::bad_alloc();
}
@@ -876,7 +914,7 @@ static std_new_handler_t mi_get_new_handler() {
static bool mi_try_new_handler(bool nothrow) {
std_new_handler_t h = mi_get_new_handler();
if (h==NULL) {
- _mi_error_message(ENOMEM, "out of memory in 'new'");
+ _mi_error_message(ENOMEM, "out of memory in 'new'");
if (!nothrow) {
abort(); // cannot throw in plain C, use abort
}
@@ -889,20 +927,46 @@ static bool mi_try_new_handler(bool nothrow) {
}
#endif
-static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow ) {
+static mi_decl_noinline void* mi_heap_try_new(mi_heap_t* heap, size_t size, bool nothrow ) {
void* p = NULL;
while(p == NULL && mi_try_new_handler(nothrow)) {
- p = mi_malloc(size);
+ p = mi_heap_malloc(heap,size);
}
return p;
}
-mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
- void* p = mi_malloc(size);
- if mi_unlikely(p == NULL) return mi_try_new(size,false);
+static mi_decl_noinline void* mi_try_new(size_t size, bool nothrow) {
+ return mi_heap_try_new(mi_get_default_heap(), size, nothrow);
+}
+
+
+mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new(mi_heap_t* heap, size_t size) {
+ void* p = mi_heap_malloc(heap,size);
+ if mi_unlikely(p == NULL) return mi_heap_try_new(heap, size, false);
return p;
}
+mi_decl_nodiscard mi_decl_restrict void* mi_new(size_t size) {
+ return mi_heap_alloc_new(mi_get_default_heap(), size);
+}
+
+
+mi_decl_nodiscard mi_decl_restrict extern inline void* mi_heap_alloc_new_n(mi_heap_t* heap, size_t count, size_t size) {
+ size_t total;
+ if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
+ mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
+ return NULL;
+ }
+ else {
+ return mi_heap_alloc_new(heap,total);
+ }
+}
+
+mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) {
+ return mi_heap_alloc_new_n(mi_get_default_heap(), size, count);
+}
+
+
mi_decl_nodiscard mi_decl_restrict void* mi_new_nothrow(size_t size) mi_attr_noexcept {
void* p = mi_malloc(size);
if mi_unlikely(p == NULL) return mi_try_new(size, true);
@@ -927,17 +991,6 @@ mi_decl_nodiscard mi_decl_restrict void* mi_new_aligned_nothrow(size_t size, siz
return p;
}
-mi_decl_nodiscard mi_decl_restrict void* mi_new_n(size_t count, size_t size) {
- size_t total;
- if mi_unlikely(mi_count_size_overflow(count, size, &total)) {
- mi_try_new_handler(false); // on overflow we invoke the try_new_handler once to potentially throw std::bad_alloc
- return NULL;
- }
- else {
- return mi_new(total);
- }
-}
-
mi_decl_nodiscard void* mi_new_realloc(void* p, size_t newsize) {
void* q;
do {
@@ -956,3 +1009,23 @@ mi_decl_nodiscard void* mi_new_reallocn(void* p, size_t newcount, size_t size) {
return mi_new_realloc(p, total);
}
}
+
+// ------------------------------------------------------
+// ensure explicit external inline definitions are emitted!
+// ------------------------------------------------------
+
+#ifdef __cplusplus
+void* _mi_externs[] = {
+ (void*)&_mi_page_malloc,
+ (void*)&_mi_heap_malloc_zero,
+ (void*)&_mi_heap_malloc_zero_ex,
+ (void*)&mi_malloc,
+ (void*)&mi_malloc_small,
+ (void*)&mi_zalloc_small,
+ (void*)&mi_heap_malloc,
+ (void*)&mi_heap_zalloc,
+ (void*)&mi_heap_malloc_small,
+ (void*)&mi_heap_alloc_new,
+ (void*)&mi_heap_alloc_new_n
+};
+#endif
diff --git a/source/luametatex/source/libraries/mimalloc/src/arena.c b/source/luametatex/source/libraries/mimalloc/src/arena.c
index 56b87d083..80dd47869 100644
--- a/source/luametatex/source/libraries/mimalloc/src/arena.c
+++ b/source/luametatex/source/libraries/mimalloc/src/arena.c
@@ -48,8 +48,8 @@ bool _mi_os_decommit(void* addr, size_t size, mi_stats_t* stats);
// Block info: bit 0 contains the `in_use` bit, the upper bits the
// size in count of arena blocks.
typedef uintptr_t mi_block_info_t;
-#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 8MiB (must be at least MI_SEGMENT_ALIGN)
-#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 4MiB
+#define MI_ARENA_BLOCK_SIZE (MI_SEGMENT_SIZE) // 64MiB (must be at least MI_SEGMENT_ALIGN)
+#define MI_ARENA_MIN_OBJ_SIZE (MI_ARENA_BLOCK_SIZE/2) // 32MiB
#define MI_MAX_ARENAS (64) // not more than 126 (since we use 7 bits in the memid and an arena index + 1)
// A memory arena descriptor
@@ -190,22 +190,23 @@ static mi_decl_noinline void* mi_arena_alloc_from(mi_arena_t* arena, size_t aren
return p;
}
-static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large,
- bool* is_pinned, bool* is_zero,
- mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
-{
+// allocate from an arena with fallback to the OS
+static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size_t alignment, bool* commit, bool* large,
+ bool* is_pinned, bool* is_zero,
+ mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld )
+{
MI_UNUSED_RELEASE(alignment);
mi_assert_internal(alignment <= MI_SEGMENT_ALIGN);
- const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
+ const size_t max_arena = mi_atomic_load_relaxed(&mi_arena_count);
const size_t bcount = mi_block_count_of_size(size);
if mi_likely(max_arena == 0) return NULL;
- mi_assert_internal(size <= bcount*MI_ARENA_BLOCK_SIZE);
+ mi_assert_internal(size <= bcount * MI_ARENA_BLOCK_SIZE);
size_t arena_index = mi_arena_id_index(req_arena_id);
if (arena_index < MI_MAX_ARENAS) {
// try a specific arena if requested
mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[arena_index]);
- if (arena != NULL &&
+ if ((arena != NULL) &&
(arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local?
(*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
{
@@ -215,28 +216,28 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size
}
}
else {
- // try numa affine allocation
- for (size_t i = 0; i < max_arena; i++) {
- mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
- if (arena==NULL) break; // end reached
- if ((arena->numa_node<0 || arena->numa_node==numa_node) && // numa local?
- (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
- {
+ // try numa affine allocation
+ for (size_t i = 0; i < max_arena; i++) {
+ mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+ if (arena == NULL) break; // end reached
+ if ((arena->numa_node < 0 || arena->numa_node == numa_node) && // numa local?
+ (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+ {
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
- mi_assert_internal((uintptr_t)p % alignment == 0);
+ mi_assert_internal((uintptr_t)p % alignment == 0);
if (p != NULL) return p;
+ }
}
- }
- // try from another numa node instead..
- for (size_t i = 0; i < max_arena; i++) {
- mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
- if (arena==NULL) break; // end reached
- if ((arena->numa_node>=0 && arena->numa_node!=numa_node) && // not numa local!
- (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
- {
+ // try from another numa node instead..
+ for (size_t i = 0; i < max_arena; i++) {
+ mi_arena_t* arena = mi_atomic_load_ptr_relaxed(mi_arena_t, &mi_arenas[i]);
+ if (arena == NULL) break; // end reached
+ if ((arena->numa_node >= 0 && arena->numa_node != numa_node) && // not numa local!
+ (*large || !arena->is_large)) // large OS pages allowed, or arena is not large OS pages
+ {
void* p = mi_arena_alloc_from(arena, i, bcount, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
- mi_assert_internal((uintptr_t)p % alignment == 0);
+ mi_assert_internal((uintptr_t)p % alignment == 0);
if (p != NULL) return p;
}
}
@@ -244,8 +245,7 @@ static mi_decl_noinline void* mi_arena_allocate(int numa_node, size_t size, size
return NULL;
}
-
-void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero,
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero,
mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(commit != NULL && is_pinned != NULL && is_zero != NULL && memid != NULL && tld != NULL);
@@ -255,11 +255,11 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool*
*is_pinned = false;
bool default_large = false;
- if (large==NULL) large = &default_large; // ensure `large != NULL`
+ if (large == NULL) large = &default_large; // ensure `large != NULL`
const int numa_node = _mi_os_numa_node(tld); // current numa node
// try to allocate in an arena if the alignment is small enough and the object is not too small (as for heap meta data)
- if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN) {
+ if (size >= MI_ARENA_MIN_OBJ_SIZE && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
void* p = mi_arena_allocate(numa_node, size, alignment, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
if (p != NULL) return p;
}
@@ -270,15 +270,15 @@ void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool*
return NULL;
}
*is_zero = true;
- *memid = MI_MEMID_OS;
- void* p = _mi_os_alloc_aligned(size, alignment, *commit, large, tld->stats);
- if (p != NULL) *is_pinned = *large;
+ *memid = MI_MEMID_OS;
+ void* p = _mi_os_alloc_aligned_offset(size, alignment, align_offset, *commit, large, tld->stats);
+ if (p != NULL) { *is_pinned = *large; }
return p;
}
void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld)
{
- return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
+ return _mi_arena_alloc_aligned(size, MI_ARENA_BLOCK_SIZE, 0, commit, large, is_pinned, is_zero, req_arena_id, memid, tld);
}
void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
@@ -295,17 +295,18 @@ void* mi_arena_area(mi_arena_id_t arena_id, size_t* size) {
Arena free
----------------------------------------------------------- */
-void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_os_tld_t* tld) {
- mi_assert_internal(size > 0 && tld->stats != NULL);
+void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats) {
+ mi_assert_internal(size > 0 && stats != NULL);
if (p==NULL) return;
if (size==0) return;
if (memid == MI_MEMID_OS) {
// was a direct OS allocation, pass through
- _mi_os_free_ex(p, size, all_committed, tld->stats);
+ _mi_os_free_aligned(p, size, alignment, align_offset, all_committed, stats);
}
else {
// allocated in an arena
+ mi_assert_internal(align_offset == 0);
size_t arena_idx;
size_t bitmap_idx;
mi_arena_memid_indices(memid, &arena_idx, &bitmap_idx);
@@ -329,10 +330,10 @@ void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_o
}
else {
mi_assert_internal(arena->blocks_committed != NULL);
- _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, tld->stats); // ok if this fails
+ _mi_os_decommit(p, blocks * MI_ARENA_BLOCK_SIZE, stats); // ok if this fails
_mi_bitmap_unclaim_across(arena->blocks_committed, arena->field_count, blocks, bitmap_idx);
}
- // and make it available to others again
+ // and make it available to others again
bool all_inuse = _mi_bitmap_unclaim_across(arena->blocks_inuse, arena->field_count, blocks, bitmap_idx);
if (!all_inuse) {
_mi_error_message(EAGAIN, "trying to free an already freed block: %p, size %zu\n", p, size);
@@ -371,8 +372,8 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is
mi_assert_internal(is_committed);
is_committed = true;
}
-
- const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
+
+ const size_t bcount = size / MI_ARENA_BLOCK_SIZE;
const size_t fields = _mi_divide_up(bcount, MI_BITMAP_FIELD_BITS);
const size_t bitmaps = (is_committed ? 2 : 3);
const size_t asize = sizeof(mi_arena_t) + (bitmaps*fields*sizeof(mi_bitmap_field_t));
@@ -410,7 +411,7 @@ bool mi_manage_os_memory_ex(void* start, size_t size, bool is_committed, bool is
}
// Reserve a range of regular OS memory
-int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept
+int mi_reserve_os_memory_ex(size_t size, bool commit, bool allow_large, bool exclusive, mi_arena_id_t* arena_id) mi_attr_noexcept
{
if (arena_id != NULL) *arena_id = _mi_arena_id_none();
size = _mi_align_up(size, MI_ARENA_BLOCK_SIZE); // at least one block
diff --git a/source/luametatex/source/libraries/mimalloc/src/bitmap.c b/source/luametatex/source/libraries/mimalloc/src/bitmap.c
index 4fc7a1f3d..4ea9f4afa 100644
--- a/source/luametatex/source/libraries/mimalloc/src/bitmap.c
+++ b/source/luametatex/source/libraries/mimalloc/src/bitmap.c
@@ -188,15 +188,15 @@ bool _mi_bitmap_is_any_claimed(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
// between the fields. This is used in arena allocation
//--------------------------------------------------------------------------
-// Try to atomically claim a sequence of `count` bits starting from the field
+// Try to atomically claim a sequence of `count` bits starting from the field
// at `idx` in `bitmap` and crossing into subsequent fields. Returns `true` on success.
static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t idx, const size_t count, const size_t retries, mi_bitmap_index_t* bitmap_idx)
{
mi_assert_internal(bitmap_idx != NULL);
-
+
// check initial trailing zeros
mi_bitmap_field_t* field = &bitmap[idx];
- size_t map = mi_atomic_load_relaxed(field);
+ size_t map = mi_atomic_load_relaxed(field);
const size_t initial = mi_clz(map); // count of initial zeros starting at idx
mi_assert_internal(initial <= MI_BITMAP_FIELD_BITS);
if (initial == 0) return false;
@@ -231,14 +231,14 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
newmap = map | initial_mask;
if ((map & initial_mask) != 0) { goto rollback; };
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
-
+
// intermediate fields
while (++field < final_field) {
newmap = MI_BITMAP_FIELD_FULL;
map = 0;
if (!mi_atomic_cas_strong_acq_rel(field, &map, newmap)) { goto rollback; }
}
-
+
// final field
mi_assert_internal(field == final_field);
map = mi_atomic_load_relaxed(field);
@@ -251,7 +251,7 @@ static bool mi_bitmap_try_find_claim_field_across(mi_bitmap_t bitmap, size_t bit
*bitmap_idx = mi_bitmap_index_create(idx, MI_BITMAP_FIELD_BITS - initial);
return true;
-rollback:
+rollback:
// roll back intermediate fields
while (--field > initial_field) {
newmap = 0;
@@ -265,7 +265,7 @@ rollback:
mi_assert_internal((map & initial_mask) == initial_mask);
newmap = map & ~initial_mask;
} while (!mi_atomic_cas_strong_acq_rel(field, &map, newmap));
- }
+ }
// retry? (we make a recursive call instead of goto to be able to use const declarations)
if (retries < 4) {
return mi_bitmap_try_find_claim_field_across(bitmap, bitmap_fields, idx, count, retries+1, bitmap_idx);
@@ -330,7 +330,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
size_t pre_mask;
size_t mid_mask;
size_t post_mask;
- size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
+ size_t mid_count = mi_bitmap_mask_across(bitmap_idx, bitmap_fields, count, &pre_mask, &mid_mask, &post_mask);
bool all_one = true;
mi_bitmap_field_t* field = &bitmap[idx];
size_t prev = mi_atomic_and_acq_rel(field++, ~pre_mask);
@@ -343,7 +343,7 @@ bool _mi_bitmap_unclaim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t
prev = mi_atomic_and_acq_rel(field, ~post_mask);
if ((prev & post_mask) != post_mask) all_one = false;
}
- return all_one;
+ return all_one;
}
// Set `count` bits at `bitmap_idx` to 1 atomically
@@ -375,7 +375,7 @@ bool _mi_bitmap_claim_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t co
}
-// Returns `true` if all `count` bits were 1.
+// Returns `true` if all `count` bits were 1.
// `any_ones` is `true` if there was at least one bit set to one.
static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_fields, size_t count, mi_bitmap_index_t bitmap_idx, bool* pany_ones) {
size_t idx = mi_bitmap_index_field(bitmap_idx);
@@ -398,7 +398,7 @@ static bool mi_bitmap_is_claimedx_across(mi_bitmap_t bitmap, size_t bitmap_field
prev = mi_atomic_load_relaxed(field);
if ((prev & post_mask) != post_mask) all_ones = false;
if ((prev & post_mask) != 0) any_ones = true;
- }
+ }
if (pany_ones != NULL) *pany_ones = any_ones;
return all_ones;
}
diff --git a/source/luametatex/source/libraries/mimalloc/src/heap.c b/source/luametatex/source/libraries/mimalloc/src/heap.c
index 15ca36031..ac2d042bf 100644
--- a/source/luametatex/source/libraries/mimalloc/src/heap.c
+++ b/source/luametatex/source/libraries/mimalloc/src/heap.c
@@ -92,7 +92,7 @@ static bool mi_heap_page_collect(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_t
mi_collect_t collect = *((mi_collect_t*)arg_collect);
_mi_page_free_collect(page, collect >= MI_FORCE);
if (mi_page_all_free(page)) {
- // no more used blocks, free the page.
+ // no more used blocks, free the page.
// note: this will free retired pages as well.
_mi_page_free(page, pq, collect >= MI_FORCE);
}
@@ -133,7 +133,7 @@ static void mi_heap_collect_ex(mi_heap_t* heap, mi_collect_t collect)
// if all memory is freed by now, all segments should be freed.
_mi_abandoned_reclaim_all(heap, &heap->tld->segments);
}
-
+
// if abandoning, mark all pages to no longer add to delayed_free
if (collect == MI_ABANDON) {
mi_heap_visit_pages(heap, &mi_heap_page_never_delayed_free, NULL, NULL);
@@ -260,7 +260,7 @@ static void mi_heap_free(mi_heap_t* heap) {
// remove ourselves from the thread local heaps list
// linear search but we expect the number of heaps to be relatively small
mi_heap_t* prev = NULL;
- mi_heap_t* curr = heap->tld->heaps;
+ mi_heap_t* curr = heap->tld->heaps;
while (curr != heap && curr != NULL) {
prev = curr;
curr = curr->next;
@@ -347,7 +347,20 @@ void mi_heap_destroy(mi_heap_t* heap) {
}
}
-
+void _mi_heap_destroy_all(void) {
+ mi_heap_t* bheap = mi_heap_get_backing();
+ mi_heap_t* curr = bheap->tld->heaps;
+ while (curr != NULL) {
+ mi_heap_t* next = curr->next;
+ if (curr->no_reclaim) {
+ mi_heap_destroy(curr);
+ }
+ else {
+ _mi_heap_destroy_pages(curr);
+ }
+ curr = next;
+ }
+}
/* -----------------------------------------------------------
Safe Heap delete
@@ -360,8 +373,8 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
// reduce the size of the delayed frees
_mi_heap_delayed_free_partial(from);
-
- // transfer all pages by appending the queues; this will set a new heap field
+
+ // transfer all pages by appending the queues; this will set a new heap field
// so threads may do delayed frees in either heap for a while.
// note: appending waits for each page to not be in the `MI_DELAYED_FREEING` state
// so after this only the new heap will get delayed frees
@@ -374,17 +387,17 @@ static void mi_heap_absorb(mi_heap_t* heap, mi_heap_t* from) {
}
mi_assert_internal(from->page_count == 0);
- // and do outstanding delayed frees in the `from` heap
+ // and do outstanding delayed frees in the `from` heap
// note: be careful here as the `heap` field in all those pages no longer point to `from`,
- // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a
+ // turns out to be ok as `_mi_heap_delayed_free` only visits the list and calls a
// the regular `_mi_free_delayed_block` which is safe.
- _mi_heap_delayed_free_all(from);
+ _mi_heap_delayed_free_all(from);
#if !defined(_MSC_VER) || (_MSC_VER > 1900) // somehow the following line gives an error in VS2015, issue #353
mi_assert_internal(mi_atomic_load_ptr_relaxed(mi_block_t,&from->thread_delayed_free) == NULL);
#endif
// and reset the `from` heap
- mi_heap_reset_pages(from);
+ mi_heap_reset_pages(from);
}
// Safe delete a heap without freeing any still allocated blocks in that heap.
diff --git a/source/luametatex/source/libraries/mimalloc/src/init.c b/source/luametatex/source/libraries/mimalloc/src/init.c
index 4f37b7176..c416208cf 100644
--- a/source/luametatex/source/libraries/mimalloc/src/init.c
+++ b/source/luametatex/source/libraries/mimalloc/src/init.c
@@ -19,12 +19,12 @@ const mi_page_t _mi_page_empty = {
false, // is_zero
0, // retire_expire
NULL, // free
- #if MI_ENCODE_FREELIST
- { 0, 0 },
- #endif
0, // used
0, // xblock_size
NULL, // local_free
+ #if MI_ENCODE_FREELIST
+ { 0, 0 },
+ #endif
MI_ATOMIC_VAR_INIT(0), // xthread_free
MI_ATOMIC_VAR_INIT(0), // xheap
NULL, NULL
@@ -111,7 +111,7 @@ mi_decl_cache_align const mi_heap_t _mi_heap_empty = {
0, // cookie
0, // arena id
{ 0, 0 }, // keys
- { {0}, {0}, 0 },
+ { {0}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
NULL, // next
@@ -152,7 +152,7 @@ mi_heap_t _mi_heap_main = {
0, // initial cookie
0, // arena id
{ 0, 0 }, // the key of the main heap can be fixed (unlike page keys that need to be secure!)
- { {0x846ca68b}, {0}, 0 }, // random
+ { {0x846ca68b}, {0}, 0, true }, // random
0, // page count
MI_BIN_FULL, 0, // page retired min/max
NULL, // next heap
@@ -167,8 +167,13 @@ mi_stats_t _mi_stats_main = { MI_STATS_NULL };
static void mi_heap_main_init(void) {
if (_mi_heap_main.cookie == 0) {
_mi_heap_main.thread_id = _mi_thread_id();
- _mi_heap_main.cookie = _mi_os_random_weak((uintptr_t)&mi_heap_main_init);
- _mi_random_init(&_mi_heap_main.random);
+ _mi_heap_main.cookie = 1;
+ #if defined(_WIN32) && !defined(MI_SHARED_LIB)
+ _mi_random_init_weak(&_mi_heap_main.random); // prevent allocation failure during bcrypt dll initialization with static linking
+ #else
+ _mi_random_init(&_mi_heap_main.random);
+ #endif
+ _mi_heap_main.cookie = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[0] = _mi_heap_random_next(&_mi_heap_main);
_mi_heap_main.keys[1] = _mi_heap_random_next(&_mi_heap_main);
}
@@ -193,7 +198,7 @@ typedef struct mi_thread_data_s {
// Thread meta-data is allocated directly from the OS. For
// some programs that do not use thread pools and allocate and
-// destroy many OS threads, this may causes too much overhead
+// destroy many OS threads, this may causes too much overhead
// per thread so we maintain a small cache of recently freed metadata.
#define TD_CACHE_SIZE (8)
@@ -205,7 +210,7 @@ static mi_thread_data_t* mi_thread_data_alloc(void) {
for (int i = 0; i < TD_CACHE_SIZE; i++) {
td = mi_atomic_load_ptr_relaxed(mi_thread_data_t, &td_cache[i]);
if (td != NULL) {
- td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
+ td = mi_atomic_exchange_ptr_acq_rel(mi_thread_data_t, &td_cache[i], NULL);
if (td != NULL) {
return td;
}
@@ -283,7 +288,7 @@ static bool _mi_heap_init(void) {
tld->segments.stats = &tld->stats;
tld->segments.os = &tld->os;
tld->os.stats = &tld->stats;
- _mi_heap_set_default_direct(heap);
+ _mi_heap_set_default_direct(heap);
}
return false;
}
@@ -316,9 +321,9 @@ static bool _mi_heap_done(mi_heap_t* heap) {
if (heap != &_mi_heap_main) {
_mi_heap_collect_abandon(heap);
}
-
+
// merge stats
- _mi_stats_done(&heap->tld->stats);
+ _mi_stats_done(&heap->tld->stats);
// free if not the main thread
if (heap != &_mi_heap_main) {
@@ -329,8 +334,8 @@ static bool _mi_heap_done(mi_heap_t* heap) {
mi_thread_data_free((mi_thread_data_t*)heap);
}
else {
- mi_thread_data_collect(); // free cached thread metadata
- #if 0
+ mi_thread_data_collect(); // free cached thread metadata
+ #if 0
// never free the main thread even in debug mode; if a dll is linked statically with mimalloc,
// there may still be delete/free calls after the mi_fls_done is called. Issue #207
_mi_heap_destroy_pages(heap);
@@ -366,7 +371,7 @@ static void _mi_thread_done(mi_heap_t* default_heap);
// use thread local storage keys to detect thread ending
#include <windows.h>
#include <fibersapi.h>
- #if (_WIN32_WINNT < 0x600) // before Windows Vista
+ #if (_WIN32_WINNT < 0x600) // before Windows Vista
WINBASEAPI DWORD WINAPI FlsAlloc( _In_opt_ PFLS_CALLBACK_FUNCTION lpCallback );
WINBASEAPI PVOID WINAPI FlsGetValue( _In_ DWORD dwFlsIndex );
WINBASEAPI BOOL WINAPI FlsSetValue( _In_ DWORD dwFlsIndex, _In_opt_ PVOID lpFlsData );
@@ -374,7 +379,11 @@ static void _mi_thread_done(mi_heap_t* default_heap);
#endif
static DWORD mi_fls_key = (DWORD)(-1);
static void NTAPI mi_fls_done(PVOID value) {
- if (value!=NULL) _mi_thread_done((mi_heap_t*)value);
+ mi_heap_t* heap = (mi_heap_t*)value;
+ if (heap != NULL) {
+ _mi_thread_done(heap);
+ FlsSetValue(mi_fls_key, NULL); // prevent recursion as _mi_thread_done may set it back to the main heap, issue #672
+ }
}
#elif defined(MI_USE_PTHREADS)
// use pthread local storage keys to detect thread ending
@@ -421,7 +430,7 @@ void mi_thread_init(void) mi_attr_noexcept
{
// ensure our process has started already
mi_process_init();
-
+
// initialize the thread local default heap
// (this will call `_mi_heap_set_default_direct` and thus set the
// fiber/pthread key to a non-zero value, ensuring `_mi_thread_done` is called)
@@ -442,7 +451,7 @@ static void _mi_thread_done(mi_heap_t* heap) {
// check thread-id as on Windows shutdown with FLS the main (exit) thread may call this on thread-local heaps...
if (heap->thread_id != _mi_thread_id()) return;
-
+
// abandon the thread local heap
if (_mi_heap_done(heap)) return; // returns true if already ran
}
@@ -531,12 +540,13 @@ static void mi_process_load(void) {
MI_UNUSED(dummy);
#endif
os_preloading = false;
+ mi_assert_internal(_mi_is_main_thread());
#if !(defined(_WIN32) && defined(MI_SHARED_LIB)) // use Dll process detach (see below) instead of atexit (issue #521)
- atexit(&mi_process_done);
+ atexit(&mi_process_done);
#endif
_mi_options_init();
+ mi_process_setup_auto_thread_done();
mi_process_init();
- //mi_stats_reset();-
if (mi_redirected) _mi_verbose_message("malloc is redirected.\n");
// show message from the redirector (if present)
@@ -545,6 +555,9 @@ static void mi_process_load(void) {
if (msg != NULL && (mi_option_is_enabled(mi_option_verbose) || mi_option_is_enabled(mi_option_show_errors))) {
_mi_fputs(NULL,NULL,NULL,msg);
}
+
+ // reseed random
+ _mi_random_reinit_if_weak(&_mi_heap_main.random);
}
#if defined(_WIN32) && (defined(_M_IX86) || defined(_M_X64))
@@ -571,7 +584,6 @@ void mi_process_init(void) mi_attr_noexcept {
_mi_process_is_initialized = true;
mi_process_setup_auto_thread_done();
-
mi_detect_cpu_features();
_mi_os_init();
mi_heap_main_init();
@@ -579,6 +591,7 @@ void mi_process_init(void) mi_attr_noexcept {
_mi_verbose_message("debug level : %d\n", MI_DEBUG);
#endif
_mi_verbose_message("secure level: %d\n", MI_SECURE);
+ _mi_verbose_message("mem tracking: %s\n", MI_TRACK_TOOL);
mi_thread_init();
#if defined(_WIN32) && !defined(MI_SHARED_LIB)
@@ -598,7 +611,7 @@ void mi_process_init(void) mi_attr_noexcept {
} else {
mi_reserve_huge_os_pages_interleave(pages, 0, pages*500);
}
- }
+ }
if (mi_option_is_enabled(mi_option_reserve_os_memory)) {
long ksize = mi_option_get(mi_option_reserve_os_memory);
if (ksize > 0) {
@@ -619,9 +632,9 @@ static void mi_cdecl mi_process_done(void) {
#if defined(_WIN32) && !defined(MI_SHARED_LIB)
FlsFree(mi_fls_key); // call thread-done on all threads (except the main thread) to prevent dangling callback pointer if statically linked with a DLL; Issue #208
#endif
-
+
#ifndef MI_SKIP_COLLECT_ON_EXIT
- #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)
+ #if (MI_DEBUG != 0) || !defined(MI_SHARED_LIB)
// free all memory if possible on process exit. This is not needed for a stand-alone process
// but should be done if mimalloc is statically linked into another shared library which
// is repeatedly loaded/unloaded, see issue #281.
@@ -629,10 +642,18 @@ static void mi_cdecl mi_process_done(void) {
#endif
#endif
+ // Forcefully release all retained memory; this can be dangerous in general if overriding regular malloc/free
+ // since after process_done there might still be other code running that calls `free` (like at_exit routines,
+ // or C-runtime termination code.
+ if (mi_option_is_enabled(mi_option_destroy_on_exit)) {
+ _mi_heap_destroy_all(); // forcefully release all memory held by all heaps (of this thread only!)
+ _mi_segment_cache_free_all(&_mi_heap_main_get()->tld->os); // release all cached segments
+ }
+
if (mi_option_is_enabled(mi_option_show_stats) || mi_option_is_enabled(mi_option_verbose)) {
mi_stats_print(NULL);
}
- mi_allocator_done();
+ mi_allocator_done();
_mi_verbose_message("process done: 0x%zx\n", _mi_heap_main.thread_id);
os_preloading = true; // don't call the C runtime anymore
}
@@ -654,7 +675,7 @@ static void mi_cdecl mi_process_done(void) {
if (!mi_is_redirected()) {
mi_thread_done();
}
- }
+ }
return TRUE;
}
diff --git a/source/luametatex/source/libraries/mimalloc/src/options.c b/source/luametatex/source/libraries/mimalloc/src/options.c
index 0182671ce..e53538f5f 100644
--- a/source/luametatex/source/libraries/mimalloc/src/options.c
+++ b/source/luametatex/source/libraries/mimalloc/src/options.c
@@ -76,7 +76,7 @@ static mi_option_desc_t options[_mi_option_last] =
{ 0, UNINIT, MI_OPTION(reserve_os_memory) },
{ 0, UNINIT, MI_OPTION(deprecated_segment_cache) }, // cache N segments per thread
{ 0, UNINIT, MI_OPTION(page_reset) }, // reset page memory on free
- { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates
+ { 0, UNINIT, MI_OPTION_LEGACY(abandoned_page_decommit, abandoned_page_reset) },// decommit free page memory when a thread terminates
{ 0, UNINIT, MI_OPTION(deprecated_segment_reset) },
#if defined(__NetBSD__)
{ 0, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed
@@ -86,15 +86,16 @@ static mi_option_desc_t options[_mi_option_last] =
{ 1, UNINIT, MI_OPTION(eager_commit_delay) }, // the first N segments per thread are not eagerly committed (but per page in the segment on demand)
#endif
{ 25, UNINIT, MI_OPTION_LEGACY(decommit_delay, reset_delay) }, // page decommit delay in milli-seconds
- { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
+ { 0, UNINIT, MI_OPTION(use_numa_nodes) }, // 0 = use available numa nodes, otherwise use at most N nodes.
{ 0, UNINIT, MI_OPTION(limit_os_alloc) }, // 1 = do not use OS memory for allocation (but only reserved arenas)
{ 100, UNINIT, MI_OPTION(os_tag) }, // only apple specific for now but might serve more or less related purpose
{ 16, UNINIT, MI_OPTION(max_errors) }, // maximum errors that are output
{ 16, UNINIT, MI_OPTION(max_warnings) }, // maximum warnings that are output
- { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.
+ { 8, UNINIT, MI_OPTION(max_segment_reclaim)},// max. number of segment reclaims from the abandoned segments per try.
{ 1, UNINIT, MI_OPTION(allow_decommit) }, // decommit slices when no longer used (after decommit_delay milli-seconds)
{ 500, UNINIT, MI_OPTION(segment_decommit_delay) }, // decommit delay in milli-seconds for freed segments
- { 2, UNINIT, MI_OPTION(decommit_extend_delay) }
+ { 1, UNINIT, MI_OPTION(decommit_extend_delay) },
+ { 0, UNINIT, MI_OPTION(destroy_on_exit)} // release all OS memory on process exit; careful with dangling pointer or after-exit frees!
};
static void mi_option_init(mi_option_desc_t* desc);
@@ -106,7 +107,8 @@ void _mi_options_init(void) {
for(int i = 0; i < _mi_option_last; i++ ) {
mi_option_t option = (mi_option_t)i;
long l = mi_option_get(option); MI_UNUSED(l); // initialize
- if (option != mi_option_verbose) {
+ // if (option != mi_option_verbose)
+ {
mi_option_desc_t* desc = &options[option];
_mi_verbose_message("option '%s': %ld\n", desc->name, desc->value);
}
@@ -179,13 +181,26 @@ static void mi_cdecl mi_out_stderr(const char* msg, void* arg) {
if (!_mi_preloading()) {
// _cputs(msg); // _cputs cannot be used at is aborts if it fails to lock the console
static HANDLE hcon = INVALID_HANDLE_VALUE;
+ static bool hconIsConsole;
if (hcon == INVALID_HANDLE_VALUE) {
+ CONSOLE_SCREEN_BUFFER_INFO sbi;
hcon = GetStdHandle(STD_ERROR_HANDLE);
+ hconIsConsole = ((hcon != INVALID_HANDLE_VALUE) && GetConsoleScreenBufferInfo(hcon, &sbi));
}
const size_t len = strlen(msg);
- if (hcon != INVALID_HANDLE_VALUE && len > 0 && len < UINT32_MAX) {
+ if (len > 0 && len < UINT32_MAX) {
DWORD written = 0;
- WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL);
+ if (hconIsConsole) {
+ WriteConsoleA(hcon, msg, (DWORD)len, &written, NULL);
+ }
+ else if (hcon != INVALID_HANDLE_VALUE) {
+ // use direct write if stderr was redirected
+ WriteFile(hcon, msg, (DWORD)len, &written, NULL);
+ }
+ else {
+ // finally fall back to fputs after all
+ fputs(msg, stderr);
+ }
}
}
#else
@@ -480,13 +495,6 @@ static bool mi_getenv(const char* name, char* result, size_t result_size) {
return false;
}
#else
-static inline int mi_strnicmp(const char* s, const char* t, size_t n) {
- if (n==0) return 0;
- for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) {
- if (toupper(*s) != toupper(*t)) break;
- }
- return (n==0 ? 0 : *s - *t);
-}
#if defined _WIN32
// On Windows use GetEnvironmentVariable instead of getenv to work
// reliably even when this is invoked before the C runtime is initialized.
@@ -512,6 +520,13 @@ static char** mi_get_environ(void) {
return environ;
}
#endif
+static int mi_strnicmp(const char* s, const char* t, size_t n) {
+ if (n == 0) return 0;
+ for (; *s != 0 && *t != 0 && n > 0; s++, t++, n--) {
+ if (toupper(*s) != toupper(*t)) break;
+ }
+ return (n == 0 ? 0 : *s - *t);
+}
static bool mi_getenv(const char* name, char* result, size_t result_size) {
if (name==NULL) return false;
const size_t len = strlen(name);
@@ -570,7 +585,7 @@ static void mi_option_init(mi_option_desc_t* desc) {
found = mi_getenv(buf,s,sizeof(s));
if (found) {
_mi_warning_message("environment option \"mimalloc_%s\" is deprecated -- use \"mimalloc_%s\" instead.\n", desc->legacy_name, desc->name );
- }
+ }
}
if (found) {
diff --git a/source/luametatex/source/libraries/mimalloc/src/os.c b/source/luametatex/source/libraries/mimalloc/src/os.c
index 6d7249873..0f9847417 100644
--- a/source/luametatex/source/libraries/mimalloc/src/os.c
+++ b/source/luametatex/source/libraries/mimalloc/src/os.c
@@ -88,7 +88,7 @@ static size_t os_alloc_granularity = 4096;
// if non-zero, use large page allocation
static size_t large_os_page_size = 0;
-// is memory overcommit allowed?
+// is memory overcommit allowed?
// set dynamically in _mi_os_init (and if true we use MAP_NORESERVE)
static bool os_overcommit = true;
@@ -139,7 +139,7 @@ typedef enum MI_MEM_EXTENDED_PARAMETER_TYPE_E {
MiMemExtendedParameterUserPhysicalHandle,
MiMemExtendedParameterAttributeFlags,
MiMemExtendedParameterMax
-} MI_MEM_EXTENDED_PARAMETER_TYPE;
+} MI_MEM_EXTENDED_PARAMETER_TYPE;
typedef struct DECLSPEC_ALIGN(8) MI_MEM_EXTENDED_PARAMETER_S {
struct { DWORD64 Type : 8; DWORD64 Reserved : 56; } Type;
@@ -166,9 +166,11 @@ typedef struct MI_PROCESSOR_NUMBER_S { WORD Group; BYTE Number; BYTE Reserved; }
typedef VOID (__stdcall *PGetCurrentProcessorNumberEx)(MI_PROCESSOR_NUMBER* ProcNumber);
typedef BOOL (__stdcall *PGetNumaProcessorNodeEx)(MI_PROCESSOR_NUMBER* Processor, PUSHORT NodeNumber);
typedef BOOL (__stdcall* PGetNumaNodeProcessorMaskEx)(USHORT Node, PGROUP_AFFINITY ProcessorMask);
+typedef BOOL (__stdcall *PGetNumaProcessorNode)(UCHAR Processor, PUCHAR NodeNumber);
static PGetCurrentProcessorNumberEx pGetCurrentProcessorNumberEx = NULL;
static PGetNumaProcessorNodeEx pGetNumaProcessorNodeEx = NULL;
static PGetNumaNodeProcessorMaskEx pGetNumaNodeProcessorMaskEx = NULL;
+static PGetNumaProcessorNode pGetNumaProcessorNode = NULL;
static bool mi_win_enable_large_os_pages(void)
{
@@ -205,7 +207,7 @@ static bool mi_win_enable_large_os_pages(void)
return (ok!=0);
}
-void _mi_os_init(void)
+void _mi_os_init(void)
{
os_overcommit = false;
// get the page size
@@ -234,6 +236,7 @@ void _mi_os_init(void)
pGetCurrentProcessorNumberEx = (PGetCurrentProcessorNumberEx)(void (*)(void))GetProcAddress(hDll, "GetCurrentProcessorNumberEx");
pGetNumaProcessorNodeEx = (PGetNumaProcessorNodeEx)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNodeEx");
pGetNumaNodeProcessorMaskEx = (PGetNumaNodeProcessorMaskEx)(void (*)(void))GetProcAddress(hDll, "GetNumaNodeProcessorMaskEx");
+ pGetNumaProcessorNode = (PGetNumaProcessorNode)(void (*)(void))GetProcAddress(hDll, "GetNumaProcessorNode");
FreeLibrary(hDll);
}
if (mi_option_is_enabled(mi_option_large_os_pages) || mi_option_is_enabled(mi_option_reserve_huge_os_pages)) {
@@ -266,9 +269,9 @@ static void os_detect_overcommit(void) {
size_t olen = sizeof(val);
if (sysctlbyname("vm.overcommit", &val, &olen, NULL, 0) == 0) {
os_overcommit = (val != 0);
- }
+ }
#else
- // default: overcommit is true
+ // default: overcommit is true
#endif
}
@@ -306,10 +309,10 @@ static int mi_madvise(void* addr, size_t length, int advice) {
static mi_decl_cache_align _Atomic(uintptr_t)aligned_base;
// Return a MI_SEGMENT_SIZE aligned address that is probably available.
-// If this returns NULL, the OS will determine the address but on some OS's that may not be
+// If this returns NULL, the OS will determine the address but on some OS's that may not be
// properly aligned which can be more costly as it needs to be adjusted afterwards.
-// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
-// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
+// For a size > 1GiB this always returns NULL in order to guarantee good ASLR randomization;
+// (otherwise an initial large allocation of say 2TiB has a 50% chance to include (known) addresses
// in the middle of the 2TiB - 6TiB address range (see issue #372))
#define MI_HINT_BASE ((uintptr_t)2 << 40) // 2TiB start
@@ -383,12 +386,12 @@ static bool mi_os_mem_free(void* addr, size_t size, bool was_committed, mi_stats
#endif
if (was_committed) { _mi_stat_decrease(&stats->committed, size); }
_mi_stat_decrease(&stats->reserved, size);
- return !err;
+ return !err;
}
/* -----------------------------------------------------------
- Raw allocation on Windows (VirtualAlloc)
+ Raw allocation on Windows (VirtualAlloc)
-------------------------------------------------------------- */
#ifdef _WIN32
@@ -406,7 +409,7 @@ static void* mi_win_virtual_allocx(void* addr, size_t size, size_t try_alignment
_mi_verbose_message("warning: unable to allocate hinted aligned OS memory (%zu bytes, error code: 0x%x, address: %p, alignment: %zu, flags: 0x%x)\n", size, GetLastError(), hint, try_alignment, flags);
// fall through on error
}
- }
+ }
#endif
// on modern Windows try use VirtualAlloc2 for aligned allocation
if (try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0 && pVirtualAlloc2 != NULL) {
@@ -464,12 +467,12 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
-------------------------------------------------------------- */
#elif defined(MI_USE_SBRK) || defined(__wasi__)
-#if defined(MI_USE_SBRK)
+#if defined(MI_USE_SBRK)
static void* mi_memory_grow( size_t size ) {
void* p = sbrk(size);
if (p == (void*)(-1)) return NULL;
#if !defined(__wasi__) // on wasi this is always zero initialized already (?)
- memset(p,0,size);
+ memset(p,0,size);
#endif
return p;
}
@@ -477,8 +480,8 @@ static void* mi_win_virtual_alloc(void* addr, size_t size, size_t try_alignment,
static void* mi_memory_grow( size_t size ) {
size_t base = (size > 0 ? __builtin_wasm_memory_grow(0,_mi_divide_up(size, _mi_os_page_size()))
: __builtin_wasm_memory_size(0));
- if (base == SIZE_MAX) return NULL;
- return (void*)(base * _mi_os_page_size());
+ if (base == SIZE_MAX) return NULL;
+ return (void*)(base * _mi_os_page_size());
}
#endif
@@ -490,7 +493,7 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) {
void* p = NULL;
if (try_alignment <= 1) {
// `sbrk` is not thread safe in general so try to protect it (we could skip this on WASM but leave it in for now)
- #if defined(MI_USE_PTHREADS)
+ #if defined(MI_USE_PTHREADS)
pthread_mutex_lock(&mi_heap_grow_mutex);
#endif
p = mi_memory_grow(size);
@@ -512,7 +515,7 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) {
if (current != NULL) {
void* aligned_current = mi_align_up_ptr(current, try_alignment); // and align from there to minimize wasted space
alloc_size = _mi_align_up( ((uint8_t*)aligned_current - (uint8_t*)current) + size, _mi_os_page_size());
- base = mi_memory_grow(alloc_size);
+ base = mi_memory_grow(alloc_size);
}
}
#if defined(MI_USE_PTHREADS)
@@ -529,7 +532,7 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) {
}
}
if (p == NULL) {
- _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment);
+ _mi_warning_message("unable to allocate sbrk/wasm_memory_grow OS memory (%zu bytes, %zu alignment)\n", size, try_alignment);
errno = ENOMEM;
return NULL;
}
@@ -540,10 +543,10 @@ static void* mi_heap_grow(size_t size, size_t try_alignment) {
/* -----------------------------------------------------------
Raw allocation on Unix's (mmap)
-------------------------------------------------------------- */
-#else
+#else
#define MI_OS_USE_MMAP
static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int protect_flags, int flags, int fd) {
- MI_UNUSED(try_alignment);
+ MI_UNUSED(try_alignment);
#if defined(MAP_ALIGNED) // BSD
if (addr == NULL && try_alignment > 1 && (try_alignment % _mi_os_page_size()) == 0) {
size_t n = mi_bsr(try_alignment);
@@ -574,7 +577,7 @@ static void* mi_unix_mmapx(void* addr, size_t size, size_t try_alignment, int pr
#endif
// regular mmap
void* p = mmap(addr, size, protect_flags, flags, fd, 0);
- if (p!=MAP_FAILED) return p;
+ if (p!=MAP_FAILED) return p;
// failed to allocate
return NULL;
}
@@ -602,7 +605,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
int flags = MAP_PRIVATE | MAP_ANONYMOUS;
if (_mi_os_has_overcommit()) {
flags |= MAP_NORESERVE;
- }
+ }
#if defined(PROT_MAX)
protect_flags |= PROT_MAX(PROT_READ | PROT_WRITE); // BSD
#endif
@@ -685,7 +688,7 @@ static void* mi_unix_mmap(void* addr, size_t size, size_t try_alignment, int pro
if (memcntl((caddr_t)p, size, MC_HAT_ADVISE, (caddr_t)&cmd, 0, 0) == 0) {
*is_large = true;
}
- }
+ }
#endif
}
}
@@ -753,7 +756,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
// try first with a hint (this will be aligned directly on Win 10+ or BSD)
void* p = mi_os_mem_alloc(size, alignment, commit, allow_large, is_large, stats);
if (p == NULL) return NULL;
-
+
// if not aligned, free it, overallocate, and unmap around it
if (((uintptr_t)p % alignment != 0)) {
mi_os_mem_free(p, size, commit, stats);
@@ -765,7 +768,7 @@ static void* mi_os_mem_alloc_aligned(size_t size, size_t alignment, bool commit,
// over-allocate uncommitted (virtual) memory
p = mi_os_mem_alloc(over_size, 0 /*alignment*/, false /* commit? */, false /* allow_large */, is_large, stats);
if (p == NULL) return NULL;
-
+
// set p to the aligned part in the full region
// note: this is dangerous on Windows as VirtualFree needs the actual region pointer
// but in mi_os_mem_free we handle this (hopefully exceptional) situation.
@@ -837,7 +840,45 @@ void* _mi_os_alloc_aligned(size_t size, size_t alignment, bool commit, bool* lar
return mi_os_mem_alloc_aligned(size, alignment, commit, allow_large, (large!=NULL?large:&allow_large), &_mi_stats_main /*tld->stats*/ );
}
+/* -----------------------------------------------------------
+ OS aligned allocation with an offset. This is used
+ for large alignments > MI_ALIGNMENT_MAX. We use a large mimalloc
+ page where the object can be aligned at an offset from the start of the segment.
+ As we may need to overallocate, we need to free such pointers using `mi_free_aligned`
+ to use the actual start of the memory region.
+----------------------------------------------------------- */
+void* _mi_os_alloc_aligned_offset(size_t size, size_t alignment, size_t offset, bool commit, bool* large, mi_stats_t* tld_stats) {
+ mi_assert(offset <= MI_SEGMENT_SIZE);
+ mi_assert(offset <= size);
+ mi_assert((alignment % _mi_os_page_size()) == 0);
+ if (offset > MI_SEGMENT_SIZE) return NULL;
+ if (offset == 0) {
+ // regular aligned allocation
+ return _mi_os_alloc_aligned(size, alignment, commit, large, tld_stats);
+ }
+ else {
+ // overallocate to align at an offset
+ const size_t extra = _mi_align_up(offset, alignment) - offset;
+ const size_t oversize = size + extra;
+ void* start = _mi_os_alloc_aligned(oversize, alignment, commit, large, tld_stats);
+ if (start == NULL) return NULL;
+ void* p = (uint8_t*)start + extra;
+ mi_assert(_mi_is_aligned((uint8_t*)p + offset, alignment));
+ // decommit the overallocation at the start
+ if (commit && extra > _mi_os_page_size()) {
+ _mi_os_decommit(start, extra, tld_stats);
+ }
+ return p;
+ }
+}
+
+void _mi_os_free_aligned(void* p, size_t size, size_t alignment, size_t align_offset, bool was_committed, mi_stats_t* tld_stats) {
+ mi_assert(align_offset <= MI_SEGMENT_SIZE);
+ const size_t extra = _mi_align_up(align_offset, alignment) - align_offset;
+ void* start = (uint8_t*)p - extra;
+ _mi_os_free_ex(start, size + extra, was_committed, tld_stats);
+}
/* -----------------------------------------------------------
OS memory API: reset, commit, decommit, protect, unprotect.
@@ -916,7 +957,7 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
// commit: just change the protection
err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
if (err != 0) { err = errno; }
- }
+ }
else {
// decommit: use mmap with MAP_FIXED to discard the existing memory (and reduce rss)
const int fd = mi_unix_mmap_fd();
@@ -926,10 +967,10 @@ static bool mi_os_commitx(void* addr, size_t size, bool commit, bool conservativ
#else
// Linux, macOSX and others.
if (commit) {
- // commit: ensure we can access the area
+ // commit: ensure we can access the area
err = mprotect(start, csize, (PROT_READ | PROT_WRITE));
if (err != 0) { err = errno; }
- }
+ }
else {
#if defined(MADV_DONTNEED) && MI_DEBUG == 0 && MI_SECURE == 0
// decommit: use MADV_DONTNEED as it decreases rss immediately (unlike MADV_FREE)
@@ -1008,7 +1049,7 @@ static bool mi_os_resetx(void* addr, size_t size, bool reset, mi_stats_t* stats)
int oadvice = (int)mi_atomic_load_relaxed(&advice);
int err;
while ((err = mi_madvise(start, csize, oadvice)) != 0 && errno == EAGAIN) { errno = 0; };
- if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {
+ if (err != 0 && errno == EINVAL && oadvice == MADV_FREE) {
// if MADV_FREE is not supported, fall back to MADV_DONTNEED from now on
mi_atomic_store_release(&advice, (size_t)MADV_DONTNEED);
err = mi_madvise(start, csize, MADV_DONTNEED);
@@ -1041,13 +1082,8 @@ bool _mi_os_reset(void* addr, size_t size, mi_stats_t* tld_stats) {
bool _mi_os_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* tld_stats) {
MI_UNUSED(tld_stats);
mi_stats_t* stats = &_mi_stats_main;
- if (mi_option_is_enabled(mi_option_reset_decommits)) {
- return mi_os_commit_unreset(addr, size, is_zero, stats); // re-commit it (conservatively!)
- }
- else {
- *is_zero = false;
- return mi_os_resetx(addr, size, false, stats);
- }
+ *is_zero = false;
+ return mi_os_resetx(addr, size, false, stats);
}
*/
@@ -1156,7 +1192,7 @@ static void* mi_os_alloc_huge_os_pagesx(void* addr, size_t size, int numa_node)
params[0].Arg.ULong = (unsigned)numa_node;
return (*pVirtualAlloc2)(GetCurrentProcess(), addr, size, flags, PAGE_READWRITE, params, 1);
}
-
+
// otherwise use regular virtual alloc on older windows
return VirtualAlloc(addr, size, flags, PAGE_READWRITE);
}
@@ -1305,7 +1341,7 @@ void _mi_os_free_huge_pages(void* p, size_t size, mi_stats_t* stats) {
/* ----------------------------------------------------------------------------
Support NUMA aware allocation
-----------------------------------------------------------------------------*/
-#ifdef _WIN32
+#ifdef _WIN32
static size_t mi_os_numa_nodex(void) {
USHORT numa_node = 0;
if (pGetCurrentProcessorNumberEx != NULL && pGetNumaProcessorNodeEx != NULL) {
@@ -1314,14 +1350,14 @@ static size_t mi_os_numa_nodex(void) {
(*pGetCurrentProcessorNumberEx)(&pnum);
USHORT nnode = 0;
BOOL ok = (*pGetNumaProcessorNodeEx)(&pnum, &nnode);
- if (ok) numa_node = nnode;
+ if (ok) { numa_node = nnode; }
}
- else {
+ else if (pGetNumaProcessorNode != NULL) {
// Vista or earlier, use older API that is limited to 64 processors. Issue #277
DWORD pnum = GetCurrentProcessorNumber();
UCHAR nnode = 0;
- BOOL ok = GetNumaProcessorNode((UCHAR)pnum, &nnode);
- if (ok) numa_node = nnode;
+ BOOL ok = pGetNumaProcessorNode((UCHAR)pnum, &nnode);
+ if (ok) { numa_node = nnode; }
}
return numa_node;
}
@@ -1425,7 +1461,7 @@ size_t _mi_os_numa_node_count_get(void) {
else {
count = mi_os_numa_node_countx(); // or detect dynamically
if (count == 0) count = 1;
- }
+ }
mi_atomic_store_release(&_mi_numa_node_count, count); // save it
_mi_verbose_message("using %zd numa regions\n", count);
}
diff --git a/source/luametatex/source/libraries/mimalloc/src/page-queue.c b/source/luametatex/source/libraries/mimalloc/src/page-queue.c
index 92f933c2a..cb54b3740 100644
--- a/source/luametatex/source/libraries/mimalloc/src/page-queue.c
+++ b/source/luametatex/source/libraries/mimalloc/src/page-queue.c
@@ -229,8 +229,9 @@ static void mi_page_queue_remove(mi_page_queue_t* queue, mi_page_t* page) {
static void mi_page_queue_push(mi_heap_t* heap, mi_page_queue_t* queue, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(!mi_page_queue_contains(queue, page));
-
+ #if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
+ #endif
mi_assert_internal(page->xblock_size == queue->block_size ||
(page->xblock_size > MI_MEDIUM_OBJ_SIZE_MAX) ||
(mi_page_is_in_full(page) && mi_page_queue_is_full(queue)));
@@ -304,7 +305,7 @@ size_t _mi_page_queue_append(mi_heap_t* heap, mi_page_queue_t* pq, mi_page_queue
for (mi_page_t* page = append->first; page != NULL; page = page->next) {
// inline `mi_page_set_heap` to avoid wrong assertion during absorption;
// in this case it is ok to be delayed freeing since both "to" and "from" heap are still alive.
- mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
+ mi_atomic_store_release(&page->xheap, (uintptr_t)heap);
// set the flag to delayed free (not overriding NEVER_DELAYED_FREE) which has as a
// side effect that it spins until any DELAYED_FREEING is finished. This ensures
// that after appending only the new heap will be used for delayed free operations.
diff --git a/source/luametatex/source/libraries/mimalloc/src/page.c b/source/luametatex/source/libraries/mimalloc/src/page.c
index 4b321156c..4250ff358 100644
--- a/source/luametatex/source/libraries/mimalloc/src/page.c
+++ b/source/luametatex/source/libraries/mimalloc/src/page.c
@@ -112,7 +112,10 @@ bool _mi_page_is_valid(mi_page_t* page) {
mi_segment_t* segment = _mi_page_segment(page);
mi_assert_internal(!_mi_process_is_initialized || segment->thread_id==0 || segment->thread_id == mi_page_heap(page)->thread_id);
- if (segment->kind != MI_SEGMENT_HUGE) {
+ #if MI_HUGE_PAGE_ABANDON
+ if (segment->kind != MI_SEGMENT_HUGE)
+ #endif
+ {
mi_page_queue_t* pq = mi_page_queue_of(page);
mi_assert_internal(mi_page_queue_contains(pq, page));
mi_assert_internal(pq->block_size==mi_page_block_size(page) || mi_page_block_size(page) > MI_MEDIUM_OBJ_SIZE_MAX || mi_page_is_in_full(page));
@@ -132,7 +135,7 @@ void _mi_page_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool overrid
bool _mi_page_try_use_delayed_free(mi_page_t* page, mi_delayed_t delay, bool override_never) {
mi_thread_free_t tfreex;
mi_delayed_t old_delay;
- mi_thread_free_t tfree;
+ mi_thread_free_t tfree;
size_t yield_count = 0;
do {
tfree = mi_atomic_load_acquire(&page->xthread_free); // note: must acquire as we can break/repeat this loop and not do a CAS;
@@ -245,7 +248,9 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
mi_assert_internal(mi_page_heap(page) == heap);
mi_assert_internal(mi_page_thread_free_flag(page) != MI_NEVER_DELAYED_FREE);
+ #if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
+ #endif
mi_assert_internal(!page->is_reset);
// TODO: push on full queue immediately if it is full?
mi_page_queue_t* pq = mi_page_queue(heap, mi_page_block_size(page));
@@ -254,17 +259,26 @@ void _mi_page_reclaim(mi_heap_t* heap, mi_page_t* page) {
}
// allocate a fresh page from a segment
-static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size) {
- mi_assert_internal(pq==NULL||mi_heap_contains_queue(heap, pq));
- mi_page_t* page = _mi_segment_page_alloc(heap, block_size, &heap->tld->segments, &heap->tld->os);
+static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size_t block_size, size_t page_alignment) {
+ #if !MI_HUGE_PAGE_ABANDON
+ mi_assert_internal(pq != NULL);
+ mi_assert_internal(mi_heap_contains_queue(heap, pq));
+ mi_assert_internal(page_alignment > 0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || block_size == pq->block_size);
+ #endif
+ mi_page_t* page = _mi_segment_page_alloc(heap, block_size, page_alignment, &heap->tld->segments, &heap->tld->os);
if (page == NULL) {
// this may be out-of-memory, or an abandoned page was reclaimed (and in our queue)
return NULL;
}
- mi_assert_internal(pq==NULL || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
- mi_page_init(heap, page, block_size, heap->tld);
+ mi_assert_internal(page_alignment >0 || block_size > MI_MEDIUM_OBJ_SIZE_MAX || _mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
+ mi_assert_internal(pq!=NULL || page->xblock_size != 0);
+ mi_assert_internal(pq!=NULL || mi_page_block_size(page) >= block_size);
+ // a fresh page was found, initialize it
+ const size_t full_block_size = ((pq == NULL || mi_page_queue_is_huge(pq)) ? mi_page_block_size(page) : block_size); // see also: mi_segment_huge_page_alloc
+ mi_assert_internal(full_block_size >= block_size);
+ mi_page_init(heap, page, full_block_size, heap->tld);
mi_heap_stat_increase(heap, pages, 1);
- if (pq!=NULL) mi_page_queue_push(heap, pq, page); // huge pages use pq==NULL
+ if (pq != NULL) { mi_page_queue_push(heap, pq, page); }
mi_assert_expensive(_mi_page_is_valid(page));
return page;
}
@@ -272,7 +286,7 @@ static mi_page_t* mi_page_fresh_alloc(mi_heap_t* heap, mi_page_queue_t* pq, size
// Get a fresh page to use
static mi_page_t* mi_page_fresh(mi_heap_t* heap, mi_page_queue_t* pq) {
mi_assert_internal(mi_heap_contains_queue(heap, pq));
- mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size);
+ mi_page_t* page = mi_page_fresh_alloc(heap, pq, pq->block_size, 0);
if (page==NULL) return NULL;
mi_assert_internal(pq->block_size==mi_page_block_size(page));
mi_assert_internal(pq==mi_page_queue(heap, mi_page_block_size(page)));
@@ -402,7 +416,7 @@ void _mi_page_free(mi_page_t* page, mi_page_queue_t* pq, bool force) {
}
// Retire parameters
-#define MI_MAX_RETIRE_SIZE MI_MEDIUM_OBJ_SIZE_MAX
+#define MI_MAX_RETIRE_SIZE (MI_MEDIUM_OBJ_SIZE_MAX)
#define MI_RETIRE_CYCLES (8)
// Retire a page with no more used blocks
@@ -425,7 +439,7 @@ void _mi_page_retire(mi_page_t* page) mi_attr_noexcept {
// how to check this efficiently though...
// for now, we don't retire if it is the only page left of this size class.
mi_page_queue_t* pq = mi_page_queue_of(page);
- if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_is_in_full(page)) {
+ if mi_likely(page->xblock_size <= MI_MAX_RETIRE_SIZE && !mi_page_queue_is_special(pq)) { // not too large && not full or huge queue?
if (pq->last==page && pq->first==page) { // the only page in the queue?
mi_stat_counter_increase(_mi_stats_main.page_no_retire,1);
page->retire_expire = 1 + (page->xblock_size <= MI_SMALL_OBJ_SIZE_MAX ? MI_RETIRE_CYCLES : MI_RETIRE_CYCLES/4);
@@ -573,7 +587,7 @@ static mi_decl_noinline void mi_page_free_list_extend( mi_page_t* const page, co
#if (MI_SECURE>0)
#define MI_MIN_EXTEND (8*MI_SECURE) // extend at least by this many
#else
-#define MI_MIN_EXTEND (1)
+#define MI_MIN_EXTEND (4)
#endif
// Extend the capacity (up to reserved) by initializing a free list
@@ -603,7 +617,7 @@ static void mi_page_extend_free(mi_heap_t* heap, mi_page_t* page, mi_tld_t* tld)
size_t max_extend = (bsize >= MI_MAX_EXTEND_SIZE ? MI_MIN_EXTEND : MI_MAX_EXTEND_SIZE/(uint32_t)bsize);
if (max_extend < MI_MIN_EXTEND) { max_extend = MI_MIN_EXTEND; }
mi_assert_internal(max_extend > 0);
-
+
if (extend > max_extend) {
// ensure we don't touch memory beyond the page to reduce page commit.
// the `lean` benchmark tests this. Going from 1 to 8 increases rss by 50%.
@@ -648,6 +662,7 @@ static void mi_page_init(mi_heap_t* heap, mi_page_t* page, size_t block_size, mi
mi_assert_internal(page_size <= page->slice_count*MI_SEGMENT_SLICE_SIZE);
mi_assert_internal(page_size / block_size < (1L<<16));
page->reserved = (uint16_t)(page_size / block_size);
+ mi_assert_internal(page->reserved > 0);
#ifdef MI_ENCODE_FREELIST
page->keys[0] = _mi_heap_random_next(heap);
page->keys[1] = _mi_heap_random_next(heap);
@@ -725,7 +740,7 @@ static mi_page_t* mi_page_queue_find_free_ex(mi_heap_t* heap, mi_page_queue_t* p
page = mi_page_fresh(heap, pq);
if (page == NULL && first_try) {
// out-of-memory _or_ an abandoned page with free blocks was reclaimed, try once again
- page = mi_page_queue_find_free_ex(heap, pq, false);
+ page = mi_page_queue_find_free_ex(heap, pq, false);
}
}
else {
@@ -743,17 +758,17 @@ static inline mi_page_t* mi_find_free_page(mi_heap_t* heap, size_t size) {
mi_page_queue_t* pq = mi_page_queue(heap,size);
mi_page_t* page = pq->first;
if (page != NULL) {
- #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
+ #if (MI_SECURE>=3) // in secure mode, we extend half the time to increase randomness
if (page->capacity < page->reserved && ((_mi_heap_random_next(heap) & 1) == 1)) {
mi_page_extend_free(heap, page, heap->tld);
mi_assert_internal(mi_page_immediate_available(page));
}
- else
+ else
#endif
{
_mi_page_free_collect(page,false);
}
-
+
if (mi_page_immediate_available(page)) {
page->retire_expire = 0;
return page; // fast path
@@ -797,21 +812,28 @@ void mi_register_deferred_free(mi_deferred_free_fun* fn, void* arg) mi_attr_noex
// Because huge pages contain just one block, and the segment contains
// just that page, we always treat them as abandoned and any thread
// that frees the block can free the whole page and segment directly.
-static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
+// Huge pages are also use if the requested alignment is very large (> MI_ALIGNMENT_MAX).
+static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size, size_t page_alignment) {
size_t block_size = _mi_os_good_alloc_size(size);
- mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE);
- bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX);
+ mi_assert_internal(mi_bin(block_size) == MI_BIN_HUGE || page_alignment > 0);
+ bool is_huge = (block_size > MI_LARGE_OBJ_SIZE_MAX || page_alignment > 0);
+ #if MI_HUGE_PAGE_ABANDON
mi_page_queue_t* pq = (is_huge ? NULL : mi_page_queue(heap, block_size));
- mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size);
+ #else
+ mi_page_queue_t* pq = mi_page_queue(heap, is_huge ? MI_HUGE_BLOCK_SIZE : block_size); // not block_size as that can be low if the page_alignment > 0
+ mi_assert_internal(!is_huge || mi_page_queue_is_huge(pq));
+ #endif
+ mi_page_t* page = mi_page_fresh_alloc(heap, pq, block_size, page_alignment);
if (page != NULL) {
mi_assert_internal(mi_page_immediate_available(page));
- if (pq == NULL) {
- // huge pages are directly abandoned
+ if (is_huge) {
mi_assert_internal(_mi_page_segment(page)->kind == MI_SEGMENT_HUGE);
mi_assert_internal(_mi_page_segment(page)->used==1);
+ #if MI_HUGE_PAGE_ABANDON
mi_assert_internal(_mi_page_segment(page)->thread_id==0); // abandoned, not in the huge queue
mi_page_set_heap(page, NULL);
+ #endif
}
else {
mi_assert_internal(_mi_page_segment(page)->kind != MI_SEGMENT_HUGE);
@@ -833,16 +855,16 @@ static mi_page_t* mi_large_huge_page_alloc(mi_heap_t* heap, size_t size) {
// Allocate a page
// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
-static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
+static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size, size_t huge_alignment) mi_attr_noexcept {
// huge allocation?
const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
- if mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE)) {
+ if mi_unlikely(req_size > (MI_MEDIUM_OBJ_SIZE_MAX - MI_PADDING_SIZE) || huge_alignment > 0) {
if mi_unlikely(req_size > PTRDIFF_MAX) { // we don't allocate more than PTRDIFF_MAX (see <https://sourceware.org/ml/libc-announce/2019/msg00001.html>)
_mi_error_message(EOVERFLOW, "allocation request is too large (%zu bytes)\n", req_size);
return NULL;
}
else {
- return mi_large_huge_page_alloc(heap,size);
+ return mi_large_huge_page_alloc(heap,size,huge_alignment);
}
}
else {
@@ -854,7 +876,9 @@ static mi_page_t* mi_find_page(mi_heap_t* heap, size_t size) mi_attr_noexcept {
// Generic allocation routine if the fast path (`alloc.c:mi_page_malloc`) does not succeed.
// Note: in debug mode the size includes MI_PADDING_SIZE and might have overflowed.
-void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexcept
+// The `huge_alignment` is normally 0 but is set to a multiple of MI_SEGMENT_SIZE for
+// very large requested alignments in which case we use a huge segment.
+void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero, size_t huge_alignment) mi_attr_noexcept
{
mi_assert_internal(heap != NULL);
@@ -873,14 +897,14 @@ void* _mi_malloc_generic(mi_heap_t* heap, size_t size, bool zero) mi_attr_noexce
_mi_heap_delayed_free_partial(heap);
// find (or allocate) a page of the right size
- mi_page_t* page = mi_find_page(heap, size);
+ mi_page_t* page = mi_find_page(heap, size, huge_alignment);
if mi_unlikely(page == NULL) { // first time out of memory, try to collect and retry the allocation once more
mi_heap_collect(heap, true /* force */);
- page = mi_find_page(heap, size);
+ page = mi_find_page(heap, size, huge_alignment);
}
if mi_unlikely(page == NULL) { // out of memory
- const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
+ const size_t req_size = size - MI_PADDING_SIZE; // correct for padding_size in case of an overflow on `size`
_mi_error_message(ENOMEM, "unable to allocate memory (%zu bytes)\n", req_size);
return NULL;
}
diff --git a/source/luametatex/source/libraries/mimalloc/src/random.c b/source/luametatex/source/libraries/mimalloc/src/random.c
index a5f5e6b82..06d4ba4ad 100644
--- a/source/luametatex/source/libraries/mimalloc/src/random.c
+++ b/source/luametatex/source/libraries/mimalloc/src/random.c
@@ -168,9 +168,9 @@ If we cannot get good randomness, we fall back to weak randomness based on a tim
#if defined(_WIN32)
-#if defined(MI_USE_RTLGENRANDOM) || defined(__cplusplus)
-// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using
-// dynamic overriding, we observed it can raise an exception when compiled with C++, and
+#if defined(MI_USE_RTLGENRANDOM) // || defined(__cplusplus)
+// We prefer to use BCryptGenRandom instead of (the unofficial) RtlGenRandom but when using
+// dynamic overriding, we observed it can raise an exception when compiled with C++, and
// sometimes deadlocks when also running under the VS debugger.
// In contrast, issue #623 implies that on Windows Server 2019 we need to use BCryptGenRandom.
// To be continued..
@@ -187,10 +187,27 @@ static bool os_random_buf(void* buf, size_t buf_len) {
return (RtlGenRandom(buf, (ULONG)buf_len) != 0);
}
#else
-#pragma comment (lib,"bcrypt.lib")
-#include <bcrypt.h>
+
+#ifndef BCRYPT_USE_SYSTEM_PREFERRED_RNG
+#define BCRYPT_USE_SYSTEM_PREFERRED_RNG 0x00000002
+#endif
+
+typedef LONG (NTAPI *PBCryptGenRandom)(HANDLE, PUCHAR, ULONG, ULONG);
+static PBCryptGenRandom pBCryptGenRandom = NULL;
+
static bool os_random_buf(void* buf, size_t buf_len) {
- return (BCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
+ if (pBCryptGenRandom == NULL) {
+ HINSTANCE hDll = LoadLibrary(TEXT("bcrypt.dll"));
+ if (hDll != NULL) {
+ pBCryptGenRandom = (PBCryptGenRandom)(void (*)(void))GetProcAddress(hDll, "BCryptGenRandom");
+ }
+ }
+ if (pBCryptGenRandom == NULL) {
+ return false;
+ }
+ else {
+ return (pBCryptGenRandom(NULL, (PUCHAR)buf, (ULONG)buf_len, BCRYPT_USE_SYSTEM_PREFERRED_RNG) >= 0);
+ }
}
#endif
@@ -203,7 +220,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
static bool os_random_buf(void* buf, size_t buf_len) {
#if defined(MAC_OS_X_VERSION_10_15) && MAC_OS_X_VERSION_MAX_ALLOWED >= MAC_OS_X_VERSION_10_15
// We prefere CCRandomGenerateBytes as it returns an error code while arc4random_buf
- // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
+ // may fail silently on macOS. See PR #390, and <https://opensource.apple.com/source/Libc/Libc-1439.40.11/gen/FreeBSD/arc4random.c.auto.html>
return (CCRandomGenerateBytes(buf, buf_len) == kCCSuccess);
#else
// fall back on older macOS
@@ -281,7 +298,7 @@ static bool os_random_buf(void* buf, size_t buf_len) {
uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
uintptr_t x = (uintptr_t)&_mi_os_random_weak ^ extra_seed; // ASLR makes the address random
-
+
#if defined(_WIN32)
LARGE_INTEGER pcount;
QueryPerformanceCounter(&pcount);
@@ -303,23 +320,41 @@ uintptr_t _mi_os_random_weak(uintptr_t extra_seed) {
return x;
}
-void _mi_random_init(mi_random_ctx_t* ctx) {
+static void mi_random_init_ex(mi_random_ctx_t* ctx, bool use_weak) {
uint8_t key[32];
- if (!os_random_buf(key, sizeof(key))) {
+ if (use_weak || !os_random_buf(key, sizeof(key))) {
// if we fail to get random data from the OS, we fall back to a
// weak random source based on the current time
#if !defined(__wasi__)
- _mi_warning_message("unable to use secure randomness\n");
+ if (!use_weak) { _mi_warning_message("unable to use secure randomness\n"); }
#endif
uintptr_t x = _mi_os_random_weak(0);
for (size_t i = 0; i < 8; i++) { // key is eight 32-bit words.
x = _mi_random_shuffle(x);
((uint32_t*)key)[i] = (uint32_t)x;
}
+ ctx->weak = true;
+ }
+ else {
+ ctx->weak = false;
}
chacha_init(ctx, key, (uintptr_t)ctx /*nonce*/ );
}
+void _mi_random_init(mi_random_ctx_t* ctx) {
+ mi_random_init_ex(ctx, false);
+}
+
+void _mi_random_init_weak(mi_random_ctx_t * ctx) {
+ mi_random_init_ex(ctx, true);
+}
+
+void _mi_random_reinit_if_weak(mi_random_ctx_t * ctx) {
+ if (ctx->weak) {
+ _mi_random_init(ctx);
+ }
+}
+
/* --------------------------------------------------------
test vectors from <https://tools.ietf.org/html/rfc8439>
----------------------------------------------------------- */
diff --git a/source/luametatex/source/libraries/mimalloc/src/region.c b/source/luametatex/source/libraries/mimalloc/src/region.c
index 57d11fe8d..3571abb60 100644
--- a/source/luametatex/source/libraries/mimalloc/src/region.c
+++ b/source/luametatex/source/libraries/mimalloc/src/region.c
@@ -16,8 +16,8 @@ We need this memory layer between the raw OS calls because of:
1. on `sbrk` like systems (like WebAssembly) we need our own memory maps in order
to reuse memory effectively.
2. It turns out that for large objects, between 1MiB and 32MiB (?), the cost of
- an OS allocation/free is still (much) too expensive relative to the accesses
- in that object :-( (`malloc-large` tests this). This means we need a cheaper
+ an OS allocation/free is still (much) too expensive relative to the accesses
+ in that object :-( (`malloc-large` tests this). This means we need a cheaper
way to reuse memory.
3. This layer allows for NUMA aware allocation.
@@ -47,35 +47,34 @@ bool _mi_os_commit(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
bool _mi_os_decommit(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_reset(void* p, size_t size, mi_stats_t* stats);
bool _mi_os_unreset(void* p, size_t size, bool* is_zero, mi_stats_t* stats);
+bool _mi_os_commit_unreset(void* addr, size_t size, bool* is_zero, mi_stats_t* stats);
// arena.c
mi_arena_id_t _mi_arena_id_none(void);
-void _mi_arena_free(void* p, size_t size, size_t memid, bool all_committed, mi_stats_t* stats);
+void _mi_arena_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t memid, bool all_committed, mi_stats_t* stats);
void* _mi_arena_alloc(size_t size, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
-void* _mi_arena_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
+void* _mi_arena_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t req_arena_id, size_t* memid, mi_os_tld_t* tld);
// Constants
#if (MI_INTPTR_SIZE==8)
-#define MI_HEAP_REGION_MAX_SIZE (256 * MI_GiB) // 64KiB for the region map
+#define MI_HEAP_REGION_MAX_SIZE (256 * MI_GiB) // 64KiB for the region map
#elif (MI_INTPTR_SIZE==4)
#define MI_HEAP_REGION_MAX_SIZE (3 * MI_GiB) // ~ KiB for the region map
#else
#error "define the maximum heap space allowed for regions on this platform"
#endif
-#define MI_SEGMENT_ALIGN MI_SEGMENT_SIZE
-
#define MI_REGION_MAX_BLOCKS MI_BITMAP_FIELD_BITS
#define MI_REGION_SIZE (MI_SEGMENT_SIZE * MI_BITMAP_FIELD_BITS) // 256MiB (64MiB on 32 bits)
#define MI_REGION_MAX (MI_HEAP_REGION_MAX_SIZE / MI_REGION_SIZE) // 1024 (48 on 32 bits)
#define MI_REGION_MAX_OBJ_BLOCKS (MI_REGION_MAX_BLOCKS/4) // 64MiB
-#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
+#define MI_REGION_MAX_OBJ_SIZE (MI_REGION_MAX_OBJ_BLOCKS*MI_SEGMENT_SIZE)
-// Region info
+// Region info
typedef union mi_region_info_u {
- size_t value;
+ size_t value;
struct {
bool valid; // initialized?
bool is_large:1; // allocated in fixed large/huge OS pages
@@ -89,7 +88,7 @@ typedef union mi_region_info_u {
// a bit map with one bit per MI_SEGMENT_SIZE (4MiB) block.
typedef struct mem_region_s {
_Atomic(size_t) info; // mi_region_info_t.value
- _Atomic(void*) start; // start of the memory area
+ _Atomic(void*) start; // start of the memory area
mi_bitmap_field_t in_use; // bit per in-use block
mi_bitmap_field_t dirty; // track if non-zero per block
mi_bitmap_field_t commit; // track if committed per block
@@ -102,7 +101,7 @@ typedef struct mem_region_s {
static mem_region_t regions[MI_REGION_MAX];
// Allocated regions
-static _Atomic(size_t) regions_count; // = 0;
+static _Atomic(size_t) regions_count; // = 0;
/* ----------------------------------------------------------------------------
@@ -137,7 +136,7 @@ mi_decl_nodiscard bool mi_is_in_heap_region(const void* p) mi_attr_noexcept {
static void* mi_region_blocks_start(const mem_region_t* region, mi_bitmap_index_t bit_idx) {
uint8_t* start = (uint8_t*)mi_atomic_load_ptr_acquire(uint8_t, &((mem_region_t*)region)->start);
mi_assert_internal(start != NULL);
- return (start + (bit_idx * MI_SEGMENT_SIZE));
+ return (start + (bit_idx * MI_SEGMENT_SIZE));
}
static size_t mi_memid_create(mem_region_t* region, mi_bitmap_index_t bit_idx) {
@@ -181,7 +180,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
bool is_zero = false;
bool is_pinned = false;
size_t arena_memid = 0;
- void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, &region_commit, &region_large, &is_pinned, &is_zero, _mi_arena_id_none(), & arena_memid, tld);
+ void* const start = _mi_arena_alloc_aligned(MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, &region_commit, &region_large, &is_pinned, &is_zero, _mi_arena_id_none(), & arena_memid, tld);
if (start == NULL) return false;
mi_assert_internal(!(region_large && !allow_large));
mi_assert_internal(!region_large || region_commit);
@@ -190,7 +189,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
const size_t idx = mi_atomic_increment_acq_rel(&regions_count);
if (idx >= MI_REGION_MAX) {
mi_atomic_decrement_acq_rel(&regions_count);
- _mi_arena_free(start, MI_REGION_SIZE, arena_memid, region_commit, tld->stats);
+ _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, region_commit, tld->stats);
_mi_warning_message("maximum regions used: %zu GiB (perhaps recompile with a larger setting for MI_HEAP_REGION_MAX_SIZE)", _mi_divide_up(MI_HEAP_REGION_MAX_SIZE, MI_GiB));
return false;
}
@@ -206,7 +205,7 @@ static bool mi_region_try_alloc_os(size_t blocks, bool commit, bool allow_large,
_mi_bitmap_claim(&r->in_use, 1, blocks, *bit_idx, NULL);
mi_atomic_store_ptr_release(void,&r->start, start);
- // and share it
+ // and share it
mi_region_info_t info;
info.value = 0; // initialize the full union to zero
info.x.valid = true;
@@ -243,7 +242,7 @@ static bool mi_region_is_suitable(const mem_region_t* region, int numa_node, boo
static bool mi_region_try_claim(int numa_node, size_t blocks, bool allow_large, mem_region_t** region, mi_bitmap_index_t* bit_idx, mi_os_tld_t* tld)
{
- // try all regions for a free slot
+ // try all regions for a free slot
const size_t count = mi_atomic_load_relaxed(&regions_count); // monotonic, so ok to be relaxed
size_t idx = tld->region_idx; // Or start at 0 to reuse low addresses? Starting at 0 seems to increase latency though
for (size_t visited = 0; visited < count; visited++, idx++) {
@@ -277,7 +276,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
return NULL;
}
}
-
+
// ------------------------------------------------
// found a region and claimed `blocks` at `bit_idx`, initialize them now
mi_assert_internal(region != NULL);
@@ -289,7 +288,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
mi_assert_internal(!(info.x.is_large && !*large));
mi_assert_internal(start != NULL);
- *is_zero = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);
+ *is_zero = _mi_bitmap_claim(&region->dirty, 1, blocks, bit_idx, NULL);
*large = info.x.is_large;
*is_pinned = info.x.is_pinned;
*memid = mi_memid_create(region, bit_idx);
@@ -308,20 +307,20 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
return NULL;
}
- if (commit_zero) *is_zero = true;
+ if (commit_zero) *is_zero = true;
}
}
else {
// no need to commit, but check if already fully committed
*commit = _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx);
- }
+ }
mi_assert_internal(!*commit || _mi_bitmap_is_claimed(&region->commit, 1, blocks, bit_idx));
// unreset reset blocks
if (_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx)) {
// some blocks are still reset
mi_assert_internal(!info.x.is_large && !info.x.is_pinned);
- mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
+ mi_assert_internal(!mi_option_is_enabled(mi_option_eager_commit) || *commit || mi_option_get(mi_option_eager_commit_delay) > 0);
mi_bitmap_unclaim(&region->reset, 1, blocks, bit_idx);
if (*commit || !mi_option_is_enabled(mi_option_reset_decommits)) { // only if needed
bool reset_zero = false;
@@ -330,13 +329,13 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
}
}
mi_assert_internal(!_mi_bitmap_is_any_claimed(&region->reset, 1, blocks, bit_idx));
-
+
#if (MI_DEBUG>=2) && !MI_TRACK_ENABLED
if (*commit) { ((uint8_t*)p)[0] = 0; }
#endif
-
- // and return the allocation
- mi_assert_internal(p != NULL);
+
+ // and return the allocation
+ mi_assert_internal(p != NULL);
return p;
}
@@ -347,7 +346,7 @@ static void* mi_region_try_alloc(size_t blocks, bool* commit, bool* large, bool*
// Allocate `size` memory aligned at `alignment`. Return non NULL on success, with a given memory `id`.
// (`id` is abstract, but `id = idx*MI_REGION_MAP_BITS + bitidx`)
-void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
+void* _mi_mem_alloc_aligned(size_t size, size_t alignment, size_t align_offset, bool* commit, bool* large, bool* is_pinned, bool* is_zero, size_t* memid, mi_os_tld_t* tld)
{
mi_assert_internal(memid != NULL && tld != NULL);
mi_assert_internal(size > 0);
@@ -355,7 +354,7 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
*is_zero = false;
*is_pinned = false;
bool default_large = false;
- if (large==NULL) large = &default_large; // ensure `large != NULL`
+ if (large==NULL) large = &default_large; // ensure `large != NULL`
if (size == 0) return NULL;
size = _mi_align_up(size, _mi_os_page_size());
@@ -363,23 +362,23 @@ void* _mi_mem_alloc_aligned(size_t size, size_t alignment, bool* commit, bool* l
void* p = NULL;
size_t arena_memid;
const size_t blocks = mi_region_block_count(size);
- if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN) {
- p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);
+ if (blocks <= MI_REGION_MAX_OBJ_BLOCKS && alignment <= MI_SEGMENT_ALIGN && align_offset == 0) {
+ p = mi_region_try_alloc(blocks, commit, large, is_pinned, is_zero, memid, tld);
if (p == NULL) {
_mi_warning_message("unable to allocate from region: size %zu\n", size);
}
}
if (p == NULL) {
// and otherwise fall back to the OS
- p = _mi_arena_alloc_aligned(size, alignment, commit, large, is_pinned, is_zero, _mi_arena_id_none(), & arena_memid, tld);
+ p = _mi_arena_alloc_aligned(size, alignment, align_offset, commit, large, is_pinned, is_zero, _mi_arena_id_none(), & arena_memid, tld);
*memid = mi_memid_create_from_arena(arena_memid);
}
if (p != NULL) {
- mi_assert_internal((uintptr_t)p % alignment == 0);
+ mi_assert_internal(((uintptr_t)p + align_offset) % alignment == 0);
#if (MI_DEBUG>=2) && !MI_TRACK_ENABLED
if (*commit) { ((uint8_t*)p)[0] = 0; } // ensure the memory is committed
-#endif
+ #endif
}
return p;
}
@@ -391,21 +390,22 @@ Free
-----------------------------------------------------------------------------*/
// Free previously allocated memory with a given id.
-void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
+void _mi_mem_free(void* p, size_t size, size_t alignment, size_t align_offset, size_t id, bool full_commit, bool any_reset, mi_os_tld_t* tld) {
mi_assert_internal(size > 0 && tld != NULL);
if (p==NULL) return;
if (size==0) return;
size = _mi_align_up(size, _mi_os_page_size());
-
+
size_t arena_memid = 0;
mi_bitmap_index_t bit_idx;
mem_region_t* region;
if (mi_memid_is_arena(id,&region,&bit_idx,&arena_memid)) {
// was a direct arena allocation, pass through
- _mi_arena_free(p, size, arena_memid, full_commit, tld->stats);
+ _mi_arena_free(p, size, alignment, align_offset, arena_memid, full_commit, tld->stats);
}
else {
// allocated in a region
+ mi_assert_internal(align_offset == 0);
mi_assert_internal(size <= MI_REGION_MAX_OBJ_SIZE); if (size > MI_REGION_MAX_OBJ_SIZE) return;
const size_t blocks = mi_region_block_count(size);
mi_assert_internal(blocks + bit_idx <= MI_BITMAP_FIELD_BITS);
@@ -428,9 +428,9 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
}
// reset the blocks to reduce the working set.
- if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset)
+ if (!info.x.is_large && !info.x.is_pinned && mi_option_is_enabled(mi_option_segment_reset)
&& (mi_option_is_enabled(mi_option_eager_commit) ||
- mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
+ mi_option_is_enabled(mi_option_reset_decommits))) // cannot reset halfway committed segments, use only `option_page_reset` instead
{
bool any_unreset;
_mi_bitmap_claim(&region->reset, 1, blocks, bit_idx, &any_unreset);
@@ -438,7 +438,7 @@ void _mi_mem_free(void* p, size_t size, size_t id, bool full_commit, bool any_re
_mi_abandoned_await_readers(); // ensure no more pending write (in case reset = decommit)
_mi_mem_reset(p, blocks * MI_SEGMENT_SIZE, tld);
}
- }
+ }
// and unclaim
bool all_unclaimed = mi_bitmap_unclaim(&region->in_use, 1, blocks, bit_idx);
@@ -467,9 +467,9 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
memset((void*)&regions[i], 0, sizeof(mem_region_t)); // cast to void* to avoid atomic warning
// and release the whole region
mi_atomic_store_release(&region->info, (size_t)0);
- if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
+ if (start != NULL) { // && !_mi_os_is_huge_reserved(start)) {
_mi_abandoned_await_readers(); // ensure no pending reads
- _mi_arena_free(start, MI_REGION_SIZE, arena_memid, (~commit == 0), tld->stats);
+ _mi_arena_free(start, MI_REGION_SIZE, MI_SEGMENT_ALIGN, 0, arena_memid, (~commit == 0), tld->stats);
}
}
}
@@ -482,11 +482,21 @@ void _mi_mem_collect(mi_os_tld_t* tld) {
-----------------------------------------------------------------------------*/
bool _mi_mem_reset(void* p, size_t size, mi_os_tld_t* tld) {
- return _mi_os_reset(p, size, tld->stats);
+ if (mi_option_is_enabled(mi_option_reset_decommits)) {
+ return _mi_os_decommit(p, size, tld->stats);
+ }
+ else {
+ return _mi_os_reset(p, size, tld->stats);
+ }
}
bool _mi_mem_unreset(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
- return _mi_os_unreset(p, size, is_zero, tld->stats);
+ if (mi_option_is_enabled(mi_option_reset_decommits)) {
+ return _mi_os_commit(p, size, is_zero, tld->stats);
+ }
+ else {
+ return _mi_os_unreset(p, size, is_zero, tld->stats);
+ }
}
bool _mi_mem_commit(void* p, size_t size, bool* is_zero, mi_os_tld_t* tld) {
diff --git a/source/luametatex/source/libraries/mimalloc/src/segment-cache.c b/source/luametatex/source/libraries/mimalloc/src/segment-cache.c
index da726716a..d93fd6441 100644
--- a/source/luametatex/source/libraries/mimalloc/src/segment-cache.c
+++ b/source/luametatex/source/libraries/mimalloc/src/segment-cache.c
@@ -45,7 +45,11 @@ static bool mi_cdecl mi_segment_cache_is_suitable(mi_bitmap_index_t bitidx, void
return _mi_arena_memid_is_suitable(slot->memid, req_arena_id);
}
-mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
+mi_decl_noinline static void* mi_segment_cache_pop_ex(
+ bool all_suitable,
+ size_t size, mi_commit_mask_t* commit_mask,
+ mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero,
+ mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
{
#ifdef MI_CACHE_DISABLE
return NULL;
@@ -66,8 +70,8 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm
mi_bitmap_index_t bitidx = 0;
bool claimed = false;
mi_arena_id_t req_arena_id = _req_arena_id;
- mi_bitmap_pred_fun_t pred_fun = &mi_segment_cache_is_suitable; // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache?
-
+ mi_bitmap_pred_fun_t pred_fun = (all_suitable ? NULL : &mi_segment_cache_is_suitable); // cannot pass NULL as the arena may be exclusive itself; todo: do not put exclusive arenas in the cache?
+
if (*large) { // large allowed?
claimed = _mi_bitmap_try_find_from_claim_pred(cache_available_large, MI_CACHE_FIELDS, start_field, 1, pred_fun, &req_arena_id, &bitidx);
if (claimed) *large = true;
@@ -97,6 +101,12 @@ mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* comm
#endif
}
+
+mi_decl_noinline void* _mi_segment_cache_pop(size_t size, mi_commit_mask_t* commit_mask, mi_commit_mask_t* decommit_mask, bool* large, bool* is_pinned, bool* is_zero, mi_arena_id_t _req_arena_id, size_t* memid, mi_os_tld_t* tld)
+{
+ return mi_segment_cache_pop_ex(false, size, commit_mask, decommit_mask, large, is_pinned, is_zero, _req_arena_id, memid, tld);
+}
+
static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, void* p, size_t total, mi_stats_t* stats)
{
if (mi_commit_mask_is_empty(cmask)) {
@@ -123,14 +133,14 @@ static mi_decl_noinline void mi_commit_mask_decommit(mi_commit_mask_t* cmask, vo
#define MI_MAX_PURGE_PER_PUSH (4)
-static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld)
+static mi_decl_noinline void mi_segment_cache_purge(bool visit_all, bool force, mi_os_tld_t* tld)
{
MI_UNUSED(tld);
if (!mi_option_is_enabled(mi_option_allow_decommit)) return;
mi_msecs_t now = _mi_clock_now();
size_t purged = 0;
- const size_t max_visits = (force ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
- size_t idx = (force ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
+ const size_t max_visits = (visit_all ? MI_CACHE_MAX /* visit all */ : MI_CACHE_FIELDS /* probe at most N (=16) slots */);
+ size_t idx = (visit_all ? 0 : _mi_random_shuffle((uintptr_t)now) % MI_CACHE_MAX /* random start */ );
for (size_t visited = 0; visited < max_visits; visited++,idx++) { // visit N slots
if (idx >= MI_CACHE_MAX) idx = 0; // wrap
mi_cache_slot_t* slot = &cache[idx];
@@ -154,13 +164,43 @@ static mi_decl_noinline void mi_segment_cache_purge(bool force, mi_os_tld_t* tld
}
_mi_bitmap_unclaim(cache_available, MI_CACHE_FIELDS, 1, bitidx); // make it available again for a pop
}
- if (!force && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push
+ if (!visit_all && purged > MI_MAX_PURGE_PER_PUSH) break; // bound to no more than N purge tries per push
}
}
}
void _mi_segment_cache_collect(bool force, mi_os_tld_t* tld) {
- mi_segment_cache_purge(force, tld );
+ if (force) {
+ // called on `mi_collect(true)` but not on thread termination
+ _mi_segment_cache_free_all(tld);
+ }
+ else {
+ mi_segment_cache_purge(true /* visit all */, false /* don't force unexpired */, tld);
+ }
+}
+
+void _mi_segment_cache_free_all(mi_os_tld_t* tld) {
+ mi_commit_mask_t commit_mask;
+ mi_commit_mask_t decommit_mask;
+ bool is_pinned;
+ bool is_zero;
+ size_t memid;
+ const size_t size = MI_SEGMENT_SIZE;
+ // iterate twice: first large pages, then regular memory
+ for (int i = 0; i < 2; i++) {
+ void* p;
+ do {
+ // keep popping and freeing the memory
+ bool large = (i == 0);
+ p = mi_segment_cache_pop_ex(true /* all */, size, &commit_mask, &decommit_mask,
+ &large, &is_pinned, &is_zero, _mi_arena_id_none(), &memid, tld);
+ if (p != NULL) {
+ size_t csize = _mi_commit_mask_committed_size(&commit_mask, size);
+ if (csize > 0 && !is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize);
+ _mi_arena_free(p, size, MI_SEGMENT_ALIGN, 0, memid, is_pinned /* pretend not committed to not double count decommits */, tld->stats);
+ }
+ } while (p != NULL);
+ }
}
mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t memid, const mi_commit_mask_t* commit_mask, const mi_commit_mask_t* decommit_mask, bool is_large, bool is_pinned, mi_os_tld_t* tld)
@@ -181,7 +221,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
}
// purge expired entries
- mi_segment_cache_purge(false /* force? */, tld);
+ mi_segment_cache_purge(false /* limit purges to a constant N */, false /* don't force unexpired */, tld);
// find an available slot
mi_bitmap_index_t bitidx;
@@ -245,7 +285,7 @@ mi_decl_noinline bool _mi_segment_cache_push(void* start, size_t size, size_t me
static _Atomic(uintptr_t) mi_segment_map[MI_SEGMENT_MAP_WSIZE + 1]; // 2KiB per TB with 64MiB segments
static size_t mi_segment_map_index_of(const mi_segment_t* segment, size_t* bitidx) {
- mi_assert_internal(_mi_ptr_segment(segment) == segment); // is it aligned on MI_SEGMENT_SIZE?
+ mi_assert_internal(_mi_ptr_segment(segment + 1) == segment); // is it aligned on MI_SEGMENT_SIZE?
if ((uintptr_t)segment >= MI_MAX_ADDRESS) {
*bitidx = 0;
return MI_SEGMENT_MAP_WSIZE;
@@ -285,8 +325,9 @@ void _mi_segment_map_freed_at(const mi_segment_t* segment) {
// Determine the segment belonging to a pointer or NULL if it is not in a valid segment.
static mi_segment_t* _mi_segment_of(const void* p) {
+ if (p == NULL) return NULL;
mi_segment_t* segment = _mi_ptr_segment(p);
- if (segment == NULL) return NULL;
+ mi_assert_internal(segment != NULL);
size_t bitidx;
size_t index = mi_segment_map_index_of(segment, &bitidx);
// fast path: for any pointer to valid small/medium/large object or first MI_SEGMENT_SIZE in huge
diff --git a/source/luametatex/source/libraries/mimalloc/src/segment.c b/source/luametatex/source/libraries/mimalloc/src/segment.c
index c76c2259e..dc98e3e7b 100644
--- a/source/luametatex/source/libraries/mimalloc/src/segment.c
+++ b/source/luametatex/source/libraries/mimalloc/src/segment.c
@@ -316,7 +316,7 @@ static uint8_t* _mi_segment_page_start_from_slice(const mi_segment_t* segment, c
ptrdiff_t idx = slice - segment->slices;
size_t psize = (size_t)slice->slice_count * MI_SEGMENT_SLICE_SIZE;
// make the start not OS page aligned for smaller blocks to avoid page/cache effects
- size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? MI_MAX_ALIGN_GUARANTEE : 0);
+ size_t start_offset = (xblock_size >= MI_INTPTR_SIZE && xblock_size <= 1024 ? 3*MI_MAX_ALIGN_GUARANTEE : 0);
if (page_size != NULL) { *page_size = psize - start_offset; }
return (uint8_t*)segment + ((idx*MI_SEGMENT_SLICE_SIZE) + start_offset);
}
@@ -336,12 +336,14 @@ static size_t mi_segment_calculate_slices(size_t required, size_t* pre_size, siz
size_t page_size = _mi_os_page_size();
size_t isize = _mi_align_up(sizeof(mi_segment_t), page_size);
size_t guardsize = 0;
-
+
if (MI_SECURE>0) {
// in secure mode, we set up a protected page in between the segment info
// and the page data (and one at the end of the segment)
- guardsize = page_size;
- required = _mi_align_up(required, page_size);
+ guardsize = page_size;
+ if (required > 0) {
+ required = _mi_align_up(required, MI_SEGMENT_SLICE_SIZE) + page_size;
+ }
}
if (pre_size != NULL) *pre_size = isize;
@@ -386,11 +388,13 @@ static void mi_segment_os_free(mi_segment_t* segment, mi_segments_tld_t* tld) {
// _mi_os_free(segment, mi_segment_size(segment), /*segment->memid,*/ tld->stats);
const size_t size = mi_segment_size(segment);
- if (size != MI_SEGMENT_SIZE || !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os)) {
+ if (size != MI_SEGMENT_SIZE || segment->mem_align_offset != 0 || segment->kind == MI_SEGMENT_HUGE || // only push regular segments on the cache
+ !_mi_segment_cache_push(segment, size, segment->memid, &segment->commit_mask, &segment->decommit_mask, segment->mem_is_large, segment->mem_is_pinned, tld->os))
+ {
const size_t csize = _mi_commit_mask_committed_size(&segment->commit_mask, size);
if (csize > 0 && !segment->mem_is_pinned) _mi_stat_decrease(&_mi_stats_main.committed, csize);
_mi_abandoned_await_readers(); // wait until safe to free
- _mi_arena_free(segment, mi_segment_size(segment), segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->os);
+ _mi_arena_free(segment, mi_segment_size(segment), segment->mem_alignment, segment->mem_align_offset, segment->memid, segment->mem_is_pinned /* pretend not committed to not double count decommits */, tld->stats);
}
}
@@ -402,11 +406,11 @@ void _mi_segment_thread_collect(mi_segments_tld_t* tld) {
/* -----------------------------------------------------------
- Span management
+ Commit/Decommit ranges
----------------------------------------------------------- */
static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uint8_t* p, size_t size, uint8_t** start_p, size_t* full_size, mi_commit_mask_t* cm) {
- mi_assert_internal(_mi_ptr_segment(p) == segment);
+ mi_assert_internal(_mi_ptr_segment(p + 1) == segment);
mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
mi_commit_mask_create_empty(cm);
if (size == 0 || size > MI_SEGMENT_SIZE || segment->kind == MI_SEGMENT_HUGE) return;
@@ -459,15 +463,6 @@ static void mi_segment_commit_mask(mi_segment_t* segment, bool conservative, uin
static bool mi_segment_commitx(mi_segment_t* segment, bool commit, uint8_t* p, size_t size, mi_stats_t* stats) {
mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
- // try to commit in at least MI_MINIMAL_COMMIT_SIZE sizes.
- /*
- if (commit && size > 0) {
- const size_t csize = _mi_align_up(size, MI_MINIMAL_COMMIT_SIZE);
- if (p + csize <= mi_segment_end(segment)) {
- size = csize;
- }
- }
- */
// commit liberal, but decommit conservative
uint8_t* start = NULL;
size_t full_size = 0;
@@ -536,8 +531,12 @@ static void mi_segment_perhaps_decommit(mi_segment_t* segment, uint8_t* p, size_
}
else if (segment->decommit_expire <= now) {
// previous decommit mask already expired
- // mi_segment_delayed_decommit(segment, true, stats);
- segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+ if (segment->decommit_expire + mi_option_get(mi_option_decommit_extend_delay) <= now) {
+ mi_segment_delayed_decommit(segment, true, stats);
+ }
+ else {
+ segment->decommit_expire = now + mi_option_get(mi_option_decommit_extend_delay); // (mi_option_get(mi_option_decommit_delay) / 8); // wait a tiny bit longer in case there is a series of free's
+ }
}
else {
// previous decommit mask is not yet expired, increase the expiration by a bit.
@@ -570,12 +569,16 @@ static void mi_segment_delayed_decommit(mi_segment_t* segment, bool force, mi_st
}
+/* -----------------------------------------------------------
+ Span free
+----------------------------------------------------------- */
+
static bool mi_segment_is_abandoned(mi_segment_t* segment) {
return (segment->thread_id == 0);
}
// note: can be called on abandoned segments
-static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
+static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size_t slice_count, bool allow_decommit, mi_segments_tld_t* tld) {
mi_assert_internal(slice_index < segment->slice_entries);
mi_span_queue_t* sq = (segment->kind == MI_SEGMENT_HUGE || mi_segment_is_abandoned(segment)
? NULL : mi_span_queue_for(slice_count,tld));
@@ -595,7 +598,9 @@ static void mi_segment_span_free(mi_segment_t* segment, size_t slice_index, size
}
// perhaps decommit
- mi_segment_perhaps_decommit(segment,mi_slice_start(slice),slice_count*MI_SEGMENT_SLICE_SIZE,tld->stats);
+ if (allow_decommit) {
+ mi_segment_perhaps_decommit(segment, mi_slice_start(slice), slice_count * MI_SEGMENT_SLICE_SIZE, tld->stats);
+ }
// and push it on the free page queue (if it was not a huge page)
if (sq != NULL) mi_span_queue_push( sq, slice );
@@ -657,27 +662,20 @@ static mi_slice_t* mi_segment_span_free_coalesce(mi_slice_t* slice, mi_segments_
}
// and add the new free page
- mi_segment_span_free(segment, mi_slice_index(slice), slice_count, tld);
+ mi_segment_span_free(segment, mi_slice_index(slice), slice_count, true, tld);
return slice;
}
-static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) {
- mi_assert_internal(_mi_ptr_segment(slice)==segment);
- mi_assert_internal(slice->slice_count >= slice_count);
- mi_assert_internal(slice->xblock_size > 0); // no more in free queue
- if (slice->slice_count <= slice_count) return;
- mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
- size_t next_index = mi_slice_index(slice) + slice_count;
- size_t next_count = slice->slice_count - slice_count;
- mi_segment_span_free(segment, next_index, next_count, tld);
- slice->slice_count = (uint32_t)slice_count;
-}
+
+/* -----------------------------------------------------------
+ Page allocation
+----------------------------------------------------------- */
// Note: may still return NULL if committing the memory failed
static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_index, size_t slice_count, mi_segments_tld_t* tld) {
mi_assert_internal(slice_index < segment->slice_entries);
- mi_slice_t* slice = &segment->slices[slice_index];
+ mi_slice_t* const slice = &segment->slices[slice_index];
mi_assert_internal(slice->xblock_size==0 || slice->xblock_size==1);
// commit before changing the slice data
@@ -698,18 +696,21 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
size_t extra = slice_count-1;
if (extra > MI_MAX_SLICE_OFFSET) extra = MI_MAX_SLICE_OFFSET;
if (slice_index + extra >= segment->slice_entries) extra = segment->slice_entries - slice_index - 1; // huge objects may have more slices than avaiable entries in the segment->slices
- slice++;
- for (size_t i = 1; i <= extra; i++, slice++) {
- slice->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i);
- slice->slice_count = 0;
- slice->xblock_size = 1;
+
+ mi_slice_t* slice_next = slice + 1;
+ for (size_t i = 1; i <= extra; i++, slice_next++) {
+ slice_next->slice_offset = (uint32_t)(sizeof(mi_slice_t)*i);
+ slice_next->slice_count = 0;
+ slice_next->xblock_size = 1;
}
- // and also for the last one (if not set already) (the last one is needed for coalescing)
+ // and also for the last one (if not set already) (the last one is needed for coalescing and for large alignments)
// note: the cast is needed for ubsan since the index can be larger than MI_SLICES_PER_SEGMENT for huge allocations (see #543)
- mi_slice_t* last = &((mi_slice_t*)segment->slices)[slice_index + slice_count - 1];
- if (last < mi_segment_slices_end(segment) && last >= slice) {
- last->slice_offset = (uint32_t)(sizeof(mi_slice_t)*(slice_count-1));
+ mi_slice_t* last = slice + slice_count - 1;
+ mi_slice_t* end = (mi_slice_t*)mi_segment_slices_end(segment);
+ if (last > end) last = end;
+ if (last > slice) {
+ last->slice_offset = (uint32_t)(sizeof(mi_slice_t) * (last - slice));
last->slice_count = 0;
last->xblock_size = 1;
}
@@ -721,6 +722,18 @@ static mi_page_t* mi_segment_span_allocate(mi_segment_t* segment, size_t slice_i
return page;
}
+static void mi_segment_slice_split(mi_segment_t* segment, mi_slice_t* slice, size_t slice_count, mi_segments_tld_t* tld) {
+ mi_assert_internal(_mi_ptr_segment(slice) == segment);
+ mi_assert_internal(slice->slice_count >= slice_count);
+ mi_assert_internal(slice->xblock_size > 0); // no more in free queue
+ if (slice->slice_count <= slice_count) return;
+ mi_assert_internal(segment->kind != MI_SEGMENT_HUGE);
+ size_t next_index = mi_slice_index(slice) + slice_count;
+ size_t next_count = slice->slice_count - slice_count;
+ mi_segment_span_free(segment, next_index, next_count, false /* don't decommit left-over part */, tld);
+ slice->slice_count = (uint32_t)slice_count;
+}
+
static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld) {
mi_assert_internal(slice_count*MI_SEGMENT_SLICE_SIZE <= MI_LARGE_OBJ_SIZE_MAX);
// search from best fit up
@@ -735,20 +748,20 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren
// found a suitable page span
mi_span_queue_delete(sq, slice);
- if (slice->slice_count > slice_count) {
- mi_segment_slice_split(segment, slice, slice_count, tld);
- }
- mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0);
- mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);
- if (page == NULL) {
- // commit failed; return NULL but first restore the slice
- mi_segment_span_free_coalesce(slice, tld);
- return NULL;
+ if (slice->slice_count > slice_count) {
+ mi_segment_slice_split(segment, slice, slice_count, tld);
+ }
+ mi_assert_internal(slice != NULL && slice->slice_count == slice_count && slice->xblock_size > 0);
+ mi_page_t* page = mi_segment_span_allocate(segment, mi_slice_index(slice), slice->slice_count, tld);
+ if (page == NULL) {
+ // commit failed; return NULL but first restore the slice
+ mi_segment_span_free_coalesce(slice, tld);
+ return NULL;
+ }
+ return page;
}
- return page;
}
}
- }
sq++;
}
// could not find a page..
@@ -760,108 +773,129 @@ static mi_page_t* mi_segments_page_find_and_allocate(size_t slice_count, mi_aren
Segment allocation
----------------------------------------------------------- */
+static mi_segment_t* mi_segment_os_alloc( size_t required, size_t page_alignment, bool eager_delay, mi_arena_id_t req_arena_id,
+ size_t* psegment_slices, size_t* ppre_size, size_t* pinfo_slices,
+ mi_commit_mask_t* pcommit_mask, mi_commit_mask_t* pdecommit_mask,
+ bool* is_zero, bool* pcommit, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+
+{
+ // Allocate the segment from the OS
+ bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy
+ bool is_pinned = false;
+ size_t memid = 0;
+ size_t align_offset = 0;
+ size_t alignment = MI_SEGMENT_ALIGN;
+
+ if (page_alignment > 0) {
+ // mi_assert_internal(huge_page != NULL);
+ mi_assert_internal(page_alignment >= MI_SEGMENT_ALIGN);
+ alignment = page_alignment;
+ const size_t info_size = (*pinfo_slices) * MI_SEGMENT_SLICE_SIZE;
+ align_offset = _mi_align_up( info_size, MI_SEGMENT_ALIGN );
+ const size_t extra = align_offset - info_size;
+ // recalculate due to potential guard pages
+ *psegment_slices = mi_segment_calculate_slices(required + extra, ppre_size, pinfo_slices);
+ //segment_size += _mi_align_up(align_offset - info_size, MI_SEGMENT_SLICE_SIZE);
+ //segment_slices = segment_size / MI_SEGMENT_SLICE_SIZE;
+ }
+ const size_t segment_size = (*psegment_slices) * MI_SEGMENT_SLICE_SIZE;
+ mi_segment_t* segment = NULL;
+
+ // get from cache?
+ if (page_alignment == 0) {
+ segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, pcommit_mask, pdecommit_mask, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
+ }
+
+ // get from OS
+ if (segment==NULL) {
+ segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, alignment, align_offset, pcommit, &mem_large, &is_pinned, is_zero, req_arena_id, &memid, os_tld);
+ if (segment == NULL) return NULL; // failed to allocate
+ if (*pcommit) {
+ mi_commit_mask_create_full(pcommit_mask);
+ }
+ else {
+ mi_commit_mask_create_empty(pcommit_mask);
+ }
+ }
+ mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
+
+ const size_t commit_needed = _mi_divide_up((*pinfo_slices)*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
+ mi_assert_internal(commit_needed>0);
+ mi_commit_mask_t commit_needed_mask;
+ mi_commit_mask_create(0, commit_needed, &commit_needed_mask);
+ if (!mi_commit_mask_all_set(pcommit_mask, &commit_needed_mask)) {
+ // at least commit the info slices
+ mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= (*pinfo_slices)*MI_SEGMENT_SLICE_SIZE);
+ bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, is_zero, tld->stats);
+ if (!ok) return NULL; // failed to commit
+ mi_commit_mask_set(pcommit_mask, &commit_needed_mask);
+ }
+ mi_track_mem_undefined(segment,commit_needed);
+ segment->memid = memid;
+ segment->mem_is_pinned = is_pinned;
+ segment->mem_is_large = mem_large;
+ segment->mem_is_committed = mi_commit_mask_is_full(pcommit_mask);
+ segment->mem_alignment = alignment;
+ segment->mem_align_offset = align_offset;
+ mi_segments_track_size((long)(segment_size), tld);
+ _mi_segment_map_allocated_at(segment);
+ return segment;
+}
+
+
// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
+static mi_segment_t* mi_segment_alloc(size_t required, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page)
{
mi_assert_internal((required==0 && huge_page==NULL) || (required>0 && huge_page != NULL));
- mi_assert_internal((segment==NULL) || (segment!=NULL && required==0));
+
// calculate needed sizes first
size_t info_slices;
size_t pre_size;
- const size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices);
- const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
- const size_t segment_size = segment_slices * MI_SEGMENT_SLICE_SIZE;
-
+ size_t segment_slices = mi_segment_calculate_slices(required, &pre_size, &info_slices);
+
// Commit eagerly only if not the first N lazy segments (to reduce impact of many threads that allocate just a little)
const bool eager_delay = (// !_mi_os_has_overcommit() && // never delay on overcommit systems
_mi_current_thread_count() > 1 && // do not delay for the first N threads
tld->count < (size_t)mi_option_get(mi_option_eager_commit_delay));
const bool eager = !eager_delay && mi_option_is_enabled(mi_option_eager_commit);
- bool commit = eager || (required > 0);
-
- // Try to get from our cache first
- bool is_zero = false;
- const bool commit_info_still_good = (segment != NULL);
+ bool commit = eager || (required > 0);
+ bool is_zero = false;
+
mi_commit_mask_t commit_mask;
mi_commit_mask_t decommit_mask;
- if (segment != NULL) {
- commit_mask = segment->commit_mask;
- decommit_mask = segment->decommit_mask;
- }
- else {
- mi_commit_mask_create_empty(&commit_mask);
- mi_commit_mask_create_empty(&decommit_mask);
- }
- if (segment==NULL) {
- // Allocate the segment from the OS
- bool mem_large = (!eager_delay && (MI_SECURE==0)); // only allow large OS pages once we are no longer lazy
- bool is_pinned = false;
- size_t memid = 0;
- segment = (mi_segment_t*)_mi_segment_cache_pop(segment_size, &commit_mask, &decommit_mask, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld);
- if (segment==NULL) {
- segment = (mi_segment_t*)_mi_arena_alloc_aligned(segment_size, MI_SEGMENT_SIZE, &commit, &mem_large, &is_pinned, &is_zero, req_arena_id, &memid, os_tld);
- if (segment == NULL) return NULL; // failed to allocate
- if (commit) {
- mi_commit_mask_create_full(&commit_mask);
- }
- else {
- mi_commit_mask_create_empty(&commit_mask);
- }
- }
- mi_assert_internal(segment != NULL && (uintptr_t)segment % MI_SEGMENT_SIZE == 0);
-
- const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
- mi_assert_internal(commit_needed>0);
- mi_commit_mask_t commit_needed_mask;
- mi_commit_mask_create(0, commit_needed, &commit_needed_mask);
- if (!mi_commit_mask_all_set(&commit_mask, &commit_needed_mask)) {
- // at least commit the info slices
- mi_assert_internal(commit_needed*MI_COMMIT_SIZE >= info_slices*MI_SEGMENT_SLICE_SIZE);
- bool ok = _mi_os_commit(segment, commit_needed*MI_COMMIT_SIZE, &is_zero, tld->stats);
- if (!ok) return NULL; // failed to commit
- mi_commit_mask_set(&commit_mask, &commit_needed_mask);
- }
- mi_track_mem_undefined(segment,commit_needed);
- segment->memid = memid;
- segment->mem_is_pinned = is_pinned;
- segment->mem_is_large = mem_large;
- segment->mem_is_committed = mi_commit_mask_is_full(&commit_mask);
- mi_segments_track_size((long)(segment_size), tld);
- _mi_segment_map_allocated_at(segment);
- }
-
- // zero the segment info? -- not always needed as it is zero initialized from the OS
+ mi_commit_mask_create_empty(&commit_mask);
+ mi_commit_mask_create_empty(&decommit_mask);
+
+ // Allocate the segment from the OS
+ mi_segment_t* segment = mi_segment_os_alloc(required, page_alignment, eager_delay, req_arena_id,
+ &segment_slices, &pre_size, &info_slices, &commit_mask, &decommit_mask,
+ &is_zero, &commit, tld, os_tld);
+ if (segment == NULL) return NULL;
+
+ // zero the segment info? -- not always needed as it may be zero initialized from the OS
mi_atomic_store_ptr_release(mi_segment_t, &segment->abandoned_next, NULL); // tsan
if (!is_zero) {
ptrdiff_t ofs = offsetof(mi_segment_t, next);
size_t prefix = offsetof(mi_segment_t, slices) - ofs;
- memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*segment_slices);
- }
-
- if (!commit_info_still_good) {
- segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
- segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);
- if (segment->allow_decommit) {
- segment->decommit_expire = _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
- segment->decommit_mask = decommit_mask;
- mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
- #if MI_DEBUG>2
- const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
- mi_commit_mask_t commit_needed_mask;
- mi_commit_mask_create(0, commit_needed, &commit_needed_mask);
- mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask));
- #endif
- }
- else {
- mi_assert_internal(mi_commit_mask_is_empty(&decommit_mask));
- segment->decommit_expire = 0;
- mi_commit_mask_create_empty( &segment->decommit_mask );
- mi_assert_internal(mi_commit_mask_is_empty(&segment->decommit_mask));
- }
+ memset((uint8_t*)segment+ofs, 0, prefix + sizeof(mi_slice_t)*(segment_slices+1)); // one more
}
-
+ segment->commit_mask = commit_mask; // on lazy commit, the initial part is always committed
+ segment->allow_decommit = (mi_option_is_enabled(mi_option_allow_decommit) && !segment->mem_is_pinned && !segment->mem_is_large);
+ if (segment->allow_decommit) {
+ segment->decommit_expire = 0; // don't decommit just committed memory // _mi_clock_now() + mi_option_get(mi_option_decommit_delay);
+ segment->decommit_mask = decommit_mask;
+ mi_assert_internal(mi_commit_mask_all_set(&segment->commit_mask, &segment->decommit_mask));
+ #if MI_DEBUG>2
+ const size_t commit_needed = _mi_divide_up(info_slices*MI_SEGMENT_SLICE_SIZE, MI_COMMIT_SIZE);
+ mi_commit_mask_t commit_needed_mask;
+ mi_commit_mask_create(0, commit_needed, &commit_needed_mask);
+ mi_assert_internal(!mi_commit_mask_any_set(&segment->decommit_mask, &commit_needed_mask));
+ #endif
+ }
+
// initialize segment info
+ const size_t slice_entries = (segment_slices > MI_SLICES_PER_SEGMENT ? MI_SLICES_PER_SEGMENT : segment_slices);
segment->segment_slices = segment_slices;
segment->segment_info_slices = info_slices;
segment->thread_id = _mi_thread_id();
@@ -896,7 +930,7 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
// initialize initial free pages
if (segment->kind == MI_SEGMENT_NORMAL) { // not a huge page
mi_assert_internal(huge_page==NULL);
- mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, tld);
+ mi_segment_span_free(segment, info_slices, segment->slice_entries - info_slices, false /* don't decommit */, tld);
}
else {
mi_assert_internal(huge_page!=NULL);
@@ -911,12 +945,6 @@ static mi_segment_t* mi_segment_init(mi_segment_t* segment, size_t required, mi_
}
-// Allocate a segment from the OS aligned to `MI_SEGMENT_SIZE` .
-static mi_segment_t* mi_segment_alloc(size_t required, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld, mi_page_t** huge_page) {
- return mi_segment_init(NULL, required, req_arena_id, tld, os_tld, huge_page);
-}
-
-
static void mi_segment_free(mi_segment_t* segment, bool force, mi_segments_tld_t* tld) {
MI_UNUSED(force);
mi_assert_internal(segment != NULL);
@@ -1058,7 +1086,7 @@ static mi_decl_cache_align _Atomic(mi_segment_t*) abandoned_visited; // =
static mi_decl_cache_align _Atomic(mi_tagged_segment_t) abandoned; // = NULL
// Maintain these for debug purposes (these counts may be a bit off)
-static mi_decl_cache_align _Atomic(size_t) abandoned_count;
+static mi_decl_cache_align _Atomic(size_t) abandoned_count;
static mi_decl_cache_align _Atomic(size_t) abandoned_visited_count;
// We also maintain a count of current readers of the abandoned list
@@ -1369,7 +1397,7 @@ static mi_segment_t* mi_segment_try_reclaim(mi_heap_t* heap, size_t needed_slice
{
*reclaimed = false;
mi_segment_t* segment;
- long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times
+ long max_tries = mi_option_get_clamp(mi_option_max_segment_reclaim, 8, 1024); // limit the work to bound allocation times
while ((max_tries-- > 0) && ((segment = mi_abandoned_pop()) != NULL)) {
segment->abandoned_visits++;
// todo: an arena exclusive heap will potentially visit many abandoned unsuitable segments
@@ -1432,7 +1460,7 @@ void _mi_abandoned_collect(mi_heap_t* heap, bool force, mi_segments_tld_t* tld)
Reclaim or allocate
----------------------------------------------------------- */
-static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_slices, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_assert_internal(block_size < MI_HUGE_BLOCK_SIZE);
mi_assert_internal(block_size <= MI_LARGE_OBJ_SIZE_MAX);
@@ -1450,7 +1478,7 @@ static mi_segment_t* mi_segment_reclaim_or_alloc(mi_heap_t* heap, size_t needed_
return segment;
}
// 2. otherwise allocate a fresh segment
- return mi_segment_alloc(0, heap->arena_id, tld, os_tld, NULL);
+ return mi_segment_alloc(0, 0, heap->arena_id, tld, os_tld, NULL);
}
@@ -1490,17 +1518,37 @@ static mi_page_t* mi_segments_page_alloc(mi_heap_t* heap, mi_page_kind_t page_ki
Huge page allocation
----------------------------------------------------------- */
-static mi_page_t* mi_segment_huge_page_alloc(size_t size, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
+static mi_page_t* mi_segment_huge_page_alloc(size_t size, size_t page_alignment, mi_arena_id_t req_arena_id, mi_segments_tld_t* tld, mi_os_tld_t* os_tld)
{
mi_page_t* page = NULL;
- mi_segment_t* segment = mi_segment_alloc(size,req_arena_id,tld,os_tld,&page);
+ mi_segment_t* segment = mi_segment_alloc(size,page_alignment,req_arena_id,tld,os_tld,&page);
if (segment == NULL || page==NULL) return NULL;
mi_assert_internal(segment->used==1);
mi_assert_internal(mi_page_block_size(page) >= size);
+ #if MI_HUGE_PAGE_ABANDON
segment->thread_id = 0; // huge segments are immediately abandoned
+ #endif
+
+ // for huge pages we initialize the xblock_size as we may
+ // overallocate to accommodate large alignments.
+ size_t psize;
+ uint8_t* start = _mi_segment_page_start(segment, page, &psize);
+ page->xblock_size = (psize > MI_HUGE_BLOCK_SIZE ? MI_HUGE_BLOCK_SIZE : (uint32_t)psize);
+
+ // decommit the part of the prefix of a page that will not be used; this can be quite large (close to MI_SEGMENT_SIZE)
+ if (page_alignment > 0 && segment->allow_decommit) {
+ uint8_t* aligned_p = (uint8_t*)_mi_align_up((uintptr_t)start, page_alignment);
+ mi_assert_internal(_mi_is_aligned(aligned_p, page_alignment));
+ mi_assert_internal(psize - (aligned_p - start) >= size);
+ uint8_t* decommit_start = start + sizeof(mi_block_t); // for the free list
+ ptrdiff_t decommit_size = aligned_p - decommit_start;
+ _mi_os_decommit(decommit_start, decommit_size, &_mi_stats_main); // note: cannot use segment_decommit on huge segments
+ }
+
return page;
}
+#if MI_HUGE_PAGE_ABANDON
// free huge block from another thread
void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) {
// huge page segments are always abandoned and can be freed immediately by any thread
@@ -1528,12 +1576,34 @@ void _mi_segment_huge_page_free(mi_segment_t* segment, mi_page_t* page, mi_block
#endif
}
+#else
+// reset memory of a huge block from another thread
+void _mi_segment_huge_page_reset(mi_segment_t* segment, mi_page_t* page, mi_block_t* block) {
+ MI_UNUSED(page);
+ mi_assert_internal(segment->kind == MI_SEGMENT_HUGE);
+ mi_assert_internal(segment == _mi_page_segment(page));
+ mi_assert_internal(page->used == 1); // this is called just before the free
+ mi_assert_internal(page->free == NULL);
+ if (segment->allow_decommit) {
+ const size_t csize = mi_usable_size(block) - sizeof(mi_block_t);
+ uint8_t* p = (uint8_t*)block + sizeof(mi_block_t);
+ _mi_os_decommit(p, csize, &_mi_stats_main); // note: cannot use segment_decommit on huge segments
+ }
+}
+#endif
+
/* -----------------------------------------------------------
Page allocation and free
----------------------------------------------------------- */
-mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
+mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, size_t page_alignment, mi_segments_tld_t* tld, mi_os_tld_t* os_tld) {
mi_page_t* page;
- if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
+ if mi_unlikely(page_alignment > MI_ALIGNMENT_MAX) {
+ mi_assert_internal(_mi_is_power_of_two(page_alignment));
+ mi_assert_internal(page_alignment >= MI_SEGMENT_SIZE);
+ if (page_alignment < MI_SEGMENT_SIZE) { page_alignment = MI_SEGMENT_SIZE; }
+ page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld,os_tld);
+ }
+ else if (block_size <= MI_SMALL_OBJ_SIZE_MAX) {
page = mi_segments_page_alloc(heap,MI_PAGE_SMALL,block_size,block_size,tld,os_tld);
}
else if (block_size <= MI_MEDIUM_OBJ_SIZE_MAX) {
@@ -1543,7 +1613,7 @@ mi_page_t* _mi_segment_page_alloc(mi_heap_t* heap, size_t block_size, mi_segment
page = mi_segments_page_alloc(heap,MI_PAGE_LARGE,block_size,block_size,tld, os_tld);
}
else {
- page = mi_segment_huge_page_alloc(block_size,heap->arena_id,tld,os_tld);
+ page = mi_segment_huge_page_alloc(block_size,page_alignment,heap->arena_id,tld,os_tld);
}
mi_assert_internal(page == NULL || _mi_heap_memid_is_suitable(heap, _mi_page_segment(page)->memid));
mi_assert_expensive(page == NULL || mi_segment_is_valid(_mi_page_segment(page),tld));
diff --git a/source/luametatex/source/libraries/mimalloc/src/stats.c b/source/luametatex/source/libraries/mimalloc/src/stats.c
index f82c7c67f..2a8b9404f 100644
--- a/source/luametatex/source/libraries/mimalloc/src/stats.c
+++ b/source/luametatex/source/libraries/mimalloc/src/stats.c
@@ -21,7 +21,7 @@ terms of the MIT license. A copy of the license can be found in the file
static bool mi_is_in_main(void* stat) {
return ((uint8_t*)stat >= (uint8_t*)&_mi_stats_main
- && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t)));
+ && (uint8_t*)stat < ((uint8_t*)&_mi_stats_main + sizeof(mi_stats_t)));
}
static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
@@ -51,7 +51,7 @@ static void mi_stat_update(mi_stat_count_t* stat, int64_t amount) {
}
}
-void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
+void _mi_stat_counter_increase(mi_stat_counter_t* stat, size_t amount) {
if (mi_is_in_main(stat)) {
mi_atomic_addi64_relaxed( &stat->count, 1 );
mi_atomic_addi64_relaxed( &stat->total, (int64_t)amount );
@@ -77,7 +77,7 @@ static void mi_stat_add(mi_stat_count_t* stat, const mi_stat_count_t* src, int64
mi_atomic_addi64_relaxed( &stat->allocated, src->allocated * unit);
mi_atomic_addi64_relaxed( &stat->current, src->current * unit);
mi_atomic_addi64_relaxed( &stat->freed, src->freed * unit);
- // peak scores do not work across threads..
+ // peak scores do not work across threads..
mi_atomic_addi64_relaxed( &stat->peak, src->peak * unit);
}
@@ -129,11 +129,11 @@ static void mi_stats_add(mi_stats_t* stats, const mi_stats_t* src) {
Display statistics
----------------------------------------------------------- */
-// unit > 0 : size in binary bytes
+// unit > 0 : size in binary bytes
// unit == 0: count as decimal
// unit < 0 : count in binary
static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void* arg, const char* fmt) {
- char buf[32]; buf[0] = 0;
+ char buf[32]; buf[0] = 0;
int len = 32;
const char* suffix = (unit <= 0 ? " " : "B");
const int64_t base = (unit == 0 ? 1000 : 1024);
@@ -146,7 +146,7 @@ static void mi_printf_amount(int64_t n, int64_t unit, mi_output_fun* out, void*
}
}
else {
- int64_t divider = base;
+ int64_t divider = base;
const char* magnitude = "K";
if (pos >= divider*base) { divider *= base; magnitude = "M"; }
if (pos >= divider*base) { divider *= base; magnitude = "G"; }
@@ -170,19 +170,23 @@ static void mi_print_count(int64_t n, int64_t unit, mi_output_fun* out, void* ar
else mi_print_amount(n,0,out,arg);
}
-static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg ) {
+static void mi_stat_print_ex(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg, const char* notok ) {
_mi_fprintf(out, arg,"%10s:", msg);
- if (unit>0) {
+ if (unit > 0) {
mi_print_amount(stat->peak, unit, out, arg);
mi_print_amount(stat->allocated, unit, out, arg);
mi_print_amount(stat->freed, unit, out, arg);
mi_print_amount(stat->current, unit, out, arg);
mi_print_amount(unit, 1, out, arg);
mi_print_count(stat->allocated, unit, out, arg);
- if (stat->allocated > stat->freed)
- _mi_fprintf(out, arg, " not all freed!\n");
- else
+ if (stat->allocated > stat->freed) {
+ _mi_fprintf(out, arg, " ");
+ _mi_fprintf(out, arg, (notok == NULL ? "not all freed!" : notok));
+ _mi_fprintf(out, arg, "\n");
+ }
+ else {
_mi_fprintf(out, arg, " ok\n");
+ }
}
else if (unit<0) {
mi_print_amount(stat->peak, -1, out, arg);
@@ -204,12 +208,16 @@ static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t
else {
mi_print_amount(stat->peak, 1, out, arg);
mi_print_amount(stat->allocated, 1, out, arg);
- _mi_fprintf(out, arg, "%11s", " "); // no freed
+ _mi_fprintf(out, arg, "%11s", " "); // no freed
mi_print_amount(stat->current, 1, out, arg);
_mi_fprintf(out, arg, "\n");
}
}
+static void mi_stat_print(const mi_stat_count_t* stat, const char* msg, int64_t unit, mi_output_fun* out, void* arg) {
+ mi_stat_print_ex(stat, msg, unit, out, arg, NULL);
+}
+
static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg ) {
_mi_fprintf(out, arg, "%10s:", msg);
mi_print_amount(stat->total, -1, out, arg);
@@ -217,7 +225,7 @@ static void mi_stat_counter_print(const mi_stat_counter_t* stat, const char* msg
}
static void mi_stat_counter_print_avg(const mi_stat_counter_t* stat, const char* msg, mi_output_fun* out, void* arg) {
- const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count));
+ const int64_t avg_tens = (stat->count == 0 ? 0 : (stat->total*10 / stat->count));
const long avg_whole = (long)(avg_tens/10);
const long avg_frac1 = (long)(avg_tens%10);
_mi_fprintf(out, arg, "%10s: %5ld.%ld avg\n", msg, avg_whole, avg_frac1);
@@ -257,7 +265,7 @@ typedef struct buffered_s {
mi_output_fun* out; // original output function
void* arg; // and state
char* buf; // local buffer of at least size `count+1`
- size_t used; // currently used chars `used <= count`
+ size_t used; // currently used chars `used <= count`
size_t count; // total chars available for output
} buffered_t;
@@ -312,8 +320,8 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
mi_stat_print(&stats->malloc, "malloc req", 1, out, arg);
_mi_fprintf(out, arg, "\n");
#endif
- mi_stat_print(&stats->reserved, "reserved", 1, out, arg);
- mi_stat_print(&stats->committed, "committed", 1, out, arg);
+ mi_stat_print_ex(&stats->reserved, "reserved", 1, out, arg, "");
+ mi_stat_print_ex(&stats->committed, "committed", 1, out, arg, "");
mi_stat_print(&stats->reset, "reset", 1, out, arg);
mi_stat_print(&stats->page_committed, "touched", 1, out, arg);
mi_stat_print(&stats->segments, "segments", -1, out, arg);
@@ -328,7 +336,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
mi_stat_print(&stats->threads, "threads", -1, out, arg);
mi_stat_counter_print_avg(&stats->searches, "searches", out, arg);
_mi_fprintf(out, arg, "%10s: %7zu\n", "numa nodes", _mi_os_numa_node_count());
-
+
mi_msecs_t elapsed;
mi_msecs_t user_time;
mi_msecs_t sys_time;
@@ -346,7 +354,7 @@ static void _mi_stats_print(mi_stats_t* stats, mi_output_fun* out0, void* arg0)
_mi_fprintf(out, arg, ", commit: ");
mi_printf_amount((int64_t)peak_commit, 1, out, arg, "%s");
}
- _mi_fprintf(out, arg, "\n");
+ _mi_fprintf(out, arg, "\n");
}
static mi_msecs_t mi_process_start; // = 0
@@ -406,7 +414,7 @@ static mi_msecs_t mi_to_msecs(LARGE_INTEGER t) {
mfreq.QuadPart = f.QuadPart/1000LL;
if (mfreq.QuadPart == 0) mfreq.QuadPart = 1;
}
- return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart);
+ return (mi_msecs_t)(t.QuadPart / mfreq.QuadPart);
}
mi_msecs_t _mi_clock_now(void) {
@@ -421,7 +429,7 @@ mi_msecs_t _mi_clock_now(void) {
struct timespec t;
#ifdef CLOCK_MONOTONIC
clock_gettime(CLOCK_MONOTONIC, &t);
- #else
+ #else
clock_gettime(CLOCK_REALTIME, &t);
#endif
return ((mi_msecs_t)t.tv_sec * 1000) + ((mi_msecs_t)t.tv_nsec / 1000000);
@@ -457,8 +465,6 @@ mi_msecs_t _mi_clock_end(mi_msecs_t start) {
#if defined(_WIN32)
#include <windows.h>
-#include <psapi.h>
-#pragma comment(lib,"psapi.lib")
static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
ULARGE_INTEGER i;
@@ -468,7 +474,23 @@ static mi_msecs_t filetime_msecs(const FILETIME* ftime) {
return msecs;
}
-static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
+typedef struct _PROCESS_MEMORY_COUNTERS {
+ DWORD cb;
+ DWORD PageFaultCount;
+ SIZE_T PeakWorkingSetSize;
+ SIZE_T WorkingSetSize;
+ SIZE_T QuotaPeakPagedPoolUsage;
+ SIZE_T QuotaPagedPoolUsage;
+ SIZE_T QuotaPeakNonPagedPoolUsage;
+ SIZE_T QuotaNonPagedPoolUsage;
+ SIZE_T PagefileUsage;
+ SIZE_T PeakPagefileUsage;
+} PROCESS_MEMORY_COUNTERS;
+typedef PROCESS_MEMORY_COUNTERS* PPROCESS_MEMORY_COUNTERS;
+typedef BOOL (WINAPI *PGetProcessMemoryInfo)(HANDLE, PPROCESS_MEMORY_COUNTERS, DWORD);
+static PGetProcessMemoryInfo pGetProcessMemoryInfo = NULL;
+
+static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msecs_t* stime, size_t* current_rss, size_t* peak_rss, size_t* current_commit, size_t* peak_commit, size_t* page_faults)
{
*elapsed = _mi_clock_end(mi_process_start);
FILETIME ct;
@@ -478,13 +500,26 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec
GetProcessTimes(GetCurrentProcess(), &ct, &et, &st, &ut);
*utime = filetime_msecs(&ut);
*stime = filetime_msecs(&st);
+
+ // load psapi on demand
+ if (pGetProcessMemoryInfo == NULL) {
+ HINSTANCE hDll = LoadLibrary(TEXT("psapi.dll"));
+ if (hDll != NULL) {
+ pGetProcessMemoryInfo = (PGetProcessMemoryInfo)(void (*)(void))GetProcAddress(hDll, "GetProcessMemoryInfo");
+ }
+ }
+
+ // get process info
PROCESS_MEMORY_COUNTERS info;
- GetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
+ memset(&info, 0, sizeof(info));
+ if (pGetProcessMemoryInfo != NULL) {
+ pGetProcessMemoryInfo(GetCurrentProcess(), &info, sizeof(info));
+ }
*current_rss = (size_t)info.WorkingSetSize;
*peak_rss = (size_t)info.PeakWorkingSetSize;
*current_commit = (size_t)info.PagefileUsage;
*peak_commit = (size_t)info.PeakPagefileUsage;
- *page_faults = (size_t)info.PageFaultCount;
+ *page_faults = (size_t)info.PageFaultCount;
}
#elif !defined(__wasi__) && (defined(__unix__) || defined(__unix) || defined(unix) || defined(__APPLE__) || defined(__HAIKU__))
@@ -517,7 +552,7 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec
// estimate commit using our stats
*peak_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.peak));
*current_commit = (size_t)(mi_atomic_loadi64_relaxed((_Atomic(int64_t)*)&_mi_stats_main.committed.current));
- *current_rss = *current_commit; // estimate
+ *current_rss = *current_commit; // estimate
#if defined(__HAIKU__)
// Haiku does not have (yet?) a way to
// get these stats per process
@@ -538,7 +573,7 @@ static void mi_stat_process_info(mi_msecs_t* elapsed, mi_msecs_t* utime, mi_msec
}
#else
*peak_rss = rusage.ru_maxrss * 1024; // Linux reports in KiB
-#endif
+#endif
}
#else
@@ -570,7 +605,7 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
size_t peak_rss0 = 0;
size_t current_commit0 = 0;
size_t peak_commit0 = 0;
- size_t page_faults0 = 0;
+ size_t page_faults0 = 0;
mi_stat_process_info(&elapsed,&utime, &stime, &current_rss0, &peak_rss0, &current_commit0, &peak_commit0, &page_faults0);
if (elapsed_msecs!=NULL) *elapsed_msecs = (elapsed < 0 ? 0 : (elapsed < (mi_msecs_t)PTRDIFF_MAX ? (size_t)elapsed : PTRDIFF_MAX));
if (user_msecs!=NULL) *user_msecs = (utime < 0 ? 0 : (utime < (mi_msecs_t)PTRDIFF_MAX ? (size_t)utime : PTRDIFF_MAX));
@@ -581,4 +616,3 @@ mi_decl_export void mi_process_info(size_t* elapsed_msecs, size_t* user_msecs, s
if (peak_commit!=NULL) *peak_commit = peak_commit0;
if (page_faults!=NULL) *page_faults = page_faults0;
}
-
diff --git a/source/luametatex/source/libraries/readme.txt b/source/luametatex/source/libraries/readme.txt
index 8af76f93a..d41ecb58a 100644
--- a/source/luametatex/source/libraries/readme.txt
+++ b/source/luametatex/source/libraries/readme.txt
@@ -14,7 +14,9 @@ The avl and hnj libraries are adapted to Lua(Meta)TeX and might get some more ad
on our needs. The decnumber library that is also used in mplib is unchanged.
In mimalloc we need to patch init.c: #if defined(_M_X64) || defined(_M_ARM64) to get rid of a link
-error.
+error as well as in options.c some snprint issue with the mingw64 cross compiler:
+
+/* HH */ snprintf(tprefix, sizeof(tprefix), "%sthread 0x%x: ", prefix, (unsigned) _mi_thread_id()); /* HH: %z is unknown */
In decNumber.c this got added:
diff --git a/source/luametatex/source/luametatex.h b/source/luametatex/source/luametatex.h
index b46922a11..d0e6ad7e4 100644
--- a/source/luametatex/source/luametatex.h
+++ b/source/luametatex/source/luametatex.h
@@ -87,9 +87,9 @@
# include "tex/textypes.h"
# define luametatex_version 210
-# define luametatex_revision 04
-# define luametatex_version_string "2.10.04"
-# define luametatex_development_id 20221222
+# define luametatex_revision 05
+# define luametatex_version_string "2.10.05"
+# define luametatex_development_id 20221228
# define luametatex_name_camelcase "LuaMetaTeX"
# define luametatex_name_lowercase "luametatex"
diff --git a/source/luametatex/source/tex/texalign.c b/source/luametatex/source/tex/texalign.c
index f35bb50b5..46d3153cf 100644
--- a/source/luametatex/source/tex/texalign.c
+++ b/source/luametatex/source/tex/texalign.c
@@ -287,7 +287,6 @@ static alignment_state_info lmt_alignment_state = {
static void tex_aux_wipe_row_state(void)
{
- delete_attribute_reference(lmt_alignment_state.row_state.attrlist);
lmt_alignment_state.row_state.attrlist = null;
lmt_alignment_state.row_state.orientation = 0;
lmt_alignment_state.row_state.xoffset = 0;
@@ -619,7 +618,7 @@ static void tex_aux_scan_align_spec(quarterword c)
if (! attrlist) {
/* this alse sets the reference when not yet set */
attrlist = tex_current_attribute_list();
- }
+ }
/*tex Now we're referenced. We need to preserve this over the group. */
add_attribute_reference(attrlist);
tex_set_saved_record(saved_align_specification, box_spec_save_type, mode, amount);
@@ -693,6 +692,8 @@ static void tex_aux_run_no_align(void)
if (eq_value(register_attribute_location(i)) != v) {
if (lmt_alignment_state.row_state.attrlist) {
lmt_alignment_state.row_state.attrlist = tex_patch_attribute_list(lmt_alignment_state.row_state.attrlist, i, v);
+ } else if (lmt_alignment_state.attr_list) {
+ lmt_alignment_state.row_state.attrlist = tex_copy_attribute_list_set(lmt_alignment_state.attr_list, i, v);
} else {
lmt_alignment_state.row_state.attrlist = tex_copy_attribute_list_set(tex_current_attribute_list(), i, v);
}
@@ -802,11 +803,6 @@ static void tex_aux_run_no_align(void)
}
DONE:
lmt_alignment_state.row_state_set = done;
- if (! lmt_alignment_state.row_state.attrlist) {
- /* this alse sets the reference when not yet set */
- lmt_alignment_state.row_state.attrlist = tex_current_attribute_list();
- }
- add_attribute_reference(lmt_alignment_state.row_state.attrlist);
/* */
if (! brace) {
tex_scan_left_brace();
@@ -2051,6 +2047,8 @@ void tex_cleanup_alignments(void)
tex_put_available_token(lmt_alignment_state.omit_template);
lmt_alignment_state.hold_token_head = null;
lmt_alignment_state.omit_template = null;
+ delete_attribute_reference(lmt_alignment_state.attr_list);
+ lmt_alignment_state.attr_list = null;
}
/*tex
diff --git a/source/luametatex/source/tex/texcommands.c b/source/luametatex/source/tex/texcommands.c
index 615db76c9..5c8328d52 100644
--- a/source/luametatex/source/tex/texcommands.c
+++ b/source/luametatex/source/tex/texcommands.c
@@ -879,7 +879,9 @@ void tex_initialize_commands(void)
tex_primitive(tex_command, "box", make_box_cmd, box_code, 0);
tex_primitive(tex_command, "copy", make_box_cmd, copy_code, 0);
tex_primitive(tex_command, "lastbox", make_box_cmd, last_box_code, 0);
+ tex_primitive(luatex_command, "tsplit", make_box_cmd, tsplit_code, 0);
tex_primitive(tex_command, "vsplit", make_box_cmd, vsplit_code, 0);
+ tex_primitive(luatex_command, "dsplit", make_box_cmd, dsplit_code, 0);
tex_primitive(luatex_command, "tpack", make_box_cmd, tpack_code, 0);
tex_primitive(luatex_command, "vpack", make_box_cmd, vpack_code, 0);
tex_primitive(luatex_command, "hpack", make_box_cmd, hpack_code, 0);
diff --git a/source/luametatex/source/tex/texdumpdata.h b/source/luametatex/source/tex/texdumpdata.h
index 31d422784..a386ca3ba 100644
--- a/source/luametatex/source/tex/texdumpdata.h
+++ b/source/luametatex/source/tex/texdumpdata.h
@@ -55,7 +55,7 @@
*/
-# define luametatex_format_fingerprint 681
+# define luametatex_format_fingerprint 682
/* These end up in the string pool. */
diff --git a/source/luametatex/source/tex/texmainbody.c b/source/luametatex/source/tex/texmainbody.c
index 57b7d34be..4dd9c37a0 100644
--- a/source/luametatex/source/tex/texmainbody.c
+++ b/source/luametatex/source/tex/texmainbody.c
@@ -374,6 +374,13 @@ void tex_main_body(void)
}
}
+ /*tex
+ We assume that |ignore_depth_criterium_par| is unchanged. If needed we can always do
+ this:
+ */
+
+ /* cur_list.prev_depth = ignore_depth_criterium_par; */
+
/*tex Ready to go, so come to life. */
lmt_error_state.history = spotless;
diff --git a/source/luametatex/source/tex/texnesting.c b/source/luametatex/source/tex/texnesting.c
index f29bfe0a2..4ec68b28b 100644
--- a/source/luametatex/source/tex/texnesting.c
+++ b/source/luametatex/source/tex/texnesting.c
@@ -247,7 +247,7 @@ void tex_initialize_nesting(void)
cur_list.delim = null;
cur_list.prev_graf = 0;
cur_list.mode_line = 0;
- cur_list.prev_depth = ignore_depth_criterium_par;
+ cur_list.prev_depth = ignore_depth; /*tex |ignore_depth_criterium_par| is not yet available! */
cur_list.space_factor = 1000;
cur_list.incomplete_noad = null;
cur_list.direction_stack = null;
diff --git a/source/luametatex/source/tex/texpackaging.c b/source/luametatex/source/tex/texpackaging.c
index 3cea2b7b8..6ad1f5718 100644
--- a/source/luametatex/source/tex/texpackaging.c
+++ b/source/luametatex/source/tex/texpackaging.c
@@ -2312,6 +2312,52 @@ halfword tex_filtered_vpack(halfword p, scaled h, int m, scaled maxdepth, int gr
return q;
}
+static scaled tex_aux_first_height(halfword boxnode)
+{
+ halfword list = box_list(boxnode);
+ if (list) {
+ switch (node_type(list)) {
+ case hlist_node:
+ case vlist_node:
+ return box_height(list);
+ case rule_node:
+ return rule_height(list);
+ }
+ }
+ return 0;
+}
+
+static void tex_aux_set_vnature(halfword boxnode, int nature)
+{
+ switch (nature) {
+ case vtop_code:
+ case tsplit_code:
+ {
+ /*tex
+
+ Read just the height and depth of |boxnode| (|boxnode|), for |\vtop|. The height of
+ a |\vtop| box is inherited from the first item on its list, if that item is an
+ |hlist_node|, |vlist_node|, or |rule_node|; otherwise the |\vtop| height is zero.
+
+ */
+ scaled height = tex_aux_first_height(boxnode);
+ box_depth(boxnode) = box_total(boxnode) - height;
+ box_height(boxnode) = height;
+ box_package_state(boxnode) = vtop_package_state;
+ }
+ break;
+ case vbox_code:
+ case vsplit_code:
+ box_package_state(boxnode) = vbox_package_state;
+ break;
+ case dbox_code:
+ case dsplit_code:
+ box_package_state(boxnode) = dbox_package_state;
+ break;
+ }
+}
+
+
/*tex
Here we always start out in l2r mode and without shift. After all we need to be compatible with
how it was before.
@@ -2358,21 +2404,6 @@ void tex_finish_vcenter_group(void)
}
}
-static scaled tex_aux_first_height(halfword boxnode)
-{
- halfword list = box_list(boxnode);
- if (list) {
- switch (node_type(list)) {
- case hlist_node:
- case vlist_node:
- return box_height(list);
- case rule_node:
- return rule_height(list);
- }
- }
- return 0;
-}
-
void tex_package(singleword nature)
{
halfword slot, context, spec, dirptr, attrlist, justpack, orientation, anchor, geometry, source, target, axis, mainclass, state, retain;
@@ -2409,29 +2440,7 @@ void tex_package(singleword nature)
} else {
boxnode = tex_filtered_vpack(node_next(cur_list.head), spec, saved_level(saved_full_spec_item_packaging),
maxdepth, grp, saved_level(saved_full_spec_item_direction), justpack, attrlist, state, retain);
- switch (nature) {
- case vtop_code:
- {
- /*tex
-
- Read just the height and depth of |boxnode| (|boxnode|), for |\vtop|. The height of
- a |\vtop| box is inherited from the first item on its list, if that item is an
- |hlist_node|, |vlist_node|, or |rule_node|; otherwise the |\vtop| height is zero.
-
- */
- scaled height = tex_aux_first_height(boxnode);
- box_depth(boxnode) = box_total(boxnode) - height;
- box_height(boxnode) = height;
- box_package_state(boxnode) = vtop_package_state;
- }
- break;
- case vbox_code:
- box_package_state(boxnode) = vbox_package_state;
- break;
- case dbox_code:
- box_package_state(boxnode) = dbox_package_state;
- break;
- }
+ tex_aux_set_vnature(boxnode, nature);
}
if (dirptr) {
/*tex Adjust back |text_dir_ptr| for |scan_spec| */
@@ -2679,7 +2688,9 @@ void tex_run_unpackage(void)
cur_list.tail = tex_tail_of_node_list(tail);
break;
}
+ case tsplit_code:
case vsplit_code:
+ case dsplit_code:
{
tex_try_couple_nodes(tail, lmt_packaging_state.split_discards_head);
lmt_packaging_state.split_discards_head = null;
@@ -3264,7 +3275,9 @@ void tex_begin_box(int boxcontext, scaled shift, halfword slot)
}
}
break;
+ case tsplit_code:
case vsplit_code:
+ case dsplit_code:
{
/*tex
Split off part of a vertical box, make |boxnode| point to it. Here we deal with
@@ -3274,24 +3287,47 @@ void tex_begin_box(int boxcontext, scaled shift, halfword slot)
halfword mode = packing_exactly ;
halfword index = tex_scan_box_register_number();
halfword size = 0;
- switch (tex_scan_character("utUT", 0, 1, 0)) {
- case 'u': case 'U':
- if (tex_scan_mandate_keyword("upto", 1)) {
- mode = packing_additional;
- size = tex_scan_dimen(0, 0, 0, 0, NULL);
- }
- break;
- case 't': case 'T':
- if (tex_scan_mandate_keyword("to", 1)) {
- mode = packing_exactly ;
- size = tex_scan_dimen(0, 0, 0, 0, NULL);
- }
- break;
- default:
- tex_aux_show_keyword_error("upto|to");
- break;
+ halfword attrlist = null;
+ while (1) {
+ switch (tex_scan_character("adtuvADTUV", 0, 1, 0)) {
+ case 0:
+ goto DONE;
+ case 'a': case 'A':
+ if (tex_scan_mandate_keyword("attr", 1)) {
+ halfword i = tex_scan_attribute_register_number();
+ halfword v = tex_scan_int(1, NULL);
+ if (eq_value(register_attribute_location(i)) != v) {
+ if (attrlist) {
+ attrlist = tex_patch_attribute_list(attrlist, i, v);
+ } else {
+ attrlist = tex_copy_attribute_list_set(tex_current_attribute_list(), i, v);
+ }
+ }
+ }
+ break;
+ case 't': case 'T':
+ if (tex_scan_mandate_keyword("to", 1)) {
+ mode = packing_exactly ;
+ size = tex_scan_dimen(0, 0, 0, 0, NULL);
+ }
+ break;
+ case 'u': case 'U':
+ if (tex_scan_mandate_keyword("upto", 1)) {
+ mode = packing_additional;
+ size = tex_scan_dimen(0, 0, 0, 0, NULL);
+ }
+ break;
+ default:
+ tex_aux_show_keyword_error("attr|upto|to|vbox|vtop|dbox");
+ goto DONE;
+ }
}
+ DONE:
boxnode = tex_vsplit(index, size, mode);
+ tex_aux_set_vnature(boxnode, code);
+ if (attrlist) {
+ tex_attach_attribute_list_attribute(boxnode, attrlist);
+ }
}
break;
case insert_box_code:
@@ -3337,65 +3373,74 @@ void tex_begin_box(int boxcontext, scaled shift, halfword slot)
box. The juggling with codes and addition or subtraction was somewhat messy.
*/
- /* case tpack_code: */
- /* case vpack_code: */
- /* case hpack_code: */
- /* case dpack_code: */
- /* case vtop_code: */
- /* case vbox_code: */
- /* case hbox_code: */
- /* case dbox_code: */
default:
{
- int just_pack = 0;
- quarterword spec_direction = direction_unknown;
- /*tex 0 or |vmode| or |hmode| */
- halfword mode; /* todo */
+ quarterword direction;
+ int justpack = 0;
+ int group = vbox_group;
+ int mode = vmode;
+ int adjusted = 0;
+ switch (abs(cur_list.mode)) {
+ case vmode:
+ direction = dir_lefttoright;
+ if (boxcontext == direct_box_flag) {
+ adjusted = 1;
+ }
+ break;
+ case hmode:
+ direction = (singleword) text_direction_par;
+ break;
+ case mmode:
+ direction = (singleword) math_direction_par;
+ break;
+ default:
+ direction = direction_unknown;
+ break;
+ }
switch (code) {
case tpack_code:
- code = vtop_code;
- just_pack = 1;
+ // mode = vmode;
+ justpack = 1;
+ group = vtop_group;
break;
case vpack_code:
- code = vtop_code + vmode;
- just_pack = 1;
+ // mode = vmode;
+ justpack = 1;
+ // group = vbox_group;
break;
case hpack_code:
- code = vtop_code + hmode;
- just_pack = 1;
+ mode = hmode;
+ justpack = 1;
+ group = adjusted ? adjusted_hbox_group : hbox_group;
break;
case dpack_code:
- code = dbox_code + hmode;
- just_pack = 1;
+ // mode = vmode;
+ justpack = 1;
+ group = dbox_group;
break;
- }
- mode = code - vtop_code;
- switch (abs(cur_list.mode)) {
- case vmode:
- spec_direction = dir_lefttoright;
+ case vtop_code:
+ // mode = vmode;
+ // justpack = 0;
+ group = vtop_group;
break;
- case hmode:
- spec_direction = (singleword) text_direction_par;
+ case vbox_code:
+ // mode = vmode;
+ // justpack = 0;
+ // group = vbox_group;
break;
- case mmode:
- spec_direction = (singleword) math_direction_par;
+ case hbox_code:
+ mode = hmode;
+ // justpack = 0;
+ group = adjusted ? adjusted_hbox_group : hbox_group;
+ break;
+ case dbox_code:
+ // mode = vmode;
+ // justpack = 0;
+ group = dbox_group;
break;
}
- if (mode == hmode) {
- if ((boxcontext == direct_box_flag) && (abs(cur_list.mode) == vmode)) {
- tex_aux_scan_full_spec(boxcontext, adjusted_hbox_group, spec_direction, just_pack, shift, slot);
- } else {
- tex_aux_scan_full_spec(boxcontext, hbox_group, spec_direction, just_pack, shift, slot);
- }
- } else {
- if (mode == vmode) {
- tex_aux_scan_full_spec(boxcontext, vbox_group, spec_direction, just_pack, shift, slot);
- } else {
- tex_aux_scan_full_spec(boxcontext, (code == dbox_code || code == dpack_code) ? dbox_group : vtop_group, spec_direction, just_pack, shift, slot);
- mode = vmode;
- }
- tex_normal_paragraph(vmode_par_context);
- }
+ tex_aux_scan_full_spec(boxcontext, group, direction, justpack, shift, slot);
+ tex_normal_paragraph(vmode_par_context);
tex_push_nest();
update_tex_internal_dir_state(0);
cur_list.mode = - mode;
diff --git a/source/luametatex/source/tex/texpackaging.h b/source/luametatex/source/tex/texpackaging.h
index 8cef36619..dea884d0f 100644
--- a/source/luametatex/source/tex/texpackaging.h
+++ b/source/luametatex/source/tex/texpackaging.h
@@ -23,15 +23,18 @@ typedef enum box_codes {
copy_code, /*tex |chr_code| for |\copy| */
unpack_code,
last_box_code, /*tex |chr_code| for |\lastbox| */
+ tsplit_code,
vsplit_code, /*tex |chr_code| for |\vsplit| */
- tpack_code,
- vpack_code,
- hpack_code,
+ dsplit_code,
+ /* hsplit_code, */
+ tpack_code,
+ vpack_code, /*tex |chr_code| for |\vpack| */
dpack_code,
- vtop_code, /*tex |chr_code| for |\vtop| */
- vbox_code,
- hbox_code,
+ hpack_code,
+ vtop_code,
+ vbox_code, /*tex |chr_code| for |\vbox| */
dbox_code,
+ hbox_code,
insert_box_code,
insert_copy_code,
local_left_box_box_code,
diff --git a/source/luametatex/source/tex/textoken.c b/source/luametatex/source/tex/textoken.c
index 7a30b97eb..79e2e5e64 100644
--- a/source/luametatex/source/tex/textoken.c
+++ b/source/luametatex/source/tex/textoken.c
@@ -480,15 +480,19 @@ void tex_print_meaning(halfword code)
tex_print_cs(cur_cs);
return;
} else {
- if (cur_chr && get_token_reference(cur_chr) == max_token_reference) {
- tex_print_str("constant ");
- }
+ int constant = (cur_chr && get_token_reference(cur_chr) == max_token_reference);
switch (code) {
case meaning_code:
case meaning_full_code:
+ if (constant) {
+ tex_print_str("constant ");
+ }
tex_print_str("macro");
goto FOLLOWUP;
case meaning_asis_code:
+ if (constant) {
+ tex_print_str_esc("constant ");
+ }
// tex_print_format("%e%C %S ", def_cmd, def_code, cur_cs);
tex_print_cmd_chr(def_cmd, def_code);
tex_print_char(' ');