summaryrefslogtreecommitdiff
path: root/source/luametatex/source/utilities
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/utilities')
-rw-r--r--source/luametatex/source/utilities/auxarithmetic.h61
-rw-r--r--source/luametatex/source/utilities/auxfile.c294
-rw-r--r--source/luametatex/source/utilities/auxfile.h166
-rw-r--r--source/luametatex/source/utilities/auxmemory.c25
-rw-r--r--source/luametatex/source/utilities/auxmemory.h54
-rw-r--r--source/luametatex/source/utilities/auxsparsearray.c623
-rw-r--r--source/luametatex/source/utilities/auxsparsearray.h212
-rw-r--r--source/luametatex/source/utilities/auxsystem.c155
-rw-r--r--source/luametatex/source/utilities/auxsystem.h17
-rw-r--r--source/luametatex/source/utilities/auxunistring.c158
-rw-r--r--source/luametatex/source/utilities/auxunistring.h19
-rw-r--r--source/luametatex/source/utilities/auxzlib.c18
-rw-r--r--source/luametatex/source/utilities/auxzlib.h24
13 files changed, 1826 insertions, 0 deletions
diff --git a/source/luametatex/source/utilities/auxarithmetic.h b/source/luametatex/source/utilities/auxarithmetic.h
new file mode 100644
index 000000000..8daf6f29a
--- /dev/null
+++ b/source/luametatex/source/utilities/auxarithmetic.h
@@ -0,0 +1,61 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# ifndef LMT_UTILITIES_ARITHMETIC_H
+# define LMT_UTILITIES_ARITHMETIC_H
+
+/* The |fabs| macro is used in mp. */
+
+/*tex
+
+There has always be much attention on accuracy in \TEX, especially in the perspective of portability.
+Keep in mind that \TEX\ was written when there was no IEEE floating point defined so all happens in
+16.16, or actually in 14.16 precission. We could actually consider going 16.16 if we use long integers
+in some places but it needs some checking first. We could just accept wrapping around as that already
+happens in some places anyway (not all dimension calculation are checked).
+
+In \LUATEX\ and \LUAMETATEX\ we have the \LUA\ engine and that one was exclusively using doubles till
+5.3 when it went for a more hybrid approach. Because we go a lot between \TEX\ and \LUA\ (in \CONTEXT)
+that had some consequences and rounding happens all over the place. It is also for that reason that
+we now use doubles and rounding in some more places in the \TEX\ part: it is more consistent with what
+happens at the \LUA\ end. And, because IEEE is common now, we are (afaiks) portable enough.
+
+We don't use round but lround as that one rounds away from zero. In a few places we use llround. Also
+in some places we clip to the official maxima but not always.
+
+*/
+
+
+/*
+# undef abs
+# undef fabs
+
+# define abs(x) ((int)(x) >= 0 ? (int)(x) : (int)-(x))
+# define fabs(x) ((x) >= 0.0 ? (x) : -(x))
+*/
+
+# define odd(x) ((x) & 1)
+
+# define lfloor(x) ( (lua_Integer)(floor((double)(x))) )
+# define tfloor(x) ( (size_t) (floor((double)(x))) )
+# define ifloor(x) ( (int) (floor((double)(x))) )
+
+//define lround(x) ( ((double) x >= 0.0) ? (lua_Integer) ((double) x + 0.5) : (lua_Integer) ((double) x - 0.5) )
+//define tround(x) ( ((double) x >= 0.0) ? (size_t) ((double) x + 0.5) : (size_t) ((double) x - 0.5) )
+//define iround(x) ( ((double) x >= 0.0) ? (int) ((double) x + 0.5) : (int) ((double) x - 0.5) )
+//define sround(x) ( ((double) x >= 0.0) ? (int) ((double) x + 0.5) : (int) ((double) x - 0.5) )
+
+//define lround(x) ( ((double) x >= 0.0) ? (lua_Integer) ((double) x + 0.5) : (lua_Integer) ((double) x - 0.5) )
+//define tround(x) ( ((double) x >= 0.0) ? (size_t) ((double) x + 0.5) : (size_t) ((double) x - 0.5) )
+//define iround(x) ( (int) lround((double) x) )
+
+//define zround(r) ((r>2147483647.0) ? 2147483647 : ((r<-2147483647.0) ? -2147483647 : ((r >= 0.0) ? (int)(r + 0.5) : ((int)(r-0.5)))))
+//define zround(r) ((r>2147483647.0) ? 2147483647 : ((r<-2147483647.0) ? -2147483647 : (int) lround(r)))
+
+# define scaledround(x) ((scaled) lround((double) x))
+# define longlonground llround
+# define clippedround(r) ((r>2147483647.0) ? 2147483647 : ((r<-2147483647.0) ? -2147483647 : (int) lround(r)))
+# define glueround(x) clippedround((double) (x))
+
+# endif
diff --git a/source/luametatex/source/utilities/auxfile.c b/source/luametatex/source/utilities/auxfile.c
new file mode 100644
index 000000000..1aae0e691
--- /dev/null
+++ b/source/luametatex/source/utilities/auxfile.c
@@ -0,0 +1,294 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# include <stdio.h>
+# include <sys/stat.h>
+
+# include "auxfile.h"
+# include "auxmemory.h"
+
+# ifdef _WIN32
+
+ # include <windows.h>
+ # include <ctype.h>
+ # include <io.h>
+ # include <shellapi.h>
+
+ LPWSTR aux_utf8_to_wide(const char *utf8str) {
+ if (utf8str) {
+ int length = MultiByteToWideChar(CP_UTF8, 0, utf8str, -1, NULL, 0); /* preroll */
+ LPWSTR wide = (LPWSTR) lmt_memory_malloc(sizeof(WCHAR) * length);
+ MultiByteToWideChar(CP_UTF8, 0, utf8str, -1, wide, length);
+ return wide;
+ } else {
+ return NULL;
+ }
+ }
+
+ char *aux_utf8_from_wide(LPWSTR widestr) {
+ if (widestr) {
+ int length = WideCharToMultiByte(CP_UTF8, 0, widestr, -1, NULL, 0, NULL, NULL);
+ char * utf8str = (char *) lmt_memory_malloc(sizeof(char) * length);
+ WideCharToMultiByte(CP_UTF8, 0, widestr, -1, utf8str, length, NULL, NULL);
+ return (char *) utf8str;
+ } else {
+ return NULL;
+ }
+ }
+
+ FILE *aux_utf8_fopen(const char *path, const char *mode) {
+ if (path && mode) {
+ LPWSTR wpath = aux_utf8_to_wide(path);
+ LPWSTR wmode = aux_utf8_to_wide(mode);
+ FILE *f = _wfopen(wpath,wmode);
+ lmt_memory_free(wpath);
+ lmt_memory_free(wmode);
+ return f;
+ } else {
+ return NULL;
+ }
+ }
+
+ FILE *aux_utf8_popen(const char *path, const char *mode) {
+ if (path && mode) {
+ LPWSTR wpath = aux_utf8_to_wide(path);
+ LPWSTR wmode = aux_utf8_to_wide(mode);
+ FILE *f = _wpopen(wpath,wmode);
+ lmt_memory_free(wpath);
+ lmt_memory_free(wmode);
+ return f;
+ } else {
+ return NULL;
+ }
+ }
+
+ int aux_utf8_system(const char *cmd)
+ {
+ LPWSTR wcmd = aux_utf8_to_wide(cmd);
+ int result = _wsystem(wcmd);
+ lmt_memory_free(wcmd);
+ return result;
+ }
+
+ int aux_utf8_remove(const char *name)
+ {
+ LPWSTR wname = aux_utf8_to_wide(name);
+ int result = _wremove(wname);
+ lmt_memory_free(wname);
+ return result;
+ }
+
+ int aux_utf8_rename(const char *oldname, const char *newname)
+ {
+ LPWSTR woldname = aux_utf8_to_wide(oldname);
+ LPWSTR wnewname = aux_utf8_to_wide(newname);
+ int result = _wrename(woldname, wnewname);
+ lmt_memory_free(woldname);
+ lmt_memory_free(wnewname);
+ return result;
+ }
+
+ int aux_utf8_setargv(char * **av, char **argv, int argc)
+ {
+ if (argv) {
+ int c = 0;
+ LPWSTR *l = CommandLineToArgvW(GetCommandLineW(), &c);
+ if (l != NULL) {
+ char **v = lmt_memory_malloc(sizeof(char *) * c);
+ for (int i = 0; i < c; i++) {
+ v[i] = aux_utf8_from_wide(l[i]);
+ }
+ *av = v;
+ /*tex Let's be nice with path names: |c:\\foo\\etc| */
+ if (c > 1) {
+ if ((strlen(v[c-1]) > 2) && isalpha(v[c-1][0]) && (v[c-1][1] == ':') && (v[c-1][2] == '\\')) {
+ for (char *p = v[c-1]+2; *p; p++) {
+ if (*p == '\\') {
+ *p = '/';
+ }
+ }
+ }
+ }
+ }
+ return c;
+ } else {
+ *av = NULL;
+ return argc;
+ }
+ }
+
+ char *aux_utf8_getownpath(const char *file)
+ {
+ if (file) {
+ char *path = NULL;
+ char buffer[MAX_PATH];
+ GetModuleFileName(NULL,buffer,sizeof(buffer));
+ path = lmt_memory_strdup(buffer);
+ if (strlen(path) > 0) {
+ for (size_t i = 0; i < strlen(path); i++) {
+ if (path[i] == '\\') {
+ path[i] = '/';
+ }
+ }
+ return path;
+ }
+ }
+ return lmt_memory_strdup(".");
+ }
+
+# else
+
+ # include <string.h>
+ # include <stdlib.h>
+ # include <unistd.h>
+
+ int aux_utf8_setargv(char * **av, char **argv, int argc)
+ {
+ *av = argv;
+ return argc;
+ }
+
+ char *aux_utf8_getownpath(const char *file)
+ {
+ if (strchr(file, '/')) {
+ return lmt_memory_strdup(file);
+ } else {
+ const char *esp;
+ size_t prefixlen = 0;
+ size_t totallen = 0;
+ size_t filelen = strlen(file);
+ char *path = NULL;
+ char *searchpath = lmt_memory_strdup(getenv("PATH"));
+ const char *index = searchpath;
+ if (index) {
+ do {
+ esp = strchr(index, ':');
+ if (esp) {
+ prefixlen = (size_t) (esp - index);
+ } else {
+ prefixlen = strlen(index);
+ }
+ if (prefixlen == 0 || index[prefixlen - 1] == '/') {
+ totallen = prefixlen + filelen;
+# ifdef PATH_MAX
+ if (totallen >= PATH_MAX) {
+ continue;
+ }
+# endif
+ path = lmt_memory_malloc(totallen + 1);
+ memcpy(path, index, prefixlen);
+ memcpy(path + prefixlen, file, filelen);
+ } else {
+ totallen = prefixlen + filelen + 1;
+# ifdef PATH_MAX
+ if (totallen >= PATH_MAX) {
+ continue;
+ }
+# endif
+ path = lmt_memory_malloc(totallen + 1);
+ memcpy(path, index, prefixlen);
+ path[prefixlen] = '/';
+ memcpy(path + prefixlen + 1, file, filelen);
+ }
+ path[totallen] = '\0';
+ if (access(path, X_OK) >= 0) {
+ break;
+ }
+ lmt_memory_free(path);
+ path = NULL;
+ index = esp + 1;
+ } while (esp);
+ }
+ lmt_memory_free(searchpath);
+ if (path) {
+ return path;
+ } else {
+ return lmt_memory_strdup("."); /* ok? */
+ }
+ }
+ }
+
+# endif
+
+# ifndef S_ISREG
+ # define S_ISREG(mode) (mode & _S_IFREG)
+# endif
+
+# ifdef _WIN32
+
+ char *aux_basename(const char *name) {
+ char base[256+1];
+ char suff[256+1];
+ _splitpath(name,NULL,NULL,base,suff);
+ {
+ size_t b = strlen((const char*)base);
+ size_t s = strlen((const char*)suff);
+ char *result = (char *) lmt_memory_malloc(sizeof(char) * (b+s+1));
+ if (result) {
+ memcpy(&result[0], &base[0], b);
+ memcpy(&result[b], &suff[0], s);
+ result[b + s] = '\0';
+ }
+ return result;
+ }
+ }
+
+ char *aux_dirname(const char *name) {
+ char driv[256 + 1];
+ char path[256 + 1];
+ _splitpath(name,driv,path,NULL,NULL);
+ {
+ size_t d = strlen((const char*)driv);
+ size_t p = strlen((const char*)path);
+ char *result = (char *) lmt_memory_malloc(sizeof(char) * (d+p+1));
+ if (result) {
+ if (path[p - 1] == '/' || path[p - 1] == '\\') {
+ --p;
+ }
+ memcpy(&result[0], &driv[0], d);
+ memcpy(&result[d], &path[0], p);
+ result[d + p] = '\0';
+ }
+ return result;
+ }
+ }
+
+ // int aux_is_readable(const char *filename)
+ // {
+ // struct stat finfo;
+ // FILE *f;
+ // return (stat(filename, &finfo) == 0)
+ // && S_ISREG(finfo.st_mode)
+ // && ((f = aux_utf8_fopen(filename, "r")) != NULL)
+ // && ! fclose(f);
+ // }
+
+ int aux_is_readable(const char *filename)
+ {
+ struct _stati64 info;
+ LPWSTR w = aux_utf8_to_wide(filename);
+ int r = _wstati64(w, &info);
+ FILE *f;
+ lmt_memory_free(w);
+ return (r == 0)
+ && (S_ISREG(info.st_mode))
+ && ((f = aux_utf8_fopen(filename, "r")) != NULL)
+ && ! fclose(f);
+ }
+
+# else
+
+ # include <libgen.h>
+
+ int aux_is_readable(const char *filename)
+ {
+ struct stat finfo;
+ FILE *f;
+ return (stat(filename, &finfo) == 0)
+ && S_ISREG(finfo.st_mode)
+ && ((f = fopen(filename, "r")) != NULL)
+ && ! fclose(f);
+ }
+
+# endif
diff --git a/source/luametatex/source/utilities/auxfile.h b/source/luametatex/source/utilities/auxfile.h
new file mode 100644
index 000000000..19a4815c2
--- /dev/null
+++ b/source/luametatex/source/utilities/auxfile.h
@@ -0,0 +1,166 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# ifndef LMT_UTILITIES_FILE_H
+# define LMT_UTILITIES_FILE_H
+
+/*tex
+
+ We have to deal with wide characters on windows when it comes to filenames. The same is true for
+ the commandline and environment variables. Basically we go from utf8 to wide and back.
+
+ \starttyping
+ libraries/zlib/crc32.c : fopen -> minimalistic, goes via lua anyway
+ libraries/zlib/trees.c : fopen -> minimalistic, goes via lua anyway
+ libraries/zlib/zutil.h : fopen -> minimalistic, goes via lua anyway
+
+ lua/llualib.c : fopen -> utf8_fopen
+ lua/lenginelib.c : fopen -> utf8_fopen
+
+ luacore/lua54/src/lauxlib.c : fopen -> see below
+ luacore/lua54/src/liolib.c : fopen -> see below
+ luacore/lua54/src/loadlib.c : fopen -> see below
+
+ luaffi/call.c : fopen -> not used
+
+ mp/mpw/mp.w : fopen -> overloaded by callback
+
+ libraries/pplib/ppload.c : fopen -> will be abstraction (next pplib)
+
+ libraries/pplib/util/utiliof.c : fopen -> not used
+ libraries/pplib/util/utiliof.c : fopen -> not used
+ libraries/pplib/util/utiliof.c : fopen -> not used
+ libraries/pplib/util/utiliof.c : fopen -> not used
+ libraries/pplib/util/utiliof.c : fopen -> not used
+ libraries/pplib/util/utiliof.c : fopen -> not used
+ libraries/pplib/util/utiliof.c : fopen -> not used
+ libraries/pplib/util/utiliof.c : fopen -> not used
+ libraries/pplib/util/utiliof.c : fopen -> not used
+ libraries/pplib/util/utiliof.c : fopen -> not used
+ libraries/pplib/util/utiliof.c : fopen -> not used
+ libraries/pplib/util/utiliof.c : fopen -> not used
+
+ tex/texfileio.c 12: : fopen -> utf8_fopen
+ \stoptyping
+
+ Furthermore:
+
+ \starttyping
+ - system commands (execute) : done
+ - popen : done
+
+ - lua rename : done
+ - lua remove : done
+
+ - command line argv : done
+ - lua setenv : done
+ - lua getenv : done
+
+ - lfs attributes : done
+ - lfs chdir : done
+ - lfs currentdir : done
+ - lfs dir : done
+ - lfs mkdir : done
+ - lfs rmdir : done
+ - lfs touch : done
+ - lfs link : done
+ - lfs symlink : done
+ - lfs setexecutable : done (needs testing)
+ - lfs isdir : done
+ - lfs isfile : done
+ - lfs iswriteabledir : done
+ - lfs iswriteablefile : done
+ - lfs isreadabledir : done
+ - lfs isreadablefile : done
+ \stoptyping
+
+ Kind of tricky because quite some code (indirectness):
+
+ \starttyping
+ - lua load : via overload ?
+ - lua dofile : via overload -> loadstring
+ - lua require : via overload ?
+ \stoptyping
+
+ So: do we patch lua (fopen) or just copy? We can actually assume flat ascii files for libraries
+ and such so there is no real need unless we load job related files.
+
+ I will probably reshuffle some code and maybe more some more here; once I'm sure all works out
+ well.
+
+*/
+
+# ifdef _WIN32
+
+ # include <windows.h>
+ # include <ctype.h>
+ # include <stdio.h>
+
+ extern LPWSTR aux_utf8_to_wide (const char *utf8str);
+ extern char *aux_utf8_from_wide (LPWSTR widestr);
+
+ extern FILE *aux_utf8_fopen (const char *path, const char *mode);
+ extern FILE *aux_utf8_popen (const char *path, const char *mode);
+ extern int aux_utf8_system (const char *cmd);
+ extern int aux_utf8_remove (const char *name);
+ extern int aux_utf8_rename (const char *oldname, const char *newname);
+ extern int aux_utf8_setargv (char * **av, char **argv, int argc);
+ extern char *aux_utf8_getownpath (const char *file);
+
+# else
+
+ # define aux_utf8_fopen fopen
+ # define aux_utf8_popen popen
+ # define aux_utf8_system system
+ # define aux_utf8_remove remove
+ # define aux_utf8_rename rename
+
+ extern int aux_utf8_setargv (char * **av, char **argv, int argc);
+ extern char *aux_utf8_getownpath (const char *file);
+
+ # include <libgen.h>
+
+# endif
+
+# ifdef _WIN32
+
+ extern char *aux_basename (const char *name);
+ extern char *aux_dirname (const char *name);
+
+# else
+
+ # define aux_basename basename
+ # define aux_dirname dirname
+
+# endif
+
+extern int aux_is_readable (const char *filename);
+
+/*tex
+
+ We support unix and windows. In fact, we could stick to |/| only. When
+ scanning filenames entered in \TEX\ we can actually enforce a |/| as
+ convention.
+
+*/
+
+# ifndef IS_DIR_SEP
+ # ifdef _WIN32
+ # define IS_DIR_SEP(ch) ((ch) == '/' || (ch) == '\\')
+ # else
+ # define IS_DIR_SEP(ch) ((ch) == '/')
+ # endif
+# endif
+
+# ifndef R_OK
+ # define F_OK 0x0
+ # define W_OK 0x2
+ # define R_OK 0x4
+# endif
+
+# ifndef S_ISREG
+ # define S_ISREG(mode) (mode & _S_IFREG)
+# endif
+
+# endif
diff --git a/source/luametatex/source/utilities/auxmemory.c b/source/luametatex/source/utilities/auxmemory.c
new file mode 100644
index 000000000..9ba02f946
--- /dev/null
+++ b/source/luametatex/source/utilities/auxmemory.c
@@ -0,0 +1,25 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# include "auxmemory.h"
+
+void *aux_allocate_array(int recordsize, int size, int reserved)
+{
+ return lmt_memory_malloc(recordsize * ((size_t) size + reserved + 1));
+}
+
+void *aux_reallocate_array(void *p, int recordsize, int size, int reserved)
+{
+ return lmt_memory_realloc(p, recordsize * ((size_t) size + reserved + 1));
+}
+
+void *aux_allocate_clear_array(int recordsize, int size, int reserved)
+{
+ return lmt_memory_calloc((size_t) size + reserved + 1, recordsize);
+}
+
+void aux_deallocate_array(void *p)
+{
+ lmt_memory_free(p);
+}
diff --git a/source/luametatex/source/utilities/auxmemory.h b/source/luametatex/source/utilities/auxmemory.h
new file mode 100644
index 000000000..4f040eafd
--- /dev/null
+++ b/source/luametatex/source/utilities/auxmemory.h
@@ -0,0 +1,54 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+/*
+ Some operating systems come with |allocarray| so we use more verbose names. We cannot define
+ them because on some bsd/apple platforms |CLANG| cannot resolve them.
+
+*/
+
+# ifndef LMT_UTILITIES_MEMORY_H
+# define LMT_UTILITIES_MEMORY_H
+
+/*tex
+ This is an experiment. The impact of using an alternative allocator on native Windows makes a
+ native version some 5% faster than a cross compiled one. Otherwise the cross compiled version
+ outperforms the native one a bit. In \TEX\ and \METAPOST\ we already do something like this
+ but there we don't reclaim memory.
+
+*/
+
+# include <stdlib.h>
+# include <string.h>
+
+# if defined(LUAMETATEX_USE_MIMALLOC)
+ # include "libraries/mimalloc/include/mimalloc.h"
+ # define lmt_memory_malloc mi_malloc
+ # define lmt_memory_calloc mi_calloc
+ # define lmt_memory_realloc mi_realloc
+ # define lmt_memory_free mi_free
+ # define lmt_memory_strdup mi_strdup
+
+ // # include "libraries/mimalloc/include/mimalloc-override.h"
+
+# else
+ # define lmt_memory_malloc malloc
+ # define lmt_memory_calloc calloc
+ # define lmt_memory_realloc realloc
+ # define lmt_memory_free free
+ # define lmt_memory_strdup strdup
+# endif
+
+# define lmt_generic_malloc malloc
+# define lmt_generic_calloc calloc
+# define lmt_generic_realloc realloc
+# define lmt_generic_free free
+# define lmt_generic_strdup strdup
+
+extern void *aux_allocate_array (int recordsize, int size, int reserved);
+extern void *aux_reallocate_array (void *p, int recordsize, int size, int reserved);
+extern void *aux_allocate_clear_array (int recordsize, int size, int reserved);
+extern void aux_deallocate_array (void *p);
+
+# endif
diff --git a/source/luametatex/source/utilities/auxsparsearray.c b/source/luametatex/source/utilities/auxsparsearray.c
new file mode 100644
index 000000000..d9fa5e453
--- /dev/null
+++ b/source/luametatex/source/utilities/auxsparsearray.c
@@ -0,0 +1,623 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+/*tex
+
+ Here we implement sparse arrays with an embedded save stack. These functions are called very
+ often but a few days of experimenting proved that there is not much to gain (if at all) from
+ using macros or optimizations like preallocating and fast access to the first 128 entries. In
+ practice the overhead is mostly in accessing memory and not in (probably inlined) calls. So, we
+ should accept fate and wait for faster memory. It's the price we pay for being unicode on the
+ one hand and sparse on the other.
+
+*/
+
+# include "luametatex.h"
+
+sparse_state_info lmt_sparse_state = {
+ .sparse_data = {
+ .minimum = memory_data_unset,
+ .maximum = memory_data_unset,
+ .size = memory_data_unset,
+ .step = memory_data_unset,
+ .allocated = 0,
+ .itemsize = 1,
+ .top = memory_data_unset,
+ .ptr = memory_data_unset,
+ .initial = memory_data_unset,
+ .offset = 0,
+}
+};
+
+void *sa_malloc_array(int recordsize, int size)
+{
+ int allocated = recordsize * size;
+ lmt_sparse_state.sparse_data.allocated += allocated;
+ return lmt_memory_malloc((size_t) allocated);
+}
+
+void *sa_realloc_array(void *p, int recordsize, int size, int step)
+{
+ int deallocated = recordsize * size;
+ int allocated = recordsize * (size + step);
+ lmt_sparse_state.sparse_data.allocated += (allocated - deallocated);
+ return lmt_memory_realloc(p, (size_t) allocated);
+}
+
+void *sa_calloc_array(int recordsize, int size)
+{
+ int allocated = recordsize * size;
+ lmt_sparse_state.sparse_data.allocated += allocated;
+ return lmt_memory_calloc((size_t) size, recordsize);
+}
+
+void sa_wipe_array(void *head, int recordsize, int size)
+{
+ memset(head, 0, recordsize * ((size_t) size));
+}
+
+void *sa_free_array(void *p)
+{
+ lmt_memory_free(p);
+ return NULL;
+}
+
+/*tex
+
+ Once we have two variants allocated we can dump and undump a |LOWPART| array in one go. But
+ not yet. Currently the waste of one extra dummy int is cheaper than multiple functions.
+
+*/
+
+static void sa_aux_store_stack(sa_tree a, int n, sa_tree_item v1, sa_tree_item v2, int gl)
+{
+ sa_stack_item st;
+ st.code = n;
+ st.value_1 = v1;
+ st.value_2 = v2;
+ st.level = gl;
+ if (! a->stack) {
+ a->stack = sa_malloc_array(sizeof(sa_stack_item), a->sa_stack_size);
+ } else if (((a->sa_stack_ptr) + 1) >= a->sa_stack_size) {
+ a->stack = sa_realloc_array(a->stack, sizeof(sa_stack_item), a->sa_stack_size, a->sa_stack_step);
+ a->sa_stack_size += a->sa_stack_step;
+ }
+ (a->sa_stack_ptr)++;
+ a->stack[a->sa_stack_ptr] = st;
+}
+
+static void sa_aux_skip_in_stack(sa_tree a, int n)
+{
+ if (a->stack) {
+ int p = a->sa_stack_ptr;
+ while (p > 0) {
+ if (a->stack[p].code == n && a->stack[p].level > 0) {
+ a->stack[p].level = -(a->stack[p].level);
+ }
+ p--;
+ }
+ }
+}
+
+int sa_get_item_1(const sa_tree head, int n)
+{
+ if (head->tree) {
+ int h = LMT_SA_H_PART(n);
+ if (head->tree[h]) {
+ int m = LMT_SA_M_PART(n);
+ if (head->tree[h][m]) {
+ return head->tree[h][m][LMT_SA_L_PART(n)/4].uchar_value[n%4];
+ }
+ }
+ }
+ return (int) head->dflt.uchar_value[n%4];
+}
+
+int sa_get_item_2(const sa_tree head, int n)
+{
+ if (head->tree) {
+ int h = LMT_SA_H_PART(n);
+ if (head->tree[h]) {
+ int m = LMT_SA_M_PART(n);
+ if (head->tree[h][m]) {
+ return head->tree[h][m][LMT_SA_L_PART(n)/2].ushort_value[n%2];
+ }
+ }
+ }
+ return (int) head->dflt.ushort_value[n%2];
+}
+
+sa_tree_item sa_get_item_4(const sa_tree head, int n)
+{
+ if (head->tree) {
+ int h = LMT_SA_H_PART(n);
+ if (head->tree[h]) {
+ int m = LMT_SA_M_PART(n);
+ if (head->tree[h][m]) {
+ return head->tree[h][m][LMT_SA_L_PART(n)];
+ }
+ }
+ }
+ return head->dflt;
+}
+
+sa_tree_item sa_get_item_8(const sa_tree head, int n, sa_tree_item *v2)
+{
+ if (head->tree != NULL) {
+ int h = LMT_SA_H_PART(n);
+ if (head->tree[h]) {
+ int m = LMT_SA_M_PART(n);
+ if (head->tree[h][m]) {
+ int l = 2*LMT_SA_L_PART(n);
+ *v2 = head->tree[h][m][l+1];
+ return head->tree[h][m][l];
+ }
+ }
+ }
+ *v2 = head->dflt;
+ return head->dflt;
+}
+
+void sa_set_item_1(sa_tree head, int n, int v, int gl)
+{
+ int h = LMT_SA_H_PART(n);
+ int m = LMT_SA_M_PART(n);
+ int l = LMT_SA_L_PART(n);
+ if (! head->tree) {
+ head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ }
+ if (! head->tree[h]) {
+ head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ }
+ if (! head->tree[h][m]) {
+ head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/4);
+ for (int i = 0; i < LMT_SA_LOWPART/4; i++) {
+ head->tree[h][m][i] = head->dflt;
+ }
+ }
+ if (gl <= 1) {
+ sa_aux_skip_in_stack(head, n);
+ } else {
+ sa_aux_store_stack(head, n, head->tree[h][m][l/4], (sa_tree_item) { 0 }, gl);
+ }
+ head->tree[h][m][l/4].uchar_value[n%4] = (unsigned char) v;
+}
+
+void sa_set_item_2(sa_tree head, int n, int v, int gl)
+{
+ int h = LMT_SA_H_PART(n);
+ int m = LMT_SA_M_PART(n);
+ int l = LMT_SA_L_PART(n);
+ if (! head->tree) {
+ head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ }
+ if (! head->tree[h]) {
+ head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ }
+ if (! head->tree[h][m]) {
+ head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/2);
+ for (int i = 0; i < LMT_SA_LOWPART/2; i++) {
+ head->tree[h][m][i] = head->dflt;
+ }
+ }
+ if (gl <= 1) {
+ sa_aux_skip_in_stack(head, n);
+ } else {
+ sa_aux_store_stack(head, n, head->tree[h][m][l/2], (sa_tree_item) { 0 }, gl);
+ }
+ head->tree[h][m][l/2].ushort_value[n%2] = (unsigned short) v;
+}
+
+void sa_set_item_4(sa_tree head, int n, sa_tree_item v, int gl)
+{
+ int h = LMT_SA_H_PART(n);
+ int m = LMT_SA_M_PART(n);
+ int l = LMT_SA_L_PART(n);
+ if (! head->tree) {
+ head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ }
+ if (! head->tree[h]) {
+ head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ }
+ if (! head->tree[h][m]) {
+ head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART);
+ for (int i = 0; i < LMT_SA_LOWPART; i++) {
+ head->tree[h][m][i] = head->dflt;
+ }
+ }
+ if (gl <= 1) {
+ sa_aux_skip_in_stack(head, n);
+ } else {
+ sa_aux_store_stack(head, n, head->tree[h][m][l], (sa_tree_item) { 0 }, gl);
+ }
+ head->tree[h][m][l] = v;
+}
+
+void sa_set_item_8(sa_tree head, int n, sa_tree_item v1, sa_tree_item v2, int gl)
+{
+ int h = LMT_SA_H_PART(n);
+ int m = LMT_SA_M_PART(n);
+ int l = 2*LMT_SA_L_PART(n);
+ if (! head->tree) {
+ head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ }
+ if (! head->tree[h]) {
+ head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ }
+ if (! head->tree[h][m]) {
+ head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), 2 * LMT_SA_LOWPART);
+ for (int i = 0; i < 2 * LMT_SA_LOWPART; i++) {
+ head->tree[h][m][i] = head->dflt;
+ }
+ }
+ if (gl <= 1) {
+ sa_aux_skip_in_stack(head, n);
+ } else {
+ sa_aux_store_stack(head, n, head->tree[h][m][l], head->tree[h][m][l+1], gl);
+ }
+ head->tree[h][m][l] = v1;
+ head->tree[h][m][l+1] = v2;
+}
+
+void sa_set_item_n(sa_tree head, int n, int v, int gl)
+{
+ int h = LMT_SA_H_PART(n);
+ int m = LMT_SA_M_PART(n);
+ int l = LMT_SA_L_PART(n);
+ int d = head->bytes == 1 ? 4 : (head->bytes == 2 ? 2 : 1);
+ if (! head->tree) {
+ head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ }
+ if (! head->tree[h]) {
+ head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ }
+ if (! head->tree[h][m]) {
+ head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/d);
+ for (int i = 0; i < LMT_SA_LOWPART/d; i++) {
+ head->tree[h][m][i] = head->dflt;
+ }
+ }
+ if (gl <= 1) {
+ sa_aux_skip_in_stack(head, n);
+ } else {
+ sa_aux_store_stack(head, n, head->tree[h][m][l/d], (sa_tree_item) { 0 }, gl);
+ }
+ switch (head->bytes) {
+ case 1:
+ {
+ head->tree[h][m][l/4].uchar_value[n%4] = (unsigned char) (v < 0 ? 0 : (v > 0xFF ? 0xFF : v));
+ break;
+ }
+ case 2:
+ {
+ head->tree[h][m][l/2].ushort_value[n%2] = (unsigned char) (v < 0 ? 0 : (v > 0xFFFF ? 0xFFFF : v));
+ break;
+ }
+ case 4:
+ {
+ head->tree[h][m][l].int_value = v;
+ break;
+ }
+ }
+}
+
+int sa_get_item_n(const sa_tree head, int n)
+{
+ if (head->tree) {
+ int h = LMT_SA_H_PART(n);
+ if (head->tree[h]) {
+ int m = LMT_SA_M_PART(n);
+ if (head->tree[h][m]) {
+ switch (head->bytes) {
+ case 1 : return (int) head->tree[h][m][LMT_SA_L_PART(n)/4].uchar_value[n%4];
+ case 2 : return (int) head->tree[h][m][LMT_SA_L_PART(n)/2].ushort_value[n%2];
+ case 4 : return (int) head->tree[h][m][LMT_SA_L_PART(n) ].int_value;
+ }
+ }
+ }
+ }
+ switch (head->bytes) {
+ case 1 : return (int) head->dflt.uchar_value[n%4];
+ case 2 : return (int) head->dflt.ushort_value[n%2];
+ case 4 : return (int) head->dflt.int_value;
+ default: return 0;
+ }
+}
+
+/*
+void rawset_sa_item_4(sa_tree head, int n, sa_tree_item v)
+{
+ head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)][LMT_SA_L_PART(n)] = v;
+}
+*/
+
+void sa_clear_stack(sa_tree a)
+{
+ if (a) {
+ a->stack = sa_free_array(a->stack);
+ a->sa_stack_ptr = 0;
+ a->sa_stack_size = a->sa_stack_step;
+ }
+}
+
+void sa_destroy_tree(sa_tree a)
+{
+ if (a) {
+ if (a->tree) {
+ for (int h = 0; h < LMT_SA_HIGHPART; h++) {
+ if (a->tree[h]) {
+ for (int m = 0; m < LMT_SA_MIDPART; m++) {
+ a->tree[h][m] = sa_free_array(a->tree[h][m]);
+ }
+ a->tree[h] = sa_free_array(a->tree[h]);
+ }
+ }
+ a->tree = sa_free_array(a->tree);
+ }
+ a->stack = sa_free_array(a->stack);
+ a = sa_free_array(a);
+ }
+}
+
+sa_tree sa_copy_tree(sa_tree b)
+{
+ sa_tree a = (sa_tree) sa_malloc_array(sizeof(sa_tree_head), 1);
+ a->sa_stack_step = b->sa_stack_step;
+ a->sa_stack_size = b->sa_stack_size;
+ a->bytes = b->bytes;
+ a->dflt = b->dflt;
+ a->stack = NULL;
+ a->sa_stack_ptr = 0;
+ a->tree = NULL;
+ if (b->tree) {
+ a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(void *), LMT_SA_HIGHPART);
+ for (int h = 0; h < LMT_SA_HIGHPART; h++) {
+ if (b->tree[h]) {
+ int slide = LMT_SA_LOWPART;
+ switch (b->bytes) {
+ case 1: slide = LMT_SA_LOWPART/4; break;
+ case 2: slide = LMT_SA_LOWPART/2; break;
+ case 4: slide = LMT_SA_LOWPART ; break;
+ case 8: slide = 2*LMT_SA_LOWPART ; break;
+ }
+ a->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(void *), LMT_SA_MIDPART);
+ for (int m = 0; m < LMT_SA_MIDPART; m++) {
+ if (b->tree[h][m]) {
+ a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), slide);
+ memcpy(a->tree[h][m], b->tree[h][m], sizeof(sa_tree_item) * slide);
+ }
+ }
+ }
+ }
+ }
+ return a;
+}
+
+/*tex
+
+ The main reason to fill in the lowest entry branches here immediately is that most of the sparse
+ arrays have a bias toward \ASCII\ values. Allocating those here immediately improves the chance
+ of the structure |a->tree[0][0][x]| being close together in actual memory locations. We could
+ save less for type 0 stacks.
+
+*/
+
+sa_tree sa_new_tree(int size, int bytes, sa_tree_item dflt)
+{
+ sa_tree_head *a;
+ a = (sa_tree_head *) lmt_memory_malloc(sizeof(sa_tree_head));
+ a->dflt = dflt;
+ a->stack = NULL;
+ a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART);
+ a->tree[0] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART);
+ a->sa_stack_size = size;
+ a->sa_stack_step = size;
+ a->bytes = bytes;
+ a->sa_stack_ptr = 0;
+ return (sa_tree) a;
+}
+
+void sa_restore_stack(sa_tree head, int gl)
+{
+ if (head->stack) {
+ sa_stack_item st;
+ while (head->sa_stack_ptr > 0 && abs(head->stack[head->sa_stack_ptr].level) >= gl) {
+ st = head->stack[head->sa_stack_ptr];
+ if (st.level > 0) {
+ int code = st.code;
+ switch (head->bytes) {
+ case 1:
+ {
+ int c = code % 4;
+ head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)/4].uchar_value[c] = st.value_1.uchar_value[c];
+ }
+ break;
+ case 2:
+ {
+ int c = code % 2;
+ head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)/2].ushort_value[c] = st.value_1.ushort_value[c];
+ }
+ break;
+ case 4:
+ {
+ head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)] = st.value_1;
+ }
+ break;
+ case 8:
+ {
+ int l = 2*LMT_SA_L_PART(code);
+ head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][l] = st.value_1;
+ head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][l+1] = st.value_2;
+ }
+ break;
+
+ }
+ }
+ (head->sa_stack_ptr)--;
+ }
+ }
+}
+
+void sa_dump_tree(dumpstream f, sa_tree a)
+{
+ dump_int(f, a->sa_stack_step);
+ dump_int(f, a->dflt.int_value);
+ if (a->tree) {
+ int bytes = a->bytes;
+ /*tex A marker: */
+ dump_via_int(f, 1);
+ dump_int(f, bytes);
+ for (int h = 0; h < LMT_SA_HIGHPART; h++) {
+ if (a->tree[h]) {
+ dump_via_int(f, 1);
+ for (int m = 0; m < LMT_SA_MIDPART; m++) {
+ if (a->tree[h][m]) {
+ /*tex
+ It happens a lot that the value is the same as the index, for instance
+ with case mappings.
+
+ Using mode 3 for the case where all values are the default value saves
+ In \CONTEXT\ some 128 * 5 dumps which is not worth the trouble but it
+ is neat anyway.
+
+ 1 : values are kind of unique
+ 2 : for all values : value == self
+ 3 : for all values : value == default
+
+ Actually, we could decide not to save at all in the third mode because
+ unset equals default.
+ */
+ int mode = 1;
+ if (bytes != 8) {
+ /*tex Check for default values. */
+ int slide = bytes == 1 ? LMT_SA_LOWPART/4 : (bytes == 2 ? LMT_SA_LOWPART/2 : LMT_SA_LOWPART);
+ mode = 3;
+ for (int l = 0; l < slide; l++) {
+ if (a->tree[h][m][l].uint_value != a->dflt.uint_value) {
+ mode = 1;
+ break;
+ }
+ }
+ }
+ if (mode == 1 && bytes == 4) {
+ /*tex Check for identity values. */
+ unsigned int hm = h * LMT_SA_HIGHPART + m * LMT_SA_MIDPART * LMT_SA_LOWPART ;
+ mode = 2;
+ for (int l = 0; l < LMT_SA_LOWPART; l++) {
+ if (a->tree[h][m][l].uint_value == hm) {
+ hm++;
+ } else {
+ mode = 1;
+ break;
+ }
+ }
+ }
+ dump_int(f, mode);
+ if (mode == 1) {
+ /*tex
+ We have unique values. By avoiding this branch we save some 85 Kb
+ on the \CONTEXT\ format. We could actually save this property in
+ the tree but there is not that much to gain.
+ */
+ int slide = LMT_SA_LOWPART;
+ switch (bytes) {
+ case 1: slide = LMT_SA_LOWPART/4; break;
+ case 2: slide = LMT_SA_LOWPART/2; break;
+ case 4: slide = LMT_SA_LOWPART ; break;
+ case 8: slide = 2*LMT_SA_LOWPART ; break;
+ }
+ dump_items(f, &a->tree[h][m][0], sizeof(sa_tree_item), slide);
+ } else {
+ /*tex We have a self value or defaults. */
+ }
+ } else {
+ dump_via_int(f, 0);
+ }
+ }
+ } else {
+ dump_via_int(f, 0);
+ }
+ }
+ } else {
+ /*tex A marker: */
+ dump_via_int(f, 0);
+ }
+}
+
+sa_tree sa_undump_tree(dumpstream f)
+{
+ int x;
+ sa_tree a = (sa_tree) sa_malloc_array(sizeof(sa_tree_head), 1);
+ undump_int(f,a->sa_stack_step);
+ undump_int(f,a->dflt.int_value);
+ a->sa_stack_size = a->sa_stack_step;
+ a->stack = sa_calloc_array(sizeof(sa_stack_item), a->sa_stack_size);
+ a->sa_stack_ptr = 0;
+ a->tree = NULL;
+ /*tex The marker: */
+ undump_int(f, x);
+ if (x != 0) {
+ int bytes, mode;
+ a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(void *), LMT_SA_HIGHPART);
+ undump_int(f, bytes);
+ a->bytes = bytes;
+ for (int h = 0; h < LMT_SA_HIGHPART; h++) {
+ undump_int(f, mode); /* more a trigger */
+ if (mode > 0) {
+ a->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(void *), LMT_SA_MIDPART);
+ for (int m = 0; m < LMT_SA_MIDPART; m++) {
+ undump_int(f, mode);
+ switch (mode) {
+ case 1:
+ /*tex
+ We have a unique values.
+ */
+ {
+ int slide = LMT_SA_LOWPART;
+ switch (bytes) {
+ case 1: slide = LMT_SA_LOWPART/4; break;
+ case 2: slide = LMT_SA_LOWPART/2; break;
+ case 4: slide = LMT_SA_LOWPART ; break;
+ case 8: slide = 2*LMT_SA_LOWPART ; break;
+ }
+ a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), slide);
+ undump_items(f, &a->tree[h][m][0], sizeof(sa_tree_item), slide);
+ }
+ break;
+ case 2:
+ /*tex
+ We have a self value. We only have this when we have integers. Other
+ cases are math anyway, so not much to gain.
+ */
+ {
+ if (bytes == 4) {
+ int hm = h * 128 * LMT_SA_HIGHPART + m * LMT_SA_MIDPART;
+ a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART);
+ for (int l = 0; l < LMT_SA_LOWPART; l++) {
+ a->tree[h][m][l].int_value = hm;
+ hm++;
+ }
+ } else {
+ printf("\nfatal format error, mode %i, bytes %i\n", mode, bytes);
+ }
+ }
+ break;
+ case 3:
+ /*tex
+ We have all default values. so no need to set them. In fact, we
+ cannot even end up here.
+ */
+ break;
+ default:
+ /*tex
+ We have no values set.
+ */
+ break;
+ }
+ }
+ }
+ }
+ }
+ return a;
+}
diff --git a/source/luametatex/source/utilities/auxsparsearray.h b/source/luametatex/source/utilities/auxsparsearray.h
new file mode 100644
index 000000000..0a4ce20f1
--- /dev/null
+++ b/source/luametatex/source/utilities/auxsparsearray.h
@@ -0,0 +1,212 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# ifndef LMT_UTILITIES_SPARSEARRAY_H
+# define LMT_UTILITIES_SPARSEARRAY_H
+
+/*tex
+
+ This file originally was called |managed-sa| but becauss it kind of a library and also used in
+ \LUATEX\ it's better to use a different name. In this variant dumping is more sparse (resulting
+ in somewhat smaller format files). This might be backported but only after testing it here for a
+ long time. Of course the principles are the same, it's just extended.
+
+*/
+
+/*tex
+
+ The next two sets of three had better match up exactly, but using bare numbers is easier on the
+ \CCODE\ compiler. Here are some format sizes (for ConTeXt) with different values:
+
+ 64 : 17562942
+ 128 : 17548150 <= best value
+ 256 : 17681398
+
+*/
+
+# define LMT_SA_HIGHPART 128
+# define LMT_SA_MIDPART 128
+# define LMT_SA_LOWPART 128
+
+# define LMT_SA_H_PART(a) (((a)>>14)&127)
+# define LMT_SA_M_PART(a) (((a)>> 7)&127)
+# define LMT_SA_L_PART(a) ( (a) &127)
+
+/*tex
+
+ In the early days of \LUATEX\ we had just simple items, all 32 bit values. Then we put the
+ delcodes in trees too which saved memory and format size but it introduced 32 bit slack in all
+ the other code arrays. We then also had to dump selectively, but it was no big deal. Eventually,
+ once it became clear that the concepts would not change a variant was made for \LUAMETATEX: we
+ just use a two times larger lower array when we have delimiters. This saves some memory. The
+ price we pay is that a stack entry now has two values but that is not really an issue.
+
+ By packing the math code values we loose the option to store an active state but that's no big
+ deal.
+
+ todo: consider simple char array for catcodes.
+
+ The code here is somewhat messy because we generalized it a bit. Maybe I'll redo it some day.
+
+ */
+
+typedef struct sparse_state_info {
+ memory_data sparse_data;
+} sparse_state_info;
+
+extern sparse_state_info lmt_sparse_state;
+
+/*
+typedef struct sa_mathblob {
+ unsigned int character_value:21;
+ unsigned int class_value:3;
+ unsigned int family_value:8;
+} sa_mathblob;
+*/
+
+typedef struct sa_mathblob {
+ unsigned int class_value:math_class_bits;
+ unsigned int family_value:math_family_bits;
+ unsigned int character_value:math_character_bits;
+} sa_mathblob;
+
+typedef struct sa_mathspec {
+ unsigned short properties;
+ unsigned short group;
+ unsigned int index;
+} sa_mathspec;
+
+typedef struct packed_math_character {
+ union {
+ sa_mathblob sa_value;
+ unsigned ui_value;
+ };
+} packed_math_character;
+
+typedef union sa_tree_item {
+ unsigned int uint_value;
+ int int_value;
+ sa_mathblob math_code_value;
+ sa_mathspec math_spec_value;
+ unsigned short ushort_value[2];
+ unsigned char uchar_value[4];
+} sa_tree_item;
+
+typedef struct sa_stack_item {
+ int code;
+ int level;
+ sa_tree_item value_1;
+ sa_tree_item value_2;
+} sa_stack_item;
+
+typedef struct sa_tree_head {
+ int sa_stack_size; /*tex initial stack size */
+ int sa_stack_step; /*tex increment stack step */
+ int sa_stack_ptr; /*tex current stack point */
+ sa_tree_item dflt; /*tex default item value */
+ sa_tree_item ***tree; /*tex item tree head */
+ sa_stack_item *stack; /*tex stack tree head */
+ int bytes; /*tex the number of items per entry */
+ int padding;
+} sa_tree_head;
+
+typedef sa_tree_head *sa_tree;
+
+extern int sa_get_item_1 (const sa_tree head, int n);
+extern int sa_get_item_2 (const sa_tree head, int n);
+extern sa_tree_item sa_get_item_4 (const sa_tree head, int n);
+extern sa_tree_item sa_get_item_8 (const sa_tree head, int n, sa_tree_item * v2);
+extern void sa_set_item_1 (sa_tree head, int n, int v, int gl);
+extern void sa_set_item_2 (sa_tree head, int n, int v, int gl);
+extern void sa_set_item_4 (sa_tree head, int n, sa_tree_item v, int gl);
+extern void sa_set_item_8 (sa_tree head, int n, sa_tree_item v1, sa_tree_item v2, int gl);
+/* void sa_rawset_item_1 (sa_tree head, int n, sa_tree_item v); */
+/* void sa_rawset_item_2 (sa_tree head, int n, sa_tree_item v); */
+/* void sa_rawset_item_4 (sa_tree head, int n, sa_tree_item v); */
+/* void sa_rawset_item_8 (sa_tree head, int n, sa_tree_item v1, sa_tree_item v2); */
+extern sa_tree sa_new_tree (int size, int bytes, sa_tree_item dflt);
+extern sa_tree sa_copy_tree (sa_tree head);
+extern void sa_destroy_tree (sa_tree head);
+extern void sa_dump_tree (dumpstream f, sa_tree a);
+extern sa_tree sa_undump_tree (dumpstream f);
+extern void sa_restore_stack (sa_tree a, int gl);
+extern void sa_clear_stack (sa_tree a);
+
+extern void sa_set_item_n (const sa_tree head, int n, int v, int gl);
+extern int sa_get_item_n (const sa_tree head, int n);
+
+inline static halfword sa_return_item_1(sa_tree head, halfword n)
+{
+ if (head->tree) {
+ int hp = LMT_SA_H_PART(n);
+ if (head->tree[hp]) {
+ int mp = LMT_SA_M_PART(n);
+ if (head->tree[hp][mp]) {
+ return (halfword) head->tree[hp][mp][LMT_SA_L_PART(n)/4].uchar_value[n%4];
+ }
+ }
+ }
+ return (halfword) head->dflt.uchar_value[0];
+}
+
+inline static halfword sa_return_item_2(sa_tree head, halfword n)
+{
+ if (head->tree) {
+ int hp = LMT_SA_H_PART(n);
+ if (head->tree[hp]) {
+ int mp = LMT_SA_M_PART(n);
+ if (head->tree[hp][mp]) {
+ return (halfword) head->tree[hp][mp][LMT_SA_L_PART(n)/2].ushort_value[n%2];
+ }
+ }
+ }
+ return (halfword) head->dflt.ushort_value[0];
+}
+
+inline static halfword sa_return_item_4(sa_tree head, halfword n)
+{
+ if (head->tree) {
+ int hp = LMT_SA_H_PART(n);
+ if (head->tree[hp]) {
+ int mp = LMT_SA_M_PART(n);
+ if (head->tree[hp][mp]) {
+ return (halfword) head->tree[hp][mp][LMT_SA_L_PART(n)].int_value;
+ }
+ }
+ }
+ return (halfword) head->dflt.int_value;
+}
+
+inline static void sa_rawset_item_1(sa_tree head, halfword n, unsigned char v)
+{
+ head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)][LMT_SA_L_PART(n)/4].uchar_value[n%4] = v;
+}
+
+inline static void sa_rawset_item_2(sa_tree head, halfword n, unsigned short v)
+{
+ head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)][LMT_SA_L_PART(n)/2].ushort_value[n%2] = v;
+}
+
+inline static void sa_rawset_item_4(sa_tree head, halfword n, sa_tree_item v)
+{
+ head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)][LMT_SA_L_PART(n)] = v;
+}
+
+inline static void sa_rawset_item_8(sa_tree head, halfword n, sa_tree_item v1, sa_tree_item v2)
+{
+ sa_tree_item *low = head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)];
+ int l = 2*LMT_SA_L_PART(n);
+ low[l] = v1;
+ low[l+1] = v2;
+}
+
+// inline them
+
+extern void *sa_malloc_array (int recordsize, int size);
+extern void *sa_realloc_array (void *p, int recordsize, int size, int step);
+extern void *sa_calloc_array (int recordsize, int size);
+extern void *sa_free_array (void *p);
+extern void sa_wipe_array (void *head, int recordsize, int size);
+
+# endif
diff --git a/source/luametatex/source/utilities/auxsystem.c b/source/luametatex/source/utilities/auxsystem.c
new file mode 100644
index 000000000..d3d818a85
--- /dev/null
+++ b/source/luametatex/source/utilities/auxsystem.c
@@ -0,0 +1,155 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# include "luametatex.h"
+
+/*tex This code is taken from the \LUA\ socket library: |timeout.c|. */
+
+# ifdef _WIN32
+
+ double aux_get_current_time(void) {
+ FILETIME ft;
+ double t;
+ GetSystemTimeAsFileTime(&ft);
+ /* Windows file time (time since January 1, 1601 (UTC)) */
+ t = ft.dwLowDateTime/1.0e7 + ft.dwHighDateTime*(4294967296.0/1.0e7);
+ /* convert to Unix Epoch time (time since January 1, 1970 (UTC)) */
+ return (t - 11644473600.0);
+ }
+
+# else
+
+ double aux_get_current_time(void) {
+ struct timeval v;
+ gettimeofday(&v, (struct timezone *) NULL);
+ /* Unix Epoch time (time since January 1, 1970 (UTC)) */
+ return v.tv_sec + v.tv_usec/1.0e6;
+ }
+
+# endif
+
+void aux_set_run_time(void)
+{
+ lmt_main_state.start_time = aux_get_current_time();
+}
+
+double aux_get_run_time(void)
+{
+ return aux_get_current_time() - lmt_main_state.start_time;
+}
+
+/*tex
+
+ In order to avoid all kind of time code in the backend code we use a function. The start time
+ can be overloaded in several ways:
+
+ \startitemize[n]
+ \startitem
+ By setting the environmment variable |SOURCE_DATE_EPOCH|. This will influence the \PDF\
+ timestamp and \PDF\ id that is derived from the time. This variable is consulted when
+ the kpse library is enabled which is analogue to other properties.
+ \stopitem
+ \startitem
+ By setting the |texconfig.start_time| variable (as with other variables we use the
+ internal name there). This has the same effect as (1) and is provided for when kpse is
+ not used to set these variables or when an overloaded is wanted. This is analogue to
+ other properties.
+ \stopitem
+ \stopitemize
+
+ To some extend a cleaner solution would be to have a flag that disables all variable data in
+ one go (like filenames and so) but we just follow the method implemented in pdftex where
+ primitives are used to disable it.
+
+*/
+
+static int start_time = -1; /*tex This will move to one of the structs. */
+
+static int aux_get_start_time(void) {
+ if (start_time < 0) {
+ start_time = (int) time((time_t *) NULL);
+ }
+ return start_time;
+}
+
+/*tex
+
+ This one is used to fetch a value from texconfig which can also be used to set properties.
+ This might come in handy when one has other ways to get date info in the \PDF\ file.
+
+*/
+
+void aux_set_start_time(int s) {
+ if (s >= 0) {
+ start_time = s ;
+ }
+}
+
+/*tex
+
+ All our interrupt handler has to do is set \TEX's global variable |interrupt|; then they
+ will do everything needed.
+
+*/
+
+# ifdef _WIN32
+
+ /* Win32 doesn't set SIGINT ... */
+
+ static BOOL WINAPI catch_interrupt(DWORD arg)
+ {
+ switch (arg) {
+ case CTRL_C_EVENT:
+ case CTRL_BREAK_EVENT:
+ aux_quit_the_program();
+ return 1;
+ default:
+ /*tex No need to set interrupt as we are exiting anyway. */
+ return 0;
+ }
+ }
+
+ void aux_set_interrupt_handler(void)
+ {
+ SetConsoleCtrlHandler(catch_interrupt, TRUE);
+ }
+
+# else
+
+ /* static RETSIGTYPE catch_interrupt(int arg) */
+
+ static void catch_interrupt(int arg)
+ {
+ (void) arg;
+ aux_quit_the_program();
+ (void) signal(SIGINT, catch_interrupt);
+ }
+
+ void aux_set_interrupt_handler(void)
+ {
+ /* RETSIGTYPE (*old_handler) (int); */
+ void (*old_handler) (int);
+ old_handler = signal(SIGINT, catch_interrupt);
+ if (old_handler != SIG_DFL) {
+ signal(SIGINT, old_handler);
+ }
+ }
+
+# endif
+
+void aux_get_date_and_time(int *minutes, int *day, int *month, int *year, int *utc)
+{
+ time_t myclock = aux_get_start_time();
+ struct tm *tmptr ;
+ if (*utc) {
+ tmptr = gmtime(&myclock);
+ } else {
+ tmptr = localtime(&myclock);
+ }
+ *minutes = tmptr->tm_hour * 60 + tmptr->tm_min;
+ *day = tmptr->tm_mday;
+ *month = tmptr->tm_mon + 1;
+ *year = tmptr->tm_year + 1900;
+ /* set_interrupt_handler(); */
+}
diff --git a/source/luametatex/source/utilities/auxsystem.h b/source/luametatex/source/utilities/auxsystem.h
new file mode 100644
index 000000000..5b9a5bad0
--- /dev/null
+++ b/source/luametatex/source/utilities/auxsystem.h
@@ -0,0 +1,17 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# ifndef LMT_UTILITIES_SYSTEM_H
+# define LMT_UTILITIES_SYSTEM_H
+
+extern void aux_quit_the_program (void);
+
+extern void aux_set_start_time (int);
+extern void aux_set_interrupt_handler (void);
+extern void aux_get_date_and_time (int *minutes, int *day, int *month, int *year, int *utc);
+extern double aux_get_current_time (void);
+extern void aux_set_run_time (void);
+extern double aux_get_run_time (void);
+
+# endif
diff --git a/source/luametatex/source/utilities/auxunistring.c b/source/luametatex/source/utilities/auxunistring.c
new file mode 100644
index 000000000..e95854a93
--- /dev/null
+++ b/source/luametatex/source/utilities/auxunistring.c
@@ -0,0 +1,158 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# include "luametatex.h"
+
+/*tex
+
+ The 5- and 6-byte UTF-8 sequences generate integers that are outside of the valid UCS range,
+ and therefore unsupported. We recover from an error with |0xFFFD|.
+
+*/
+
+unsigned aux_str2uni(const unsigned char *k)
+{
+ const unsigned char *text = k;
+ int ch = *text++;
+ if (ch < 0x80) {
+ return (unsigned) ch;
+ } else if (ch <= 0xbf) {
+ return 0xFFFD;
+ } else if (ch <= 0xdf) {
+ if (text[0] >= 0x80 && text[0] < 0xc0) {
+ return (unsigned) (((ch & 0x1f) << 6) | (text[0] & 0x3f));
+ }
+ } else if (ch <= 0xef) {
+ if (text[0] >= 0x80 && text[0] < 0xc0 && text[1] >= 0x80 && text[1] < 0xc0) {
+ return (unsigned) (((ch & 0xf) << 12) | ((text[0] & 0x3f) << 6) | (text[1] & 0x3f));
+ }
+ } else if (ch <= 0xf7) {
+ if (text[0] < 0x80 || text[1] < 0x80 || text[2] < 0x80 ||
+ text[0] >= 0xc0 || text[1] >= 0xc0 || text[2] >= 0xc0) {
+ return 0xFFFD;
+ } else {
+ int w1 = (((ch & 0x7) << 2) | ((text[0] & 0x30) >> 4)) - 1;
+ int w2 = ((text[1] & 0xf) << 6) | (text[2] & 0x3f);
+ w1 = (w1 << 6) | ((text[0] & 0xf) << 2) | ((text[1] & 0x30) >> 4);
+ return (unsigned) (w1 * 0x400 + w2 + 0x10000);
+ }
+ }
+ return 0xFFFD;
+}
+
+unsigned char *aux_uni2str(unsigned unic)
+{
+ unsigned char *buf = lmt_memory_malloc(5);
+ if (buf) {
+ if (unic < 0x80) {
+ buf[0] = (unsigned char) unic;
+ buf[1] = '\0';
+ } else if (unic < 0x800) {
+ buf[0] = (unsigned char) (0xc0 | (unic >> 6));
+ buf[1] = (unsigned char) (0x80 | (unic & 0x3f));
+ buf[2] = '\0';
+ } else if (unic >= 0x110000) {
+ buf[0] = (unsigned char) (unic - 0x110000);
+ buf[1] = '\0';
+ } else if (unic < 0x10000) {
+ buf[0] = (unsigned char) (0xe0 | (unic >> 12));
+ buf[1] = (unsigned char) (0x80 | ((unic >> 6) & 0x3f));
+ buf[2] = (unsigned char) (0x80 | (unic & 0x3f));
+ buf[3] = '\0';
+ } else {
+ unic -= 0x10000;
+ int u = (int) (((unic & 0xf0000) >> 16) + 1);
+ buf[0] = (unsigned char) (0xf0 | (u >> 2));
+ buf[1] = (unsigned char) (0x80 | ((u & 3) << 4) | ((unic & 0x0f000) >> 12));
+ buf[2] = (unsigned char) (0x80 | ((unic & 0x00fc0) >> 6));
+ buf[3] = (unsigned char) (0x80 | (unic & 0x0003f));
+ buf[4] = '\0';
+ }
+ }
+ return buf;
+}
+
+/*tex
+
+ Function |buffer_to_unichar| converts a sequence of bytes in the |buffer| into a \UNICODE\
+ character value. It does not check for overflow of the |buffer|, but it is careful to check
+ the validity of the \UTF-8 encoding. For historical reasons all these small helpers look a bit
+ different but that has a certain charm so we keep it.
+
+*/
+
+char *aux_uni2string(char *utf8_text, unsigned unic)
+{
+ /*tex Increment and deposit character: */
+ if (unic <= 0x7f) {
+ *utf8_text++ = (char) unic;
+ } else if (unic <= 0x7ff) {
+ *utf8_text++ = (char) (0xc0 | (unic >> 6));
+ *utf8_text++ = (char) (0x80 | (unic & 0x3f));
+ } else if (unic <= 0xffff) {
+ *utf8_text++ = (char) (0xe0 | (unic >> 12));
+ *utf8_text++ = (char) (0x80 | ((unic >> 6) & 0x3f));
+ *utf8_text++ = (char) (0x80 | (unic & 0x3f));
+ } else if (unic < 0x110000) {
+ unic -= 0x10000;
+ unsigned u = ((unic & 0xf0000) >> 16) + 1;
+ *utf8_text++ = (char) (0xf0 | (u >> 2));
+ *utf8_text++ = (char) (0x80 | ((u & 3) << 4) | ((unic & 0x0f000) >> 12));
+ *utf8_text++ = (char) (0x80 | ((unic & 0x00fc0) >> 6));
+ *utf8_text++ = (char) (0x80 | (unic & 0x0003f));
+ }
+ return (utf8_text);
+}
+
+unsigned aux_splitutf2uni(unsigned int *ubuf, const char *utf8buf)
+{
+ int len = (int) strlen(utf8buf);
+ unsigned int *upt = ubuf;
+ unsigned int *uend = ubuf + len;
+ const unsigned char *pt = (const unsigned char *) utf8buf;
+ const unsigned char *end = pt + len;
+ while (pt < end && *pt != '\0' && upt < uend) {
+ if (*pt <= 127) {
+ *upt = *pt++;
+ } else if (*pt <= 0xdf) {
+ *upt = (unsigned int) (((*pt & 0x1f) << 6) | (pt[1] & 0x3f));
+ pt += 2;
+ } else if (*pt <= 0xef) {
+ *upt = (unsigned int) (((*pt & 0xf) << 12) | ((pt[1] & 0x3f) << 6) | (pt[2] & 0x3f));
+ pt += 3;
+ } else {
+ int w1 = (((*pt & 0x7) << 2) | ((pt[1] & 0x30) >> 4)) - 1;
+ int w2 = ((pt[2] & 0xf) << 6) | (pt[3] & 0x3f);
+ w1 = (w1 << 6) | ((pt[1] & 0xf) << 2) | ((pt[2] & 0x30) >> 4);
+ *upt = (unsigned int) (w1 * 0x400 + w2 + 0x10000);
+ pt += 4;
+ }
+ ++upt;
+ }
+ *upt = '\0';
+ return (unsigned int) (upt - ubuf);
+}
+
+size_t aux_utf8len(const char *text, size_t size)
+{
+ size_t ls = size;
+ size_t ind = 0;
+ size_t num = 0;
+ while (ind < ls) {
+ unsigned char i = (unsigned char) *(text + ind);
+ if (i < 0x80) {
+ ind += 1;
+ } else if (i >= 0xF0) {
+ ind += 4;
+ } else if (i >= 0xE0) {
+ ind += 3;
+ } else if (i >= 0xC0) {
+ ind += 2;
+ } else {
+ ind += 1;
+ }
+ num += 1;
+ }
+ return num;
+}
diff --git a/source/luametatex/source/utilities/auxunistring.h b/source/luametatex/source/utilities/auxunistring.h
new file mode 100644
index 000000000..1e6a997b9
--- /dev/null
+++ b/source/luametatex/source/utilities/auxunistring.h
@@ -0,0 +1,19 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# ifndef LMT_UTILITIES_UNISTRING_H
+# define LMT_UTILITIES_UNISTRING_H
+
+extern unsigned char *aux_uni2str (unsigned);
+extern unsigned aux_str2uni (const unsigned char *);
+extern char *aux_uni2string (char *utf8_text, unsigned ch);
+extern unsigned aux_splitutf2uni (unsigned int *ubuf, const char *utf8buf);
+extern size_t aux_utf8len (const char *text, size_t size);
+
+# define is_utf8_follow(a) (a >= 0x80 && a < 0xC0)
+# define utf8_size(a) (a > 0xFFFF ? 4 : (a > 0x7FF ? 3 : (a > 0x7F ? 2 : 1)))
+# define buffer_to_unichar(k) aux_str2uni((const unsigned char *)(lmt_fileio_state.io_buffer+k))
+
+# endif
+
diff --git a/source/luametatex/source/utilities/auxzlib.c b/source/luametatex/source/utilities/auxzlib.c
new file mode 100644
index 000000000..7444b5944
--- /dev/null
+++ b/source/luametatex/source/utilities/auxzlib.c
@@ -0,0 +1,18 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# include "auxzlib.h"
+# include "auxmemory.h"
+
+void *lmt_zlib_alloc(void *opaque, size_t items, size_t size)
+{
+ (void) opaque;
+ return lmt_memory_malloc((size_t) items * size);
+}
+
+void lmt_zlib_free(void *opaque, void *p)
+{
+ (void) opaque;
+ lmt_memory_free(p);
+}
diff --git a/source/luametatex/source/utilities/auxzlib.h b/source/luametatex/source/utilities/auxzlib.h
new file mode 100644
index 000000000..7dfaa058a
--- /dev/null
+++ b/source/luametatex/source/utilities/auxzlib.h
@@ -0,0 +1,24 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+/*tex
+
+ This module deals with the memory allocation that plugs in the zipper. Although we could just
+ use the defaule malloc, it's nicer to use the replacement, when it is enabled. A previous
+ version had th eoption to choose between zlib and miniz but in 2021 we switched to the later
+ so the former is now in the attic.
+
+*/
+
+# ifndef LMT_UTILITIES_ZLIB_H
+# define LMT_UTILITIES_ZLIB_H
+
+# include "../libraries/miniz/miniz.h"
+
+/*tex These plug in the lua library as well as pplib's flate hander. */
+
+extern void *lmt_zlib_alloc (void *opaque, size_t items, size_t size);
+extern void lmt_zlib_free (void *opaque, void *p);
+
+# endif