diff options
Diffstat (limited to 'source/luametatex/source/utilities')
-rw-r--r-- | source/luametatex/source/utilities/auxarithmetic.h | 61 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxfile.c | 294 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxfile.h | 166 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxmemory.c | 25 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxmemory.h | 54 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxsparsearray.c | 623 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxsparsearray.h | 212 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxsystem.c | 155 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxsystem.h | 17 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxunistring.c | 158 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxunistring.h | 19 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxzlib.c | 18 | ||||
-rw-r--r-- | source/luametatex/source/utilities/auxzlib.h | 24 |
13 files changed, 1826 insertions, 0 deletions
diff --git a/source/luametatex/source/utilities/auxarithmetic.h b/source/luametatex/source/utilities/auxarithmetic.h new file mode 100644 index 000000000..8daf6f29a --- /dev/null +++ b/source/luametatex/source/utilities/auxarithmetic.h @@ -0,0 +1,61 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_UTILITIES_ARITHMETIC_H +# define LMT_UTILITIES_ARITHMETIC_H + +/* The |fabs| macro is used in mp. */ + +/*tex + +There has always be much attention on accuracy in \TEX, especially in the perspective of portability. +Keep in mind that \TEX\ was written when there was no IEEE floating point defined so all happens in +16.16, or actually in 14.16 precission. We could actually consider going 16.16 if we use long integers +in some places but it needs some checking first. We could just accept wrapping around as that already +happens in some places anyway (not all dimension calculation are checked). + +In \LUATEX\ and \LUAMETATEX\ we have the \LUA\ engine and that one was exclusively using doubles till +5.3 when it went for a more hybrid approach. Because we go a lot between \TEX\ and \LUA\ (in \CONTEXT) +that had some consequences and rounding happens all over the place. It is also for that reason that +we now use doubles and rounding in some more places in the \TEX\ part: it is more consistent with what +happens at the \LUA\ end. And, because IEEE is common now, we are (afaiks) portable enough. + +We don't use round but lround as that one rounds away from zero. In a few places we use llround. Also +in some places we clip to the official maxima but not always. + +*/ + + +/* +# undef abs +# undef fabs + +# define abs(x) ((int)(x) >= 0 ? (int)(x) : (int)-(x)) +# define fabs(x) ((x) >= 0.0 ? (x) : -(x)) +*/ + +# define odd(x) ((x) & 1) + +# define lfloor(x) ( (lua_Integer)(floor((double)(x))) ) +# define tfloor(x) ( (size_t) (floor((double)(x))) ) +# define ifloor(x) ( (int) (floor((double)(x))) ) + +//define lround(x) ( ((double) x >= 0.0) ? (lua_Integer) ((double) x + 0.5) : (lua_Integer) ((double) x - 0.5) ) +//define tround(x) ( ((double) x >= 0.0) ? (size_t) ((double) x + 0.5) : (size_t) ((double) x - 0.5) ) +//define iround(x) ( ((double) x >= 0.0) ? (int) ((double) x + 0.5) : (int) ((double) x - 0.5) ) +//define sround(x) ( ((double) x >= 0.0) ? (int) ((double) x + 0.5) : (int) ((double) x - 0.5) ) + +//define lround(x) ( ((double) x >= 0.0) ? (lua_Integer) ((double) x + 0.5) : (lua_Integer) ((double) x - 0.5) ) +//define tround(x) ( ((double) x >= 0.0) ? (size_t) ((double) x + 0.5) : (size_t) ((double) x - 0.5) ) +//define iround(x) ( (int) lround((double) x) ) + +//define zround(r) ((r>2147483647.0) ? 2147483647 : ((r<-2147483647.0) ? -2147483647 : ((r >= 0.0) ? (int)(r + 0.5) : ((int)(r-0.5))))) +//define zround(r) ((r>2147483647.0) ? 2147483647 : ((r<-2147483647.0) ? -2147483647 : (int) lround(r))) + +# define scaledround(x) ((scaled) lround((double) x)) +# define longlonground llround +# define clippedround(r) ((r>2147483647.0) ? 2147483647 : ((r<-2147483647.0) ? -2147483647 : (int) lround(r))) +# define glueround(x) clippedround((double) (x)) + +# endif diff --git a/source/luametatex/source/utilities/auxfile.c b/source/luametatex/source/utilities/auxfile.c new file mode 100644 index 000000000..1aae0e691 --- /dev/null +++ b/source/luametatex/source/utilities/auxfile.c @@ -0,0 +1,294 @@ +/* + See license.txt in the root of this project. +*/ + +# include <stdio.h> +# include <sys/stat.h> + +# include "auxfile.h" +# include "auxmemory.h" + +# ifdef _WIN32 + + # include <windows.h> + # include <ctype.h> + # include <io.h> + # include <shellapi.h> + + LPWSTR aux_utf8_to_wide(const char *utf8str) { + if (utf8str) { + int length = MultiByteToWideChar(CP_UTF8, 0, utf8str, -1, NULL, 0); /* preroll */ + LPWSTR wide = (LPWSTR) lmt_memory_malloc(sizeof(WCHAR) * length); + MultiByteToWideChar(CP_UTF8, 0, utf8str, -1, wide, length); + return wide; + } else { + return NULL; + } + } + + char *aux_utf8_from_wide(LPWSTR widestr) { + if (widestr) { + int length = WideCharToMultiByte(CP_UTF8, 0, widestr, -1, NULL, 0, NULL, NULL); + char * utf8str = (char *) lmt_memory_malloc(sizeof(char) * length); + WideCharToMultiByte(CP_UTF8, 0, widestr, -1, utf8str, length, NULL, NULL); + return (char *) utf8str; + } else { + return NULL; + } + } + + FILE *aux_utf8_fopen(const char *path, const char *mode) { + if (path && mode) { + LPWSTR wpath = aux_utf8_to_wide(path); + LPWSTR wmode = aux_utf8_to_wide(mode); + FILE *f = _wfopen(wpath,wmode); + lmt_memory_free(wpath); + lmt_memory_free(wmode); + return f; + } else { + return NULL; + } + } + + FILE *aux_utf8_popen(const char *path, const char *mode) { + if (path && mode) { + LPWSTR wpath = aux_utf8_to_wide(path); + LPWSTR wmode = aux_utf8_to_wide(mode); + FILE *f = _wpopen(wpath,wmode); + lmt_memory_free(wpath); + lmt_memory_free(wmode); + return f; + } else { + return NULL; + } + } + + int aux_utf8_system(const char *cmd) + { + LPWSTR wcmd = aux_utf8_to_wide(cmd); + int result = _wsystem(wcmd); + lmt_memory_free(wcmd); + return result; + } + + int aux_utf8_remove(const char *name) + { + LPWSTR wname = aux_utf8_to_wide(name); + int result = _wremove(wname); + lmt_memory_free(wname); + return result; + } + + int aux_utf8_rename(const char *oldname, const char *newname) + { + LPWSTR woldname = aux_utf8_to_wide(oldname); + LPWSTR wnewname = aux_utf8_to_wide(newname); + int result = _wrename(woldname, wnewname); + lmt_memory_free(woldname); + lmt_memory_free(wnewname); + return result; + } + + int aux_utf8_setargv(char * **av, char **argv, int argc) + { + if (argv) { + int c = 0; + LPWSTR *l = CommandLineToArgvW(GetCommandLineW(), &c); + if (l != NULL) { + char **v = lmt_memory_malloc(sizeof(char *) * c); + for (int i = 0; i < c; i++) { + v[i] = aux_utf8_from_wide(l[i]); + } + *av = v; + /*tex Let's be nice with path names: |c:\\foo\\etc| */ + if (c > 1) { + if ((strlen(v[c-1]) > 2) && isalpha(v[c-1][0]) && (v[c-1][1] == ':') && (v[c-1][2] == '\\')) { + for (char *p = v[c-1]+2; *p; p++) { + if (*p == '\\') { + *p = '/'; + } + } + } + } + } + return c; + } else { + *av = NULL; + return argc; + } + } + + char *aux_utf8_getownpath(const char *file) + { + if (file) { + char *path = NULL; + char buffer[MAX_PATH]; + GetModuleFileName(NULL,buffer,sizeof(buffer)); + path = lmt_memory_strdup(buffer); + if (strlen(path) > 0) { + for (size_t i = 0; i < strlen(path); i++) { + if (path[i] == '\\') { + path[i] = '/'; + } + } + return path; + } + } + return lmt_memory_strdup("."); + } + +# else + + # include <string.h> + # include <stdlib.h> + # include <unistd.h> + + int aux_utf8_setargv(char * **av, char **argv, int argc) + { + *av = argv; + return argc; + } + + char *aux_utf8_getownpath(const char *file) + { + if (strchr(file, '/')) { + return lmt_memory_strdup(file); + } else { + const char *esp; + size_t prefixlen = 0; + size_t totallen = 0; + size_t filelen = strlen(file); + char *path = NULL; + char *searchpath = lmt_memory_strdup(getenv("PATH")); + const char *index = searchpath; + if (index) { + do { + esp = strchr(index, ':'); + if (esp) { + prefixlen = (size_t) (esp - index); + } else { + prefixlen = strlen(index); + } + if (prefixlen == 0 || index[prefixlen - 1] == '/') { + totallen = prefixlen + filelen; +# ifdef PATH_MAX + if (totallen >= PATH_MAX) { + continue; + } +# endif + path = lmt_memory_malloc(totallen + 1); + memcpy(path, index, prefixlen); + memcpy(path + prefixlen, file, filelen); + } else { + totallen = prefixlen + filelen + 1; +# ifdef PATH_MAX + if (totallen >= PATH_MAX) { + continue; + } +# endif + path = lmt_memory_malloc(totallen + 1); + memcpy(path, index, prefixlen); + path[prefixlen] = '/'; + memcpy(path + prefixlen + 1, file, filelen); + } + path[totallen] = '\0'; + if (access(path, X_OK) >= 0) { + break; + } + lmt_memory_free(path); + path = NULL; + index = esp + 1; + } while (esp); + } + lmt_memory_free(searchpath); + if (path) { + return path; + } else { + return lmt_memory_strdup("."); /* ok? */ + } + } + } + +# endif + +# ifndef S_ISREG + # define S_ISREG(mode) (mode & _S_IFREG) +# endif + +# ifdef _WIN32 + + char *aux_basename(const char *name) { + char base[256+1]; + char suff[256+1]; + _splitpath(name,NULL,NULL,base,suff); + { + size_t b = strlen((const char*)base); + size_t s = strlen((const char*)suff); + char *result = (char *) lmt_memory_malloc(sizeof(char) * (b+s+1)); + if (result) { + memcpy(&result[0], &base[0], b); + memcpy(&result[b], &suff[0], s); + result[b + s] = '\0'; + } + return result; + } + } + + char *aux_dirname(const char *name) { + char driv[256 + 1]; + char path[256 + 1]; + _splitpath(name,driv,path,NULL,NULL); + { + size_t d = strlen((const char*)driv); + size_t p = strlen((const char*)path); + char *result = (char *) lmt_memory_malloc(sizeof(char) * (d+p+1)); + if (result) { + if (path[p - 1] == '/' || path[p - 1] == '\\') { + --p; + } + memcpy(&result[0], &driv[0], d); + memcpy(&result[d], &path[0], p); + result[d + p] = '\0'; + } + return result; + } + } + + // int aux_is_readable(const char *filename) + // { + // struct stat finfo; + // FILE *f; + // return (stat(filename, &finfo) == 0) + // && S_ISREG(finfo.st_mode) + // && ((f = aux_utf8_fopen(filename, "r")) != NULL) + // && ! fclose(f); + // } + + int aux_is_readable(const char *filename) + { + struct _stati64 info; + LPWSTR w = aux_utf8_to_wide(filename); + int r = _wstati64(w, &info); + FILE *f; + lmt_memory_free(w); + return (r == 0) + && (S_ISREG(info.st_mode)) + && ((f = aux_utf8_fopen(filename, "r")) != NULL) + && ! fclose(f); + } + +# else + + # include <libgen.h> + + int aux_is_readable(const char *filename) + { + struct stat finfo; + FILE *f; + return (stat(filename, &finfo) == 0) + && S_ISREG(finfo.st_mode) + && ((f = fopen(filename, "r")) != NULL) + && ! fclose(f); + } + +# endif diff --git a/source/luametatex/source/utilities/auxfile.h b/source/luametatex/source/utilities/auxfile.h new file mode 100644 index 000000000..19a4815c2 --- /dev/null +++ b/source/luametatex/source/utilities/auxfile.h @@ -0,0 +1,166 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_UTILITIES_FILE_H +# define LMT_UTILITIES_FILE_H + +/*tex + + We have to deal with wide characters on windows when it comes to filenames. The same is true for + the commandline and environment variables. Basically we go from utf8 to wide and back. + + \starttyping + libraries/zlib/crc32.c : fopen -> minimalistic, goes via lua anyway + libraries/zlib/trees.c : fopen -> minimalistic, goes via lua anyway + libraries/zlib/zutil.h : fopen -> minimalistic, goes via lua anyway + + lua/llualib.c : fopen -> utf8_fopen + lua/lenginelib.c : fopen -> utf8_fopen + + luacore/lua54/src/lauxlib.c : fopen -> see below + luacore/lua54/src/liolib.c : fopen -> see below + luacore/lua54/src/loadlib.c : fopen -> see below + + luaffi/call.c : fopen -> not used + + mp/mpw/mp.w : fopen -> overloaded by callback + + libraries/pplib/ppload.c : fopen -> will be abstraction (next pplib) + + libraries/pplib/util/utiliof.c : fopen -> not used + libraries/pplib/util/utiliof.c : fopen -> not used + libraries/pplib/util/utiliof.c : fopen -> not used + libraries/pplib/util/utiliof.c : fopen -> not used + libraries/pplib/util/utiliof.c : fopen -> not used + libraries/pplib/util/utiliof.c : fopen -> not used + libraries/pplib/util/utiliof.c : fopen -> not used + libraries/pplib/util/utiliof.c : fopen -> not used + libraries/pplib/util/utiliof.c : fopen -> not used + libraries/pplib/util/utiliof.c : fopen -> not used + libraries/pplib/util/utiliof.c : fopen -> not used + libraries/pplib/util/utiliof.c : fopen -> not used + + tex/texfileio.c 12: : fopen -> utf8_fopen + \stoptyping + + Furthermore: + + \starttyping + - system commands (execute) : done + - popen : done + + - lua rename : done + - lua remove : done + + - command line argv : done + - lua setenv : done + - lua getenv : done + + - lfs attributes : done + - lfs chdir : done + - lfs currentdir : done + - lfs dir : done + - lfs mkdir : done + - lfs rmdir : done + - lfs touch : done + - lfs link : done + - lfs symlink : done + - lfs setexecutable : done (needs testing) + - lfs isdir : done + - lfs isfile : done + - lfs iswriteabledir : done + - lfs iswriteablefile : done + - lfs isreadabledir : done + - lfs isreadablefile : done + \stoptyping + + Kind of tricky because quite some code (indirectness): + + \starttyping + - lua load : via overload ? + - lua dofile : via overload -> loadstring + - lua require : via overload ? + \stoptyping + + So: do we patch lua (fopen) or just copy? We can actually assume flat ascii files for libraries + and such so there is no real need unless we load job related files. + + I will probably reshuffle some code and maybe more some more here; once I'm sure all works out + well. + +*/ + +# ifdef _WIN32 + + # include <windows.h> + # include <ctype.h> + # include <stdio.h> + + extern LPWSTR aux_utf8_to_wide (const char *utf8str); + extern char *aux_utf8_from_wide (LPWSTR widestr); + + extern FILE *aux_utf8_fopen (const char *path, const char *mode); + extern FILE *aux_utf8_popen (const char *path, const char *mode); + extern int aux_utf8_system (const char *cmd); + extern int aux_utf8_remove (const char *name); + extern int aux_utf8_rename (const char *oldname, const char *newname); + extern int aux_utf8_setargv (char * **av, char **argv, int argc); + extern char *aux_utf8_getownpath (const char *file); + +# else + + # define aux_utf8_fopen fopen + # define aux_utf8_popen popen + # define aux_utf8_system system + # define aux_utf8_remove remove + # define aux_utf8_rename rename + + extern int aux_utf8_setargv (char * **av, char **argv, int argc); + extern char *aux_utf8_getownpath (const char *file); + + # include <libgen.h> + +# endif + +# ifdef _WIN32 + + extern char *aux_basename (const char *name); + extern char *aux_dirname (const char *name); + +# else + + # define aux_basename basename + # define aux_dirname dirname + +# endif + +extern int aux_is_readable (const char *filename); + +/*tex + + We support unix and windows. In fact, we could stick to |/| only. When + scanning filenames entered in \TEX\ we can actually enforce a |/| as + convention. + +*/ + +# ifndef IS_DIR_SEP + # ifdef _WIN32 + # define IS_DIR_SEP(ch) ((ch) == '/' || (ch) == '\\') + # else + # define IS_DIR_SEP(ch) ((ch) == '/') + # endif +# endif + +# ifndef R_OK + # define F_OK 0x0 + # define W_OK 0x2 + # define R_OK 0x4 +# endif + +# ifndef S_ISREG + # define S_ISREG(mode) (mode & _S_IFREG) +# endif + +# endif diff --git a/source/luametatex/source/utilities/auxmemory.c b/source/luametatex/source/utilities/auxmemory.c new file mode 100644 index 000000000..9ba02f946 --- /dev/null +++ b/source/luametatex/source/utilities/auxmemory.c @@ -0,0 +1,25 @@ +/* + See license.txt in the root of this project. +*/ + +# include "auxmemory.h" + +void *aux_allocate_array(int recordsize, int size, int reserved) +{ + return lmt_memory_malloc(recordsize * ((size_t) size + reserved + 1)); +} + +void *aux_reallocate_array(void *p, int recordsize, int size, int reserved) +{ + return lmt_memory_realloc(p, recordsize * ((size_t) size + reserved + 1)); +} + +void *aux_allocate_clear_array(int recordsize, int size, int reserved) +{ + return lmt_memory_calloc((size_t) size + reserved + 1, recordsize); +} + +void aux_deallocate_array(void *p) +{ + lmt_memory_free(p); +} diff --git a/source/luametatex/source/utilities/auxmemory.h b/source/luametatex/source/utilities/auxmemory.h new file mode 100644 index 000000000..4f040eafd --- /dev/null +++ b/source/luametatex/source/utilities/auxmemory.h @@ -0,0 +1,54 @@ +/* + See license.txt in the root of this project. +*/ + +/* + Some operating systems come with |allocarray| so we use more verbose names. We cannot define + them because on some bsd/apple platforms |CLANG| cannot resolve them. + +*/ + +# ifndef LMT_UTILITIES_MEMORY_H +# define LMT_UTILITIES_MEMORY_H + +/*tex + This is an experiment. The impact of using an alternative allocator on native Windows makes a + native version some 5% faster than a cross compiled one. Otherwise the cross compiled version + outperforms the native one a bit. In \TEX\ and \METAPOST\ we already do something like this + but there we don't reclaim memory. + +*/ + +# include <stdlib.h> +# include <string.h> + +# if defined(LUAMETATEX_USE_MIMALLOC) + # include "libraries/mimalloc/include/mimalloc.h" + # define lmt_memory_malloc mi_malloc + # define lmt_memory_calloc mi_calloc + # define lmt_memory_realloc mi_realloc + # define lmt_memory_free mi_free + # define lmt_memory_strdup mi_strdup + + // # include "libraries/mimalloc/include/mimalloc-override.h" + +# else + # define lmt_memory_malloc malloc + # define lmt_memory_calloc calloc + # define lmt_memory_realloc realloc + # define lmt_memory_free free + # define lmt_memory_strdup strdup +# endif + +# define lmt_generic_malloc malloc +# define lmt_generic_calloc calloc +# define lmt_generic_realloc realloc +# define lmt_generic_free free +# define lmt_generic_strdup strdup + +extern void *aux_allocate_array (int recordsize, int size, int reserved); +extern void *aux_reallocate_array (void *p, int recordsize, int size, int reserved); +extern void *aux_allocate_clear_array (int recordsize, int size, int reserved); +extern void aux_deallocate_array (void *p); + +# endif diff --git a/source/luametatex/source/utilities/auxsparsearray.c b/source/luametatex/source/utilities/auxsparsearray.c new file mode 100644 index 000000000..d9fa5e453 --- /dev/null +++ b/source/luametatex/source/utilities/auxsparsearray.c @@ -0,0 +1,623 @@ +/* + See license.txt in the root of this project. +*/ + +/*tex + + Here we implement sparse arrays with an embedded save stack. These functions are called very + often but a few days of experimenting proved that there is not much to gain (if at all) from + using macros or optimizations like preallocating and fast access to the first 128 entries. In + practice the overhead is mostly in accessing memory and not in (probably inlined) calls. So, we + should accept fate and wait for faster memory. It's the price we pay for being unicode on the + one hand and sparse on the other. + +*/ + +# include "luametatex.h" + +sparse_state_info lmt_sparse_state = { + .sparse_data = { + .minimum = memory_data_unset, + .maximum = memory_data_unset, + .size = memory_data_unset, + .step = memory_data_unset, + .allocated = 0, + .itemsize = 1, + .top = memory_data_unset, + .ptr = memory_data_unset, + .initial = memory_data_unset, + .offset = 0, +} +}; + +void *sa_malloc_array(int recordsize, int size) +{ + int allocated = recordsize * size; + lmt_sparse_state.sparse_data.allocated += allocated; + return lmt_memory_malloc((size_t) allocated); +} + +void *sa_realloc_array(void *p, int recordsize, int size, int step) +{ + int deallocated = recordsize * size; + int allocated = recordsize * (size + step); + lmt_sparse_state.sparse_data.allocated += (allocated - deallocated); + return lmt_memory_realloc(p, (size_t) allocated); +} + +void *sa_calloc_array(int recordsize, int size) +{ + int allocated = recordsize * size; + lmt_sparse_state.sparse_data.allocated += allocated; + return lmt_memory_calloc((size_t) size, recordsize); +} + +void sa_wipe_array(void *head, int recordsize, int size) +{ + memset(head, 0, recordsize * ((size_t) size)); +} + +void *sa_free_array(void *p) +{ + lmt_memory_free(p); + return NULL; +} + +/*tex + + Once we have two variants allocated we can dump and undump a |LOWPART| array in one go. But + not yet. Currently the waste of one extra dummy int is cheaper than multiple functions. + +*/ + +static void sa_aux_store_stack(sa_tree a, int n, sa_tree_item v1, sa_tree_item v2, int gl) +{ + sa_stack_item st; + st.code = n; + st.value_1 = v1; + st.value_2 = v2; + st.level = gl; + if (! a->stack) { + a->stack = sa_malloc_array(sizeof(sa_stack_item), a->sa_stack_size); + } else if (((a->sa_stack_ptr) + 1) >= a->sa_stack_size) { + a->stack = sa_realloc_array(a->stack, sizeof(sa_stack_item), a->sa_stack_size, a->sa_stack_step); + a->sa_stack_size += a->sa_stack_step; + } + (a->sa_stack_ptr)++; + a->stack[a->sa_stack_ptr] = st; +} + +static void sa_aux_skip_in_stack(sa_tree a, int n) +{ + if (a->stack) { + int p = a->sa_stack_ptr; + while (p > 0) { + if (a->stack[p].code == n && a->stack[p].level > 0) { + a->stack[p].level = -(a->stack[p].level); + } + p--; + } + } +} + +int sa_get_item_1(const sa_tree head, int n) +{ + if (head->tree) { + int h = LMT_SA_H_PART(n); + if (head->tree[h]) { + int m = LMT_SA_M_PART(n); + if (head->tree[h][m]) { + return head->tree[h][m][LMT_SA_L_PART(n)/4].uchar_value[n%4]; + } + } + } + return (int) head->dflt.uchar_value[n%4]; +} + +int sa_get_item_2(const sa_tree head, int n) +{ + if (head->tree) { + int h = LMT_SA_H_PART(n); + if (head->tree[h]) { + int m = LMT_SA_M_PART(n); + if (head->tree[h][m]) { + return head->tree[h][m][LMT_SA_L_PART(n)/2].ushort_value[n%2]; + } + } + } + return (int) head->dflt.ushort_value[n%2]; +} + +sa_tree_item sa_get_item_4(const sa_tree head, int n) +{ + if (head->tree) { + int h = LMT_SA_H_PART(n); + if (head->tree[h]) { + int m = LMT_SA_M_PART(n); + if (head->tree[h][m]) { + return head->tree[h][m][LMT_SA_L_PART(n)]; + } + } + } + return head->dflt; +} + +sa_tree_item sa_get_item_8(const sa_tree head, int n, sa_tree_item *v2) +{ + if (head->tree != NULL) { + int h = LMT_SA_H_PART(n); + if (head->tree[h]) { + int m = LMT_SA_M_PART(n); + if (head->tree[h][m]) { + int l = 2*LMT_SA_L_PART(n); + *v2 = head->tree[h][m][l+1]; + return head->tree[h][m][l]; + } + } + } + *v2 = head->dflt; + return head->dflt; +} + +void sa_set_item_1(sa_tree head, int n, int v, int gl) +{ + int h = LMT_SA_H_PART(n); + int m = LMT_SA_M_PART(n); + int l = LMT_SA_L_PART(n); + if (! head->tree) { + head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + } + if (! head->tree[h]) { + head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + } + if (! head->tree[h][m]) { + head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/4); + for (int i = 0; i < LMT_SA_LOWPART/4; i++) { + head->tree[h][m][i] = head->dflt; + } + } + if (gl <= 1) { + sa_aux_skip_in_stack(head, n); + } else { + sa_aux_store_stack(head, n, head->tree[h][m][l/4], (sa_tree_item) { 0 }, gl); + } + head->tree[h][m][l/4].uchar_value[n%4] = (unsigned char) v; +} + +void sa_set_item_2(sa_tree head, int n, int v, int gl) +{ + int h = LMT_SA_H_PART(n); + int m = LMT_SA_M_PART(n); + int l = LMT_SA_L_PART(n); + if (! head->tree) { + head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + } + if (! head->tree[h]) { + head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + } + if (! head->tree[h][m]) { + head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/2); + for (int i = 0; i < LMT_SA_LOWPART/2; i++) { + head->tree[h][m][i] = head->dflt; + } + } + if (gl <= 1) { + sa_aux_skip_in_stack(head, n); + } else { + sa_aux_store_stack(head, n, head->tree[h][m][l/2], (sa_tree_item) { 0 }, gl); + } + head->tree[h][m][l/2].ushort_value[n%2] = (unsigned short) v; +} + +void sa_set_item_4(sa_tree head, int n, sa_tree_item v, int gl) +{ + int h = LMT_SA_H_PART(n); + int m = LMT_SA_M_PART(n); + int l = LMT_SA_L_PART(n); + if (! head->tree) { + head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + } + if (! head->tree[h]) { + head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + } + if (! head->tree[h][m]) { + head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART); + for (int i = 0; i < LMT_SA_LOWPART; i++) { + head->tree[h][m][i] = head->dflt; + } + } + if (gl <= 1) { + sa_aux_skip_in_stack(head, n); + } else { + sa_aux_store_stack(head, n, head->tree[h][m][l], (sa_tree_item) { 0 }, gl); + } + head->tree[h][m][l] = v; +} + +void sa_set_item_8(sa_tree head, int n, sa_tree_item v1, sa_tree_item v2, int gl) +{ + int h = LMT_SA_H_PART(n); + int m = LMT_SA_M_PART(n); + int l = 2*LMT_SA_L_PART(n); + if (! head->tree) { + head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + } + if (! head->tree[h]) { + head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + } + if (! head->tree[h][m]) { + head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), 2 * LMT_SA_LOWPART); + for (int i = 0; i < 2 * LMT_SA_LOWPART; i++) { + head->tree[h][m][i] = head->dflt; + } + } + if (gl <= 1) { + sa_aux_skip_in_stack(head, n); + } else { + sa_aux_store_stack(head, n, head->tree[h][m][l], head->tree[h][m][l+1], gl); + } + head->tree[h][m][l] = v1; + head->tree[h][m][l+1] = v2; +} + +void sa_set_item_n(sa_tree head, int n, int v, int gl) +{ + int h = LMT_SA_H_PART(n); + int m = LMT_SA_M_PART(n); + int l = LMT_SA_L_PART(n); + int d = head->bytes == 1 ? 4 : (head->bytes == 2 ? 2 : 1); + if (! head->tree) { + head->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + } + if (! head->tree[h]) { + head->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + } + if (! head->tree[h][m]) { + head->tree[h][m] = (sa_tree_item *) sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART/d); + for (int i = 0; i < LMT_SA_LOWPART/d; i++) { + head->tree[h][m][i] = head->dflt; + } + } + if (gl <= 1) { + sa_aux_skip_in_stack(head, n); + } else { + sa_aux_store_stack(head, n, head->tree[h][m][l/d], (sa_tree_item) { 0 }, gl); + } + switch (head->bytes) { + case 1: + { + head->tree[h][m][l/4].uchar_value[n%4] = (unsigned char) (v < 0 ? 0 : (v > 0xFF ? 0xFF : v)); + break; + } + case 2: + { + head->tree[h][m][l/2].ushort_value[n%2] = (unsigned char) (v < 0 ? 0 : (v > 0xFFFF ? 0xFFFF : v)); + break; + } + case 4: + { + head->tree[h][m][l].int_value = v; + break; + } + } +} + +int sa_get_item_n(const sa_tree head, int n) +{ + if (head->tree) { + int h = LMT_SA_H_PART(n); + if (head->tree[h]) { + int m = LMT_SA_M_PART(n); + if (head->tree[h][m]) { + switch (head->bytes) { + case 1 : return (int) head->tree[h][m][LMT_SA_L_PART(n)/4].uchar_value[n%4]; + case 2 : return (int) head->tree[h][m][LMT_SA_L_PART(n)/2].ushort_value[n%2]; + case 4 : return (int) head->tree[h][m][LMT_SA_L_PART(n) ].int_value; + } + } + } + } + switch (head->bytes) { + case 1 : return (int) head->dflt.uchar_value[n%4]; + case 2 : return (int) head->dflt.ushort_value[n%2]; + case 4 : return (int) head->dflt.int_value; + default: return 0; + } +} + +/* +void rawset_sa_item_4(sa_tree head, int n, sa_tree_item v) +{ + head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)][LMT_SA_L_PART(n)] = v; +} +*/ + +void sa_clear_stack(sa_tree a) +{ + if (a) { + a->stack = sa_free_array(a->stack); + a->sa_stack_ptr = 0; + a->sa_stack_size = a->sa_stack_step; + } +} + +void sa_destroy_tree(sa_tree a) +{ + if (a) { + if (a->tree) { + for (int h = 0; h < LMT_SA_HIGHPART; h++) { + if (a->tree[h]) { + for (int m = 0; m < LMT_SA_MIDPART; m++) { + a->tree[h][m] = sa_free_array(a->tree[h][m]); + } + a->tree[h] = sa_free_array(a->tree[h]); + } + } + a->tree = sa_free_array(a->tree); + } + a->stack = sa_free_array(a->stack); + a = sa_free_array(a); + } +} + +sa_tree sa_copy_tree(sa_tree b) +{ + sa_tree a = (sa_tree) sa_malloc_array(sizeof(sa_tree_head), 1); + a->sa_stack_step = b->sa_stack_step; + a->sa_stack_size = b->sa_stack_size; + a->bytes = b->bytes; + a->dflt = b->dflt; + a->stack = NULL; + a->sa_stack_ptr = 0; + a->tree = NULL; + if (b->tree) { + a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(void *), LMT_SA_HIGHPART); + for (int h = 0; h < LMT_SA_HIGHPART; h++) { + if (b->tree[h]) { + int slide = LMT_SA_LOWPART; + switch (b->bytes) { + case 1: slide = LMT_SA_LOWPART/4; break; + case 2: slide = LMT_SA_LOWPART/2; break; + case 4: slide = LMT_SA_LOWPART ; break; + case 8: slide = 2*LMT_SA_LOWPART ; break; + } + a->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(void *), LMT_SA_MIDPART); + for (int m = 0; m < LMT_SA_MIDPART; m++) { + if (b->tree[h][m]) { + a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), slide); + memcpy(a->tree[h][m], b->tree[h][m], sizeof(sa_tree_item) * slide); + } + } + } + } + } + return a; +} + +/*tex + + The main reason to fill in the lowest entry branches here immediately is that most of the sparse + arrays have a bias toward \ASCII\ values. Allocating those here immediately improves the chance + of the structure |a->tree[0][0][x]| being close together in actual memory locations. We could + save less for type 0 stacks. + +*/ + +sa_tree sa_new_tree(int size, int bytes, sa_tree_item dflt) +{ + sa_tree_head *a; + a = (sa_tree_head *) lmt_memory_malloc(sizeof(sa_tree_head)); + a->dflt = dflt; + a->stack = NULL; + a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(sa_tree_item **), LMT_SA_HIGHPART); + a->tree[0] = (sa_tree_item **) sa_calloc_array(sizeof(sa_tree_item *), LMT_SA_MIDPART); + a->sa_stack_size = size; + a->sa_stack_step = size; + a->bytes = bytes; + a->sa_stack_ptr = 0; + return (sa_tree) a; +} + +void sa_restore_stack(sa_tree head, int gl) +{ + if (head->stack) { + sa_stack_item st; + while (head->sa_stack_ptr > 0 && abs(head->stack[head->sa_stack_ptr].level) >= gl) { + st = head->stack[head->sa_stack_ptr]; + if (st.level > 0) { + int code = st.code; + switch (head->bytes) { + case 1: + { + int c = code % 4; + head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)/4].uchar_value[c] = st.value_1.uchar_value[c]; + } + break; + case 2: + { + int c = code % 2; + head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)/2].ushort_value[c] = st.value_1.ushort_value[c]; + } + break; + case 4: + { + head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][LMT_SA_L_PART(code)] = st.value_1; + } + break; + case 8: + { + int l = 2*LMT_SA_L_PART(code); + head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][l] = st.value_1; + head->tree[LMT_SA_H_PART(code)][LMT_SA_M_PART(code)][l+1] = st.value_2; + } + break; + + } + } + (head->sa_stack_ptr)--; + } + } +} + +void sa_dump_tree(dumpstream f, sa_tree a) +{ + dump_int(f, a->sa_stack_step); + dump_int(f, a->dflt.int_value); + if (a->tree) { + int bytes = a->bytes; + /*tex A marker: */ + dump_via_int(f, 1); + dump_int(f, bytes); + for (int h = 0; h < LMT_SA_HIGHPART; h++) { + if (a->tree[h]) { + dump_via_int(f, 1); + for (int m = 0; m < LMT_SA_MIDPART; m++) { + if (a->tree[h][m]) { + /*tex + It happens a lot that the value is the same as the index, for instance + with case mappings. + + Using mode 3 for the case where all values are the default value saves + In \CONTEXT\ some 128 * 5 dumps which is not worth the trouble but it + is neat anyway. + + 1 : values are kind of unique + 2 : for all values : value == self + 3 : for all values : value == default + + Actually, we could decide not to save at all in the third mode because + unset equals default. + */ + int mode = 1; + if (bytes != 8) { + /*tex Check for default values. */ + int slide = bytes == 1 ? LMT_SA_LOWPART/4 : (bytes == 2 ? LMT_SA_LOWPART/2 : LMT_SA_LOWPART); + mode = 3; + for (int l = 0; l < slide; l++) { + if (a->tree[h][m][l].uint_value != a->dflt.uint_value) { + mode = 1; + break; + } + } + } + if (mode == 1 && bytes == 4) { + /*tex Check for identity values. */ + unsigned int hm = h * LMT_SA_HIGHPART + m * LMT_SA_MIDPART * LMT_SA_LOWPART ; + mode = 2; + for (int l = 0; l < LMT_SA_LOWPART; l++) { + if (a->tree[h][m][l].uint_value == hm) { + hm++; + } else { + mode = 1; + break; + } + } + } + dump_int(f, mode); + if (mode == 1) { + /*tex + We have unique values. By avoiding this branch we save some 85 Kb + on the \CONTEXT\ format. We could actually save this property in + the tree but there is not that much to gain. + */ + int slide = LMT_SA_LOWPART; + switch (bytes) { + case 1: slide = LMT_SA_LOWPART/4; break; + case 2: slide = LMT_SA_LOWPART/2; break; + case 4: slide = LMT_SA_LOWPART ; break; + case 8: slide = 2*LMT_SA_LOWPART ; break; + } + dump_items(f, &a->tree[h][m][0], sizeof(sa_tree_item), slide); + } else { + /*tex We have a self value or defaults. */ + } + } else { + dump_via_int(f, 0); + } + } + } else { + dump_via_int(f, 0); + } + } + } else { + /*tex A marker: */ + dump_via_int(f, 0); + } +} + +sa_tree sa_undump_tree(dumpstream f) +{ + int x; + sa_tree a = (sa_tree) sa_malloc_array(sizeof(sa_tree_head), 1); + undump_int(f,a->sa_stack_step); + undump_int(f,a->dflt.int_value); + a->sa_stack_size = a->sa_stack_step; + a->stack = sa_calloc_array(sizeof(sa_stack_item), a->sa_stack_size); + a->sa_stack_ptr = 0; + a->tree = NULL; + /*tex The marker: */ + undump_int(f, x); + if (x != 0) { + int bytes, mode; + a->tree = (sa_tree_item ***) sa_calloc_array(sizeof(void *), LMT_SA_HIGHPART); + undump_int(f, bytes); + a->bytes = bytes; + for (int h = 0; h < LMT_SA_HIGHPART; h++) { + undump_int(f, mode); /* more a trigger */ + if (mode > 0) { + a->tree[h] = (sa_tree_item **) sa_calloc_array(sizeof(void *), LMT_SA_MIDPART); + for (int m = 0; m < LMT_SA_MIDPART; m++) { + undump_int(f, mode); + switch (mode) { + case 1: + /*tex + We have a unique values. + */ + { + int slide = LMT_SA_LOWPART; + switch (bytes) { + case 1: slide = LMT_SA_LOWPART/4; break; + case 2: slide = LMT_SA_LOWPART/2; break; + case 4: slide = LMT_SA_LOWPART ; break; + case 8: slide = 2*LMT_SA_LOWPART ; break; + } + a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), slide); + undump_items(f, &a->tree[h][m][0], sizeof(sa_tree_item), slide); + } + break; + case 2: + /*tex + We have a self value. We only have this when we have integers. Other + cases are math anyway, so not much to gain. + */ + { + if (bytes == 4) { + int hm = h * 128 * LMT_SA_HIGHPART + m * LMT_SA_MIDPART; + a->tree[h][m] = sa_malloc_array(sizeof(sa_tree_item), LMT_SA_LOWPART); + for (int l = 0; l < LMT_SA_LOWPART; l++) { + a->tree[h][m][l].int_value = hm; + hm++; + } + } else { + printf("\nfatal format error, mode %i, bytes %i\n", mode, bytes); + } + } + break; + case 3: + /*tex + We have all default values. so no need to set them. In fact, we + cannot even end up here. + */ + break; + default: + /*tex + We have no values set. + */ + break; + } + } + } + } + } + return a; +} diff --git a/source/luametatex/source/utilities/auxsparsearray.h b/source/luametatex/source/utilities/auxsparsearray.h new file mode 100644 index 000000000..0a4ce20f1 --- /dev/null +++ b/source/luametatex/source/utilities/auxsparsearray.h @@ -0,0 +1,212 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_UTILITIES_SPARSEARRAY_H +# define LMT_UTILITIES_SPARSEARRAY_H + +/*tex + + This file originally was called |managed-sa| but becauss it kind of a library and also used in + \LUATEX\ it's better to use a different name. In this variant dumping is more sparse (resulting + in somewhat smaller format files). This might be backported but only after testing it here for a + long time. Of course the principles are the same, it's just extended. + +*/ + +/*tex + + The next two sets of three had better match up exactly, but using bare numbers is easier on the + \CCODE\ compiler. Here are some format sizes (for ConTeXt) with different values: + + 64 : 17562942 + 128 : 17548150 <= best value + 256 : 17681398 + +*/ + +# define LMT_SA_HIGHPART 128 +# define LMT_SA_MIDPART 128 +# define LMT_SA_LOWPART 128 + +# define LMT_SA_H_PART(a) (((a)>>14)&127) +# define LMT_SA_M_PART(a) (((a)>> 7)&127) +# define LMT_SA_L_PART(a) ( (a) &127) + +/*tex + + In the early days of \LUATEX\ we had just simple items, all 32 bit values. Then we put the + delcodes in trees too which saved memory and format size but it introduced 32 bit slack in all + the other code arrays. We then also had to dump selectively, but it was no big deal. Eventually, + once it became clear that the concepts would not change a variant was made for \LUAMETATEX: we + just use a two times larger lower array when we have delimiters. This saves some memory. The + price we pay is that a stack entry now has two values but that is not really an issue. + + By packing the math code values we loose the option to store an active state but that's no big + deal. + + todo: consider simple char array for catcodes. + + The code here is somewhat messy because we generalized it a bit. Maybe I'll redo it some day. + + */ + +typedef struct sparse_state_info { + memory_data sparse_data; +} sparse_state_info; + +extern sparse_state_info lmt_sparse_state; + +/* +typedef struct sa_mathblob { + unsigned int character_value:21; + unsigned int class_value:3; + unsigned int family_value:8; +} sa_mathblob; +*/ + +typedef struct sa_mathblob { + unsigned int class_value:math_class_bits; + unsigned int family_value:math_family_bits; + unsigned int character_value:math_character_bits; +} sa_mathblob; + +typedef struct sa_mathspec { + unsigned short properties; + unsigned short group; + unsigned int index; +} sa_mathspec; + +typedef struct packed_math_character { + union { + sa_mathblob sa_value; + unsigned ui_value; + }; +} packed_math_character; + +typedef union sa_tree_item { + unsigned int uint_value; + int int_value; + sa_mathblob math_code_value; + sa_mathspec math_spec_value; + unsigned short ushort_value[2]; + unsigned char uchar_value[4]; +} sa_tree_item; + +typedef struct sa_stack_item { + int code; + int level; + sa_tree_item value_1; + sa_tree_item value_2; +} sa_stack_item; + +typedef struct sa_tree_head { + int sa_stack_size; /*tex initial stack size */ + int sa_stack_step; /*tex increment stack step */ + int sa_stack_ptr; /*tex current stack point */ + sa_tree_item dflt; /*tex default item value */ + sa_tree_item ***tree; /*tex item tree head */ + sa_stack_item *stack; /*tex stack tree head */ + int bytes; /*tex the number of items per entry */ + int padding; +} sa_tree_head; + +typedef sa_tree_head *sa_tree; + +extern int sa_get_item_1 (const sa_tree head, int n); +extern int sa_get_item_2 (const sa_tree head, int n); +extern sa_tree_item sa_get_item_4 (const sa_tree head, int n); +extern sa_tree_item sa_get_item_8 (const sa_tree head, int n, sa_tree_item * v2); +extern void sa_set_item_1 (sa_tree head, int n, int v, int gl); +extern void sa_set_item_2 (sa_tree head, int n, int v, int gl); +extern void sa_set_item_4 (sa_tree head, int n, sa_tree_item v, int gl); +extern void sa_set_item_8 (sa_tree head, int n, sa_tree_item v1, sa_tree_item v2, int gl); +/* void sa_rawset_item_1 (sa_tree head, int n, sa_tree_item v); */ +/* void sa_rawset_item_2 (sa_tree head, int n, sa_tree_item v); */ +/* void sa_rawset_item_4 (sa_tree head, int n, sa_tree_item v); */ +/* void sa_rawset_item_8 (sa_tree head, int n, sa_tree_item v1, sa_tree_item v2); */ +extern sa_tree sa_new_tree (int size, int bytes, sa_tree_item dflt); +extern sa_tree sa_copy_tree (sa_tree head); +extern void sa_destroy_tree (sa_tree head); +extern void sa_dump_tree (dumpstream f, sa_tree a); +extern sa_tree sa_undump_tree (dumpstream f); +extern void sa_restore_stack (sa_tree a, int gl); +extern void sa_clear_stack (sa_tree a); + +extern void sa_set_item_n (const sa_tree head, int n, int v, int gl); +extern int sa_get_item_n (const sa_tree head, int n); + +inline static halfword sa_return_item_1(sa_tree head, halfword n) +{ + if (head->tree) { + int hp = LMT_SA_H_PART(n); + if (head->tree[hp]) { + int mp = LMT_SA_M_PART(n); + if (head->tree[hp][mp]) { + return (halfword) head->tree[hp][mp][LMT_SA_L_PART(n)/4].uchar_value[n%4]; + } + } + } + return (halfword) head->dflt.uchar_value[0]; +} + +inline static halfword sa_return_item_2(sa_tree head, halfword n) +{ + if (head->tree) { + int hp = LMT_SA_H_PART(n); + if (head->tree[hp]) { + int mp = LMT_SA_M_PART(n); + if (head->tree[hp][mp]) { + return (halfword) head->tree[hp][mp][LMT_SA_L_PART(n)/2].ushort_value[n%2]; + } + } + } + return (halfword) head->dflt.ushort_value[0]; +} + +inline static halfword sa_return_item_4(sa_tree head, halfword n) +{ + if (head->tree) { + int hp = LMT_SA_H_PART(n); + if (head->tree[hp]) { + int mp = LMT_SA_M_PART(n); + if (head->tree[hp][mp]) { + return (halfword) head->tree[hp][mp][LMT_SA_L_PART(n)].int_value; + } + } + } + return (halfword) head->dflt.int_value; +} + +inline static void sa_rawset_item_1(sa_tree head, halfword n, unsigned char v) +{ + head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)][LMT_SA_L_PART(n)/4].uchar_value[n%4] = v; +} + +inline static void sa_rawset_item_2(sa_tree head, halfword n, unsigned short v) +{ + head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)][LMT_SA_L_PART(n)/2].ushort_value[n%2] = v; +} + +inline static void sa_rawset_item_4(sa_tree head, halfword n, sa_tree_item v) +{ + head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)][LMT_SA_L_PART(n)] = v; +} + +inline static void sa_rawset_item_8(sa_tree head, halfword n, sa_tree_item v1, sa_tree_item v2) +{ + sa_tree_item *low = head->tree[LMT_SA_H_PART(n)][LMT_SA_M_PART(n)]; + int l = 2*LMT_SA_L_PART(n); + low[l] = v1; + low[l+1] = v2; +} + +// inline them + +extern void *sa_malloc_array (int recordsize, int size); +extern void *sa_realloc_array (void *p, int recordsize, int size, int step); +extern void *sa_calloc_array (int recordsize, int size); +extern void *sa_free_array (void *p); +extern void sa_wipe_array (void *head, int recordsize, int size); + +# endif diff --git a/source/luametatex/source/utilities/auxsystem.c b/source/luametatex/source/utilities/auxsystem.c new file mode 100644 index 000000000..d3d818a85 --- /dev/null +++ b/source/luametatex/source/utilities/auxsystem.c @@ -0,0 +1,155 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex This code is taken from the \LUA\ socket library: |timeout.c|. */ + +# ifdef _WIN32 + + double aux_get_current_time(void) { + FILETIME ft; + double t; + GetSystemTimeAsFileTime(&ft); + /* Windows file time (time since January 1, 1601 (UTC)) */ + t = ft.dwLowDateTime/1.0e7 + ft.dwHighDateTime*(4294967296.0/1.0e7); + /* convert to Unix Epoch time (time since January 1, 1970 (UTC)) */ + return (t - 11644473600.0); + } + +# else + + double aux_get_current_time(void) { + struct timeval v; + gettimeofday(&v, (struct timezone *) NULL); + /* Unix Epoch time (time since January 1, 1970 (UTC)) */ + return v.tv_sec + v.tv_usec/1.0e6; + } + +# endif + +void aux_set_run_time(void) +{ + lmt_main_state.start_time = aux_get_current_time(); +} + +double aux_get_run_time(void) +{ + return aux_get_current_time() - lmt_main_state.start_time; +} + +/*tex + + In order to avoid all kind of time code in the backend code we use a function. The start time + can be overloaded in several ways: + + \startitemize[n] + \startitem + By setting the environmment variable |SOURCE_DATE_EPOCH|. This will influence the \PDF\ + timestamp and \PDF\ id that is derived from the time. This variable is consulted when + the kpse library is enabled which is analogue to other properties. + \stopitem + \startitem + By setting the |texconfig.start_time| variable (as with other variables we use the + internal name there). This has the same effect as (1) and is provided for when kpse is + not used to set these variables or when an overloaded is wanted. This is analogue to + other properties. + \stopitem + \stopitemize + + To some extend a cleaner solution would be to have a flag that disables all variable data in + one go (like filenames and so) but we just follow the method implemented in pdftex where + primitives are used to disable it. + +*/ + +static int start_time = -1; /*tex This will move to one of the structs. */ + +static int aux_get_start_time(void) { + if (start_time < 0) { + start_time = (int) time((time_t *) NULL); + } + return start_time; +} + +/*tex + + This one is used to fetch a value from texconfig which can also be used to set properties. + This might come in handy when one has other ways to get date info in the \PDF\ file. + +*/ + +void aux_set_start_time(int s) { + if (s >= 0) { + start_time = s ; + } +} + +/*tex + + All our interrupt handler has to do is set \TEX's global variable |interrupt|; then they + will do everything needed. + +*/ + +# ifdef _WIN32 + + /* Win32 doesn't set SIGINT ... */ + + static BOOL WINAPI catch_interrupt(DWORD arg) + { + switch (arg) { + case CTRL_C_EVENT: + case CTRL_BREAK_EVENT: + aux_quit_the_program(); + return 1; + default: + /*tex No need to set interrupt as we are exiting anyway. */ + return 0; + } + } + + void aux_set_interrupt_handler(void) + { + SetConsoleCtrlHandler(catch_interrupt, TRUE); + } + +# else + + /* static RETSIGTYPE catch_interrupt(int arg) */ + + static void catch_interrupt(int arg) + { + (void) arg; + aux_quit_the_program(); + (void) signal(SIGINT, catch_interrupt); + } + + void aux_set_interrupt_handler(void) + { + /* RETSIGTYPE (*old_handler) (int); */ + void (*old_handler) (int); + old_handler = signal(SIGINT, catch_interrupt); + if (old_handler != SIG_DFL) { + signal(SIGINT, old_handler); + } + } + +# endif + +void aux_get_date_and_time(int *minutes, int *day, int *month, int *year, int *utc) +{ + time_t myclock = aux_get_start_time(); + struct tm *tmptr ; + if (*utc) { + tmptr = gmtime(&myclock); + } else { + tmptr = localtime(&myclock); + } + *minutes = tmptr->tm_hour * 60 + tmptr->tm_min; + *day = tmptr->tm_mday; + *month = tmptr->tm_mon + 1; + *year = tmptr->tm_year + 1900; + /* set_interrupt_handler(); */ +} diff --git a/source/luametatex/source/utilities/auxsystem.h b/source/luametatex/source/utilities/auxsystem.h new file mode 100644 index 000000000..5b9a5bad0 --- /dev/null +++ b/source/luametatex/source/utilities/auxsystem.h @@ -0,0 +1,17 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_UTILITIES_SYSTEM_H +# define LMT_UTILITIES_SYSTEM_H + +extern void aux_quit_the_program (void); + +extern void aux_set_start_time (int); +extern void aux_set_interrupt_handler (void); +extern void aux_get_date_and_time (int *minutes, int *day, int *month, int *year, int *utc); +extern double aux_get_current_time (void); +extern void aux_set_run_time (void); +extern double aux_get_run_time (void); + +# endif diff --git a/source/luametatex/source/utilities/auxunistring.c b/source/luametatex/source/utilities/auxunistring.c new file mode 100644 index 000000000..e95854a93 --- /dev/null +++ b/source/luametatex/source/utilities/auxunistring.c @@ -0,0 +1,158 @@ +/* + See license.txt in the root of this project. +*/ + +# include "luametatex.h" + +/*tex + + The 5- and 6-byte UTF-8 sequences generate integers that are outside of the valid UCS range, + and therefore unsupported. We recover from an error with |0xFFFD|. + +*/ + +unsigned aux_str2uni(const unsigned char *k) +{ + const unsigned char *text = k; + int ch = *text++; + if (ch < 0x80) { + return (unsigned) ch; + } else if (ch <= 0xbf) { + return 0xFFFD; + } else if (ch <= 0xdf) { + if (text[0] >= 0x80 && text[0] < 0xc0) { + return (unsigned) (((ch & 0x1f) << 6) | (text[0] & 0x3f)); + } + } else if (ch <= 0xef) { + if (text[0] >= 0x80 && text[0] < 0xc0 && text[1] >= 0x80 && text[1] < 0xc0) { + return (unsigned) (((ch & 0xf) << 12) | ((text[0] & 0x3f) << 6) | (text[1] & 0x3f)); + } + } else if (ch <= 0xf7) { + if (text[0] < 0x80 || text[1] < 0x80 || text[2] < 0x80 || + text[0] >= 0xc0 || text[1] >= 0xc0 || text[2] >= 0xc0) { + return 0xFFFD; + } else { + int w1 = (((ch & 0x7) << 2) | ((text[0] & 0x30) >> 4)) - 1; + int w2 = ((text[1] & 0xf) << 6) | (text[2] & 0x3f); + w1 = (w1 << 6) | ((text[0] & 0xf) << 2) | ((text[1] & 0x30) >> 4); + return (unsigned) (w1 * 0x400 + w2 + 0x10000); + } + } + return 0xFFFD; +} + +unsigned char *aux_uni2str(unsigned unic) +{ + unsigned char *buf = lmt_memory_malloc(5); + if (buf) { + if (unic < 0x80) { + buf[0] = (unsigned char) unic; + buf[1] = '\0'; + } else if (unic < 0x800) { + buf[0] = (unsigned char) (0xc0 | (unic >> 6)); + buf[1] = (unsigned char) (0x80 | (unic & 0x3f)); + buf[2] = '\0'; + } else if (unic >= 0x110000) { + buf[0] = (unsigned char) (unic - 0x110000); + buf[1] = '\0'; + } else if (unic < 0x10000) { + buf[0] = (unsigned char) (0xe0 | (unic >> 12)); + buf[1] = (unsigned char) (0x80 | ((unic >> 6) & 0x3f)); + buf[2] = (unsigned char) (0x80 | (unic & 0x3f)); + buf[3] = '\0'; + } else { + unic -= 0x10000; + int u = (int) (((unic & 0xf0000) >> 16) + 1); + buf[0] = (unsigned char) (0xf0 | (u >> 2)); + buf[1] = (unsigned char) (0x80 | ((u & 3) << 4) | ((unic & 0x0f000) >> 12)); + buf[2] = (unsigned char) (0x80 | ((unic & 0x00fc0) >> 6)); + buf[3] = (unsigned char) (0x80 | (unic & 0x0003f)); + buf[4] = '\0'; + } + } + return buf; +} + +/*tex + + Function |buffer_to_unichar| converts a sequence of bytes in the |buffer| into a \UNICODE\ + character value. It does not check for overflow of the |buffer|, but it is careful to check + the validity of the \UTF-8 encoding. For historical reasons all these small helpers look a bit + different but that has a certain charm so we keep it. + +*/ + +char *aux_uni2string(char *utf8_text, unsigned unic) +{ + /*tex Increment and deposit character: */ + if (unic <= 0x7f) { + *utf8_text++ = (char) unic; + } else if (unic <= 0x7ff) { + *utf8_text++ = (char) (0xc0 | (unic >> 6)); + *utf8_text++ = (char) (0x80 | (unic & 0x3f)); + } else if (unic <= 0xffff) { + *utf8_text++ = (char) (0xe0 | (unic >> 12)); + *utf8_text++ = (char) (0x80 | ((unic >> 6) & 0x3f)); + *utf8_text++ = (char) (0x80 | (unic & 0x3f)); + } else if (unic < 0x110000) { + unic -= 0x10000; + unsigned u = ((unic & 0xf0000) >> 16) + 1; + *utf8_text++ = (char) (0xf0 | (u >> 2)); + *utf8_text++ = (char) (0x80 | ((u & 3) << 4) | ((unic & 0x0f000) >> 12)); + *utf8_text++ = (char) (0x80 | ((unic & 0x00fc0) >> 6)); + *utf8_text++ = (char) (0x80 | (unic & 0x0003f)); + } + return (utf8_text); +} + +unsigned aux_splitutf2uni(unsigned int *ubuf, const char *utf8buf) +{ + int len = (int) strlen(utf8buf); + unsigned int *upt = ubuf; + unsigned int *uend = ubuf + len; + const unsigned char *pt = (const unsigned char *) utf8buf; + const unsigned char *end = pt + len; + while (pt < end && *pt != '\0' && upt < uend) { + if (*pt <= 127) { + *upt = *pt++; + } else if (*pt <= 0xdf) { + *upt = (unsigned int) (((*pt & 0x1f) << 6) | (pt[1] & 0x3f)); + pt += 2; + } else if (*pt <= 0xef) { + *upt = (unsigned int) (((*pt & 0xf) << 12) | ((pt[1] & 0x3f) << 6) | (pt[2] & 0x3f)); + pt += 3; + } else { + int w1 = (((*pt & 0x7) << 2) | ((pt[1] & 0x30) >> 4)) - 1; + int w2 = ((pt[2] & 0xf) << 6) | (pt[3] & 0x3f); + w1 = (w1 << 6) | ((pt[1] & 0xf) << 2) | ((pt[2] & 0x30) >> 4); + *upt = (unsigned int) (w1 * 0x400 + w2 + 0x10000); + pt += 4; + } + ++upt; + } + *upt = '\0'; + return (unsigned int) (upt - ubuf); +} + +size_t aux_utf8len(const char *text, size_t size) +{ + size_t ls = size; + size_t ind = 0; + size_t num = 0; + while (ind < ls) { + unsigned char i = (unsigned char) *(text + ind); + if (i < 0x80) { + ind += 1; + } else if (i >= 0xF0) { + ind += 4; + } else if (i >= 0xE0) { + ind += 3; + } else if (i >= 0xC0) { + ind += 2; + } else { + ind += 1; + } + num += 1; + } + return num; +} diff --git a/source/luametatex/source/utilities/auxunistring.h b/source/luametatex/source/utilities/auxunistring.h new file mode 100644 index 000000000..1e6a997b9 --- /dev/null +++ b/source/luametatex/source/utilities/auxunistring.h @@ -0,0 +1,19 @@ +/* + See license.txt in the root of this project. +*/ + +# ifndef LMT_UTILITIES_UNISTRING_H +# define LMT_UTILITIES_UNISTRING_H + +extern unsigned char *aux_uni2str (unsigned); +extern unsigned aux_str2uni (const unsigned char *); +extern char *aux_uni2string (char *utf8_text, unsigned ch); +extern unsigned aux_splitutf2uni (unsigned int *ubuf, const char *utf8buf); +extern size_t aux_utf8len (const char *text, size_t size); + +# define is_utf8_follow(a) (a >= 0x80 && a < 0xC0) +# define utf8_size(a) (a > 0xFFFF ? 4 : (a > 0x7FF ? 3 : (a > 0x7F ? 2 : 1))) +# define buffer_to_unichar(k) aux_str2uni((const unsigned char *)(lmt_fileio_state.io_buffer+k)) + +# endif + diff --git a/source/luametatex/source/utilities/auxzlib.c b/source/luametatex/source/utilities/auxzlib.c new file mode 100644 index 000000000..7444b5944 --- /dev/null +++ b/source/luametatex/source/utilities/auxzlib.c @@ -0,0 +1,18 @@ +/* + See license.txt in the root of this project. +*/ + +# include "auxzlib.h" +# include "auxmemory.h" + +void *lmt_zlib_alloc(void *opaque, size_t items, size_t size) +{ + (void) opaque; + return lmt_memory_malloc((size_t) items * size); +} + +void lmt_zlib_free(void *opaque, void *p) +{ + (void) opaque; + lmt_memory_free(p); +} diff --git a/source/luametatex/source/utilities/auxzlib.h b/source/luametatex/source/utilities/auxzlib.h new file mode 100644 index 000000000..7dfaa058a --- /dev/null +++ b/source/luametatex/source/utilities/auxzlib.h @@ -0,0 +1,24 @@ +/* + See license.txt in the root of this project. +*/ + +/*tex + + This module deals with the memory allocation that plugs in the zipper. Although we could just + use the defaule malloc, it's nicer to use the replacement, when it is enabled. A previous + version had th eoption to choose between zlib and miniz but in 2021 we switched to the later + so the former is now in the attic. + +*/ + +# ifndef LMT_UTILITIES_ZLIB_H +# define LMT_UTILITIES_ZLIB_H + +# include "../libraries/miniz/miniz.h" + +/*tex These plug in the lua library as well as pplib's flate hander. */ + +extern void *lmt_zlib_alloc (void *opaque, size_t items, size_t size); +extern void lmt_zlib_free (void *opaque, void *p); + +# endif |