summaryrefslogtreecommitdiff
path: root/source/luametatex/source/tex/texdumpdata.h
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/tex/texdumpdata.h')
-rw-r--r--source/luametatex/source/tex/texdumpdata.h105
1 files changed, 105 insertions, 0 deletions
diff --git a/source/luametatex/source/tex/texdumpdata.h b/source/luametatex/source/tex/texdumpdata.h
new file mode 100644
index 000000000..6a9e11a7d
--- /dev/null
+++ b/source/luametatex/source/tex/texdumpdata.h
@@ -0,0 +1,105 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# ifndef LMT_DUMPDATA_H
+# define LMT_DUMPDATA_H
+
+/*tex
+
+ Originally the dump file was a memory dump, in \TEX\ called a format and in \NETAFONT\ a base
+ and in \METAPOST\ a mem file. The \TEX\ program could reload that dump file and have a fast
+ start. In addition a pool file was used to store strings. Because it was a memory dump. It was
+ also pretty system dependent.
+
+ When \WEBC\ showed up, \TEX\ installations got distributed on \CDROM\ and later \DVD, and
+ because one could run them from that medium, format files were shared. In order to do that the
+ file had to be endian neutral. Unfortunately the choice was such that for the most commonly
+ architecture (intel) the dump items had to be swapped. This could slow down a startup, depending
+ on how rigourous a compiler of operating system was in testing (it is a reason why startup on
+ \MSWINDOWS\ was somewhat slower).
+
+ Because in \LUATEX\ we can also store \LUA\ bytecodes it made no sense to take that portability
+ aspect into account. The format file also got gzipped which at that time sped up loading. Later
+ in the project the endian swappign was removed so we gained a bit more.
+
+ Because a format file that doesn't match an engine can actually result in a crash, we decided to
+ come up with amore robust approach: we use a magic number to register the version of the format!
+ Normally this number only increments when we add a new primitive of change command codes. At
+ some point in \LUATEX\ development we started with 907 which is the sum of the values of the
+ bytes of \quote {don knuth}.
+
+ We sometimes also bump when the binary format (bytecode) of \LUA\ has changed in such a way that
+ the loader doesn't detect it. But that doesn't always help either because the cache is still
+ problematic then. There we actually hard code a different number then (a simple patch of a \LUA\
+ file).
+
+ By the time that the \LUAMETATEX\ code as in a state to be released, it became time to think
+ about a number that was definitely different from \LUATEX\ so here it is:
+
+ \starttyping
+ initial = 2020//4 - 2020//100 + 2020//400 = 490
+ \stoptyping
+
+ Although \LUAMETATEX\ is already a bit older, we sort of released in leapyear 2020 so we take
+ the number of leapyears since zero (which is kind of \type {\undefined} as starting point). This
+ number actually jumps whenever something affects the format file (which can be an extra command or
+ some reshuffling of codes) so it is not always an indication of something really need.
+
+ So to summarize: we don't share formats across architectures and operating systems, we use the
+ native endian property of an architecture, we don't compress, and we bump a magic number so that
+ we can intercept a potential crash. So much for a bit of history.
+
+ We also bump the fingerprint when we have a new version of \LUA, just to play safe in case some
+ bytecodes have changed.
+
+*/
+
+# define luametatex_format_fingerprint 670
+
+/* These end up in the string pool. */
+
+typedef struct dump_state_info {
+ strnumber format_identifier;
+ strnumber format_name;
+} dump_state_info;
+
+extern dump_state_info lmt_dump_state;
+
+extern void tex_store_fmt_file (void);
+extern int tex_load_fmt_file (void);
+extern int tex_fatal_undump_error (const char *s);
+extern void tex_initialize_dump_state (void);
+
+# define dump_items(f,p,item_size,nitems) fwrite((void *) p, (size_t) item_size, (size_t) nitems, f)
+# define undump_items(f,p,item_size,nitems) { if (fread ((void *) p, (size_t) item_size, (size_t) nitems, f)) { } }
+
+# define dump_things(f,base,len) dump_items(f, (char *) &(base), sizeof (base), (int) (len))
+# define undump_things(f,base,len) undump_items(f, (char *) &(base), sizeof (base), (int) (len))
+
+# define dump_int(f,x) dump_things(f,x,1)
+# define undump_int(f,x) undump_things(f,x,1)
+
+/*tex
+
+ Because sometimes we dump constants or the result of a function call we have |dump_via_int|
+ that puts the number into a variable first. Most integers come from structs and arrays.
+ Performance wise there is not that much gain.
+
+*/
+
+# define dump_via_int(f,x) do { \
+ int x_val = (x); \
+ dump_int(f,x_val); \
+} while (0)
+
+# define dump_string(f,a) \
+ if (a) { \
+ int x = (int)strlen(a) + 1; \
+ dump_int(f,x); \
+ dump_things(f,*a, x); \
+ } else { \
+ dump_via_int(f,0); \
+ }
+
+# endif