summaryrefslogtreecommitdiff
path: root/source/luametatex/source/tex/texdumpdata.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/tex/texdumpdata.c')
-rw-r--r--source/luametatex/source/tex/texdumpdata.c331
1 files changed, 331 insertions, 0 deletions
diff --git a/source/luametatex/source/tex/texdumpdata.c b/source/luametatex/source/tex/texdumpdata.c
new file mode 100644
index 000000000..58f9e4442
--- /dev/null
+++ b/source/luametatex/source/tex/texdumpdata.c
@@ -0,0 +1,331 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# include "luametatex.h"
+
+dump_state_info lmt_dump_state = {
+ .format_identifier = 0,
+ .format_name = 0
+};
+
+/*tex
+
+ After \INITEX\ has seen a collection of fonts and macros, it can write all the necessary
+ information on an auxiliary file so that production versions of \TEX\ are able to initialize
+ their memory at high speed. The present section of the program takes care of such output and
+ input. We shall consider simultaneously the processes of storing and restoring, so that the
+ inverse relation between them is clear.
+
+ The global variable |format_ident| is a string that is printed right after the |banner| line
+ when \TEX\ is ready to start. For \INITEX\ this string says simply |(INITEX)|; for other
+ versions of \TEX\ it says, for example, |(preloaded format = plain 1982.11.19)|, showing the
+ year, month, and day that the format file was created. We have |format_ident = 0| before \TEX's
+ tables are loaded. |FORMAT_ID| is a new field of type int suitable for the identification of a
+ format: values between 0 and 256 (included) can not be used because in the previous format they
+ are used for the length of the name of the engine.
+
+ Because most used processors are little endian, we flush that way, but after that we just stick
+ to the architecture. This also lets it come out as a readable 12 character (not nul terminated)
+ string on a little endian machine. By using integers we can be sure that when it's generated on
+ a different architecture the format is not seen as valid.
+
+*/
+
+/*
+
+ In \LUAMETATEX\ the code has been overhauled. The sections are better separated and we write
+ less to the file because we try to be sparse. Also, a more dynamic approach is used. In the
+ \CONTEXT\ macro package most of what goes into the format is \LUA\ bytecode.
+
+ We no longer hand endian related code here which saves swapping bytes on the most popular
+ architectures. We also maintain some statistics and have several points where we check if
+ we're still okay.
+
+ Here we only have the main chunk. The specific data sections are implemented where it makes
+ most sense.
+
+*/
+
+# define MAGIC_FORMAT_NUMBER_LE_1 0x58544D4C // 0x4C4D5458 // LMTX
+# define MAGIC_FORMAT_NUMBER_LE_2 0x5845542D // 0x2D544558 // -TEX
+# define MAGIC_FORMAT_NUMBER_LE_3 0x544D462D // 0x2D464D54 // -FMT
+
+static int tex_aux_report_dump_state(dumpstream f, int pos, const char *what)
+{
+ int tmp = ftell(f);
+ tex_print_int(tmp - pos);
+ tex_print_char(' ');
+ tex_print_str(what);
+ fflush(stdout);
+ return tmp;
+}
+
+/* todo: move more dumping to other files, then also the sizes. */
+
+static void tex_aux_dump_fingerprint(dumpstream f)
+{
+ dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_1);
+ dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_2);
+ dump_via_int(f, MAGIC_FORMAT_NUMBER_LE_3);
+ dump_via_int(f, luametatex_format_fingerprint);
+}
+
+static void tex_aux_undump_fingerprint(dumpstream f)
+{
+ int x;
+ undump_int(f, x);
+ if (x == MAGIC_FORMAT_NUMBER_LE_1) {
+ undump_int(f, x);
+ if (x == MAGIC_FORMAT_NUMBER_LE_2) {
+ undump_int(f, x);
+ if (x == MAGIC_FORMAT_NUMBER_LE_3) {
+ undump_int(f, x);
+ if (x == luametatex_format_fingerprint) {
+ return;
+ } else {
+ tex_fatal_undump_error("version id");
+ }
+ }
+ }
+ }
+ tex_fatal_undump_error("initial fingerprint");
+}
+
+static void tex_aux_dump_final_check(dumpstream f)
+{
+ dump_int(f, lmt_dump_state.format_identifier);
+ dump_int(f, lmt_dump_state.format_name);
+ dump_via_int(f, luametatex_format_fingerprint);
+}
+
+static void tex_aux_undump_final_check(dumpstream f)
+{
+ int x;
+ undump_int(f, lmt_dump_state.format_identifier);
+ if (lmt_dump_state.format_identifier < 0 || lmt_dump_state.format_identifier > lmt_string_pool_state.string_pool_data.ptr) {
+ goto BAD;
+ }
+ undump_int(f, lmt_dump_state.format_name);
+ if (lmt_dump_state.format_name < 0 || lmt_dump_state.format_name > lmt_string_pool_state.string_pool_data.ptr) {
+ goto BAD;
+ }
+ undump_int(f, x);
+ if (x == luametatex_format_fingerprint) {
+ return;
+ }
+ BAD:
+ tex_fatal_undump_error("final fingerprint");
+}
+
+static void tex_aux_create_fmt_name(void)
+{
+ lmt_print_state.selector = new_string_selector_code;
+ tex_print_format("%s %i.%i.%i",lmt_fileio_state.fmt_name, year_par, month_par, day_par);
+ lmt_dump_state.format_identifier = tex_make_string();
+ tex_print_str(lmt_fileio_state.job_name);
+ lmt_dump_state.format_name = tex_make_string();
+ lmt_print_state.selector = terminal_and_logfile_selector_code;
+}
+
+static void tex_aux_dump_preamble(dumpstream f)
+{
+ dump_via_int(f, hash_size);
+ dump_via_int(f, hash_prime);
+ dump_via_int(f, prim_size);
+ dump_via_int(f, prim_prime);
+ dump_int(f, lmt_hash_state.hash_data.allocated);
+ dump_int(f, lmt_hash_state.hash_data.ptr);
+ dump_int(f, lmt_hash_state.hash_data.top);
+}
+
+static void tex_aux_undump_preamble(dumpstream f)
+{
+ int x;
+ undump_int(f, x);
+ if (x != hash_size) {
+ goto BAD;
+ }
+ undump_int(f, x);
+ if (x != hash_prime) {
+ goto BAD;
+ }
+ undump_int(f, x);
+ if (x != prim_size) {
+ goto BAD;
+ }
+ undump_int(f, x);
+ if (x != prim_prime) {
+ goto BAD;
+ }
+ undump_int(f, lmt_hash_state.hash_data.allocated);
+ undump_int(f, lmt_hash_state.hash_data.ptr);
+ undump_int(f, lmt_hash_state.hash_data.top);
+ /*tex
+ We can consider moving all these allocaters to the start instead of this exception.
+ */
+ tex_initialize_hash_mem();
+ return;
+ BAD:
+ tex_fatal_undump_error("preamble");
+}
+
+void tex_store_fmt_file(void)
+{
+ int pos = 0;
+ dumpstream f = NULL;
+
+ /*tex
+ If dumping is not allowed, abort. The user is not allowed to dump a format file unless
+ |save_ptr = 0|. This condition implies that |cur_level=level_one|, hence the |xeq_level|
+ array is constant and it need not be dumped.
+ */
+
+ if (lmt_save_state.save_stack_data.ptr != 0) {
+ tex_handle_error(
+ succumb_error_type,
+ "You can't dump inside a group",
+ "'{...\\dump}' is a no-no."
+ );
+ }
+
+ /*tex
+ We don't store some things.
+ */
+
+ tex_dispose_specification_nodes();
+
+ /*tex
+ Create the |format_ident|, open the format file, and inform the user that dumping has begun.
+ */
+
+ {
+ int callback_id = lmt_callback_defined(pre_dump_callback);
+ if (callback_id > 0) {
+ (void) lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "->");
+ }
+ }
+
+ /*tex
+ We report the usual plus some more statistics. When something is wrong the machine just
+ quits, hopefully with some meaningful error. We always create the format in normal log and
+ terminal mode. We create a format name first because we also use that in error reporting.
+ */
+
+ tex_aux_create_fmt_name();
+
+ f = tex_open_fmt_file(1);
+ if (! f) {
+ tex_formatted_error("system", "format file '%s' cannot be opened for writing", lmt_fileio_state.fmt_name);
+ return;
+ }
+
+ tex_print_nlp();
+ tex_print_format("Dumping format '%T' in file '%s': ", lmt_dump_state.format_identifier, lmt_fileio_state.fmt_name);
+ fflush(stdout);
+
+ tex_compact_tokens();
+ tex_compact_string_pool();
+
+ tex_aux_dump_fingerprint(f); pos = tex_aux_report_dump_state(f, pos, "fingerprint + ");
+ lmt_dump_engine_info(f); pos = tex_aux_report_dump_state(f, pos, "engine + ");
+ tex_aux_dump_preamble(f); pos = tex_aux_report_dump_state(f, pos, "preamble + ");
+ tex_dump_constants(f); pos = tex_aux_report_dump_state(f, pos, "constants + ");
+ tex_dump_string_pool(f); pos = tex_aux_report_dump_state(f, pos, "stringpool + ");
+ tex_dump_node_mem(f); pos = tex_aux_report_dump_state(f, pos, "nodes + ");
+ tex_dump_token_mem(f); pos = tex_aux_report_dump_state(f, pos, "tokens + ");
+ tex_dump_equivalents_mem(f); pos = tex_aux_report_dump_state(f, pos, "equivalents + ");
+ tex_dump_math_codes(f); pos = tex_aux_report_dump_state(f, pos, "math codes + ");
+ tex_dump_text_codes(f); pos = tex_aux_report_dump_state(f, pos, "text codes + ");
+ tex_dump_primitives(f); pos = tex_aux_report_dump_state(f, pos, "primitives + ");
+ tex_dump_hashtable(f); pos = tex_aux_report_dump_state(f, pos, "hashtable + ");
+ tex_dump_font_data(f); pos = tex_aux_report_dump_state(f, pos, "fonts + ");
+ tex_dump_math_data(f); pos = tex_aux_report_dump_state(f, pos, "math + ");
+ tex_dump_language_data(f); pos = tex_aux_report_dump_state(f, pos, "language + ");
+ tex_dump_insert_data(f); pos = tex_aux_report_dump_state(f, pos, "insert + ");
+ lmt_dump_registers(f); pos = tex_aux_report_dump_state(f, pos, "bytecodes + ");
+ tex_aux_dump_final_check(f); pos = tex_aux_report_dump_state(f, pos, "housekeeping = ");
+
+ tex_aux_report_dump_state(f, 0, "total.");
+ tex_close_fmt_file(f);
+ tex_print_ln();
+
+}
+
+/*tex
+
+ Corresponding to the procedure that dumps a format file, we have a function that reads one in.
+ The function returns |false| if the dumped format is incompatible with the present \TEX\ table
+ sizes, etc.
+
+ The inverse macros are slightly more complicated, since we need to check the range of the values
+ we are reading in. We say |undump (a) (b) (x)| to read an integer value |x| that is supposed to
+ be in the range |a <= x <= b|.
+
+*/
+
+int tex_fatal_undump_error(const char *s)
+{
+ tex_emergency_message("system", "fatal format error, loading file '%s' failed with bad '%s' data, remake the format", emergency_fmt_name, s);
+ return tex_emergency_exit();
+}
+
+//define undumping(s) printf("undumping: %s\n",s); fflush(stdout);
+# define undumping(s)
+
+static void tex_aux_undump_fmt_data(dumpstream f)
+{
+ undumping("warmingup")
+
+ undumping("fingerprint") tex_aux_undump_fingerprint(f);
+ undumping("engineinfo") lmt_undump_engine_info(f);
+ undumping("preamble") tex_aux_undump_preamble(f);
+ undumping("constants") tex_undump_constants(f);
+ undumping("strings") tex_undump_string_pool(f);
+ undumping("nodes") tex_undump_node_mem(f);
+ undumping("tokens") tex_undump_token_mem(f);
+ undumping("equivalents") tex_undump_equivalents_mem(f);
+ undumping("mathcodes") tex_undump_math_codes(f);
+ undumping("textcodes") tex_undump_text_codes(f);
+ undumping("primitives") tex_undump_primitives(f);
+ undumping("hashtable") tex_undump_hashtable(f);
+ undumping("fonts") tex_undump_font_data(f);
+ undumping("math") tex_undump_math_data(f);
+ undumping("languages") tex_undump_language_data(f);
+ undumping("inserts") tex_undump_insert_data(f);
+ undumping("bytecodes") lmt_undump_registers(f);
+ undumping("finalcheck") tex_aux_undump_final_check(f);
+
+ undumping("done")
+
+ /*tex This should go elsewhere. */
+
+ cur_list.prev_depth = ignore_depth;
+}
+
+/*
+ The next code plays nice but on an error we exit anyway so some code is never reached in that
+ case.
+*/
+
+int tex_load_fmt_file(void)
+{
+ dumpstream f = tex_open_fmt_file(0);
+ if (f) {
+ tex_aux_undump_fmt_data(f);
+ tex_close_fmt_file(f);
+ return 1;
+ } else {
+ return tex_fatal_undump_error("filehandle");
+ }
+}
+
+void tex_initialize_dump_state(void)
+{
+ lmt_dump_state.format_name = get_nullstr();
+ if (lmt_main_state.run_state == initializing_state) {
+ lmt_dump_state.format_identifier = tex_maketexstring(" (INITEX)");
+ } else {
+ lmt_dump_state.format_identifier = 0;
+ }
+}