1 files changed, 913 insertions, 0 deletions
diff --git a/source/luametatex/source/tex/texprimitive.c b/source/luametatex/source/tex/texprimitive.c
new file mode 100644
index 000000000..bbeea1bc0
--- /dev/null
+++ b/source/luametatex/source/tex/texprimitive.c
@@ -0,0 +1,913 @@
+/*
+    See license.txt in the root of this project.
+*/
+
+# include "luametatex.h"
+
+/*tex
+
+    Control sequences are stored and retrieved by means of a fairly standard hash table algorithm
+    called the method of \quote {coalescing lists} (cf.\ Algorithm 6.4C in {\em The Art of
+    Computer Programming}). Once a control sequence enters the table, it is never removed, because
+    there are complicated situations involving |\gdef| where the removal of a control sequence at
+    the end of a group would be a mistake preventable only by the introduction of a complicated
+    reference-count mechanism.
+
+    The actual sequence of letters forming a control sequence identifier is stored in the |str_pool|
+    array together with all the other strings. An auxiliary array |hash| consists of items with two
+    halfword fields per word. The first of these, called |next(p)|, points to the next identifier
+    belonging to the same coalesced list as the identifier corresponding to~|p|; and the other,
+    called |text(p)|, points to the |str_start| entry for |p|'s identifier. If position~|p| of the
+    hash table is empty, we have |text(p)=0|; if position |p| is either empty or the end of a
+    coalesced hash list, we have |next(p) = 0|. An auxiliary pointer variable called |hash_used| is
+    maintained in such a way that all locations |p >= hash_used| are nonempty. The global variable
+    |cs_count| tells how many multiletter control sequences have been defined, if statistics are
+    being kept.
+
+    A boolean variable called |no_new_control_sequence| is set to |true| during the time that new
+    hash table entries are forbidden.
+
+    The other variables in the following state structure are: the hash table: |hash|, the allocation
+    pointer |hash_used| for |hash|, |hash_extra| above |eqtb_size|, the maximum of the hash array
+    |hash_top|, the pointer to the next high hash location |hash_high|, the mentioned flag that says
+    if new identifiers are legal |no_new_control_sequence| and the total number of known identifiers:
+    |cs_count|.
+
+*/
+
+hash_state_info lmt_hash_state = {
+    .hash      = NULL,
+    .hash_data = {
+        .minimum   = min_hash_size,
+        .maximum   = max_hash_size,
+        .size      = siz_hash_size,
+        .step      = stp_hash_size,
+        .allocated = 0,
+        .itemsize  = sizeof(memoryword) + sizeof(memoryword),
+        .top       = 0,
+        .ptr       = 0,
+        .initial   = 0,
+        .offset    = 0, // eqtb_size,
+    },
+    .eqtb_data = {
+        .minimum   = min_hash_size,
+        .maximum   = max_hash_size,
+        .size      = siz_hash_size,
+        .step      = stp_hash_size,
+        .allocated = memory_data_unset,
+        .itemsize  = memory_data_unset,
+        .top       = frozen_control_sequence,
+        .ptr       = 0,
+        .initial   = 0,
+        .offset    = 0,
+    },
+    .eqtb      = NULL,
+    .no_new_cs = 1,
+    .padding   = 0,
+};
+
+/*tex
+
+    The arrays |prim| and |prim_eqtb| are used for |name -> cmd, chr| lookups. The are modelled
+    after |hash| and |eqtb|, except that primitives do not have an |eq_level|, that field is
+    replaced by |origin|. Furthermore we have a link for coalesced lists: |prim_next (a)|; the
+    string number for control sequence name: |prim_text (a)|; test if all positions are occupied:
+    |prim_is_full|; some fields: |prim_origin_field (a)|, |prim_eq_type_field (a)| and
+    |prim_equiv_field(a)|; the level of definition: |prim_origin (a)|; the command code for
+    equivalent: |prim_eq_type(a)|; the equivalent value: |prim_equiv(a)|; the allocation pointer
+    for |prim|: |prim_used|; the primitives tables: |two_halves prim [(prim_size + 1)]| and
+    |memoryword prim_eqtb [(prim_size + 1)]|. The array |prim_data| works the other way around, it
+    is used for |cmd, chr| to name lookups.
+
+*/
+
+primitive_state_info lmt_primitive_state;
+
+/*tex Test if all positions are occupied: */
+
+# define prim_base           1
+# define reserved_hash_slots 1
+
+/*tex Initialize the memory arrays: */
+
+void tex_initialize_primitives(void)
+{
+    memset(lmt_primitive_state.prim_data, 0, sizeof(prim_info)  * (last_cmd  + 1));
+    memset(lmt_primitive_state.prim,      0, sizeof(memoryword) * (prim_size + 1));
+    memset(lmt_primitive_state.prim_eqtb, 0, sizeof(memoryword) * (prim_size + 1));
+    for (int k = 0; k <= prim_size; k++) {
+        prim_eq_type(k) = undefined_cs_cmd;
+    }
+    lmt_primitive_state.prim_used = prim_size;
+}
+
+void tex_initialize_hash_mem(void)
+{
+    if (lmt_main_state.run_state == initializing_state) {
+        if (lmt_hash_state.hash_data.minimum == 0) {
+            tex_emergency_message("startup error", "you need at least some hash size");
+        } else {
+            lmt_hash_state.hash_data.allocated = lmt_hash_state.hash_data.minimum;
+            lmt_hash_state.hash_data.top = eqtb_size + lmt_hash_state.hash_data.minimum;
+        }
+    }
+    {
+        int size = lmt_hash_state.hash_data.top + 1;
+        memoryword *hash = aux_allocate_clear_array(sizeof(memoryword), size, reserved_hash_slots);
+        memoryword *eqtb = aux_allocate_clear_array(sizeof(memoryword), size, reserved_hash_slots);
+        if (hash && eqtb) {
+            lmt_hash_state.hash = hash;
+            lmt_hash_state.eqtb = eqtb;
+            if (lmt_main_state.run_state == initializing_state) {
+                /*tex Initialization happens elsewhere. */
+            } else {
+                tex_initialize_undefined_cs();
+                for (int i = eqtb_size + 1; i <= lmt_hash_state.hash_data.top; i++) {
+                    copy_eqtb_entry(i, undefined_control_sequence);
+                }
+            }
+        } else {
+            tex_overflow_error("hash", size);
+        }
+    }
+}
+
+static int tex_aux_room_in_hash(void)
+{
+    if (lmt_hash_state.hash_data.allocated + lmt_hash_state.hash_data.step <= lmt_hash_state.hash_data.size) {
+        int size = lmt_hash_state.hash_data.top + lmt_hash_state.hash_data.step + 1;
+        memoryword *hash = aux_reallocate_array(lmt_hash_state.hash, sizeof(memoryword), size, reserved_hash_slots);
+        memoryword *eqtb = aux_reallocate_array(lmt_hash_state.eqtb, sizeof(memoryword), size, reserved_hash_slots);
+        if (hash && eqtb) {
+            memset(hash + lmt_hash_state.hash_data.top + 1, 0, sizeof(memoryword) * (size_t) lmt_hash_state.hash_data.step);
+            memset(eqtb + lmt_hash_state.hash_data.top + 1, 0, sizeof(memoryword) * (size_t) lmt_hash_state.hash_data.step);
+            lmt_hash_state.hash = hash;
+            lmt_hash_state.eqtb = eqtb;
+            /*tex
+                This is not really needed because we now dp this when a new id is created which
+                is a better place anyway. But we play safe and still do it:
+            */
+            for (int i = lmt_hash_state.hash_data.top + 1; i <= size; i++) {
+                copy_eqtb_entry(i, undefined_control_sequence);
+            }
+            lmt_hash_state.hash_data.allocated += lmt_hash_state.hash_data.step;
+            lmt_hash_state.hash_data.top += lmt_hash_state.hash_data.step;
+            lmt_run_memory_callback("hash", 1);
+            return 1;
+        } else {
+            lmt_run_memory_callback("hash", 0);
+            tex_overflow_error("hash", size);
+        }
+    }
+    return 0;
+}
+
+/*tex
+
+    The value of |hash_prime| should be roughly 85\%! of |hash_size|, and it should be a prime
+    number. The theory of hashing tells us to expect fewer than two table probes, on the average,
+    when the search is successful. [See J.~S. Vitter, {\sl Journal of the ACM\/ \bf30} (1983),
+    231--258.]
+
+    https://en.wikipedia.org/wiki/Coalesced_hashing
+
+    Because we seldom use uppercase we get many misses, multiplying a chr j[k] by k actually gives
+    a better spread.
+
+    Making a \CONTEXT\ format takes some 250.000 hash calculations while the \LUAMETATEX\ needs
+    some 1.7 million for just over 250 pages (with an average string length of 15).
+
+    The primitive hash lookups are needed when we initialize and when we lookup an internal
+    variable.
+
+*/
+
+inline static halfword tex_aux_compute_hash(const char *j, int l)
+{
+    halfword h = (unsigned const char) j[0];
+    for (int k = 1; k < l; k++) {
+        h = (h + h + (unsigned const char) j[k]) % hash_prime;
+    }
+    return h;
+}
+
+inline static halfword tex_aux_compute_prim(const char *j, unsigned l)
+{
+    halfword h = (unsigned const char) j[0];
+    for (unsigned k = 1; k < l; k++) {
+        h = (h + h + (unsigned const char) j[k]) % prim_prime;
+    }
+    return h;
+}
+
+halfword tex_prim_lookup(strnumber s)
+{
+    /*tex The index in the |hash| array: */
+    if (s >= cs_offset_value) {
+        unsigned char *j = str_string(s);
+        unsigned l = (unsigned) str_length(s);
+        halfword h = tex_aux_compute_prim((char *) j, l);
+        /*tex We start searching here; note that |0 <= h < hash_prime|. */
+        halfword p = h + 1;
+        while (1) {
+            if (prim_text(p) > 0 && str_length(prim_text(p)) == l && tex_str_eq_str(prim_text(p), s)) {
+                return p;
+            } else if (prim_next(p)) {
+                p = prim_next(p);
+            } else if (lmt_hash_state.no_new_cs) {
+                return undefined_primitive;
+            } else {
+                /*tex Insert a new primitive after |p|, then make |p| point to it. */
+                if (prim_text(p) > 0) {
+                    /*tex Search for an empty location in |prim| */
+                    do {
+                        if (lmt_primitive_state.prim_used > prim_base) {
+                            --lmt_primitive_state.prim_used;
+                        } else {
+                            tex_overflow_error("primitive size", prim_size);
+                        }
+                    } while (prim_text(lmt_primitive_state.prim_used));
+                    prim_next(p) = lmt_primitive_state.prim_used;
+                    p = lmt_primitive_state.prim_used;
+                }
+                prim_text(p) = s;
+                break;
+            }
+        }
+        return p;
+    } else if ((s < 0) || (s == undefined_control_sequence)) {
+        return undefined_primitive;
+    } else {
+        return s;
+    }
+}
+
+/*tex How to test a csname for primitive-ness? */
+
+/*
+int tex_cs_is_primitive(strnumber csname)
+{
+    int m = prim_lookup(csname);
+    if (m != undefined_primitive) {
+        char *ss = makecstring(csname);
+        int n = string_locate(ss, str_length(csname), 0);
+        lmt_memory_free(ss);
+        return ((n != undefined_cs_cmd) && (eq_type(n) == prim_eq_type(m)) && (eq_value(n) == prim_equiv(m)));
+    } else {
+        return 0;
+    }
+}
+*/
+
+/*tex Dumping and undumping. */
+
+/* We cheat! It should be dump_things(f, prim_state.prim[p], 1); */
+
+void tex_dump_primitives(dumpstream f)
+{
+    /*
+    for (int p = 0; p <= prim_size; p++) {
+        dump_mem(f, prim_state.prim[p]);
+    }
+    for (int p = 0; p <= prim_size; p++) {
+        dump_mem(f, prim_state.prim_eqtb[p]);
+    }
+    */
+    dump_things(f, lmt_primitive_state.prim[0], prim_size + 1);
+    dump_things(f, lmt_primitive_state.prim_eqtb[0], prim_size + 1);
+    for (int p = 0; p <= last_cmd; p++) {
+        dump_int(f, lmt_primitive_state.prim_data[p].offset);
+        dump_int(f, lmt_primitive_state.prim_data[p].subids);
+        for (int q = 0; q < lmt_primitive_state.prim_data[p].subids; q++) {
+            dump_int(f, lmt_primitive_state.prim_data[p].names[q]);
+        }
+    }
+}
+
+void tex_undump_primitives(dumpstream f)
+{
+    undump_things(f, lmt_primitive_state.prim[0], prim_size + 1);
+    undump_things(f, lmt_primitive_state.prim_eqtb[0], prim_size + 1);
+    for (int p = 0; p <= last_cmd; p++) {
+        undump_int(f, lmt_primitive_state.prim_data[p].offset);
+        undump_int(f, lmt_primitive_state.prim_data[p].subids);
+        if (lmt_primitive_state.prim_data[p].subids > 0) {
+            int size = lmt_primitive_state.prim_data[p].subids;
+            strnumber *names = aux_allocate_clear_array(sizeof(strnumber *), size, 1);
+            if (names) {
+                lmt_primitive_state.prim_data[p].names = names;
+                for (int q = 0; q < lmt_primitive_state.prim_data[p].subids; q++) {
+                    undump_int(f, names[q]);
+                }
+            } else {
+                tex_overflow_error("primitives", size * sizeof(strnumber *));
+            }
+        }
+    }
+}
+
+/*tex
+
+    Dump the hash table, A different scheme is used to compress the hash table, since its lower
+    region is usually sparse. When |text (p) <> 0| for |p <= hash_used|, we output two words,
+    |p| and |hash[p]|. The hash table is, of course, densely packed for |p >= hash_used|, so the
+    remaining entries are output in a~block.
+
+*/
+
+void tex_dump_hashtable(dumpstream f)
+{
+    dump_int(f, lmt_hash_state.eqtb_data.top);
+    lmt_hash_state.eqtb_data.ptr = frozen_control_sequence - 1 - lmt_hash_state.eqtb_data.top + lmt_hash_state.hash_data.ptr;
+    /* the root entries, i.e. the direct hash slots */
+    for (halfword p = hash_base; p <= lmt_hash_state.eqtb_data.top; p++) {
+        if (cs_text(p)) {
+            dump_int(f, p);
+            dump_int(f, lmt_hash_state.hash[p]);
+            ++lmt_hash_state.eqtb_data.ptr;
+        }
+    }
+    /* the chain entries, i.e. the follow up list slots => eqtb */
+    dump_things(f, lmt_hash_state.hash[lmt_hash_state.eqtb_data.top + 1], special_sequence_base - lmt_hash_state.eqtb_data.top);
+    if (lmt_hash_state.hash_data.ptr > 0) {
+        dump_things(f, lmt_hash_state.hash[eqtb_size + 1], lmt_hash_state.hash_data.ptr);
+    }
+    dump_int(f, lmt_hash_state.eqtb_data.ptr);
+}
+
+void tex_undump_hashtable(dumpstream f)
+{
+    undump_int(f, lmt_hash_state.eqtb_data.top);
+    if (lmt_hash_state.eqtb_data.top >= hash_base && lmt_hash_state.eqtb_data.top <= frozen_control_sequence) {
+        halfword p = hash_base - 1;
+        do {
+            halfword q;
+            undump_int(f, q);
+            if (q >= (p + 1) && q <= lmt_hash_state.eqtb_data.top) {
+                undump_int(f, lmt_hash_state.hash[q]);
+                p = q;
+            } else {
+                goto BAD;
+            }
+        } while (p != lmt_hash_state.eqtb_data.top);
+        undump_things(f, lmt_hash_state.hash[lmt_hash_state.eqtb_data.top + 1], special_sequence_base - lmt_hash_state.eqtb_data.top);
+        if (lmt_hash_state.hash_data.ptr > 0) {
+            /* we get a warning on possible overrun here */
+            undump_things(f, lmt_hash_state.hash[eqtb_size + 1], lmt_hash_state.hash_data.ptr);
+        }
+        undump_int(f, lmt_hash_state.eqtb_data.ptr);
+        lmt_hash_state.eqtb_data.initial = lmt_hash_state.eqtb_data.ptr;
+        return;
+    }
+  BAD:
+    tex_fatal_undump_error("hash");
+}
+
+/*tex
+
+    We need to put \TEX's \quote {primitive} control sequences into the hash table, together with
+    their command code (which will be the |eq_type|) and an operand (which will be the |equiv|).
+    The |primitive| procedure does this, in a way that no \TEX\ user can. The global value |cur_val|
+    contains the new |eqtb| pointer after |primitive| has acted.
+
+    Because the definitions of the actual user-accessible name of a primitive can be postponed until
+    runtime, the function |primitive_def| is needed that does nothing except creating the control
+    sequence name.
+
+*/
+
+void tex_primitive_def(const char *str, size_t length, singleword cmd, halfword chr)
+{
+    /*tex This creates the |text()| string: */
+    cur_val = tex_string_locate(str, length, 1);
+    set_eq_level(cur_val, level_one);
+    set_eq_type(cur_val, cmd);
+    set_eq_flag(cur_val, primitive_flag_bit);
+    set_eq_value(cur_val, chr);
+}
+
+/*tex
+
+    The function |store_primitive_name| sets up the bookkeeping for the reverse lookup. It is
+    quite paranoid, because it is easy to mess this up accidentally.
+
+    The |offset| is needed because sometimes character codes (in |o|) are indices into |eqtb|
+    or are offset by a magical value to make sure they do not conflict with something else. We
+    don't want the |prim_data[c].names| to have too many entries as it will just be wasted room,
+    so |offset| is substracted from |o| before creating or accessing the array.
+
+*/
+
+static void tex_aux_store_primitive_name(strnumber s, singleword cmd, halfword chr, halfword offset)
+{
+    lmt_primitive_state.prim_data[cmd].offset = offset;
+    if (lmt_primitive_state.prim_data[cmd].subids < (chr + 1)) {
+        /*tex Not that efficient as each primitive triggers this now but only at ini time so ... */
+        strnumber *newstr = aux_allocate_clear_array(sizeof(strnumber *), chr + 1, 1);
+        if (lmt_primitive_state.prim_data[cmd].names) {
+            memcpy(newstr, lmt_primitive_state.prim_data[cmd].names, (unsigned) (lmt_primitive_state.prim_data[cmd].subids) * sizeof(strnumber));
+            aux_deallocate_array(lmt_primitive_state.prim_data[cmd].names);
+        }
+        lmt_primitive_state.prim_data[cmd].names = newstr;
+        lmt_primitive_state.prim_data[cmd].subids = chr + 1;
+    }
+    lmt_primitive_state.prim_data[cmd].names[chr] = s;
+}
+
+/*tex
+
+    Compared to \TEX82, |primitive| has two extra parameters. The |off| is an offset that will be
+    passed on to |store_primitive_name|, the |cmd_origin| is the bit that is used to group
+    primitives by originator. So the next function is called for each primitive and fills |prim_eqtb|.
+
+    Contrary to \LUATEX\ we define (using |primitive_def|) all primitives beforehand, so not only
+    those with |cmd_origin| values |core| and |tex|. As side effect, we don't get redundant string
+    entries as in \LUATEX.
+
+*/
+
+void tex_primitive(int cmd_origin, const char *str, singleword cmd, halfword chr, halfword offset)
+{
+    int prim_val;
+    strnumber ss;
+    if (cmd_origin != no_command) {
+        tex_primitive_def(str, strlen(str), cmd, offset + chr);
+        /*tex Indeed, |cur_val| has the latest primitive. */
+        ss = cs_text(cur_val);
+    } else {
+        ss = tex_maketexstring(str);
+    }
+    prim_val = tex_prim_lookup(ss);
+    prim_origin(prim_val) = (quarterword) cmd_origin;
+    prim_eq_type(prim_val) = cmd;
+    prim_equiv(prim_val) = offset + chr;
+    tex_aux_store_primitive_name(ss, cmd, chr, offset);
+}
+
+/*tex
+
+    Here is a helper that does the actual hash insertion. This code far from ideal: the existence
+    of |hash_extra| changes all the potential (short) coalesced lists into a single (long) one.
+    This will create a slowdown.
+
+    Here |hash_state.hash_used| starts out as the maximum \quote {normal} hash, not extra.
+
+*/
+
+static halfword tex_aux_insert_id(halfword p, const unsigned char *j, unsigned int l)
+{
+    if (cs_text(p) > 0) {
+     RESTART:
+        if (lmt_hash_state.hash_data.ptr < lmt_hash_state.hash_data.allocated) {
+            ++lmt_hash_state.hash_data.ptr;
+            cs_next(p) = lmt_hash_state.hash_data.ptr + eqtb_size;
+            p = cs_next(p);
+        } else if (tex_aux_room_in_hash()) {
+            goto RESTART;
+        } else {
+            /*tex
+                Search for an empty location in |hash|. This actually makes the direct first hit
+                in such a hash slot invalid but we check for the string anyway. As we now use a
+                hash size that is rather minimal, we don't really need this branch. It is a last
+                resort anyway.
+            */
+            do {
+                if (lmt_hash_state.eqtb_data.top == hash_base) {
+                    /*tex We cannot go lower than this. */
+                    tex_overflow_error("hash size", hash_size + lmt_hash_state.hash_data.allocated);
+                }
+                --lmt_hash_state.eqtb_data.top;
+            } while (cs_text(lmt_hash_state.eqtb_data.top) != 0);
+            cs_next(p) = lmt_hash_state.eqtb_data.top;
+            p = lmt_hash_state.eqtb_data.top;
+        }
+    }
+    cs_text(p) = tex_push_string(j, l);
+    copy_eqtb_entry(p, undefined_control_sequence);
+    ++lmt_hash_state.eqtb_data.ptr;
+    return p;
+}
+
+/*tex
+
+    Here is the subroutine that searches the hash table for an identifier that matches a given
+    string of length |l > 1| appearing in |buffer[j .. (j + l - 1)]|. If the identifier is found,
+    the corresponding hash table address is returned. Otherwise, if the global variable
+    |no_new_control_sequence| is |true|, the dummy address |undefined_control_sequence| is returned.
+    Otherwise the identifier is inserted into the hash table and its location is returned.
+
+    On the \LUAMETATEX\ manual we have 250K hits and 400K misses. Adapting the max and prime does
+    bring down the misses but also no gain in performance. In practice we seldom follow the chain.
+
+*/
+
+halfword tex_id_locate(int j, int l, int create)
+{
+    /*tex The index in |hash| array: */
+    halfword h = tex_aux_compute_hash((char *) (lmt_fileio_state.io_buffer + j), l);
+    /*tex We start searching here. Note that |0 <= h < hash_prime|: */
+    halfword p = h + hash_base;
+    /*tex The next one in a list: */
+    while (1) {
+        strnumber s = cs_text(p);
+        if ((s > 0) && (str_length(s) == (unsigned) l) && tex_str_eq_buf(s, j, l)) {
+            return p;
+        } else {
+            halfword n = cs_next(p);
+            if (n) {
+                p = n;
+            } else if (create) {
+                return tex_aux_insert_id(p, (lmt_fileio_state.io_buffer + j), (unsigned) l);
+            } else {
+                break;
+            }
+        }
+    }
+    return undefined_control_sequence;
+}
+
+/*tex
+
+    Here is a similar subroutine for finding a primitive in the hash. This one is based on a \CCODE\
+    string.
+
+*/
+
+halfword tex_string_locate(const char *s, size_t l, int create)
+{
+    /*tex The hash code: */
+    halfword h = tex_aux_compute_hash(s, (int) l);
+    /*tex The index in |hash| array. We start searching here. Note that |0 <= h < hash_prime|: */
+    halfword p = h + hash_base;
+    while (1) {
+        if (cs_text(p) > 0 && tex_str_eq_cstr(cs_text(p), s, (int) l)) {
+            return p;
+        } else {
+            halfword n = cs_next(p);
+            if (n) {
+                p = n;
+            } else if (create) {
+                return tex_aux_insert_id(p, (const unsigned char *) s, (unsigned) l);
+            } else {
+                break;
+            }
+        }
+    }
+    return undefined_control_sequence;
+}
+
+halfword tex_located_string(const char *s)
+{
+    size_t l = strlen(s);
+    return tex_string_locate(s, l, 0);
+}
+
+/*tex
+
+    The |print_cmd_chr| routine prints a symbolic interpretation of a command code and its modifier.
+    This is used in certain \quotation {You can\'t} error messages, and in the implementation of
+    diagnostic routines like |\show|.
+
+    The body of |print_cmd_chr| use to be a rather tedious listing of print commands, and most of it
+    was essentially an inverse to the |primitive| routine that enters a \TEX\ primitive into |eqtb|.
+
+    Thanks to |prim_data|, there is no need for all that tediousness. What is left of |primt_cnd_chr|
+    are just the exceptions to the general rule that the |cmd,chr_code| pair represents in a single
+    primitive command.
+
+*/
+
+static void tex_aux_print_chr_cmd(const char *s, halfword cmd, halfword chr)
+{
+    tex_print_str(s);
+    if (chr) {
+        tex_print_str(cmd == letter_cmd ? " letter " : " character ");
+        tex_print_uhex(chr);
+        tex_print_char(' ');
+        /*
+            By using the the unicode (ascii) names for some we can better support syntax
+            highlighting (which often involves parsing). The names are enclused in single
+            quotes. For the chr codes above 128 we assume \UNICODE\ support.
+        */
+        /*tex
+            We already intercepted the line feed here so that it doesn't give a side effect here
+            in the original |tex_print_tex_str(chr)| call but we have now inlined similar code
+            but without side effects.
+        */
+        if (chr < 32 || chr == 127) {
+            return;
+        } else if (chr <= 0x7F) {
+            switch (chr) {
+                case '\n' : tex_print_str("'line feed'");            return;
+                case '\r' : tex_print_str("'carriage return'");      return;
+                case ' '  : tex_print_str("'space'");                return;
+                case '!'  : tex_print_str("'exclamation mark'");     return;
+                case '\"' : tex_print_str("'quotation mark'");       return;
+                case '#'  : tex_print_str("'hash tag'");             return;
+                case '$'  : tex_print_str("'dollar sign'");          return;
+                case '%'  : tex_print_str("'percent sign'");         return;
+                case '&'  : tex_print_str("'ampersand'");            return;
+                case '\'' : tex_print_str("'apostrophe'");           return;
+                case '('  : tex_print_str("'left parenthesis'");     return;
+                case ')'  : tex_print_str("'right parenthesis'");    return;
+                case '*'  : tex_print_str("'asterisk'");             return;
+                case '+'  : tex_print_str("'plus sign'");            return;
+                case ','  : tex_print_str("'comma'");                return;
+                case '-'  : tex_print_str("'hyphen minus'");         return;
+                case '.'  : tex_print_str("'full stop'");            return;
+                case '/'  : tex_print_str("'slash'");                return;
+                case ':'  : tex_print_str("'colon'");                return;
+                case ';'  : tex_print_str("'semicolon'");            return;
+                case '<'  : tex_print_str("'less than sign'");       return;
+                case '='  : tex_print_str("'equal sign'");           return;
+                case '>'  : tex_print_str("'more than sign'");       return;
+                case '?'  : tex_print_str("'question mark'");        return;
+                case '@'  : tex_print_str("'at sign'");              return;
+                case '['  : tex_print_str("'left square bracket'");  return;
+                case '\\' : tex_print_str("'backslash'");            return;
+                case ']'  : tex_print_str("'right square bracket'"); return;
+                case '^'  : tex_print_str("'circumflex accent'");    return;
+                case '_'  : tex_print_str("'low line'");             return;
+                case '`'  : tex_print_str("'grave accent'");         return;
+                case '{'  : tex_print_str("'left curly bracket'");   return;
+                case '|'  : tex_print_str("'vertical bar'");         return;
+                case '}'  : tex_print_str("'right curly bracket'");  return;
+                case '~'  : tex_print_str("'tilde'");                return;
+            }
+            tex_print_char(chr);
+        } else if (chr <= 0x7FF) {
+            tex_print_char(0xC0 + (chr / 0x40));
+            tex_print_char(0x80 + (chr % 0x40));
+        } else if (chr <= 0xFFFF) {
+            tex_print_char(0xE0 +  (chr / 0x1000));
+            tex_print_char(0x80 + ((chr % 0x1000) / 0x40));
+            tex_print_char(0x80 + ((chr % 0x1000) % 0x40));
+        } else if (chr <= 0x10FFFF) {
+            tex_print_char(0xF0 +   (chr / 0x40000));
+            tex_print_char(0x80 +  ((chr % 0x40000) / 0x1000));
+            tex_print_char(0x80 + (((chr % 0x40000) % 0x1000) / 0x40));
+            tex_print_char(0x80 + (((chr % 0x40000) % 0x1000) % 0x40));
+        }
+    }
+}
+
+/*tex |\TEX82| Didn't print the |cmd,idx| information, but it may be useful. */
+
+static void tex_aux_prim_cmd_chr(quarterword cmd, halfword chr)
+{
+    if (cmd <= last_visible_cmd) {
+        int idx = chr - lmt_primitive_state.prim_data[cmd].offset;
+        if (idx >= 0 && idx < lmt_primitive_state.prim_data[cmd].subids) {
+            if (lmt_primitive_state.prim_data[cmd].names && lmt_primitive_state.prim_data[cmd].names[idx]) {
+                tex_print_tex_str_esc(lmt_primitive_state.prim_data[cmd].names[idx]);
+            } else {
+                tex_print_format("[warning: cmd %i, chr %i, no name]", cmd, idx);
+            }
+        } else if (cmd == internal_int_cmd && idx < number_int_pars) {
+            /* a special case */
+            tex_print_format("[integer: chr %i, class specific]", cmd);
+        } else {
+            tex_print_format("[warning: cmd %i, chr %i, out of range]", cmd, idx);
+        }
+    } else {
+        tex_print_format("[warning: cmd %i, invalid]", cmd);
+    }
+}
+
+static void tex_aux_show_lua_call(const char *what, int slot)
+{
+    int callback_id = lmt_callback_defined(show_lua_call_callback);
+    if (callback_id) {
+        char *ss = NULL;
+        int lua_retval = lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "Sd->S", what, slot, &ss);
+        if (lua_retval && ss && strlen(ss) > 0) {
+            tex_print_str(ss);
+            lmt_memory_free(ss);
+            return;
+        }
+    }
+    tex_print_format("%s %i", what, slot);
+}
+
+void tex_print_cmd_flags(halfword cs, halfword cmd, int flags, int escaped)
+{
+    if (flags) {
+        flags = eq_flag(cs);
+        if (is_frozen   (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("frozen "   ); }
+        if (is_permanent(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("permanent "); }
+        if (is_immutable(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("immutable "); }
+        if (is_primitive(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("primitive "); }
+        if (is_mutable  (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("mutable "  ); }
+        if (is_noaligned(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("noaligned "); }
+        if (is_instance (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("instance " ); }
+        if (is_untraced (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("untraced " ); }
+    }
+    if (is_tolerant_cmd (cmd)) {
+        (escaped ? tex_print_str_esc : tex_print_str)("tolerant " );
+    }
+    if (is_protected_cmd(cmd)) {
+        (escaped ? tex_print_str_esc : tex_print_str)("protected ");
+    } else if (is_semi_protected_cmd(cmd)) {
+        (escaped ? tex_print_str_esc : tex_print_str)("semiprotected ");
+    }
+}
+
+void tex_print_cmd_chr(singleword cmd, halfword chr)
+{
+    switch (cmd) {
+        case left_brace_cmd:
+            tex_aux_print_chr_cmd("begin group", cmd, chr);
+            break;
+        case right_brace_cmd:
+            tex_aux_print_chr_cmd("end group", cmd, chr);
+            break;
+        case math_shift_cmd:
+            tex_aux_print_chr_cmd("math shift", cmd, chr);
+            break;
+        case alignment_tab_cmd:
+            tex_aux_print_chr_cmd("alignment tab", cmd, chr);
+            break;
+        case parameter_cmd:
+            tex_aux_print_chr_cmd("parameter", cmd, chr);
+            break;
+        case superscript_cmd:
+            tex_aux_print_chr_cmd("superscript", cmd, chr);
+            break;
+        case subscript_cmd:
+            tex_aux_print_chr_cmd("subscript", cmd, chr);
+            break;
+        case spacer_cmd:
+            tex_aux_print_chr_cmd("blank space", cmd, chr);
+            break;
+        case letter_cmd:
+        case other_char_cmd:
+            tex_aux_print_chr_cmd("the", cmd, chr);
+            break;
+        /*
+        case active_char_cmd:
+        case comment_cmd:
+        case invalid_char_cmd:
+            break;
+        */
+        case end_template_cmd:
+            /*tex Kind of special: |chr| points to |null_list). */
+            tex_print_str_esc("endtemplate");
+         // tex_print_str("end of alignment template");
+            break;
+        case if_test_cmd:
+            if (chr <= last_if_test_code) {
+                tex_aux_prim_cmd_chr(cmd, chr);
+            } else {
+                tex_aux_show_lua_call("luacondition", chr - last_if_test_code);
+            }
+            break;
+        case char_given_cmd:
+            tex_print_str_esc("char");
+            tex_print_qhex(chr);
+            break;
+     // case math_char_given_cmd:
+     //     /*tex
+     //         Okay, it's better for old macro packages that mess with meaning to report a more
+     //         traditional value. A compromise. But, this might be dropped.
+     //     */
+     //     tex_print_str_esc("mathchar");
+     //     tex_show_mathcode_value(tex_mathchar_from_integer(chr, tex_mathcode), tex_mathcode);
+     //     break;
+     // case math_char_xgiven_cmd:
+     //     tex_print_str_esc("Umathchar");
+     //     tex_show_mathcode_value(tex_mathchar_from_integer(chr, umath_mathcode), umath_mathcode);
+     //     break;
+        case lua_call_cmd:
+            tex_aux_show_lua_call("luacall", chr);
+            break;
+        case lua_local_call_cmd:
+            tex_aux_show_lua_call("local luacall", chr);
+            break;
+        case lua_protected_call_cmd:
+            tex_aux_show_lua_call("protected luacall", chr);
+            break;
+        case lua_value_cmd:
+            tex_aux_show_lua_call("luavalue", chr);
+            break;
+        case set_font_cmd:
+            tex_print_str("select font ");
+            tex_print_font(chr);
+            break;
+        case undefined_cs_cmd:
+            tex_print_str("undefined");
+            break;
+        case call_cmd:
+        case protected_call_cmd:
+        case semi_protected_call_cmd:
+        case tolerant_call_cmd:
+        case tolerant_protected_call_cmd:
+        case tolerant_semi_protected_call_cmd:
+            tex_print_cmd_flags(cur_cs, cur_cmd, 1, 0);
+            tex_print_str("macro");
+            break;
+        case internal_toks_cmd:
+            tex_aux_prim_cmd_chr(cmd, chr);
+            break;
+        case register_toks_cmd:
+            tex_print_str_esc("toks");
+            tex_print_int(register_toks_number(chr));
+            break;
+        case internal_int_cmd:
+            tex_aux_prim_cmd_chr(cmd, chr);
+            break;
+        case register_int_cmd:
+            tex_print_str_esc("count");
+            tex_print_int(register_int_number(chr));
+            break;
+        case internal_attribute_cmd:
+            tex_aux_prim_cmd_chr(cmd, chr);
+            break;
+        case register_attribute_cmd:
+            tex_print_str_esc("attribute");
+            tex_print_int(register_attribute_number(chr));
+            break;
+        case internal_dimen_cmd:
+            tex_aux_prim_cmd_chr(cmd, chr);
+            break;
+        case register_dimen_cmd:
+            tex_print_str_esc("dimen");
+            tex_print_int(register_dimen_number(chr));
+            break;
+        case internal_glue_cmd:
+            tex_aux_prim_cmd_chr(cmd, chr);
+            break;
+        case register_glue_cmd:
+            tex_print_str_esc("skip");
+            tex_print_int(register_glue_number(chr));
+            break;
+        case internal_mu_glue_cmd:
+            tex_aux_prim_cmd_chr(cmd, chr);
+            break;
+        case register_mu_glue_cmd:
+            tex_print_str_esc("muskip");
+            tex_print_int(register_mu_glue_number(chr));
+            break;
+        case node_cmd:
+            tex_print_str(node_token_flagged(chr) ? "large" : "small");
+            tex_print_str(" node reference");
+            break;
+        case integer_cmd:
+            tex_print_str("integer ");
+            tex_print_int(chr);
+            break;
+        case dimension_cmd:
+            tex_print_str("dimension ");
+            tex_print_dimension(chr, pt_unit);
+            break;
+        case gluespec_cmd:
+            tex_print_str("gluespec ");
+            tex_print_spec(chr, pt_unit);
+            break;
+        case mugluespec_cmd:
+            tex_print_str("mugluespec ");
+            tex_print_spec(chr, mu_unit);
+            break;
+        case mathspec_cmd:
+            switch (node_subtype(chr)) {
+                case tex_mathcode:
+                    tex_print_str_esc("mathchar");
+                    break;
+                case umath_mathcode:
+             /* case umathnum_mathcode: */
+                    tex_print_str_esc("Umathchar");
+                    break;
+                case mathspec_mathcode:
+                    tex_print_str("mathspec ");
+            }
+            tex_print_mathspec(chr);
+            break;
+        case fontspec_cmd:
+            {
+                /* We don't check for validity here. */
+                tex_print_str("fontspec ");
+                tex_print_fontspec(chr);
+            }
+            break;
+        case deep_frozen_end_template_cmd:
+            /*tex Kind of special: |chr| points to |null_list). */
+            tex_print_str_esc("endtemplate");
+            break;
+        case deep_frozen_dont_expand_cmd:
+            /*tex Kind of special. */
+            tex_print_str_esc("notexpanded");
+            break;
+        /*
+        case string_cmd:
+            print_str("string:->");
+            print(cs_offset_value + chr);
+            break;
+        */
+        case internal_box_reference_cmd:
+            tex_print_str_esc("hiddenlocalbox");
+            break;
+        default:
+            /*tex These are most commands, actually. Todo: local boxes*/
+            tex_aux_prim_cmd_chr(cmd, chr);
+            break;
+    }
+}