summaryrefslogtreecommitdiff
path: root/source/luametatex/source/tex/texprimitive.c
diff options
context:
space:
mode:
Diffstat (limited to 'source/luametatex/source/tex/texprimitive.c')
-rw-r--r--source/luametatex/source/tex/texprimitive.c913
1 files changed, 913 insertions, 0 deletions
diff --git a/source/luametatex/source/tex/texprimitive.c b/source/luametatex/source/tex/texprimitive.c
new file mode 100644
index 000000000..bbeea1bc0
--- /dev/null
+++ b/source/luametatex/source/tex/texprimitive.c
@@ -0,0 +1,913 @@
+/*
+ See license.txt in the root of this project.
+*/
+
+# include "luametatex.h"
+
+/*tex
+
+ Control sequences are stored and retrieved by means of a fairly standard hash table algorithm
+ called the method of \quote {coalescing lists} (cf.\ Algorithm 6.4C in {\em The Art of
+ Computer Programming}). Once a control sequence enters the table, it is never removed, because
+ there are complicated situations involving |\gdef| where the removal of a control sequence at
+ the end of a group would be a mistake preventable only by the introduction of a complicated
+ reference-count mechanism.
+
+ The actual sequence of letters forming a control sequence identifier is stored in the |str_pool|
+ array together with all the other strings. An auxiliary array |hash| consists of items with two
+ halfword fields per word. The first of these, called |next(p)|, points to the next identifier
+ belonging to the same coalesced list as the identifier corresponding to~|p|; and the other,
+ called |text(p)|, points to the |str_start| entry for |p|'s identifier. If position~|p| of the
+ hash table is empty, we have |text(p)=0|; if position |p| is either empty or the end of a
+ coalesced hash list, we have |next(p) = 0|. An auxiliary pointer variable called |hash_used| is
+ maintained in such a way that all locations |p >= hash_used| are nonempty. The global variable
+ |cs_count| tells how many multiletter control sequences have been defined, if statistics are
+ being kept.
+
+ A boolean variable called |no_new_control_sequence| is set to |true| during the time that new
+ hash table entries are forbidden.
+
+ The other variables in the following state structure are: the hash table: |hash|, the allocation
+ pointer |hash_used| for |hash|, |hash_extra| above |eqtb_size|, the maximum of the hash array
+ |hash_top|, the pointer to the next high hash location |hash_high|, the mentioned flag that says
+ if new identifiers are legal |no_new_control_sequence| and the total number of known identifiers:
+ |cs_count|.
+
+*/
+
+hash_state_info lmt_hash_state = {
+ .hash = NULL,
+ .hash_data = {
+ .minimum = min_hash_size,
+ .maximum = max_hash_size,
+ .size = siz_hash_size,
+ .step = stp_hash_size,
+ .allocated = 0,
+ .itemsize = sizeof(memoryword) + sizeof(memoryword),
+ .top = 0,
+ .ptr = 0,
+ .initial = 0,
+ .offset = 0, // eqtb_size,
+ },
+ .eqtb_data = {
+ .minimum = min_hash_size,
+ .maximum = max_hash_size,
+ .size = siz_hash_size,
+ .step = stp_hash_size,
+ .allocated = memory_data_unset,
+ .itemsize = memory_data_unset,
+ .top = frozen_control_sequence,
+ .ptr = 0,
+ .initial = 0,
+ .offset = 0,
+ },
+ .eqtb = NULL,
+ .no_new_cs = 1,
+ .padding = 0,
+};
+
+/*tex
+
+ The arrays |prim| and |prim_eqtb| are used for |name -> cmd, chr| lookups. The are modelled
+ after |hash| and |eqtb|, except that primitives do not have an |eq_level|, that field is
+ replaced by |origin|. Furthermore we have a link for coalesced lists: |prim_next (a)|; the
+ string number for control sequence name: |prim_text (a)|; test if all positions are occupied:
+ |prim_is_full|; some fields: |prim_origin_field (a)|, |prim_eq_type_field (a)| and
+ |prim_equiv_field(a)|; the level of definition: |prim_origin (a)|; the command code for
+ equivalent: |prim_eq_type(a)|; the equivalent value: |prim_equiv(a)|; the allocation pointer
+ for |prim|: |prim_used|; the primitives tables: |two_halves prim [(prim_size + 1)]| and
+ |memoryword prim_eqtb [(prim_size + 1)]|. The array |prim_data| works the other way around, it
+ is used for |cmd, chr| to name lookups.
+
+*/
+
+primitive_state_info lmt_primitive_state;
+
+/*tex Test if all positions are occupied: */
+
+# define prim_base 1
+# define reserved_hash_slots 1
+
+/*tex Initialize the memory arrays: */
+
+void tex_initialize_primitives(void)
+{
+ memset(lmt_primitive_state.prim_data, 0, sizeof(prim_info) * (last_cmd + 1));
+ memset(lmt_primitive_state.prim, 0, sizeof(memoryword) * (prim_size + 1));
+ memset(lmt_primitive_state.prim_eqtb, 0, sizeof(memoryword) * (prim_size + 1));
+ for (int k = 0; k <= prim_size; k++) {
+ prim_eq_type(k) = undefined_cs_cmd;
+ }
+ lmt_primitive_state.prim_used = prim_size;
+}
+
+void tex_initialize_hash_mem(void)
+{
+ if (lmt_main_state.run_state == initializing_state) {
+ if (lmt_hash_state.hash_data.minimum == 0) {
+ tex_emergency_message("startup error", "you need at least some hash size");
+ } else {
+ lmt_hash_state.hash_data.allocated = lmt_hash_state.hash_data.minimum;
+ lmt_hash_state.hash_data.top = eqtb_size + lmt_hash_state.hash_data.minimum;
+ }
+ }
+ {
+ int size = lmt_hash_state.hash_data.top + 1;
+ memoryword *hash = aux_allocate_clear_array(sizeof(memoryword), size, reserved_hash_slots);
+ memoryword *eqtb = aux_allocate_clear_array(sizeof(memoryword), size, reserved_hash_slots);
+ if (hash && eqtb) {
+ lmt_hash_state.hash = hash;
+ lmt_hash_state.eqtb = eqtb;
+ if (lmt_main_state.run_state == initializing_state) {
+ /*tex Initialization happens elsewhere. */
+ } else {
+ tex_initialize_undefined_cs();
+ for (int i = eqtb_size + 1; i <= lmt_hash_state.hash_data.top; i++) {
+ copy_eqtb_entry(i, undefined_control_sequence);
+ }
+ }
+ } else {
+ tex_overflow_error("hash", size);
+ }
+ }
+}
+
+static int tex_aux_room_in_hash(void)
+{
+ if (lmt_hash_state.hash_data.allocated + lmt_hash_state.hash_data.step <= lmt_hash_state.hash_data.size) {
+ int size = lmt_hash_state.hash_data.top + lmt_hash_state.hash_data.step + 1;
+ memoryword *hash = aux_reallocate_array(lmt_hash_state.hash, sizeof(memoryword), size, reserved_hash_slots);
+ memoryword *eqtb = aux_reallocate_array(lmt_hash_state.eqtb, sizeof(memoryword), size, reserved_hash_slots);
+ if (hash && eqtb) {
+ memset(hash + lmt_hash_state.hash_data.top + 1, 0, sizeof(memoryword) * (size_t) lmt_hash_state.hash_data.step);
+ memset(eqtb + lmt_hash_state.hash_data.top + 1, 0, sizeof(memoryword) * (size_t) lmt_hash_state.hash_data.step);
+ lmt_hash_state.hash = hash;
+ lmt_hash_state.eqtb = eqtb;
+ /*tex
+ This is not really needed because we now dp this when a new id is created which
+ is a better place anyway. But we play safe and still do it:
+ */
+ for (int i = lmt_hash_state.hash_data.top + 1; i <= size; i++) {
+ copy_eqtb_entry(i, undefined_control_sequence);
+ }
+ lmt_hash_state.hash_data.allocated += lmt_hash_state.hash_data.step;
+ lmt_hash_state.hash_data.top += lmt_hash_state.hash_data.step;
+ lmt_run_memory_callback("hash", 1);
+ return 1;
+ } else {
+ lmt_run_memory_callback("hash", 0);
+ tex_overflow_error("hash", size);
+ }
+ }
+ return 0;
+}
+
+/*tex
+
+ The value of |hash_prime| should be roughly 85\%! of |hash_size|, and it should be a prime
+ number. The theory of hashing tells us to expect fewer than two table probes, on the average,
+ when the search is successful. [See J.~S. Vitter, {\sl Journal of the ACM\/ \bf30} (1983),
+ 231--258.]
+
+ https://en.wikipedia.org/wiki/Coalesced_hashing
+
+ Because we seldom use uppercase we get many misses, multiplying a chr j[k] by k actually gives
+ a better spread.
+
+ Making a \CONTEXT\ format takes some 250.000 hash calculations while the \LUAMETATEX\ needs
+ some 1.7 million for just over 250 pages (with an average string length of 15).
+
+ The primitive hash lookups are needed when we initialize and when we lookup an internal
+ variable.
+
+*/
+
+inline static halfword tex_aux_compute_hash(const char *j, int l)
+{
+ halfword h = (unsigned const char) j[0];
+ for (int k = 1; k < l; k++) {
+ h = (h + h + (unsigned const char) j[k]) % hash_prime;
+ }
+ return h;
+}
+
+inline static halfword tex_aux_compute_prim(const char *j, unsigned l)
+{
+ halfword h = (unsigned const char) j[0];
+ for (unsigned k = 1; k < l; k++) {
+ h = (h + h + (unsigned const char) j[k]) % prim_prime;
+ }
+ return h;
+}
+
+halfword tex_prim_lookup(strnumber s)
+{
+ /*tex The index in the |hash| array: */
+ if (s >= cs_offset_value) {
+ unsigned char *j = str_string(s);
+ unsigned l = (unsigned) str_length(s);
+ halfword h = tex_aux_compute_prim((char *) j, l);
+ /*tex We start searching here; note that |0 <= h < hash_prime|. */
+ halfword p = h + 1;
+ while (1) {
+ if (prim_text(p) > 0 && str_length(prim_text(p)) == l && tex_str_eq_str(prim_text(p), s)) {
+ return p;
+ } else if (prim_next(p)) {
+ p = prim_next(p);
+ } else if (lmt_hash_state.no_new_cs) {
+ return undefined_primitive;
+ } else {
+ /*tex Insert a new primitive after |p|, then make |p| point to it. */
+ if (prim_text(p) > 0) {
+ /*tex Search for an empty location in |prim| */
+ do {
+ if (lmt_primitive_state.prim_used > prim_base) {
+ --lmt_primitive_state.prim_used;
+ } else {
+ tex_overflow_error("primitive size", prim_size);
+ }
+ } while (prim_text(lmt_primitive_state.prim_used));
+ prim_next(p) = lmt_primitive_state.prim_used;
+ p = lmt_primitive_state.prim_used;
+ }
+ prim_text(p) = s;
+ break;
+ }
+ }
+ return p;
+ } else if ((s < 0) || (s == undefined_control_sequence)) {
+ return undefined_primitive;
+ } else {
+ return s;
+ }
+}
+
+/*tex How to test a csname for primitive-ness? */
+
+/*
+int tex_cs_is_primitive(strnumber csname)
+{
+ int m = prim_lookup(csname);
+ if (m != undefined_primitive) {
+ char *ss = makecstring(csname);
+ int n = string_locate(ss, str_length(csname), 0);
+ lmt_memory_free(ss);
+ return ((n != undefined_cs_cmd) && (eq_type(n) == prim_eq_type(m)) && (eq_value(n) == prim_equiv(m)));
+ } else {
+ return 0;
+ }
+}
+*/
+
+/*tex Dumping and undumping. */
+
+/* We cheat! It should be dump_things(f, prim_state.prim[p], 1); */
+
+void tex_dump_primitives(dumpstream f)
+{
+ /*
+ for (int p = 0; p <= prim_size; p++) {
+ dump_mem(f, prim_state.prim[p]);
+ }
+ for (int p = 0; p <= prim_size; p++) {
+ dump_mem(f, prim_state.prim_eqtb[p]);
+ }
+ */
+ dump_things(f, lmt_primitive_state.prim[0], prim_size + 1);
+ dump_things(f, lmt_primitive_state.prim_eqtb[0], prim_size + 1);
+ for (int p = 0; p <= last_cmd; p++) {
+ dump_int(f, lmt_primitive_state.prim_data[p].offset);
+ dump_int(f, lmt_primitive_state.prim_data[p].subids);
+ for (int q = 0; q < lmt_primitive_state.prim_data[p].subids; q++) {
+ dump_int(f, lmt_primitive_state.prim_data[p].names[q]);
+ }
+ }
+}
+
+void tex_undump_primitives(dumpstream f)
+{
+ undump_things(f, lmt_primitive_state.prim[0], prim_size + 1);
+ undump_things(f, lmt_primitive_state.prim_eqtb[0], prim_size + 1);
+ for (int p = 0; p <= last_cmd; p++) {
+ undump_int(f, lmt_primitive_state.prim_data[p].offset);
+ undump_int(f, lmt_primitive_state.prim_data[p].subids);
+ if (lmt_primitive_state.prim_data[p].subids > 0) {
+ int size = lmt_primitive_state.prim_data[p].subids;
+ strnumber *names = aux_allocate_clear_array(sizeof(strnumber *), size, 1);
+ if (names) {
+ lmt_primitive_state.prim_data[p].names = names;
+ for (int q = 0; q < lmt_primitive_state.prim_data[p].subids; q++) {
+ undump_int(f, names[q]);
+ }
+ } else {
+ tex_overflow_error("primitives", size * sizeof(strnumber *));
+ }
+ }
+ }
+}
+
+/*tex
+
+ Dump the hash table, A different scheme is used to compress the hash table, since its lower
+ region is usually sparse. When |text (p) <> 0| for |p <= hash_used|, we output two words,
+ |p| and |hash[p]|. The hash table is, of course, densely packed for |p >= hash_used|, so the
+ remaining entries are output in a~block.
+
+*/
+
+void tex_dump_hashtable(dumpstream f)
+{
+ dump_int(f, lmt_hash_state.eqtb_data.top);
+ lmt_hash_state.eqtb_data.ptr = frozen_control_sequence - 1 - lmt_hash_state.eqtb_data.top + lmt_hash_state.hash_data.ptr;
+ /* the root entries, i.e. the direct hash slots */
+ for (halfword p = hash_base; p <= lmt_hash_state.eqtb_data.top; p++) {
+ if (cs_text(p)) {
+ dump_int(f, p);
+ dump_int(f, lmt_hash_state.hash[p]);
+ ++lmt_hash_state.eqtb_data.ptr;
+ }
+ }
+ /* the chain entries, i.e. the follow up list slots => eqtb */
+ dump_things(f, lmt_hash_state.hash[lmt_hash_state.eqtb_data.top + 1], special_sequence_base - lmt_hash_state.eqtb_data.top);
+ if (lmt_hash_state.hash_data.ptr > 0) {
+ dump_things(f, lmt_hash_state.hash[eqtb_size + 1], lmt_hash_state.hash_data.ptr);
+ }
+ dump_int(f, lmt_hash_state.eqtb_data.ptr);
+}
+
+void tex_undump_hashtable(dumpstream f)
+{
+ undump_int(f, lmt_hash_state.eqtb_data.top);
+ if (lmt_hash_state.eqtb_data.top >= hash_base && lmt_hash_state.eqtb_data.top <= frozen_control_sequence) {
+ halfword p = hash_base - 1;
+ do {
+ halfword q;
+ undump_int(f, q);
+ if (q >= (p + 1) && q <= lmt_hash_state.eqtb_data.top) {
+ undump_int(f, lmt_hash_state.hash[q]);
+ p = q;
+ } else {
+ goto BAD;
+ }
+ } while (p != lmt_hash_state.eqtb_data.top);
+ undump_things(f, lmt_hash_state.hash[lmt_hash_state.eqtb_data.top + 1], special_sequence_base - lmt_hash_state.eqtb_data.top);
+ if (lmt_hash_state.hash_data.ptr > 0) {
+ /* we get a warning on possible overrun here */
+ undump_things(f, lmt_hash_state.hash[eqtb_size + 1], lmt_hash_state.hash_data.ptr);
+ }
+ undump_int(f, lmt_hash_state.eqtb_data.ptr);
+ lmt_hash_state.eqtb_data.initial = lmt_hash_state.eqtb_data.ptr;
+ return;
+ }
+ BAD:
+ tex_fatal_undump_error("hash");
+}
+
+/*tex
+
+ We need to put \TEX's \quote {primitive} control sequences into the hash table, together with
+ their command code (which will be the |eq_type|) and an operand (which will be the |equiv|).
+ The |primitive| procedure does this, in a way that no \TEX\ user can. The global value |cur_val|
+ contains the new |eqtb| pointer after |primitive| has acted.
+
+ Because the definitions of the actual user-accessible name of a primitive can be postponed until
+ runtime, the function |primitive_def| is needed that does nothing except creating the control
+ sequence name.
+
+*/
+
+void tex_primitive_def(const char *str, size_t length, singleword cmd, halfword chr)
+{
+ /*tex This creates the |text()| string: */
+ cur_val = tex_string_locate(str, length, 1);
+ set_eq_level(cur_val, level_one);
+ set_eq_type(cur_val, cmd);
+ set_eq_flag(cur_val, primitive_flag_bit);
+ set_eq_value(cur_val, chr);
+}
+
+/*tex
+
+ The function |store_primitive_name| sets up the bookkeeping for the reverse lookup. It is
+ quite paranoid, because it is easy to mess this up accidentally.
+
+ The |offset| is needed because sometimes character codes (in |o|) are indices into |eqtb|
+ or are offset by a magical value to make sure they do not conflict with something else. We
+ don't want the |prim_data[c].names| to have too many entries as it will just be wasted room,
+ so |offset| is substracted from |o| before creating or accessing the array.
+
+*/
+
+static void tex_aux_store_primitive_name(strnumber s, singleword cmd, halfword chr, halfword offset)
+{
+ lmt_primitive_state.prim_data[cmd].offset = offset;
+ if (lmt_primitive_state.prim_data[cmd].subids < (chr + 1)) {
+ /*tex Not that efficient as each primitive triggers this now but only at ini time so ... */
+ strnumber *newstr = aux_allocate_clear_array(sizeof(strnumber *), chr + 1, 1);
+ if (lmt_primitive_state.prim_data[cmd].names) {
+ memcpy(newstr, lmt_primitive_state.prim_data[cmd].names, (unsigned) (lmt_primitive_state.prim_data[cmd].subids) * sizeof(strnumber));
+ aux_deallocate_array(lmt_primitive_state.prim_data[cmd].names);
+ }
+ lmt_primitive_state.prim_data[cmd].names = newstr;
+ lmt_primitive_state.prim_data[cmd].subids = chr + 1;
+ }
+ lmt_primitive_state.prim_data[cmd].names[chr] = s;
+}
+
+/*tex
+
+ Compared to \TEX82, |primitive| has two extra parameters. The |off| is an offset that will be
+ passed on to |store_primitive_name|, the |cmd_origin| is the bit that is used to group
+ primitives by originator. So the next function is called for each primitive and fills |prim_eqtb|.
+
+ Contrary to \LUATEX\ we define (using |primitive_def|) all primitives beforehand, so not only
+ those with |cmd_origin| values |core| and |tex|. As side effect, we don't get redundant string
+ entries as in \LUATEX.
+
+*/
+
+void tex_primitive(int cmd_origin, const char *str, singleword cmd, halfword chr, halfword offset)
+{
+ int prim_val;
+ strnumber ss;
+ if (cmd_origin != no_command) {
+ tex_primitive_def(str, strlen(str), cmd, offset + chr);
+ /*tex Indeed, |cur_val| has the latest primitive. */
+ ss = cs_text(cur_val);
+ } else {
+ ss = tex_maketexstring(str);
+ }
+ prim_val = tex_prim_lookup(ss);
+ prim_origin(prim_val) = (quarterword) cmd_origin;
+ prim_eq_type(prim_val) = cmd;
+ prim_equiv(prim_val) = offset + chr;
+ tex_aux_store_primitive_name(ss, cmd, chr, offset);
+}
+
+/*tex
+
+ Here is a helper that does the actual hash insertion. This code far from ideal: the existence
+ of |hash_extra| changes all the potential (short) coalesced lists into a single (long) one.
+ This will create a slowdown.
+
+ Here |hash_state.hash_used| starts out as the maximum \quote {normal} hash, not extra.
+
+*/
+
+static halfword tex_aux_insert_id(halfword p, const unsigned char *j, unsigned int l)
+{
+ if (cs_text(p) > 0) {
+ RESTART:
+ if (lmt_hash_state.hash_data.ptr < lmt_hash_state.hash_data.allocated) {
+ ++lmt_hash_state.hash_data.ptr;
+ cs_next(p) = lmt_hash_state.hash_data.ptr + eqtb_size;
+ p = cs_next(p);
+ } else if (tex_aux_room_in_hash()) {
+ goto RESTART;
+ } else {
+ /*tex
+ Search for an empty location in |hash|. This actually makes the direct first hit
+ in such a hash slot invalid but we check for the string anyway. As we now use a
+ hash size that is rather minimal, we don't really need this branch. It is a last
+ resort anyway.
+ */
+ do {
+ if (lmt_hash_state.eqtb_data.top == hash_base) {
+ /*tex We cannot go lower than this. */
+ tex_overflow_error("hash size", hash_size + lmt_hash_state.hash_data.allocated);
+ }
+ --lmt_hash_state.eqtb_data.top;
+ } while (cs_text(lmt_hash_state.eqtb_data.top) != 0);
+ cs_next(p) = lmt_hash_state.eqtb_data.top;
+ p = lmt_hash_state.eqtb_data.top;
+ }
+ }
+ cs_text(p) = tex_push_string(j, l);
+ copy_eqtb_entry(p, undefined_control_sequence);
+ ++lmt_hash_state.eqtb_data.ptr;
+ return p;
+}
+
+/*tex
+
+ Here is the subroutine that searches the hash table for an identifier that matches a given
+ string of length |l > 1| appearing in |buffer[j .. (j + l - 1)]|. If the identifier is found,
+ the corresponding hash table address is returned. Otherwise, if the global variable
+ |no_new_control_sequence| is |true|, the dummy address |undefined_control_sequence| is returned.
+ Otherwise the identifier is inserted into the hash table and its location is returned.
+
+ On the \LUAMETATEX\ manual we have 250K hits and 400K misses. Adapting the max and prime does
+ bring down the misses but also no gain in performance. In practice we seldom follow the chain.
+
+*/
+
+halfword tex_id_locate(int j, int l, int create)
+{
+ /*tex The index in |hash| array: */
+ halfword h = tex_aux_compute_hash((char *) (lmt_fileio_state.io_buffer + j), l);
+ /*tex We start searching here. Note that |0 <= h < hash_prime|: */
+ halfword p = h + hash_base;
+ /*tex The next one in a list: */
+ while (1) {
+ strnumber s = cs_text(p);
+ if ((s > 0) && (str_length(s) == (unsigned) l) && tex_str_eq_buf(s, j, l)) {
+ return p;
+ } else {
+ halfword n = cs_next(p);
+ if (n) {
+ p = n;
+ } else if (create) {
+ return tex_aux_insert_id(p, (lmt_fileio_state.io_buffer + j), (unsigned) l);
+ } else {
+ break;
+ }
+ }
+ }
+ return undefined_control_sequence;
+}
+
+/*tex
+
+ Here is a similar subroutine for finding a primitive in the hash. This one is based on a \CCODE\
+ string.
+
+*/
+
+halfword tex_string_locate(const char *s, size_t l, int create)
+{
+ /*tex The hash code: */
+ halfword h = tex_aux_compute_hash(s, (int) l);
+ /*tex The index in |hash| array. We start searching here. Note that |0 <= h < hash_prime|: */
+ halfword p = h + hash_base;
+ while (1) {
+ if (cs_text(p) > 0 && tex_str_eq_cstr(cs_text(p), s, (int) l)) {
+ return p;
+ } else {
+ halfword n = cs_next(p);
+ if (n) {
+ p = n;
+ } else if (create) {
+ return tex_aux_insert_id(p, (const unsigned char *) s, (unsigned) l);
+ } else {
+ break;
+ }
+ }
+ }
+ return undefined_control_sequence;
+}
+
+halfword tex_located_string(const char *s)
+{
+ size_t l = strlen(s);
+ return tex_string_locate(s, l, 0);
+}
+
+/*tex
+
+ The |print_cmd_chr| routine prints a symbolic interpretation of a command code and its modifier.
+ This is used in certain \quotation {You can\'t} error messages, and in the implementation of
+ diagnostic routines like |\show|.
+
+ The body of |print_cmd_chr| use to be a rather tedious listing of print commands, and most of it
+ was essentially an inverse to the |primitive| routine that enters a \TEX\ primitive into |eqtb|.
+
+ Thanks to |prim_data|, there is no need for all that tediousness. What is left of |primt_cnd_chr|
+ are just the exceptions to the general rule that the |cmd,chr_code| pair represents in a single
+ primitive command.
+
+*/
+
+static void tex_aux_print_chr_cmd(const char *s, halfword cmd, halfword chr)
+{
+ tex_print_str(s);
+ if (chr) {
+ tex_print_str(cmd == letter_cmd ? " letter " : " character ");
+ tex_print_uhex(chr);
+ tex_print_char(' ');
+ /*
+ By using the the unicode (ascii) names for some we can better support syntax
+ highlighting (which often involves parsing). The names are enclused in single
+ quotes. For the chr codes above 128 we assume \UNICODE\ support.
+ */
+ /*tex
+ We already intercepted the line feed here so that it doesn't give a side effect here
+ in the original |tex_print_tex_str(chr)| call but we have now inlined similar code
+ but without side effects.
+ */
+ if (chr < 32 || chr == 127) {
+ return;
+ } else if (chr <= 0x7F) {
+ switch (chr) {
+ case '\n' : tex_print_str("'line feed'"); return;
+ case '\r' : tex_print_str("'carriage return'"); return;
+ case ' ' : tex_print_str("'space'"); return;
+ case '!' : tex_print_str("'exclamation mark'"); return;
+ case '\"' : tex_print_str("'quotation mark'"); return;
+ case '#' : tex_print_str("'hash tag'"); return;
+ case '$' : tex_print_str("'dollar sign'"); return;
+ case '%' : tex_print_str("'percent sign'"); return;
+ case '&' : tex_print_str("'ampersand'"); return;
+ case '\'' : tex_print_str("'apostrophe'"); return;
+ case '(' : tex_print_str("'left parenthesis'"); return;
+ case ')' : tex_print_str("'right parenthesis'"); return;
+ case '*' : tex_print_str("'asterisk'"); return;
+ case '+' : tex_print_str("'plus sign'"); return;
+ case ',' : tex_print_str("'comma'"); return;
+ case '-' : tex_print_str("'hyphen minus'"); return;
+ case '.' : tex_print_str("'full stop'"); return;
+ case '/' : tex_print_str("'slash'"); return;
+ case ':' : tex_print_str("'colon'"); return;
+ case ';' : tex_print_str("'semicolon'"); return;
+ case '<' : tex_print_str("'less than sign'"); return;
+ case '=' : tex_print_str("'equal sign'"); return;
+ case '>' : tex_print_str("'more than sign'"); return;
+ case '?' : tex_print_str("'question mark'"); return;
+ case '@' : tex_print_str("'at sign'"); return;
+ case '[' : tex_print_str("'left square bracket'"); return;
+ case '\\' : tex_print_str("'backslash'"); return;
+ case ']' : tex_print_str("'right square bracket'"); return;
+ case '^' : tex_print_str("'circumflex accent'"); return;
+ case '_' : tex_print_str("'low line'"); return;
+ case '`' : tex_print_str("'grave accent'"); return;
+ case '{' : tex_print_str("'left curly bracket'"); return;
+ case '|' : tex_print_str("'vertical bar'"); return;
+ case '}' : tex_print_str("'right curly bracket'"); return;
+ case '~' : tex_print_str("'tilde'"); return;
+ }
+ tex_print_char(chr);
+ } else if (chr <= 0x7FF) {
+ tex_print_char(0xC0 + (chr / 0x40));
+ tex_print_char(0x80 + (chr % 0x40));
+ } else if (chr <= 0xFFFF) {
+ tex_print_char(0xE0 + (chr / 0x1000));
+ tex_print_char(0x80 + ((chr % 0x1000) / 0x40));
+ tex_print_char(0x80 + ((chr % 0x1000) % 0x40));
+ } else if (chr <= 0x10FFFF) {
+ tex_print_char(0xF0 + (chr / 0x40000));
+ tex_print_char(0x80 + ((chr % 0x40000) / 0x1000));
+ tex_print_char(0x80 + (((chr % 0x40000) % 0x1000) / 0x40));
+ tex_print_char(0x80 + (((chr % 0x40000) % 0x1000) % 0x40));
+ }
+ }
+}
+
+/*tex |\TEX82| Didn't print the |cmd,idx| information, but it may be useful. */
+
+static void tex_aux_prim_cmd_chr(quarterword cmd, halfword chr)
+{
+ if (cmd <= last_visible_cmd) {
+ int idx = chr - lmt_primitive_state.prim_data[cmd].offset;
+ if (idx >= 0 && idx < lmt_primitive_state.prim_data[cmd].subids) {
+ if (lmt_primitive_state.prim_data[cmd].names && lmt_primitive_state.prim_data[cmd].names[idx]) {
+ tex_print_tex_str_esc(lmt_primitive_state.prim_data[cmd].names[idx]);
+ } else {
+ tex_print_format("[warning: cmd %i, chr %i, no name]", cmd, idx);
+ }
+ } else if (cmd == internal_int_cmd && idx < number_int_pars) {
+ /* a special case */
+ tex_print_format("[integer: chr %i, class specific]", cmd);
+ } else {
+ tex_print_format("[warning: cmd %i, chr %i, out of range]", cmd, idx);
+ }
+ } else {
+ tex_print_format("[warning: cmd %i, invalid]", cmd);
+ }
+}
+
+static void tex_aux_show_lua_call(const char *what, int slot)
+{
+ int callback_id = lmt_callback_defined(show_lua_call_callback);
+ if (callback_id) {
+ char *ss = NULL;
+ int lua_retval = lmt_run_callback(lmt_lua_state.lua_instance, callback_id, "Sd->S", what, slot, &ss);
+ if (lua_retval && ss && strlen(ss) > 0) {
+ tex_print_str(ss);
+ lmt_memory_free(ss);
+ return;
+ }
+ }
+ tex_print_format("%s %i", what, slot);
+}
+
+void tex_print_cmd_flags(halfword cs, halfword cmd, int flags, int escaped)
+{
+ if (flags) {
+ flags = eq_flag(cs);
+ if (is_frozen (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("frozen " ); }
+ if (is_permanent(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("permanent "); }
+ if (is_immutable(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("immutable "); }
+ if (is_primitive(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("primitive "); }
+ if (is_mutable (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("mutable " ); }
+ if (is_noaligned(flags)) { (escaped ? tex_print_str_esc : tex_print_str)("noaligned "); }
+ if (is_instance (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("instance " ); }
+ if (is_untraced (flags)) { (escaped ? tex_print_str_esc : tex_print_str)("untraced " ); }
+ }
+ if (is_tolerant_cmd (cmd)) {
+ (escaped ? tex_print_str_esc : tex_print_str)("tolerant " );
+ }
+ if (is_protected_cmd(cmd)) {
+ (escaped ? tex_print_str_esc : tex_print_str)("protected ");
+ } else if (is_semi_protected_cmd(cmd)) {
+ (escaped ? tex_print_str_esc : tex_print_str)("semiprotected ");
+ }
+}
+
+void tex_print_cmd_chr(singleword cmd, halfword chr)
+{
+ switch (cmd) {
+ case left_brace_cmd:
+ tex_aux_print_chr_cmd("begin group", cmd, chr);
+ break;
+ case right_brace_cmd:
+ tex_aux_print_chr_cmd("end group", cmd, chr);
+ break;
+ case math_shift_cmd:
+ tex_aux_print_chr_cmd("math shift", cmd, chr);
+ break;
+ case alignment_tab_cmd:
+ tex_aux_print_chr_cmd("alignment tab", cmd, chr);
+ break;
+ case parameter_cmd:
+ tex_aux_print_chr_cmd("parameter", cmd, chr);
+ break;
+ case superscript_cmd:
+ tex_aux_print_chr_cmd("superscript", cmd, chr);
+ break;
+ case subscript_cmd:
+ tex_aux_print_chr_cmd("subscript", cmd, chr);
+ break;
+ case spacer_cmd:
+ tex_aux_print_chr_cmd("blank space", cmd, chr);
+ break;
+ case letter_cmd:
+ case other_char_cmd:
+ tex_aux_print_chr_cmd("the", cmd, chr);
+ break;
+ /*
+ case active_char_cmd:
+ case comment_cmd:
+ case invalid_char_cmd:
+ break;
+ */
+ case end_template_cmd:
+ /*tex Kind of special: |chr| points to |null_list). */
+ tex_print_str_esc("endtemplate");
+ // tex_print_str("end of alignment template");
+ break;
+ case if_test_cmd:
+ if (chr <= last_if_test_code) {
+ tex_aux_prim_cmd_chr(cmd, chr);
+ } else {
+ tex_aux_show_lua_call("luacondition", chr - last_if_test_code);
+ }
+ break;
+ case char_given_cmd:
+ tex_print_str_esc("char");
+ tex_print_qhex(chr);
+ break;
+ // case math_char_given_cmd:
+ // /*tex
+ // Okay, it's better for old macro packages that mess with meaning to report a more
+ // traditional value. A compromise. But, this might be dropped.
+ // */
+ // tex_print_str_esc("mathchar");
+ // tex_show_mathcode_value(tex_mathchar_from_integer(chr, tex_mathcode), tex_mathcode);
+ // break;
+ // case math_char_xgiven_cmd:
+ // tex_print_str_esc("Umathchar");
+ // tex_show_mathcode_value(tex_mathchar_from_integer(chr, umath_mathcode), umath_mathcode);
+ // break;
+ case lua_call_cmd:
+ tex_aux_show_lua_call("luacall", chr);
+ break;
+ case lua_local_call_cmd:
+ tex_aux_show_lua_call("local luacall", chr);
+ break;
+ case lua_protected_call_cmd:
+ tex_aux_show_lua_call("protected luacall", chr);
+ break;
+ case lua_value_cmd:
+ tex_aux_show_lua_call("luavalue", chr);
+ break;
+ case set_font_cmd:
+ tex_print_str("select font ");
+ tex_print_font(chr);
+ break;
+ case undefined_cs_cmd:
+ tex_print_str("undefined");
+ break;
+ case call_cmd:
+ case protected_call_cmd:
+ case semi_protected_call_cmd:
+ case tolerant_call_cmd:
+ case tolerant_protected_call_cmd:
+ case tolerant_semi_protected_call_cmd:
+ tex_print_cmd_flags(cur_cs, cur_cmd, 1, 0);
+ tex_print_str("macro");
+ break;
+ case internal_toks_cmd:
+ tex_aux_prim_cmd_chr(cmd, chr);
+ break;
+ case register_toks_cmd:
+ tex_print_str_esc("toks");
+ tex_print_int(register_toks_number(chr));
+ break;
+ case internal_int_cmd:
+ tex_aux_prim_cmd_chr(cmd, chr);
+ break;
+ case register_int_cmd:
+ tex_print_str_esc("count");
+ tex_print_int(register_int_number(chr));
+ break;
+ case internal_attribute_cmd:
+ tex_aux_prim_cmd_chr(cmd, chr);
+ break;
+ case register_attribute_cmd:
+ tex_print_str_esc("attribute");
+ tex_print_int(register_attribute_number(chr));
+ break;
+ case internal_dimen_cmd:
+ tex_aux_prim_cmd_chr(cmd, chr);
+ break;
+ case register_dimen_cmd:
+ tex_print_str_esc("dimen");
+ tex_print_int(register_dimen_number(chr));
+ break;
+ case internal_glue_cmd:
+ tex_aux_prim_cmd_chr(cmd, chr);
+ break;
+ case register_glue_cmd:
+ tex_print_str_esc("skip");
+ tex_print_int(register_glue_number(chr));
+ break;
+ case internal_mu_glue_cmd:
+ tex_aux_prim_cmd_chr(cmd, chr);
+ break;
+ case register_mu_glue_cmd:
+ tex_print_str_esc("muskip");
+ tex_print_int(register_mu_glue_number(chr));
+ break;
+ case node_cmd:
+ tex_print_str(node_token_flagged(chr) ? "large" : "small");
+ tex_print_str(" node reference");
+ break;
+ case integer_cmd:
+ tex_print_str("integer ");
+ tex_print_int(chr);
+ break;
+ case dimension_cmd:
+ tex_print_str("dimension ");
+ tex_print_dimension(chr, pt_unit);
+ break;
+ case gluespec_cmd:
+ tex_print_str("gluespec ");
+ tex_print_spec(chr, pt_unit);
+ break;
+ case mugluespec_cmd:
+ tex_print_str("mugluespec ");
+ tex_print_spec(chr, mu_unit);
+ break;
+ case mathspec_cmd:
+ switch (node_subtype(chr)) {
+ case tex_mathcode:
+ tex_print_str_esc("mathchar");
+ break;
+ case umath_mathcode:
+ /* case umathnum_mathcode: */
+ tex_print_str_esc("Umathchar");
+ break;
+ case mathspec_mathcode:
+ tex_print_str("mathspec ");
+ }
+ tex_print_mathspec(chr);
+ break;
+ case fontspec_cmd:
+ {
+ /* We don't check for validity here. */
+ tex_print_str("fontspec ");
+ tex_print_fontspec(chr);
+ }
+ break;
+ case deep_frozen_end_template_cmd:
+ /*tex Kind of special: |chr| points to |null_list). */
+ tex_print_str_esc("endtemplate");
+ break;
+ case deep_frozen_dont_expand_cmd:
+ /*tex Kind of special. */
+ tex_print_str_esc("notexpanded");
+ break;
+ /*
+ case string_cmd:
+ print_str("string:->");
+ print(cs_offset_value + chr);
+ break;
+ */
+ case internal_box_reference_cmd:
+ tex_print_str_esc("hiddenlocalbox");
+ break;
+ default:
+ /*tex These are most commands, actually. Todo: local boxes*/
+ tex_aux_prim_cmd_chr(cmd, chr);
+ break;
+ }
+}